|
|
from huggingface_hub import list_models, model_info |
|
|
from datetime import datetime |
|
|
from datasets import Dataset, load_dataset |
|
|
import pandas as pd |
|
|
import os |
|
|
import globals |
|
|
from typing import List, Tuple |
|
|
|
|
|
|
|
|
def get_models_providers() -> List[Tuple[str, List[str]]]: |
|
|
"""Get list of popular text generation models and associated providers from Hugging Face""" |
|
|
models = list_models( |
|
|
filter="text-generation", |
|
|
sort="likes", |
|
|
direction=-1, |
|
|
limit=1, |
|
|
expand="inferenceProviderMapping" |
|
|
) |
|
|
|
|
|
model_providers = [ |
|
|
(model.id, [p.provider for p in model.inference_provider_mapping]) |
|
|
for model in models |
|
|
if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping |
|
|
] |
|
|
return model_providers |
|
|
|
|
|
|
|
|
def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
|
|
"""Initialize the models_providers.txt file with popular models and their providers.""" |
|
|
model_to_providers = get_models_providers() |
|
|
|
|
|
with open(file_path, 'w') as f: |
|
|
f.write("# Models and Providers Configuration\n") |
|
|
f.write("# Format: model_name provider_name\n") |
|
|
f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") |
|
|
|
|
|
count = 0 |
|
|
for (model_id, providers) in model_to_providers: |
|
|
try: |
|
|
for provider in providers: |
|
|
f.write(f"{model_id} {provider}\n") |
|
|
count += 1 |
|
|
except Exception as e: |
|
|
print(f"Error processing model {model_id}: {e}") |
|
|
continue |
|
|
|
|
|
print(f"Successfully wrote {count} model-provider combinations to {file_path}") |
|
|
return f"Initialized {count} model-provider combinations" |
|
|
|
|
|
|
|
|
def load_models_providers(file_path: str = "models_providers.txt") -> List[Tuple[str, str]]: |
|
|
"""Load models and providers from text file.""" |
|
|
models_providers = [] |
|
|
try: |
|
|
with open(file_path, 'r') as f: |
|
|
for line in f: |
|
|
line = line.strip() |
|
|
|
|
|
if line and not line.startswith('#'): |
|
|
parts = line.split() |
|
|
if len(parts) >= 2: |
|
|
model = parts[0] |
|
|
provider = parts[1] |
|
|
models_providers.append((model, provider)) |
|
|
except Exception as e: |
|
|
print(f"Error loading models_providers.txt: {str(e)}") |
|
|
return models_providers |
|
|
|
|
|
|
|
|
def save_results() -> None: |
|
|
"""Persist job results to HuggingFace dataset.""" |
|
|
try: |
|
|
with globals.results_lock: |
|
|
if not globals.job_results: |
|
|
print("No results to save") |
|
|
return |
|
|
|
|
|
records = list(globals.job_results.values()) |
|
|
df = pd.DataFrame(records) |
|
|
dataset = Dataset.from_pandas(df) |
|
|
|
|
|
|
|
|
dataset.push_to_hub( |
|
|
globals.RESULTS_DATASET_NAME, |
|
|
token=os.getenv("HF_TOKEN"), |
|
|
private=False |
|
|
) |
|
|
print(f"Saved {len(records)} results to dataset") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error saving results to dataset: {e}") |
|
|
|
|
|
|
|
|
def load_results() -> None: |
|
|
"""Load job results from HuggingFace dataset.""" |
|
|
try: |
|
|
|
|
|
dataset = load_dataset( |
|
|
globals.RESULTS_DATASET_NAME, |
|
|
split="train", |
|
|
token=os.getenv("HF_TOKEN") |
|
|
) |
|
|
|
|
|
|
|
|
for row in dataset: |
|
|
key = globals.get_model_provider_key(row["model"], row["provider"]) |
|
|
globals.job_results[key] = { |
|
|
"model": row["model"], |
|
|
"provider": row["provider"], |
|
|
"last_run": row["last_run"], |
|
|
"status": row["status"], |
|
|
"current_score": row["current_score"], |
|
|
"previous_score": row["previous_score"], |
|
|
"job_id": row["job_id"] |
|
|
} |
|
|
|
|
|
print(f"Loaded {len(globals.job_results)} results from dataset") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"No existing dataset found or error loading: {e}") |
|
|
print("Starting with empty results") |
|
|
|
|
|
def get_results_table() -> List[List[str]]: |
|
|
"""Return job results as a list for Gradio DataFrame.""" |
|
|
with globals.results_lock: |
|
|
if not globals.job_results: |
|
|
return [] |
|
|
|
|
|
table_data = [] |
|
|
for key, info in globals.job_results.items(): |
|
|
current_score = info.get("current_score", "N/A") |
|
|
if current_score is not None and isinstance(current_score, (int, float)): |
|
|
current_score = f"{current_score:.4f}" |
|
|
|
|
|
previous_score = info.get("previous_score", "N/A") |
|
|
if previous_score is not None and isinstance(previous_score, (int, float)): |
|
|
previous_score = f"{previous_score:.4f}" |
|
|
|
|
|
table_data.append([ |
|
|
info["model"], |
|
|
info["provider"], |
|
|
info["last_run"], |
|
|
info["status"], |
|
|
current_score, |
|
|
previous_score |
|
|
]) |
|
|
|
|
|
return table_data |
|
|
|
|
|
|