Clémentine
wip
7f5506e
raw
history blame
5.21 kB
from huggingface_hub import list_models, model_info
from datetime import datetime
from datasets import Dataset, load_dataset
import pandas as pd
import os
import globals
from typing import List, Tuple
def get_models_providers() -> List[Tuple[str, List[str]]]:
"""Get list of popular text generation models and associated providers from Hugging Face"""
models = list_models(
filter="text-generation",
sort="likes",
direction=-1,
limit=1,
expand="inferenceProviderMapping"
)
model_providers = [
(model.id, [p.provider for p in model.inference_provider_mapping])
for model in models
if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping
]
return model_providers
def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str:
"""Initialize the models_providers.txt file with popular models and their providers."""
model_to_providers = get_models_providers()
with open(file_path, 'w') as f:
f.write("# Models and Providers Configuration\n")
f.write("# Format: model_name provider_name\n")
f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
count = 0
for (model_id, providers) in model_to_providers:
try:
for provider in providers:
f.write(f"{model_id} {provider}\n")
count += 1
except Exception as e:
print(f"Error processing model {model_id}: {e}")
continue
print(f"Successfully wrote {count} model-provider combinations to {file_path}")
return f"Initialized {count} model-provider combinations"
def load_models_providers(file_path: str = "models_providers.txt") -> List[Tuple[str, str]]:
"""Load models and providers from text file."""
models_providers = []
try:
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
if line and not line.startswith('#'):
parts = line.split()
if len(parts) >= 2:
model = parts[0]
provider = parts[1]
models_providers.append((model, provider))
except Exception as e:
print(f"Error loading models_providers.txt: {str(e)}")
return models_providers
def save_results() -> None:
"""Persist job results to HuggingFace dataset."""
try:
with globals.results_lock:
if not globals.job_results:
print("No results to save")
return
records = list(globals.job_results.values())
df = pd.DataFrame(records)
dataset = Dataset.from_pandas(df)
# Push to HuggingFace Hub
dataset.push_to_hub(
globals.RESULTS_DATASET_NAME,
token=os.getenv("HF_TOKEN"),
private=False
)
print(f"Saved {len(records)} results to dataset")
except Exception as e:
print(f"Error saving results to dataset: {e}")
def load_results() -> None:
"""Load job results from HuggingFace dataset."""
try:
# Try to load existing dataset
dataset = load_dataset(
globals.RESULTS_DATASET_NAME,
split="train",
token=os.getenv("HF_TOKEN")
)
# Convert dataset to job_results dict
for row in dataset:
key = globals.get_model_provider_key(row["model"], row["provider"])
globals.job_results[key] = {
"model": row["model"],
"provider": row["provider"],
"last_run": row["last_run"],
"status": row["status"],
"current_score": row["current_score"],
"previous_score": row["previous_score"],
"job_id": row["job_id"]
}
print(f"Loaded {len(globals.job_results)} results from dataset")
except Exception as e:
print(f"No existing dataset found or error loading: {e}")
print("Starting with empty results")
def get_results_table() -> List[List[str]]:
"""Return job results as a list for Gradio DataFrame."""
with globals.results_lock:
if not globals.job_results:
return []
table_data = []
for key, info in globals.job_results.items():
current_score = info.get("current_score", "N/A")
if current_score is not None and isinstance(current_score, (int, float)):
current_score = f"{current_score:.4f}"
previous_score = info.get("previous_score", "N/A")
if previous_score is not None and isinstance(previous_score, (int, float)):
previous_score = f"{previous_score:.4f}"
table_data.append([
info["model"],
info["provider"],
info["last_run"],
info["status"],
current_score,
previous_score
])
return table_data