from huggingface_hub import list_models, model_info from datetime import datetime from datasets import Dataset, load_dataset import pandas as pd import os import globals from typing import List, Tuple def get_models_providers() -> List[Tuple[str, List[str]]]: """Get list of popular text generation models and associated providers from Hugging Face""" models = list_models( filter="text-generation", sort="likes", direction=-1, limit=1, expand="inferenceProviderMapping" ) model_providers = [ (model.id, [p.provider for p in model.inference_provider_mapping]) for model in models if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping ] return model_providers def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: """Initialize the models_providers.txt file with popular models and their providers.""" model_to_providers = get_models_providers() with open(file_path, 'w') as f: f.write("# Models and Providers Configuration\n") f.write("# Format: model_name provider_name\n") f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") count = 0 for (model_id, providers) in model_to_providers: try: for provider in providers: f.write(f"{model_id} {provider}\n") count += 1 except Exception as e: print(f"Error processing model {model_id}: {e}") continue print(f"Successfully wrote {count} model-provider combinations to {file_path}") return f"Initialized {count} model-provider combinations" def load_models_providers(file_path: str = "models_providers.txt") -> List[Tuple[str, str]]: """Load models and providers from text file.""" models_providers = [] try: with open(file_path, 'r') as f: for line in f: line = line.strip() # Skip empty lines and comments if line and not line.startswith('#'): parts = line.split() if len(parts) >= 2: model = parts[0] provider = parts[1] models_providers.append((model, provider)) except Exception as e: print(f"Error loading models_providers.txt: {str(e)}") return models_providers def save_results() -> None: """Persist job results to HuggingFace dataset.""" try: with globals.results_lock: if not globals.job_results: print("No results to save") return records = list(globals.job_results.values()) df = pd.DataFrame(records) dataset = Dataset.from_pandas(df) # Push to HuggingFace Hub dataset.push_to_hub( globals.RESULTS_DATASET_NAME, token=os.getenv("HF_TOKEN"), private=False ) print(f"Saved {len(records)} results to dataset") except Exception as e: print(f"Error saving results to dataset: {e}") def load_results() -> None: """Load job results from HuggingFace dataset.""" try: # Try to load existing dataset dataset = load_dataset( globals.RESULTS_DATASET_NAME, split="train", token=os.getenv("HF_TOKEN") ) # Convert dataset to job_results dict for row in dataset: key = globals.get_model_provider_key(row["model"], row["provider"]) globals.job_results[key] = { "model": row["model"], "provider": row["provider"], "last_run": row["last_run"], "status": row["status"], "current_score": row["current_score"], "previous_score": row["previous_score"], "job_id": row["job_id"] } print(f"Loaded {len(globals.job_results)} results from dataset") except Exception as e: print(f"No existing dataset found or error loading: {e}") print("Starting with empty results") def get_results_table() -> List[List[str]]: """Return job results as a list for Gradio DataFrame.""" with globals.results_lock: if not globals.job_results: return [] table_data = [] for key, info in globals.job_results.items(): current_score = info.get("current_score", "N/A") if current_score is not None and isinstance(current_score, (int, float)): current_score = f"{current_score:.4f}" previous_score = info.get("previous_score", "N/A") if previous_score is not None and isinstance(previous_score, (int, float)): previous_score = f"{previous_score:.4f}" table_data.append([ info["model"], info["provider"], info["last_run"], info["status"], current_score, previous_score ]) return table_data