from huggingface_hub import list_models, model_info from datetime import datetime from datasets import Dataset, load_dataset import pandas as pd import os import globals from typing import List, Tuple def get_models_providers() -> List[Tuple[str, List[str]]]: """Get list of popular text generation models and associated providers from Hugging Face""" models = list_models( filter="text-generation", sort="likes", direction=-1, limit=globals.NUM_MODELS_RUN, expand="inferenceProviderMapping" ) model_providers = [ (model.id, [p.provider for p in model.inference_provider_mapping]) for model in models if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping ] return model_providers def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: """Initialize the models_providers.txt file with popular models and their providers.""" model_to_providers = get_models_providers() with open(file_path, 'w') as f: f.write("# Models and Providers Configuration\n") f.write("# Format: model_name provider_name\n") f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") count = 0 for (model_id, providers) in model_to_providers: try: for provider in providers: f.write(f"{model_id} {provider}\n") count += 1 except Exception as e: print(f"Error processing model {model_id}: {e}") continue print(f"Successfully wrote {count} model-provider combinations to {file_path}") return f"Initialized {count} model-provider combinations", load_models_providers_str() def load_models_providers_str(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: mp_list = load_models_providers(file_path) return "\n".join([f"{model} : {provider}" for (model, provider) in mp_list]) def load_models_providers(file_path: str = globals.LOCAL_CONFIG_FILE) -> List[Tuple[str, str]]: """Load models and providers from text file. Creates file if it doesn't exist.""" models_providers = [] try: # Check if file exists, if not, create it if not os.path.exists(file_path): print(f"Config file {file_path} not found. Initializing...") initialize_models_providers_file(file_path) with open(file_path, 'r') as f: for line in f: line = line.strip() # Skip empty lines and comments if line and not line.startswith('#'): parts = line.split() if len(parts) >= 2: model = parts[0] provider = parts[1] models_providers.append((model, provider)) except Exception as e: print(f"Error loading model providers: {str(e)}") import traceback traceback.print_exc() return models_providers def save_results() -> None: """Persist job results to HuggingFace dataset.""" try: if not globals.job_results: print("No results to save") return records = list(globals.job_results.values()) df = pd.DataFrame(records) dataset = Dataset.from_pandas(df) # Push to HuggingFace Hub dataset.push_to_hub( globals.RESULTS_DATASET_NAME, token=os.getenv("HF_TOKEN"), private=False ) print(f"Saved {len(records)} results to dataset") except Exception as e: print(f"Error saving results to dataset: {e}") def load_results() -> None: """Load job results from HuggingFace dataset.""" try: # Try to load existing dataset dataset = load_dataset( globals.RESULTS_DATASET_NAME, split="train", token=os.getenv("HF_TOKEN") ) # Convert dataset to job_results dict for row in dataset: key = globals.get_model_provider_key(row["model"], row["provider"]) globals.job_results[key] = { "model": row["model"], "provider": row["provider"], "last_run": row["last_run"], "status": row["status"], "current_score": row["current_score"], "previous_score": row["previous_score"], "job_id": row["job_id"], "start_time": row.get("start_time"), "duration": row.get("duration"), "completed_at": row.get("completed_at"), "runs": row.get("runs", []), "score_variance": row.get("score_variance") } print(f"Loaded {len(globals.job_results)} results from dataset") except Exception as e: print(f"No existing dataset found or error loading: {e}") print("Starting with empty results") def style_status(val): """Style function for status column.""" if val == "COMPLETED": return 'background-color: green' elif val == "ERROR": return 'background-color: red' elif val == "RUNNING": return 'background-color: blue' return '' def get_summary_stats(): """Get summary statistics of job results.""" if not globals.job_results: return "📊 **Status:** No jobs yet" total = len(globals.job_results) running = sum(1 for info in globals.job_results.values() if info.get("status") == "RUNNING") completed = sum(1 for info in globals.job_results.values() if info.get("status") == "COMPLETED") failed = sum(1 for info in globals.job_results.values() if info.get("status") in ["ERROR", "FAILED"]) return f"📊 **Total:** {total} | 🔵 **Running:** {running} | ✅ **Completed:** {completed} | ❌ **Failed:** {failed}" def get_results_table(): """Return job results as a styled pandas DataFrame for Gradio DataFrame.""" if not globals.job_results: return pd.DataFrame(columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Latest Job Id"]) table_data = [] for key, info in globals.job_results.items(): # Format mean score current_score = info.get("current_score", "N/A") if current_score is not None and isinstance(current_score, (int, float)): current_score = f"{current_score:.4f}" # Format variance variance = info.get("score_variance", "N/A") if variance is not None and isinstance(variance, (int, float)): variance = f"{variance:.6f}" # Format previous score previous_score = info.get("previous_score", "N/A") if previous_score is not None and isinstance(previous_score, (int, float)): previous_score = f"{previous_score:.4f}" # Count runs runs = info.get("runs", []) completed_runs = sum(1 for run in runs if run.get("status") == "COMPLETED") total_runs = len(runs) runs_str = f"{completed_runs}/{total_runs}" if runs else "0/0" # Format duration duration = info.get("duration") if duration is not None and isinstance(duration, (int, float)): # Convert seconds to minutes and seconds minutes = int(duration // 60) seconds = int(duration % 60) duration_str = f"{minutes}m {seconds}s" else: duration_str = "N/A" # Get completion time completed_at = info.get("completed_at", "N/A") job_id = info.get("job_id", "N/A") # Create a clickable link for the job ID if job_id != "N/A": job_url = f"https://hf.co/jobs/{globals.NAMESPACE}/{job_id}" job_link = f'{job_id}: 📄 ' else: job_link = job_id # Create relaunch link with data attributes for JavaScript to access model = info["model"] provider = info["provider"] relaunch_link = '🔄 Relaunch' table_data.append([ model, provider, runs_str, info["last_run"], info["status"], current_score, variance, previous_score, duration_str, completed_at, job_link, relaunch_link ]) df = pd.DataFrame(table_data, columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Job Id and Logs", "Actions"]) # Apply styling to the Status column styled_df = df.style.map(style_status, subset=['Status']) return styled_df