Spaces:

ReallyFloppyPenguin
/

GGUF-Finder

Sleeping

File size: 12,701 Bytes

import gradio as gr
import requests
import json
import re
import time
from typing import List, Dict, Tuple, Optional
from urllib.parse import quote

class GGUFModelFinder:
    def __init__(self):
        self.popular_gguf_creators = [
            "TheBloke",
            "bartowski", 
            "mradermacher",
            "microsoft",
            "QuantFactory",
            "lmstudio-ai",
            "huggingface",
            "mlabonne",
            "NousResearch",
            "MaziyarPanahi"
        ]
        
        self.api_base = "https://huggingface.co/api"
        self.headers = {
            "User-Agent": "GGUF-Model-Finder/1.0"
        }
    
    def clean_model_name(self, model_name: str) -> str:
        """Clean and normalize model name for better searching"""
        # Remove common prefixes and suffixes
        cleaned = model_name.strip()
        
        # Remove author/organization prefix if present
        if "/" in cleaned:
            cleaned = cleaned.split("/")[-1]
        
        # Remove common suffixes
        suffixes_to_remove = [
            "-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq", 
            "-HF", "-hf", "-chat", "-instruct", "-base", "-v1", 
            "-v2", "-v3", "-uncensored", "-finetune"
        ]
        
        for suffix in suffixes_to_remove:
            if cleaned.lower().endswith(suffix.lower()):
                cleaned = cleaned[:-len(suffix)]
        
        return cleaned.strip()
    
    def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]:
        """Search for models using HuggingFace API"""
        try:
            search_url = f"{self.api_base}/models"
            params = {
                "search": query,
                "filter": "gguf",
                "limit": limit,
                "sort": "downloads"
            }
            
            if author:
                params["author"] = author
            
            response = requests.get(search_url, params=params, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                return response.json()
            else:
                return []
        except Exception as e:
            print(f"Error searching models: {e}")
            return []
    
    def search_gguf_variants(self, model_name: str) -> List[Dict]:
        """Search for GGUF variants of a given model"""
        cleaned_name = self.clean_model_name(model_name)
        all_results = []
        
        # Search with different query variations
        search_terms = [
            cleaned_name,
            f"{cleaned_name} GGUF",
            f"{cleaned_name}-GGUF",
            f"{cleaned_name}_GGUF"
        ]
        
        # Search through popular GGUF creators
        for creator in self.popular_gguf_creators:
            for term in search_terms:
                results = self.search_models(term, author=creator, limit=10)
                all_results.extend(results)
                time.sleep(0.1)  # Rate limiting
        
        # Also search generally without author filter
        for term in search_terms:
            results = self.search_models(term, limit=15)
            all_results.extend(results)
            time.sleep(0.1)
        
        # Remove duplicates and filter relevant results
        seen_ids = set()
        filtered_results = []
        
        for model in all_results:
            model_id = model.get('id', '')
            if model_id not in seen_ids and 'gguf' in model_id.lower():
                seen_ids.add(model_id)
                
                # Check if model name is relevant
                model_name_clean = self.clean_model_name(model_id)
                if self.is_relevant_match(cleaned_name, model_name_clean):
                    filtered_results.append(model)
        
        # Sort by downloads (descending)
        filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True)
        
        return filtered_results[:20]  # Return top 20 results
    
    def is_relevant_match(self, original: str, candidate: str) -> bool:
        """Check if candidate model is a relevant match for original"""
        original_lower = original.lower()
        candidate_lower = candidate.lower()
        
        # Direct substring match
        if original_lower in candidate_lower or candidate_lower in original_lower:
            return True
        
        # Check word overlap
        original_words = set(re.findall(r'\w+', original_lower))
        candidate_words = set(re.findall(r'\w+', candidate_lower))
        
        # If most words overlap, it's likely a match
        if len(original_words) > 0:
            overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words)
            return overlap_ratio >= 0.6
        
        return False
    
    def get_model_details(self, model_id: str) -> Dict:
        """Get detailed information about a specific model"""
        try:
            url = f"{self.api_base}/models/{model_id}"
            response = requests.get(url, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                return response.json()
            return {}
        except Exception as e:
            print(f"Error getting model details: {e}")
            return {}
    
    def format_model_info(self, model: Dict) -> str:
        """Format model information for display"""
        model_id = model.get('id', 'Unknown')
        downloads = model.get('downloads', 0)
        likes = model.get('likes', 0)
        updated = model.get('lastModified', 'Unknown')
        
        # Format the date
        if updated != 'Unknown':
            try:
                from datetime import datetime
                date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00'))
                updated = date_obj.strftime('%Y-%m-%d')
            except:
                pass
        
        # Get model size info if available
        size_info = ""
        if 'siblings' in model:
            total_size = 0
            file_count = 0
            for sibling in model['siblings']:
                if sibling.get('rfilename', '').endswith('.gguf'):
                    file_count += 1
                    if 'size' in sibling:
                        total_size += sibling['size']
            
            if total_size > 0:
                size_gb = total_size / (1024**3)
                size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)"
        
        model_url = f"https://huggingface.co/{model_id}"
        
        return f"""
**[{model_id}]({model_url})**
- Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info}
"""

def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]:
    """Main function to find GGUF models"""
    if not model_name.strip():
        return "Please enter a model name to search for.", ""
    
    progress(0.1, desc="Initializing search...")
    
    finder = GGUFModelFinder()
    
    progress(0.3, desc="Searching for GGUF variants...")
    results = finder.search_gguf_variants(model_name)
    
    progress(0.8, desc="Formatting results...")
    
    if not results:
        no_results = f"""
# No GGUF Models Found 😞

Could not find any GGUF variants for **{model_name}**.

## Suggestions:
1. **Check the spelling** of the model name
2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf")
3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter
4. **Check popular GGUF creators**:
   - [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin)
   - [TheBloke](https://huggingface.co/TheBloke)
   - [bartowski](https://huggingface.co/bartowski)
   - [mradermacher](https://huggingface.co/mradermacher)
   - [QuantFactory](https://huggingface.co/QuantFactory)

The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name.
"""
        return no_results, ""
    
    # Create main results
    results_md = f"""
# GGUF Models Found for "{model_name}" 🎯

Found **{len(results)}** GGUF variant(s):

"""
    
    for i, model in enumerate(results, 1):
        results_md += f"{i}. {finder.format_model_info(model)}\n"
    
    # Create additional info
    additional_info = f"""
## 📋 What is GGUF?

GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible.

## 🔧 How to Use These Models

### With llama.cpp:
```bash
./main -m model.gguf -p "Your prompt here"
```

### With Ollama:
```bash
ollama create mymodel -f Modelfile
ollama run mymodel
```

### With Python (llama-cpp-python):
```python
from llama_cpp import Llama

llm = Llama(model_path="model.gguf")
output = llm("Your prompt here")
```

## 💡 Tips for Choosing a Model

- **Q4_K_M**: Good balance of quality and size
- **Q5_K_M**: Higher quality, larger size  
- **Q6_K**: Even higher quality, larger size
- **Q8_0**: Highest quality, largest size

Lower numbers = smaller file size but lower quality
Higher numbers = larger file size but higher quality

## 🌟 Popular GGUF Model Creators

The results above are from trusted model quantizers who regularly convert popular models to GGUF format.
"""
    
    progress(1.0, desc="Complete!")
    
    return results_md, additional_info

# Create the Gradio interface
def create_interface():
    with gr.Blocks(
        title="GGUF Model Finder", 
        theme=gr.themes.Soft(),
        css="""
        .container { max-width: 1200px; margin: auto; }
        .header { text-align: center; margin: 20px 0; }
        .search-box { margin: 20px 0; }
        """
    ) as iface:
        
        gr.HTML("""
        <div class="header">
            <h1>🔍 GGUF Model Finder</h1>
            <p>Find GGUF (quantized) versions of your favorite language models for local inference</p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=3):
                model_input = gr.Textbox(
                    label="Model Name",
                    placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b",
                    info="Enter the name of the model you want to find GGUF versions for",
                    lines=1
                )
            
            with gr.Column(scale=1):
                search_btn = gr.Button("🔍 Search GGUF Models", variant="primary", size="lg")
        
        gr.HTML("""
        <div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;">
            <strong>💡 Quick Examples:</strong><br>
            • <code>llama-2-7b</code> - Meta's Llama 2 7B model<br>
            • <code>mistral-7b</code> - Mistral AI's 7B model<br>
            • <code>codellama-34b</code> - Code Llama 34B model<br>
            • <code>neural-chat-7b</code> - Intel's Neural Chat model<br>
            • <code>deepseek-coder</code> - DeepSeek Coder model
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                results_output = gr.Markdown(
                    label="Search Results",
                    value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.",
                    height=400
                )
            
            with gr.Column(scale=1):
                info_output = gr.Markdown(
                    label="Additional Information",
                    value="",
                    height=400
                )
        
        # Event handlers
        search_btn.click(
            fn=find_gguf_models,
            inputs=[model_input],
            outputs=[results_output, info_output],
            show_progress=True
        )
        
        model_input.submit(
            fn=find_gguf_models,
            inputs=[model_input],
            outputs=[results_output, info_output],
            show_progress=True
        )
        
        gr.HTML("""
        <div style="margin-top: 30px; text-align: center; color: #666;">
            <p>Made with ❤️ using Gradio | Data from <a href="https://huggingface.co">Hugging Face</a></p>
            <p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p>
        </div>
        """)
    
    return iface

if __name__ == "__main__":
    # Create and launch the interface
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
    )