Spaces:
Running
Running
import gradio as gr | |
import requests | |
import json | |
import re | |
import time | |
from typing import List, Dict, Tuple, Optional | |
from urllib.parse import quote | |
class GGUFModelFinder: | |
def __init__(self): | |
self.popular_gguf_creators = [ | |
"TheBloke", | |
"bartowski", | |
"mradermacher", | |
"microsoft", | |
"QuantFactory", | |
"lmstudio-ai", | |
"huggingface", | |
"mlabonne", | |
"NousResearch", | |
"MaziyarPanahi" | |
] | |
self.api_base = "https://huggingface.co/api" | |
self.headers = { | |
"User-Agent": "GGUF-Model-Finder/1.0" | |
} | |
def clean_model_name(self, model_name: str) -> str: | |
"""Clean and normalize model name for better searching""" | |
# Remove common prefixes and suffixes | |
cleaned = model_name.strip() | |
# Remove author/organization prefix if present | |
if "/" in cleaned: | |
cleaned = cleaned.split("/")[-1] | |
# Remove common suffixes | |
suffixes_to_remove = [ | |
"-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq", | |
"-HF", "-hf", "-chat", "-instruct", "-base", "-v1", | |
"-v2", "-v3", "-uncensored", "-finetune" | |
] | |
for suffix in suffixes_to_remove: | |
if cleaned.lower().endswith(suffix.lower()): | |
cleaned = cleaned[:-len(suffix)] | |
return cleaned.strip() | |
def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]: | |
"""Search for models using HuggingFace API""" | |
try: | |
search_url = f"{self.api_base}/models" | |
params = { | |
"search": query, | |
"filter": "gguf", | |
"limit": limit, | |
"sort": "downloads" | |
} | |
if author: | |
params["author"] = author | |
response = requests.get(search_url, params=params, headers=self.headers, timeout=10) | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return [] | |
except Exception as e: | |
print(f"Error searching models: {e}") | |
return [] | |
def search_gguf_variants(self, model_name: str) -> List[Dict]: | |
"""Search for GGUF variants of a given model""" | |
cleaned_name = self.clean_model_name(model_name) | |
all_results = [] | |
# Search with different query variations | |
search_terms = [ | |
cleaned_name, | |
f"{cleaned_name} GGUF", | |
f"{cleaned_name}-GGUF", | |
f"{cleaned_name}_GGUF" | |
] | |
# Search through popular GGUF creators | |
for creator in self.popular_gguf_creators: | |
for term in search_terms: | |
results = self.search_models(term, author=creator, limit=10) | |
all_results.extend(results) | |
time.sleep(0.1) # Rate limiting | |
# Also search generally without author filter | |
for term in search_terms: | |
results = self.search_models(term, limit=15) | |
all_results.extend(results) | |
time.sleep(0.1) | |
# Remove duplicates and filter relevant results | |
seen_ids = set() | |
filtered_results = [] | |
for model in all_results: | |
model_id = model.get('id', '') | |
if model_id not in seen_ids and 'gguf' in model_id.lower(): | |
seen_ids.add(model_id) | |
# Check if model name is relevant | |
model_name_clean = self.clean_model_name(model_id) | |
if self.is_relevant_match(cleaned_name, model_name_clean): | |
filtered_results.append(model) | |
# Sort by downloads (descending) | |
filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True) | |
return filtered_results[:20] # Return top 20 results | |
def is_relevant_match(self, original: str, candidate: str) -> bool: | |
"""Check if candidate model is a relevant match for original""" | |
original_lower = original.lower() | |
candidate_lower = candidate.lower() | |
# Direct substring match | |
if original_lower in candidate_lower or candidate_lower in original_lower: | |
return True | |
# Check word overlap | |
original_words = set(re.findall(r'\w+', original_lower)) | |
candidate_words = set(re.findall(r'\w+', candidate_lower)) | |
# If most words overlap, it's likely a match | |
if len(original_words) > 0: | |
overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words) | |
return overlap_ratio >= 0.6 | |
return False | |
def get_model_details(self, model_id: str) -> Dict: | |
"""Get detailed information about a specific model""" | |
try: | |
url = f"{self.api_base}/models/{model_id}" | |
response = requests.get(url, headers=self.headers, timeout=10) | |
if response.status_code == 200: | |
return response.json() | |
return {} | |
except Exception as e: | |
print(f"Error getting model details: {e}") | |
return {} | |
def format_model_info(self, model: Dict) -> str: | |
"""Format model information for display""" | |
model_id = model.get('id', 'Unknown') | |
downloads = model.get('downloads', 0) | |
likes = model.get('likes', 0) | |
updated = model.get('lastModified', 'Unknown') | |
# Format the date | |
if updated != 'Unknown': | |
try: | |
from datetime import datetime | |
date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00')) | |
updated = date_obj.strftime('%Y-%m-%d') | |
except: | |
pass | |
# Get model size info if available | |
size_info = "" | |
if 'siblings' in model: | |
total_size = 0 | |
file_count = 0 | |
for sibling in model['siblings']: | |
if sibling.get('rfilename', '').endswith('.gguf'): | |
file_count += 1 | |
if 'size' in sibling: | |
total_size += sibling['size'] | |
if total_size > 0: | |
size_gb = total_size / (1024**3) | |
size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)" | |
model_url = f"https://huggingface.co/{model_id}" | |
return f""" | |
**[{model_id}]({model_url})** | |
- Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info} | |
""" | |
def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]: | |
"""Main function to find GGUF models""" | |
if not model_name.strip(): | |
return "Please enter a model name to search for.", "" | |
progress(0.1, desc="Initializing search...") | |
finder = GGUFModelFinder() | |
progress(0.3, desc="Searching for GGUF variants...") | |
results = finder.search_gguf_variants(model_name) | |
progress(0.8, desc="Formatting results...") | |
if not results: | |
no_results = f""" | |
# No GGUF Models Found π | |
Could not find any GGUF variants for **{model_name}**. | |
## Suggestions: | |
1. **Check the spelling** of the model name | |
2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf") | |
3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter | |
4. **Check popular GGUF creators**: | |
- [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin) | |
- [TheBloke](https://huggingface.co/TheBloke) | |
- [bartowski](https://huggingface.co/bartowski) | |
- [mradermacher](https://huggingface.co/mradermacher) | |
- [QuantFactory](https://huggingface.co/QuantFactory) | |
The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name. | |
""" | |
return no_results, "" | |
# Create main results | |
results_md = f""" | |
# GGUF Models Found for "{model_name}" π― | |
Found **{len(results)}** GGUF variant(s): | |
""" | |
for i, model in enumerate(results, 1): | |
results_md += f"{i}. {finder.format_model_info(model)}\n" | |
# Create additional info | |
additional_info = f""" | |
## π What is GGUF? | |
GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible. | |
## π§ How to Use These Models | |
### With llama.cpp: | |
```bash | |
./main -m model.gguf -p "Your prompt here" | |
``` | |
### With Ollama: | |
```bash | |
ollama create mymodel -f Modelfile | |
ollama run mymodel | |
``` | |
### With Python (llama-cpp-python): | |
```python | |
from llama_cpp import Llama | |
llm = Llama(model_path="model.gguf") | |
output = llm("Your prompt here") | |
``` | |
## π‘ Tips for Choosing a Model | |
- **Q4_K_M**: Good balance of quality and size | |
- **Q5_K_M**: Higher quality, larger size | |
- **Q6_K**: Even higher quality, larger size | |
- **Q8_0**: Highest quality, largest size | |
Lower numbers = smaller file size but lower quality | |
Higher numbers = larger file size but higher quality | |
## π Popular GGUF Model Creators | |
The results above are from trusted model quantizers who regularly convert popular models to GGUF format. | |
""" | |
progress(1.0, desc="Complete!") | |
return results_md, additional_info | |
# Create the Gradio interface | |
def create_interface(): | |
with gr.Blocks( | |
title="GGUF Model Finder", | |
theme=gr.themes.Soft(), | |
css=""" | |
.container { max-width: 1200px; margin: auto; } | |
.header { text-align: center; margin: 20px 0; } | |
.search-box { margin: 20px 0; } | |
""" | |
) as iface: | |
gr.HTML(""" | |
<div class="header"> | |
<h1>π GGUF Model Finder</h1> | |
<p>Find GGUF (quantized) versions of your favorite language models for local inference</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
model_input = gr.Textbox( | |
label="Model Name", | |
placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b", | |
info="Enter the name of the model you want to find GGUF versions for", | |
lines=1 | |
) | |
with gr.Column(scale=1): | |
search_btn = gr.Button("π Search GGUF Models", variant="primary", size="lg") | |
gr.HTML(""" | |
<div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;"> | |
<strong>π‘ Quick Examples:</strong><br> | |
β’ <code>llama-2-7b</code> - Meta's Llama 2 7B model<br> | |
β’ <code>mistral-7b</code> - Mistral AI's 7B model<br> | |
β’ <code>codellama-34b</code> - Code Llama 34B model<br> | |
β’ <code>neural-chat-7b</code> - Intel's Neural Chat model<br> | |
β’ <code>deepseek-coder</code> - DeepSeek Coder model | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
results_output = gr.Markdown( | |
label="Search Results", | |
value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.", | |
height=400 | |
) | |
with gr.Column(scale=1): | |
info_output = gr.Markdown( | |
label="Additional Information", | |
value="", | |
height=400 | |
) | |
# Event handlers | |
search_btn.click( | |
fn=find_gguf_models, | |
inputs=[model_input], | |
outputs=[results_output, info_output], | |
show_progress=True | |
) | |
model_input.submit( | |
fn=find_gguf_models, | |
inputs=[model_input], | |
outputs=[results_output, info_output], | |
show_progress=True | |
) | |
gr.HTML(""" | |
<div style="margin-top: 30px; text-align: center; color: #666;"> | |
<p>Made with β€οΈ using Gradio | Data from <a href="https://huggingface.co">Hugging Face</a></p> | |
<p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p> | |
</div> | |
""") | |
return iface | |
if __name__ == "__main__": | |
# Create and launch the interface | |
demo = create_interface() | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True, | |
) |