GGUF-Finder / app.py
ReallyFloppyPenguin's picture
Update app.py
52cb539 verified
import gradio as gr
import requests
import json
import re
import time
from typing import List, Dict, Tuple, Optional
from urllib.parse import quote
class GGUFModelFinder:
def __init__(self):
self.popular_gguf_creators = [
"TheBloke",
"bartowski",
"mradermacher",
"microsoft",
"QuantFactory",
"lmstudio-ai",
"huggingface",
"mlabonne",
"NousResearch",
"MaziyarPanahi"
]
self.api_base = "https://huggingface.co/api"
self.headers = {
"User-Agent": "GGUF-Model-Finder/1.0"
}
def clean_model_name(self, model_name: str) -> str:
"""Clean and normalize model name for better searching"""
# Remove common prefixes and suffixes
cleaned = model_name.strip()
# Remove author/organization prefix if present
if "/" in cleaned:
cleaned = cleaned.split("/")[-1]
# Remove common suffixes
suffixes_to_remove = [
"-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq",
"-HF", "-hf", "-chat", "-instruct", "-base", "-v1",
"-v2", "-v3", "-uncensored", "-finetune"
]
for suffix in suffixes_to_remove:
if cleaned.lower().endswith(suffix.lower()):
cleaned = cleaned[:-len(suffix)]
return cleaned.strip()
def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]:
"""Search for models using HuggingFace API"""
try:
search_url = f"{self.api_base}/models"
params = {
"search": query,
"filter": "gguf",
"limit": limit,
"sort": "downloads"
}
if author:
params["author"] = author
response = requests.get(search_url, params=params, headers=self.headers, timeout=10)
if response.status_code == 200:
return response.json()
else:
return []
except Exception as e:
print(f"Error searching models: {e}")
return []
def search_gguf_variants(self, model_name: str) -> List[Dict]:
"""Search for GGUF variants of a given model"""
cleaned_name = self.clean_model_name(model_name)
all_results = []
# Search with different query variations
search_terms = [
cleaned_name,
f"{cleaned_name} GGUF",
f"{cleaned_name}-GGUF",
f"{cleaned_name}_GGUF"
]
# Search through popular GGUF creators
for creator in self.popular_gguf_creators:
for term in search_terms:
results = self.search_models(term, author=creator, limit=10)
all_results.extend(results)
time.sleep(0.1) # Rate limiting
# Also search generally without author filter
for term in search_terms:
results = self.search_models(term, limit=15)
all_results.extend(results)
time.sleep(0.1)
# Remove duplicates and filter relevant results
seen_ids = set()
filtered_results = []
for model in all_results:
model_id = model.get('id', '')
if model_id not in seen_ids and 'gguf' in model_id.lower():
seen_ids.add(model_id)
# Check if model name is relevant
model_name_clean = self.clean_model_name(model_id)
if self.is_relevant_match(cleaned_name, model_name_clean):
filtered_results.append(model)
# Sort by downloads (descending)
filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True)
return filtered_results[:20] # Return top 20 results
def is_relevant_match(self, original: str, candidate: str) -> bool:
"""Check if candidate model is a relevant match for original"""
original_lower = original.lower()
candidate_lower = candidate.lower()
# Direct substring match
if original_lower in candidate_lower or candidate_lower in original_lower:
return True
# Check word overlap
original_words = set(re.findall(r'\w+', original_lower))
candidate_words = set(re.findall(r'\w+', candidate_lower))
# If most words overlap, it's likely a match
if len(original_words) > 0:
overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words)
return overlap_ratio >= 0.6
return False
def get_model_details(self, model_id: str) -> Dict:
"""Get detailed information about a specific model"""
try:
url = f"{self.api_base}/models/{model_id}"
response = requests.get(url, headers=self.headers, timeout=10)
if response.status_code == 200:
return response.json()
return {}
except Exception as e:
print(f"Error getting model details: {e}")
return {}
def format_model_info(self, model: Dict) -> str:
"""Format model information for display"""
model_id = model.get('id', 'Unknown')
downloads = model.get('downloads', 0)
likes = model.get('likes', 0)
updated = model.get('lastModified', 'Unknown')
# Format the date
if updated != 'Unknown':
try:
from datetime import datetime
date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00'))
updated = date_obj.strftime('%Y-%m-%d')
except:
pass
# Get model size info if available
size_info = ""
if 'siblings' in model:
total_size = 0
file_count = 0
for sibling in model['siblings']:
if sibling.get('rfilename', '').endswith('.gguf'):
file_count += 1
if 'size' in sibling:
total_size += sibling['size']
if total_size > 0:
size_gb = total_size / (1024**3)
size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)"
model_url = f"https://huggingface.co/{model_id}"
return f"""
**[{model_id}]({model_url})**
- Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info}
"""
def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]:
"""Main function to find GGUF models"""
if not model_name.strip():
return "Please enter a model name to search for.", ""
progress(0.1, desc="Initializing search...")
finder = GGUFModelFinder()
progress(0.3, desc="Searching for GGUF variants...")
results = finder.search_gguf_variants(model_name)
progress(0.8, desc="Formatting results...")
if not results:
no_results = f"""
# No GGUF Models Found 😞
Could not find any GGUF variants for **{model_name}**.
## Suggestions:
1. **Check the spelling** of the model name
2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf")
3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter
4. **Check popular GGUF creators**:
- [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin)
- [TheBloke](https://huggingface.co/TheBloke)
- [bartowski](https://huggingface.co/bartowski)
- [mradermacher](https://huggingface.co/mradermacher)
- [QuantFactory](https://huggingface.co/QuantFactory)
The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name.
"""
return no_results, ""
# Create main results
results_md = f"""
# GGUF Models Found for "{model_name}" 🎯
Found **{len(results)}** GGUF variant(s):
"""
for i, model in enumerate(results, 1):
results_md += f"{i}. {finder.format_model_info(model)}\n"
# Create additional info
additional_info = f"""
## πŸ“‹ What is GGUF?
GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible.
## πŸ”§ How to Use These Models
### With llama.cpp:
```bash
./main -m model.gguf -p "Your prompt here"
```
### With Ollama:
```bash
ollama create mymodel -f Modelfile
ollama run mymodel
```
### With Python (llama-cpp-python):
```python
from llama_cpp import Llama
llm = Llama(model_path="model.gguf")
output = llm("Your prompt here")
```
## πŸ’‘ Tips for Choosing a Model
- **Q4_K_M**: Good balance of quality and size
- **Q5_K_M**: Higher quality, larger size
- **Q6_K**: Even higher quality, larger size
- **Q8_0**: Highest quality, largest size
Lower numbers = smaller file size but lower quality
Higher numbers = larger file size but higher quality
## 🌟 Popular GGUF Model Creators
The results above are from trusted model quantizers who regularly convert popular models to GGUF format.
"""
progress(1.0, desc="Complete!")
return results_md, additional_info
# Create the Gradio interface
def create_interface():
with gr.Blocks(
title="GGUF Model Finder",
theme=gr.themes.Soft(),
css="""
.container { max-width: 1200px; margin: auto; }
.header { text-align: center; margin: 20px 0; }
.search-box { margin: 20px 0; }
"""
) as iface:
gr.HTML("""
<div class="header">
<h1>πŸ” GGUF Model Finder</h1>
<p>Find GGUF (quantized) versions of your favorite language models for local inference</p>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
model_input = gr.Textbox(
label="Model Name",
placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b",
info="Enter the name of the model you want to find GGUF versions for",
lines=1
)
with gr.Column(scale=1):
search_btn = gr.Button("πŸ” Search GGUF Models", variant="primary", size="lg")
gr.HTML("""
<div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;">
<strong>πŸ’‘ Quick Examples:</strong><br>
β€’ <code>llama-2-7b</code> - Meta's Llama 2 7B model<br>
β€’ <code>mistral-7b</code> - Mistral AI's 7B model<br>
β€’ <code>codellama-34b</code> - Code Llama 34B model<br>
β€’ <code>neural-chat-7b</code> - Intel's Neural Chat model<br>
β€’ <code>deepseek-coder</code> - DeepSeek Coder model
</div>
""")
with gr.Row():
with gr.Column(scale=2):
results_output = gr.Markdown(
label="Search Results",
value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.",
height=400
)
with gr.Column(scale=1):
info_output = gr.Markdown(
label="Additional Information",
value="",
height=400
)
# Event handlers
search_btn.click(
fn=find_gguf_models,
inputs=[model_input],
outputs=[results_output, info_output],
show_progress=True
)
model_input.submit(
fn=find_gguf_models,
inputs=[model_input],
outputs=[results_output, info_output],
show_progress=True
)
gr.HTML("""
<div style="margin-top: 30px; text-align: center; color: #666;">
<p>Made with ❀️ using Gradio | Data from <a href="https://huggingface.co">Hugging Face</a></p>
<p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p>
</div>
""")
return iface
if __name__ == "__main__":
# Create and launch the interface
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
)