File size: 12,701 Bytes
fc10172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52cb539
fc10172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52cb539
fc10172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import gradio as gr
import requests
import json
import re
import time
from typing import List, Dict, Tuple, Optional
from urllib.parse import quote

class GGUFModelFinder:
    def __init__(self):
        self.popular_gguf_creators = [
            "TheBloke",
            "bartowski", 
            "mradermacher",
            "microsoft",
            "QuantFactory",
            "lmstudio-ai",
            "huggingface",
            "mlabonne",
            "NousResearch",
            "MaziyarPanahi"
        ]
        
        self.api_base = "https://huggingface.co/api"
        self.headers = {
            "User-Agent": "GGUF-Model-Finder/1.0"
        }
    
    def clean_model_name(self, model_name: str) -> str:
        """Clean and normalize model name for better searching"""
        # Remove common prefixes and suffixes
        cleaned = model_name.strip()
        
        # Remove author/organization prefix if present
        if "/" in cleaned:
            cleaned = cleaned.split("/")[-1]
        
        # Remove common suffixes
        suffixes_to_remove = [
            "-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq", 
            "-HF", "-hf", "-chat", "-instruct", "-base", "-v1", 
            "-v2", "-v3", "-uncensored", "-finetune"
        ]
        
        for suffix in suffixes_to_remove:
            if cleaned.lower().endswith(suffix.lower()):
                cleaned = cleaned[:-len(suffix)]
        
        return cleaned.strip()
    
    def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]:
        """Search for models using HuggingFace API"""
        try:
            search_url = f"{self.api_base}/models"
            params = {
                "search": query,
                "filter": "gguf",
                "limit": limit,
                "sort": "downloads"
            }
            
            if author:
                params["author"] = author
            
            response = requests.get(search_url, params=params, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                return response.json()
            else:
                return []
        except Exception as e:
            print(f"Error searching models: {e}")
            return []
    
    def search_gguf_variants(self, model_name: str) -> List[Dict]:
        """Search for GGUF variants of a given model"""
        cleaned_name = self.clean_model_name(model_name)
        all_results = []
        
        # Search with different query variations
        search_terms = [
            cleaned_name,
            f"{cleaned_name} GGUF",
            f"{cleaned_name}-GGUF",
            f"{cleaned_name}_GGUF"
        ]
        
        # Search through popular GGUF creators
        for creator in self.popular_gguf_creators:
            for term in search_terms:
                results = self.search_models(term, author=creator, limit=10)
                all_results.extend(results)
                time.sleep(0.1)  # Rate limiting
        
        # Also search generally without author filter
        for term in search_terms:
            results = self.search_models(term, limit=15)
            all_results.extend(results)
            time.sleep(0.1)
        
        # Remove duplicates and filter relevant results
        seen_ids = set()
        filtered_results = []
        
        for model in all_results:
            model_id = model.get('id', '')
            if model_id not in seen_ids and 'gguf' in model_id.lower():
                seen_ids.add(model_id)
                
                # Check if model name is relevant
                model_name_clean = self.clean_model_name(model_id)
                if self.is_relevant_match(cleaned_name, model_name_clean):
                    filtered_results.append(model)
        
        # Sort by downloads (descending)
        filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True)
        
        return filtered_results[:20]  # Return top 20 results
    
    def is_relevant_match(self, original: str, candidate: str) -> bool:
        """Check if candidate model is a relevant match for original"""
        original_lower = original.lower()
        candidate_lower = candidate.lower()
        
        # Direct substring match
        if original_lower in candidate_lower or candidate_lower in original_lower:
            return True
        
        # Check word overlap
        original_words = set(re.findall(r'\w+', original_lower))
        candidate_words = set(re.findall(r'\w+', candidate_lower))
        
        # If most words overlap, it's likely a match
        if len(original_words) > 0:
            overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words)
            return overlap_ratio >= 0.6
        
        return False
    
    def get_model_details(self, model_id: str) -> Dict:
        """Get detailed information about a specific model"""
        try:
            url = f"{self.api_base}/models/{model_id}"
            response = requests.get(url, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                return response.json()
            return {}
        except Exception as e:
            print(f"Error getting model details: {e}")
            return {}
    
    def format_model_info(self, model: Dict) -> str:
        """Format model information for display"""
        model_id = model.get('id', 'Unknown')
        downloads = model.get('downloads', 0)
        likes = model.get('likes', 0)
        updated = model.get('lastModified', 'Unknown')
        
        # Format the date
        if updated != 'Unknown':
            try:
                from datetime import datetime
                date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00'))
                updated = date_obj.strftime('%Y-%m-%d')
            except:
                pass
        
        # Get model size info if available
        size_info = ""
        if 'siblings' in model:
            total_size = 0
            file_count = 0
            for sibling in model['siblings']:
                if sibling.get('rfilename', '').endswith('.gguf'):
                    file_count += 1
                    if 'size' in sibling:
                        total_size += sibling['size']
            
            if total_size > 0:
                size_gb = total_size / (1024**3)
                size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)"
        
        model_url = f"https://huggingface.co/{model_id}"
        
        return f"""
**[{model_id}]({model_url})**
- Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info}
"""

def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]:
    """Main function to find GGUF models"""
    if not model_name.strip():
        return "Please enter a model name to search for.", ""
    
    progress(0.1, desc="Initializing search...")
    
    finder = GGUFModelFinder()
    
    progress(0.3, desc="Searching for GGUF variants...")
    results = finder.search_gguf_variants(model_name)
    
    progress(0.8, desc="Formatting results...")
    
    if not results:
        no_results = f"""
# No GGUF Models Found 😞

Could not find any GGUF variants for **{model_name}**.

## Suggestions:
1. **Check the spelling** of the model name
2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf")
3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter
4. **Check popular GGUF creators**:
   - [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin)
   - [TheBloke](https://huggingface.co/TheBloke)
   - [bartowski](https://huggingface.co/bartowski)
   - [mradermacher](https://huggingface.co/mradermacher)
   - [QuantFactory](https://huggingface.co/QuantFactory)

The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name.
"""
        return no_results, ""
    
    # Create main results
    results_md = f"""
# GGUF Models Found for "{model_name}" 🎯

Found **{len(results)}** GGUF variant(s):

"""
    
    for i, model in enumerate(results, 1):
        results_md += f"{i}. {finder.format_model_info(model)}\n"
    
    # Create additional info
    additional_info = f"""
## πŸ“‹ What is GGUF?

GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible.

## πŸ”§ How to Use These Models

### With llama.cpp:
```bash
./main -m model.gguf -p "Your prompt here"
```

### With Ollama:
```bash
ollama create mymodel -f Modelfile
ollama run mymodel
```

### With Python (llama-cpp-python):
```python
from llama_cpp import Llama

llm = Llama(model_path="model.gguf")
output = llm("Your prompt here")
```

## πŸ’‘ Tips for Choosing a Model

- **Q4_K_M**: Good balance of quality and size
- **Q5_K_M**: Higher quality, larger size  
- **Q6_K**: Even higher quality, larger size
- **Q8_0**: Highest quality, largest size

Lower numbers = smaller file size but lower quality
Higher numbers = larger file size but higher quality

## 🌟 Popular GGUF Model Creators

The results above are from trusted model quantizers who regularly convert popular models to GGUF format.
"""
    
    progress(1.0, desc="Complete!")
    
    return results_md, additional_info

# Create the Gradio interface
def create_interface():
    with gr.Blocks(
        title="GGUF Model Finder", 
        theme=gr.themes.Soft(),
        css="""
        .container { max-width: 1200px; margin: auto; }
        .header { text-align: center; margin: 20px 0; }
        .search-box { margin: 20px 0; }
        """
    ) as iface:
        
        gr.HTML("""
        <div class="header">
            <h1>πŸ” GGUF Model Finder</h1>
            <p>Find GGUF (quantized) versions of your favorite language models for local inference</p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=3):
                model_input = gr.Textbox(
                    label="Model Name",
                    placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b",
                    info="Enter the name of the model you want to find GGUF versions for",
                    lines=1
                )
            
            with gr.Column(scale=1):
                search_btn = gr.Button("πŸ” Search GGUF Models", variant="primary", size="lg")
        
        gr.HTML("""
        <div style="margin: 20px 0; padding: 15px; background-color: #374151; border-radius: 8px;">
            <strong>πŸ’‘ Quick Examples:</strong><br>
            β€’ <code>llama-2-7b</code> - Meta's Llama 2 7B model<br>
            β€’ <code>mistral-7b</code> - Mistral AI's 7B model<br>
            β€’ <code>codellama-34b</code> - Code Llama 34B model<br>
            β€’ <code>neural-chat-7b</code> - Intel's Neural Chat model<br>
            β€’ <code>deepseek-coder</code> - DeepSeek Coder model
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                results_output = gr.Markdown(
                    label="Search Results",
                    value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.",
                    height=400
                )
            
            with gr.Column(scale=1):
                info_output = gr.Markdown(
                    label="Additional Information",
                    value="",
                    height=400
                )
        
        # Event handlers
        search_btn.click(
            fn=find_gguf_models,
            inputs=[model_input],
            outputs=[results_output, info_output],
            show_progress=True
        )
        
        model_input.submit(
            fn=find_gguf_models,
            inputs=[model_input],
            outputs=[results_output, info_output],
            show_progress=True
        )
        
        gr.HTML("""
        <div style="margin-top: 30px; text-align: center; color: #666;">
            <p>Made with ❀️ using Gradio | Data from <a href="https://huggingface.co">Hugging Face</a></p>
            <p>GGUF format by the <a href="https://github.com/ggerganov/llama.cpp">llama.cpp</a> team</p>
        </div>
        """)
    
    return iface

if __name__ == "__main__":
    # Create and launch the interface
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
    )