File size: 4,000 Bytes
a8d5350 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
import requests
import subprocess
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Globals
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
vector_index = None
indexed_chunks = []
url_cache = {} # url -> (chunks, faiss index)
FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc" # Replace this
# Ollama call helper
def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
"""
Calls local Ollama LLM via CLI and returns generated text.
"""
try:
command = [
"ollama",
"generate",
model,
"--system", system_prompt,
"--prompt", user_prompt,
"--quiet",
"--json"
]
result = subprocess.run(command, capture_output=True, text=True, check=True)
# Ollama JSON output format: {"text":"generated text..."}
import json
output = json.loads(result.stdout)
return output.get("text", "").strip()
except Exception as e:
return f"Error calling Ollama: {str(e)}"
# Scrape URL and embed content
def scrape_and_embed(url: str):
global vector_index, indexed_chunks
if url in url_cache:
indexed_chunks, vector_index = url_cache[url]
return f"β
Loaded cached content for {url}"
# Firecrawl scrape
response = requests.post(
"https://api.firecrawl.dev/v1/scrape",
headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
json={"url": url, "javascript": False}
)
if response.status_code != 200:
return f"β Failed to scrape URL: {response.status_code}"
content = response.json().get("text", "")
chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
indexed_chunks = chunks[:100]
# Embeddings + FAISS index
embeddings = embedding_model.encode(indexed_chunks)
vector_index = faiss.IndexFlatL2(embeddings.shape[1])
vector_index.add(np.array(embeddings))
# Cache it
url_cache[url] = (indexed_chunks, vector_index)
return f"β
Scraped and indexed {len(indexed_chunks)} chunks from {url}"
# Main RAG + Ollama Q&A function
def web_rag_ollama(combined_input: str) -> str:
"""
Expects input: "<URL> || <question>"
Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
"""
global vector_index, indexed_chunks
if "||" not in combined_input:
return "β Input format must be: <URL> || <your question>"
url, question = [part.strip() for part in combined_input.split("||", 1)]
# Scrape and embed
scrape_status = scrape_and_embed(url)
if scrape_status.startswith("β"):
return scrape_status
# Retrieval
if not indexed_chunks or vector_index is None:
return "β οΈ No indexed content available."
query_emb = embedding_model.encode([question])
D, I = vector_index.search(np.array(query_emb), k=3)
context = "\n\n".join([indexed_chunks[i] for i in I[0]])
# Ollama prompt engineering
system_prompt = (
"You are a helpful assistant. Use the provided context to answer the question. "
"If the answer is not contained in the context, say you don't know."
)
user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
# Call Ollama
answer = ollama_generate(system_prompt, user_prompt)
return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}"
# Gradio interface with MCP support
demo = gr.Interface(
fn=web_rag_ollama,
inputs=gr.Textbox(
label="Input",
placeholder="Enter input in format:\nhttps://example.com || What is this page about?"
),
outputs=gr.Textbox(label="Answer"),
title="π Web RAG Q&A with Ollama (MCP-ready)",
description="Scrape URL, embed content, and answer questions using local Ollama LLM."
)
if __name__ == "__main__":
demo.launch(mcp_server=True)
|