Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import subprocess
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
import faiss
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# Globals
|
9 |
+
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
10 |
+
vector_index = None
|
11 |
+
indexed_chunks = []
|
12 |
+
url_cache = {} # url -> (chunks, faiss index)
|
13 |
+
|
14 |
+
FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc" # Replace this
|
15 |
+
|
16 |
+
# Ollama call helper
|
17 |
+
def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
|
18 |
+
"""
|
19 |
+
Calls local Ollama LLM via CLI and returns generated text.
|
20 |
+
"""
|
21 |
+
try:
|
22 |
+
command = [
|
23 |
+
"ollama",
|
24 |
+
"generate",
|
25 |
+
model,
|
26 |
+
"--system", system_prompt,
|
27 |
+
"--prompt", user_prompt,
|
28 |
+
"--quiet",
|
29 |
+
"--json"
|
30 |
+
]
|
31 |
+
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
32 |
+
# Ollama JSON output format: {"text":"generated text..."}
|
33 |
+
import json
|
34 |
+
output = json.loads(result.stdout)
|
35 |
+
return output.get("text", "").strip()
|
36 |
+
except Exception as e:
|
37 |
+
return f"Error calling Ollama: {str(e)}"
|
38 |
+
|
39 |
+
# Scrape URL and embed content
|
40 |
+
def scrape_and_embed(url: str):
|
41 |
+
global vector_index, indexed_chunks
|
42 |
+
|
43 |
+
if url in url_cache:
|
44 |
+
indexed_chunks, vector_index = url_cache[url]
|
45 |
+
return f"β
Loaded cached content for {url}"
|
46 |
+
|
47 |
+
# Firecrawl scrape
|
48 |
+
response = requests.post(
|
49 |
+
"https://api.firecrawl.dev/v1/scrape",
|
50 |
+
headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
|
51 |
+
json={"url": url, "javascript": False}
|
52 |
+
)
|
53 |
+
if response.status_code != 200:
|
54 |
+
return f"β Failed to scrape URL: {response.status_code}"
|
55 |
+
|
56 |
+
content = response.json().get("text", "")
|
57 |
+
chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
|
58 |
+
indexed_chunks = chunks[:100]
|
59 |
+
|
60 |
+
# Embeddings + FAISS index
|
61 |
+
embeddings = embedding_model.encode(indexed_chunks)
|
62 |
+
vector_index = faiss.IndexFlatL2(embeddings.shape[1])
|
63 |
+
vector_index.add(np.array(embeddings))
|
64 |
+
|
65 |
+
# Cache it
|
66 |
+
url_cache[url] = (indexed_chunks, vector_index)
|
67 |
+
|
68 |
+
return f"β
Scraped and indexed {len(indexed_chunks)} chunks from {url}"
|
69 |
+
|
70 |
+
# Main RAG + Ollama Q&A function
|
71 |
+
def web_rag_ollama(combined_input: str) -> str:
|
72 |
+
"""
|
73 |
+
Expects input: "<URL> || <question>"
|
74 |
+
Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
|
75 |
+
"""
|
76 |
+
global vector_index, indexed_chunks
|
77 |
+
|
78 |
+
if "||" not in combined_input:
|
79 |
+
return "β Input format must be: <URL> || <your question>"
|
80 |
+
|
81 |
+
url, question = [part.strip() for part in combined_input.split("||", 1)]
|
82 |
+
|
83 |
+
# Scrape and embed
|
84 |
+
scrape_status = scrape_and_embed(url)
|
85 |
+
if scrape_status.startswith("β"):
|
86 |
+
return scrape_status
|
87 |
+
|
88 |
+
# Retrieval
|
89 |
+
if not indexed_chunks or vector_index is None:
|
90 |
+
return "β οΈ No indexed content available."
|
91 |
+
|
92 |
+
query_emb = embedding_model.encode([question])
|
93 |
+
D, I = vector_index.search(np.array(query_emb), k=3)
|
94 |
+
context = "\n\n".join([indexed_chunks[i] for i in I[0]])
|
95 |
+
|
96 |
+
# Ollama prompt engineering
|
97 |
+
system_prompt = (
|
98 |
+
"You are a helpful assistant. Use the provided context to answer the question. "
|
99 |
+
"If the answer is not contained in the context, say you don't know."
|
100 |
+
)
|
101 |
+
user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
|
102 |
+
|
103 |
+
# Call Ollama
|
104 |
+
answer = ollama_generate(system_prompt, user_prompt)
|
105 |
+
|
106 |
+
return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}"
|
107 |
+
|
108 |
+
# Gradio interface with MCP support
|
109 |
+
demo = gr.Interface(
|
110 |
+
fn=web_rag_ollama,
|
111 |
+
inputs=gr.Textbox(
|
112 |
+
label="Input",
|
113 |
+
placeholder="Enter input in format:\nhttps://example.com || What is this page about?"
|
114 |
+
),
|
115 |
+
outputs=gr.Textbox(label="Answer"),
|
116 |
+
title="π Web RAG Q&A with Ollama (MCP-ready)",
|
117 |
+
description="Scrape URL, embed content, and answer questions using local Ollama LLM."
|
118 |
+
)
|
119 |
+
|
120 |
+
if __name__ == "__main__":
|
121 |
+
demo.launch(mcp_server=True)
|