PraveenVellingiri commited on
Commit
a8d5350
Β·
verified Β·
1 Parent(s): 859fae9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import subprocess
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import numpy as np
7
+
8
+ # Globals
9
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
10
+ vector_index = None
11
+ indexed_chunks = []
12
+ url_cache = {} # url -> (chunks, faiss index)
13
+
14
+ FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc" # Replace this
15
+
16
+ # Ollama call helper
17
+ def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
18
+ """
19
+ Calls local Ollama LLM via CLI and returns generated text.
20
+ """
21
+ try:
22
+ command = [
23
+ "ollama",
24
+ "generate",
25
+ model,
26
+ "--system", system_prompt,
27
+ "--prompt", user_prompt,
28
+ "--quiet",
29
+ "--json"
30
+ ]
31
+ result = subprocess.run(command, capture_output=True, text=True, check=True)
32
+ # Ollama JSON output format: {"text":"generated text..."}
33
+ import json
34
+ output = json.loads(result.stdout)
35
+ return output.get("text", "").strip()
36
+ except Exception as e:
37
+ return f"Error calling Ollama: {str(e)}"
38
+
39
+ # Scrape URL and embed content
40
+ def scrape_and_embed(url: str):
41
+ global vector_index, indexed_chunks
42
+
43
+ if url in url_cache:
44
+ indexed_chunks, vector_index = url_cache[url]
45
+ return f"βœ… Loaded cached content for {url}"
46
+
47
+ # Firecrawl scrape
48
+ response = requests.post(
49
+ "https://api.firecrawl.dev/v1/scrape",
50
+ headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
51
+ json={"url": url, "javascript": False}
52
+ )
53
+ if response.status_code != 200:
54
+ return f"❌ Failed to scrape URL: {response.status_code}"
55
+
56
+ content = response.json().get("text", "")
57
+ chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
58
+ indexed_chunks = chunks[:100]
59
+
60
+ # Embeddings + FAISS index
61
+ embeddings = embedding_model.encode(indexed_chunks)
62
+ vector_index = faiss.IndexFlatL2(embeddings.shape[1])
63
+ vector_index.add(np.array(embeddings))
64
+
65
+ # Cache it
66
+ url_cache[url] = (indexed_chunks, vector_index)
67
+
68
+ return f"βœ… Scraped and indexed {len(indexed_chunks)} chunks from {url}"
69
+
70
+ # Main RAG + Ollama Q&A function
71
+ def web_rag_ollama(combined_input: str) -> str:
72
+ """
73
+ Expects input: "<URL> || <question>"
74
+ Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
75
+ """
76
+ global vector_index, indexed_chunks
77
+
78
+ if "||" not in combined_input:
79
+ return "❌ Input format must be: <URL> || <your question>"
80
+
81
+ url, question = [part.strip() for part in combined_input.split("||", 1)]
82
+
83
+ # Scrape and embed
84
+ scrape_status = scrape_and_embed(url)
85
+ if scrape_status.startswith("❌"):
86
+ return scrape_status
87
+
88
+ # Retrieval
89
+ if not indexed_chunks or vector_index is None:
90
+ return "⚠️ No indexed content available."
91
+
92
+ query_emb = embedding_model.encode([question])
93
+ D, I = vector_index.search(np.array(query_emb), k=3)
94
+ context = "\n\n".join([indexed_chunks[i] for i in I[0]])
95
+
96
+ # Ollama prompt engineering
97
+ system_prompt = (
98
+ "You are a helpful assistant. Use the provided context to answer the question. "
99
+ "If the answer is not contained in the context, say you don't know."
100
+ )
101
+ user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
102
+
103
+ # Call Ollama
104
+ answer = ollama_generate(system_prompt, user_prompt)
105
+
106
+ return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}"
107
+
108
+ # Gradio interface with MCP support
109
+ demo = gr.Interface(
110
+ fn=web_rag_ollama,
111
+ inputs=gr.Textbox(
112
+ label="Input",
113
+ placeholder="Enter input in format:\nhttps://example.com || What is this page about?"
114
+ ),
115
+ outputs=gr.Textbox(label="Answer"),
116
+ title="🌐 Web RAG Q&A with Ollama (MCP-ready)",
117
+ description="Scrape URL, embed content, and answer questions using local Ollama LLM."
118
+ )
119
+
120
+ if __name__ == "__main__":
121
+ demo.launch(mcp_server=True)