File size: 4,000 Bytes
a8d5350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import gradio as gr
import requests
import subprocess
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Globals
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
vector_index = None
indexed_chunks = []
url_cache = {}  # url -> (chunks, faiss index)

FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc"  # Replace this

# Ollama call helper
def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
    """
    Calls local Ollama LLM via CLI and returns generated text.
    """
    try:
        command = [
            "ollama",
            "generate",
            model,
            "--system", system_prompt,
            "--prompt", user_prompt,
            "--quiet",
            "--json"
        ]
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        # Ollama JSON output format: {"text":"generated text..."}
        import json
        output = json.loads(result.stdout)
        return output.get("text", "").strip()
    except Exception as e:
        return f"Error calling Ollama: {str(e)}"

# Scrape URL and embed content
def scrape_and_embed(url: str):
    global vector_index, indexed_chunks

    if url in url_cache:
        indexed_chunks, vector_index = url_cache[url]
        return f"βœ… Loaded cached content for {url}"

    # Firecrawl scrape
    response = requests.post(
        "https://api.firecrawl.dev/v1/scrape",
        headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
        json={"url": url, "javascript": False}
    )
    if response.status_code != 200:
        return f"❌ Failed to scrape URL: {response.status_code}"

    content = response.json().get("text", "")
    chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
    indexed_chunks = chunks[:100]

    # Embeddings + FAISS index
    embeddings = embedding_model.encode(indexed_chunks)
    vector_index = faiss.IndexFlatL2(embeddings.shape[1])
    vector_index.add(np.array(embeddings))

    # Cache it
    url_cache[url] = (indexed_chunks, vector_index)

    return f"βœ… Scraped and indexed {len(indexed_chunks)} chunks from {url}"

# Main RAG + Ollama Q&A function
def web_rag_ollama(combined_input: str) -> str:
    """
    Expects input: "<URL> || <question>"
    Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
    """
    global vector_index, indexed_chunks

    if "||" not in combined_input:
        return "❌ Input format must be: <URL> || <your question>"

    url, question = [part.strip() for part in combined_input.split("||", 1)]

    # Scrape and embed
    scrape_status = scrape_and_embed(url)
    if scrape_status.startswith("❌"):
        return scrape_status

    # Retrieval
    if not indexed_chunks or vector_index is None:
        return "⚠️ No indexed content available."

    query_emb = embedding_model.encode([question])
    D, I = vector_index.search(np.array(query_emb), k=3)
    context = "\n\n".join([indexed_chunks[i] for i in I[0]])

    # Ollama prompt engineering
    system_prompt = (
        "You are a helpful assistant. Use the provided context to answer the question. "
        "If the answer is not contained in the context, say you don't know."
    )
    user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"

    # Call Ollama
    answer = ollama_generate(system_prompt, user_prompt)

    return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}"

# Gradio interface with MCP support
demo = gr.Interface(
    fn=web_rag_ollama,
    inputs=gr.Textbox(
        label="Input",
        placeholder="Enter input in format:\nhttps://example.com || What is this page about?"
    ),
    outputs=gr.Textbox(label="Answer"),
    title="🌐 Web RAG Q&A with Ollama (MCP-ready)",
    description="Scrape URL, embed content, and answer questions using local Ollama LLM."
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)