import gradio as gr
import requests
import subprocess
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Globals
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
vector_index = None
indexed_chunks = []
url_cache = {}  # url -> (chunks, faiss index)

FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc"  # Replace this

# Ollama call helper
def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
    """
    Calls local Ollama LLM via CLI and returns generated text.
    """
    try:
        command = [
            "ollama",
            "generate",
            model,
            "--system", system_prompt,
            "--prompt", user_prompt,
            "--quiet",
            "--json"
        ]
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        # Ollama JSON output format: {"text":"generated text..."}
        import json
        output = json.loads(result.stdout)
        return output.get("text", "").strip()
    except Exception as e:
        return f"Error calling Ollama: {str(e)}"

# Scrape URL and embed content
def scrape_and_embed(url: str):
    global vector_index, indexed_chunks

    if url in url_cache:
        indexed_chunks, vector_index = url_cache[url]
        return f"✅ Loaded cached content for {url}"

    # Firecrawl scrape
    response = requests.post(
        "https://api.firecrawl.dev/v1/scrape",
        headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
        json={"url": url, "javascript": False}
    )
    if response.status_code != 200:
        return f"❌ Failed to scrape URL: {response.status_code}"

    content = response.json().get("text", "")
    chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
    indexed_chunks = chunks[:100]

    # Embeddings + FAISS index
    embeddings = embedding_model.encode(indexed_chunks)
    vector_index = faiss.IndexFlatL2(embeddings.shape[1])
    vector_index.add(np.array(embeddings))

    # Cache it
    url_cache[url] = (indexed_chunks, vector_index)

    return f"✅ Scraped and indexed {len(indexed_chunks)} chunks from {url}"

# Main RAG + Ollama Q&A function
def web_rag_ollama(combined_input: str) -> str:
    """
    Expects input: "<URL> || <question>"
    Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
    """
    global vector_index, indexed_chunks

    if "||" not in combined_input:
        return "❌ Input format must be: <URL> || <your question>"

    url, question = [part.strip() for part in combined_input.split("||", 1)]

    # Scrape and embed
    scrape_status = scrape_and_embed(url)
    if scrape_status.startswith("❌"):
        return scrape_status

    # Retrieval
    if not indexed_chunks or vector_index is None:
        return "⚠️ No indexed content available."

    query_emb = embedding_model.encode([question])
    D, I = vector_index.search(np.array(query_emb), k=3)
    context = "\n\n".join([indexed_chunks[i] for i in I[0]])

    # Ollama prompt engineering
    system_prompt = (
        "You are a helpful assistant. Use the provided context to answer the question. "
        "If the answer is not contained in the context, say you don't know."
    )
    user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"

    # Call Ollama
    answer = ollama_generate(system_prompt, user_prompt)

    return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}"

# Gradio interface with MCP support
demo = gr.Interface(
    fn=web_rag_ollama,
    inputs=gr.Textbox(
        label="Input",
        placeholder="Enter input in format:\nhttps://example.com || What is this page about?"
    ),
    outputs=gr.Textbox(label="Answer"),
    title="🌐 Web RAG Q&A with Ollama (MCP-ready)",
    description="Scrape URL, embed content, and answer questions using local Ollama LLM."
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)