Spaces:

schmuelling
/

hopsworks_chat

Sleeping

File size: 7,770 Bytes

0cc1518
 
 
afd7f5e
 
 
 
 
 
 
2707707
fb5aa44
afd7f5e
 
 
 
 
 
 
 
 
 
2707707
 
 
afd7f5e
fb5aa44
 
 
 
2707707
 
 
 
afd7f5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2707707
afd7f5e
 
 
 
 
2707707
fb5aa44
2707707
 
fb5aa44
 
 
 
 
 
 
 
 
 
 
2707707
fb5aa44
 
 
 
2707707
 
fb5aa44
 
2707707
 
 
 
 
 
fb5aa44
 
2707707
 
 
fb5aa44
2707707
fb5aa44
afd7f5e
 
fb5aa44
 
 
 
 
afd7f5e
 
fb5aa44
afd7f5e
fb5aa44
afd7f5e
 
 
 
fb5aa44
 
 
 
 
 
 
afd7f5e
 
 
 
 
 
 
 
 
 
fb5aa44
 
afd7f5e
fb5aa44
 
 
 
 
afd7f5e
fb5aa44
 
afd7f5e
 
 
fb5aa44
 
 
 
afd7f5e
fb5aa44
afd7f5e
 
 
 
 
 
 
c4ef8ee
2707707
 
afd7f5e
fb5aa44
 
 
 
 
 
2707707
fb5aa44
2707707
fb5aa44
 
2707707
 
 
 
 
fb5aa44
2707707
 
 
 
 
 
 
afd7f5e
 
2707707
 
afd7f5e
 
 
 
fb5aa44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2707707
fb5aa44
2707707
 
fb5aa44
 
 
 
 
2707707
 
fb5aa44
 
 
 
 
 
 
2707707
afd7f5e

import subprocess
subprocess.run("pip install llama-cpp-python==0.3.15", shell=True, check=True)

import gradio as gr
import hopsworks
from sentence_transformers import SentenceTransformer
from llama_cpp import Llama
import faiss
import numpy as np
import os
import json
import yaml
from dotenv import load_dotenv

# 1. Load Environment Variables & Validation
load_dotenv()

HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")

if not HOPSWORKS_API_KEY:
    raise ValueError("HOPSWORKS_API_KEY not found in environment variables.")

# Load models configuration
with open("models_config.json", "r") as f:
    models_config = json.load(f)

# Load RAG prompt configuration
with open("prompts/rag_prompt.yml", "r") as f:
    prompt_config = yaml.safe_load(f)

# Global variable to store the current LLM
llm = None

print("Initializing embeddings and connecting to Hopsworks...")

try:
    embeddings = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

    project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
    fs = project.get_feature_store()
    book_fg = fs.get_feature_group("book_embeddings", version=1)

    df = book_fg.read()
    
    if df.empty:
        raise ValueError("Feature group 'book_embeddings' is empty.")

    texts = df['text'].tolist()
    raw_embeddings = [emb if isinstance(emb, list) else emb.tolist() for emb in df['embedding']]
    embedding_vectors = np.array(raw_embeddings, dtype='float32')

    dimension = embedding_vectors.shape[1]
    index = faiss.IndexFlatIP(dimension)
    
    faiss.normalize_L2(embedding_vectors)
    index.add(embedding_vectors)

    print("Embeddings and FAISS index initialized.")

except Exception as e:
    print(f"Critical Error during initialization: {e}")
    index = None

# Function to load a model dynamically
def load_model(repo_name, model_name, progress=gr.Progress()):
    global llm
    try:
        progress(0, desc="Initializing...")

        # Find the repository
        repo = next((r for r in models_config["repositories"] if r["name"] == repo_name), None)
        if not repo:
            return f"Error: Repository '{repo_name}' not found in config."

        # Find the model within the repository
        model = next((m for m in repo["models"] if m["name"] == model_name), None)
        if not model:
            return f"Error: Model '{model_name}' not found in repository."

        print(f"Loading model: {model['name']}...")
        print(f"Repo: {repo['repo_id']}, File: {model['filename']}")

        progress(0.3, desc=f"Downloading/Loading {model['name']}...")

        llm = Llama.from_pretrained(
            repo_id=repo["repo_id"],
            filename=model["filename"],
            n_ctx=2048,
            n_threads=4,
            n_gpu_layers=-1,
            verbose=False
        )

        progress(1.0, desc="Complete!")
        return f"✅ Model '{model_name}' loaded successfully!"

    except Exception as e:
        llm = None
        return f"❌ Error loading model: {str(e)}"

def retrieve_context(query, k=None):
    if index is None:
        return "Error: Search index not initialized."

    # Use k from prompt config if not specified
    if k is None:
        k = prompt_config["rag"]["num_retrieved_chunks"]

    query_embedding = embeddings.encode(query).astype('float32').reshape(1, -1)
    faiss.normalize_L2(query_embedding)

    distances, indices = index.search(query_embedding, k)

    retrieved_texts = []
    for i in indices[0]:
        if 0 <= i < len(texts):
            retrieved_texts.append(texts[i])

    # Use separator from prompt config
    separator = prompt_config["rag"]["context_separator"]

    print(f"Retrieved {len(retrieved_texts)} context chunks for the query.")
    print("Similarities:", distances)
    return separator.join(retrieved_texts)

def respond(message, history):
    """
    Generator function for streaming response.
    gr.ChatInterface passes 'message' and 'history' automatically.
    """
    if llm is None:
        yield "System Error: Models failed to load. Check console logs."
        return

    # Retrieve context using config settings
    context = retrieve_context(message)

    # Build prompt from template
    prompt = prompt_config["template"].format(
        context=context,
        question=message
    )

    # Get generation parameters from config
    gen_params = prompt_config["generation"]

    output = llm(
        prompt,
        max_tokens=gen_params["max_tokens"],
        temperature=gen_params["temperature"],
        stop=gen_params["stop_sequences"],
        stream=True
    )

    partial_message = ""
    for chunk in output:
        text_chunk = chunk["choices"][0]["text"]
        partial_message += text_chunk
        yield partial_message

with gr.Blocks(title="Hopsworks RAG ChatBot") as demo:
    gr.Markdown("<h1 style='text-align: center; color: #1EB382'>Hopsworks ChatBot</h1>")

    # Model Selection Section
    with gr.Row():
        repo_dropdown = gr.Dropdown(
            choices=[r["name"] for r in models_config["repositories"]],
            label="Select Repository",
            value=models_config["repositories"][0]["name"],
            scale=2
        )
        model_dropdown = gr.Dropdown(
            choices=[m["name"] for m in models_config["repositories"][0]["models"]],
            label="Select Model",
            value=models_config["repositories"][0]["models"][0]["name"],
            scale=2
        )
        load_button = gr.Button("Load Model", variant="primary", scale=1)

    status_box = gr.Textbox(
        label="Status",
        value="⚠️ Please select a repository and model, then click 'Load Model'",
        interactive=False
    )

    # Model info display
    model_info = gr.Markdown("")

    # Chat Interface
    chat_interface = gr.ChatInterface(
        fn=respond,
        chatbot=gr.Chatbot(height=400),
        textbox=gr.Textbox(placeholder="Ask a question about your documents...", container=False, scale=7),
        examples=["What is the main topic of the documents?", "Summarize the key points."],
        cache_examples=False,
    )

    # Function to update model dropdown when repository changes
    def update_model_choices(repo_name):
        repo = next((r for r in models_config["repositories"] if r["name"] == repo_name), None)
        if repo and repo["models"]:
            model_choices = [m["name"] for m in repo["models"]]
            return gr.Dropdown(choices=model_choices, value=model_choices[0])
        return gr.Dropdown(choices=[], value=None)

    # Function to update model info display
    def update_model_info(repo_name, model_name):
        repo = next((r for r in models_config["repositories"] if r["name"] == repo_name), None)
        if not repo:
            return ""

        model = next((m for m in repo["models"] if m["name"] == model_name), None)
        if model:
            return f"**{model['name']}**\n\n{model['description']}\n\n Repository: `{repo['repo_id']}`\n\n File: `{model['filename']}`"
        return ""

    # Event handlers
    repo_dropdown.change(update_model_choices, inputs=[repo_dropdown], outputs=[model_dropdown])
    repo_dropdown.change(update_model_info, inputs=[repo_dropdown, model_dropdown], outputs=[model_info])
    model_dropdown.change(update_model_info, inputs=[repo_dropdown, model_dropdown], outputs=[model_info])
    load_button.click(load_model, inputs=[repo_dropdown, model_dropdown], outputs=[status_box])

    # Load default model info on startup
    demo.load(
        lambda: update_model_info(
            models_config["repositories"][0]["name"],
            models_config["repositories"][0]["models"][0]["name"]
        ),
        outputs=[model_info]
    )

if __name__ == "__main__":
    demo.launch(share=True)