Emails2go

Sleeping

App Files Files Community

Ultronprime commited on Feb 4

Commit

1b5d6e1

verified ·

1 Parent(s): 6c3e7e8

Update app.py

Browse files

Files changed (1) hide show

app.py +288 -208

app.py CHANGED Viewed

@@ -1,234 +1,314 @@
 import os
-import time
-import json
-from pathlib import Path
-from typing import List
 import spaces
-import gradio as gr
 import torch
-from huggingface_hub import HfApi, hf_hub_download, create_repo, upload_file, CommitOperationAdd, login
-from transformers import pipeline, AutoTokenizer
-from datasets import Dataset
-from sklearn.decomposition import PCA
 import numpy as np
-import plotly.graph_objects as go
-from sklearn.manifold import TSNE
-import traceback
-# --- User Configuration ---
-HF_USERNAME = os.getenv("HF_USERNAME")
-DATASET_ID = f"{HF_USERNAME}/rag-embeddings"  # Dataset repo name
-MODEL_ID = f"{HF_USERNAME}/my-test-model" # Model repo name
-API_TOKEN = os.getenv("HF_TOKEN")  # Read from environment variable
-if not HF_USERNAME:
-    raise ValueError("Please set the HF_USERNAME environment variable with your Hugging Face username.")
-if not API_TOKEN:
-    raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face API token.")
-# --- Helper Functions ---
-def get_text_from_files(file_paths):
-    all_text = []
-    for filepath in file_paths:
-        try:
-            with open(filepath.name, "r", encoding="utf-8") as file:
-                all_text.append(file.read())
-        except Exception as e:
-           print(f"Error reading file: {file.name} with error: {e}. Skipping file.")
-    return all_text
-def get_embeddings(texts, model_id="sentence-transformers/all-mpnet-base-v2"):
     try:
-      model = pipeline('feature-extraction', model=model_id, device="cuda")
-      embeddings = model(texts)
     except Exception as e:
-        print(f"Error during embeddings: {e}. Please check your GPU configuration and model.")
-        return None
-    return embeddings
-def get_llm_response(query, context, model_id="HuggingFaceH4/zephyr-7b-beta"):
     try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = pipeline("text-generation", model=model_id, device="cuda")
-        prompt = f"""
-            Answer the following question according to the provided context.
-            Question: {query}
-            Context: {context}
-            Answer:
-        """
-        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-        output = model(
-            **inputs,
-            max_new_tokens=250,
-            do_sample=True,
-            top_p=0.9,
-            temperature=0.2,
         )
-        return tokenizer.decode(output[0]["generated_text"], skip_special_tokens=True)
     except Exception as e:
-        print(f"Error during text generation {e}. Please check your settings")
-        return f"There was an error. Please check settings and if the models are available: {str(e)}"
-def format_output(output):
-     return output.strip()
-def fetch_from_store(query_embeddings, dataset_id):
-  try:
-        file_path = hf_hub_download(repo_id=dataset_id, filename="embeddings.json", repo_type="dataset", token=API_TOKEN)
-  except Exception as e:
-       return f"Couldn't find the embeddings on the Hub! Did you save them before? {str(e)}"
-  with open(file_path, 'r') as f:
-        dataset = json.load(f)
-  all_similarities = []
-  for text_embedding in dataset["embeddings"]:
-        try:
-          sim = torch.nn.functional.cosine_similarity(torch.tensor(query_embeddings), torch.tensor(text_embedding), dim=0)
-          all_similarities.append(sim.item())
-        except Exception as e:
-            print (f"Error calculating similarity {e} skipping text entry")
-  most_similar_index = all_similarities.index(max(all_similarities))
-  return dataset["texts"][most_similar_index]
-@spaces.GPU
-def rag_chain(question,files):
-    # generate embedding for user input.
-    if files is not None:
-      texts = get_text_from_files(files)
-      if texts:
-        embeddings = get_embeddings(texts)
-        if embeddings:
-          upload_embeddings_to_hub(texts, embeddings, dataset_id=DATASET_ID)
-        else:
-           return "There was an error uploading the dataset."
-    input_embedding = get_embeddings(texts=[question])
-    # Get most relevant text:
-    if input_embedding:
-        context = fetch_from_store(input_embedding[0], dataset_id=DATASET_ID)
-        if context:
-        #Get the final output
-           output = get_llm_response(question,context)
-           return format_output(output)
-        else:
-           return "There was an error. Couldn't fetch a correct context. Is there embeddings in the Hub?"
-    else:
-        return "There was an error generating the embeddings. Try again"
-# --- Upload embedding to the Hub (only run one time) ---
-def upload_embeddings_to_hub(texts, embeddings, dataset_id):
-    api = HfApi(token=API_TOKEN)
-    try:
-        create_repo(repo_id=dataset_id, repo_type="dataset", private=False)
-        print(f"Dataset repo {dataset_id} created successfully!")
     except Exception as e:
-       print(f"Dataset repo {dataset_id} already exists, {e}")
-    dataset = {
-        "texts": texts,
-        "embeddings": embeddings
-        }
-    with open("embeddings.json","w") as outfile:
-         json.dump(dataset, outfile)
-    upload_file(
-         path_or_fileobj="embeddings.json",
-         path_in_repo="embeddings.json",
-         repo_id=dataset_id,
-         repo_type="dataset",
-        token = API_TOKEN
-        )
-    print("Finished embeddings upload")
-def reduce_dimension_pca(embeddings, n_components=2):
-  pca = PCA(n_components=n_components)
-  reduced_embeddings = pca.fit_transform(np.array(embeddings))
-  return reduced_embeddings
-def reduce_dimension_tsne(embeddings, n_components=2, perplexity = 30, n_iter = 300):
-  tsne = TSNE(n_components=n_components, perplexity = perplexity, n_iter = n_iter, random_state=42)
-  reduced_embeddings = tsne.fit_transform(np.array(embeddings))
-  return reduced_embeddings
-def get_plotly_plot(texts, embeddings, method='PCA'):
-    if method == 'PCA':
-      reduced_embeddings = reduce_dimension_pca(embeddings)
-    elif method == 'TSNE':
-       reduced_embeddings = reduce_dimension_tsne(embeddings)
-    fig = go.Figure(data=[go.Scatter(
-        x=reduced_embeddings[:, 0],
-        y=reduced_embeddings[:, 1],
-        mode='markers+text',
-         text=texts,
-         textposition="bottom center",
-        marker=dict(size=10,
-                    color=list(range(len(texts))),
-                    colorscale='Viridis',
-                    showscale=True,
-                   )
-    )])
-    fig.update_layout(title=f'Document Embeddings Visualization using {method}')
-    return fig
 @spaces.GPU
-def visualize_data(files, dataset_id):
-    if not files:
-        return "No files uploaded to visualize", None, None
     try:
-       file_path = hf_hub_download(repo_id=dataset_id, filename="embeddings.json", repo_type="dataset", token=API_TOKEN)
     except Exception as e:
-       return f"Couldn't find the embeddings on the Hub! Did you save them before? {str(e)}", None, None
-    with open(file_path, 'r') as f:
-         dataset = json.load(f)
-    texts = dataset["texts"]
-    embeddings = dataset["embeddings"]
-    fig_pca = get_plotly_plot(texts, embeddings, method='PCA')
-    fig_tsne = get_plotly_plot(texts, embeddings, method='TSNE')
-    return fig_pca, fig_tsne
-# --- Main Gradio Interface ---
-with gr.Blocks() as demo:
-  with gr.Tab("Chat"):
-    chatbot_input = gr.Textbox(placeholder="Ask me something...")
-    chatbot_output = gr.Textbox()
-    with gr.Row():
-        chatbot_files = gr.File(file_types=['.txt'],  file_count = "multiple", label="Upload text files")
-        chatbot_button = gr.Button("Submit")
-    chatbot_button.click(rag_chain, inputs=[chatbot_input, chatbot_files], outputs=chatbot_output)
-  with gr.Tab("Visualization"):
-    visualization_files = gr.File(file_types=['.txt'],  file_count = "multiple", label="Upload text files")
-    with gr.Row():
-       submit_button = gr.Button("Visualize data")
-    with gr.Row():
-      plotly_output_pca = gr.Plot()
-    with gr.Row():
-      plotly_output_tsne = gr.Plot()
-    submit_button.click(visualize_data, inputs=visualization_files, outputs=[plotly_output_pca, plotly_output_tsne])
-demo.launch(server_name="0.0.0.0")
-# ---  Upload embeddings to Hub(one time execution)---
-# local_data_path = "data"  # Please set this path to where your data is!
-# texts = get_text_from_files(os.listdir(local_data_path))
-# embeddings = get_embeddings(texts)
-# upload_embeddings_to_hub(texts, embeddings, dataset_id=DATASET_ID)

 import os
+import gradio as gr
+import logging
+import traceback
 import spaces
+from typing import Optional, List
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+import gc
 import torch
+from torch.cuda.amp import autocast
+from transformers import AutoModel, AutoTokenizer
+from sentence_transformers import SentenceTransformer
+from charset_normalizer import from_bytes
 import numpy as np
+import requests
+# Custom Exception Class
+class GPUQuotaExceededError(Exception):
+    pass
+# Constants
+EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+CHUNK_SIZE = 500
+BATCH_SIZE = 32
+CACHE_DIR = os.getenv("CACHE_DIR", "/tmp/cache")
+PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/data")
+# Create directories
+os.makedirs(CACHE_DIR, exist_ok=True)
+os.makedirs(PERSISTENT_PATH, exist_ok=True)
+# Logging Setup
+LOG_DIR = os.getenv("LOG_DIR", "/data/logs")
+os.makedirs(LOG_DIR, exist_ok=True)
+LOG_FILE = Path(LOG_DIR) / "app.log"
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+# Model initialization
+model = None
+def initialize_model():
+    global model
+    try:
+        if model is None:
+            model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=CACHE_DIR)
+            logger.info(f"Initialized model: {EMBEDDING_MODEL_NAME}")
+        return True
+    except requests.exceptions.ConnectionError as e:
+        logger.error(f"Connection error during model download: {str(e)}\n{traceback.format_exc()}")
+        return False
+    except Exception as e:
+        logger.error(f"Model initialization failed: {str(e)}\n{traceback.format_exc()}")
+        return False
+@spaces.GPU
+def handle_gpu_operation(func):
     try:
+        start_time = datetime.now()
+        with autocast(enabled=torch.cuda.is_available()):
+            result = func()
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+        logger.info(f"GPU operation completed in {duration:.2f}s")
+        return result
+    except RuntimeError as e:
+        if "CUDA out of memory" in str(e):
+            torch.cuda.empty_cache()
+            logger.error(f"GPU memory error: {str(e)}")
+            raise GPUQuotaExceededError("GPU memory limit exceeded. Please try with a smaller batch.")
+        else:
+            logger.error(f"GPU runtime error: {str(e)}")
+            raise
     except Exception as e:
+        if "quota exceeded" in str(e).lower():
+            logger.error(f"GPU quota exceeded: {str(e)}")
+            raise GPUQuotaExceededError("GPU quota exceeded. Please wait a few minutes before trying again.")
+        else:
+            logger.error(f"Unexpected GPU error: {str(e)}")
+            raise
+def get_model():
+    global model
+    if model is None:
+        if torch.cuda.is_available():
+            initialize_model()
+        else:
+            logger.warning("Attempted to initialize model outside GPU context, deferring.")
+            return None
+    return model
+@spaces.GPU
+def process_files(files):
+    if not files:
+        return "Please upload one or more .txt files.", "", ""
     try:
+        if not initialize_model():
+            return "Failed to initialize the model. Please try again.", "", ""
+        valid_files = [f for f in files if f.name.lower().endswith('.txt')]
+        if not valid_files:
+            return "No .txt files found in upload. Please ensure you upload .txt files.", "", ""
+        all_chunks = []
+        processed_files = 0
+        for file in valid_files:
+            try:
+                with open(file.name, 'rb') as f:
+                    content = f.read()
+                    detected_encoding = from_bytes(content).best().encoding
+                    decoded_content = content.decode(detected_encoding, errors='ignore')
+                chunks = [decoded_content[i:i+CHUNK_SIZE] for i in range(0, len(decoded_content), CHUNK_SIZE)]
+                all_chunks.extend(chunks)
+                processed_files += 1
+                logger.info(f"Processed file: {file.name}")
+            except Exception as e:
+                logger.error(f"Error processing file {file.name}: {str(e)}")
+        if not all_chunks:
+            return "No valid content found in the uploaded .txt files.", "", ""
+        # Generate embeddings in batches
+        all_embeddings = []
+        for i in range(0, len(all_chunks), BATCH_SIZE):
+            batch = all_chunks[i:i+BATCH_SIZE]
+            embeddings = handle_gpu_operation(lambda: get_model().encode(batch))
+            all_embeddings.extend(embeddings)
+        # Save results
+        np.save(f"{PERSISTENT_PATH}/embeddings.npy", np.array(all_embeddings))
+        with open(f"{PERSISTENT_PATH}/chunks.txt", "w", encoding="utf-8") as f:
+            for chunk in all_chunks:
+                f.write(chunk + "\n===CHUNK_SEPARATOR===\n")
+        return (
+            f"Successfully processed {processed_files} files. Generated {len(all_embeddings)} embeddings from {len(all_chunks)} chunks.",
+            "",
+            ""
         )
     except Exception as e:
+        logger.error(f"Processing failed: {str(e)}")
+        return f"Error processing files: {str(e)}", "", ""
+@spaces.GPU
+def semantic_search(query, top_k=5):
+    global model
+    if model is None:  # Check if model is initialized
+        if not initialize_model():  # Initialize only if needed and within GPU context
+            return "Model initialization failed. Please try again."
+    try:
+        # Load saved embeddings
+        stored_embeddings = np.load(f"{PERSISTENT_PATH}/embeddings.npy")
+        # Load stored chunks
+        with open(f"{PERSISTENT_PATH}/chunks.txt", "r", encoding="utf-8") as f:
+            chunks = f.read().split("\n===CHUNK_SEPARATOR===\n")
+            chunks = [c for c in chunks if c.strip()]  # Remove empty chunks
+        # Get query embedding
+        query_embedding = handle_gpu_operation(lambda: get_model().encode([query]))[0]  # Use get_model() to get the model
+        # Calculate similarities
+        similarities = np.dot(stored_embeddings, query_embedding) / (
+            np.linalg.norm(stored_embeddings, axis=1) * np.linalg.norm(query_embedding)
+        )
+        # Get top results
+        top_indices = np.argsort(similarities)[-top_k:][::-1]
+        # Format results
+        results = []
+        for idx in top_indices:
+            results.append(f"""
+Similarity: {similarities[idx]:.3f}
+Content: {chunks[idx]}
+-------------------
+""")
+        return "\n".join(results)
     except Exception as e:
+        logger.error(f"Search error: {str(e)}")
+        return f"Search error occurred: {str(e)}"
+def search_and_format(query, num_results):
+    if not query.strip():
+        return "Please enter a search query"
+    return semantic_search(query, top_k=num_results)
+def download_results(text):
+    if not text:
+        return None
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"search_results_{timestamp}.txt"
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(text)
+    return filename
 @spaces.GPU
+def safe_generate_embedding(text):
+    global model
+    if model is None:  # Check if model is initialized
+        initialize_model()  # Initialize only if needed and within GPU context
     try:
+        embedding = handle_gpu_operation(
+            lambda: get_model().encode([text])[0].tolist()  # Use get_model() to get the model
+        )
+        return embedding, "", False
+    except GPUQuotaExceededError as e:
+        error_msg = str(e)
+        logger.error(error_msg)
+        return "", error_msg, True
     except Exception as e:
+        error_msg = f"Error generating embedding: {str(e)}"
+        logger.error(error_msg)
+        return "", error_msg, True
+def download_embeddings():
+    embeddings_path = f"{PERSISTENT_PATH}/embeddings.npy"
+    if not os.path.exists(embeddings_path):
+        return None
+    return embeddings_path
+def create_gradio_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("## Text Chunk Embeddings Generator")
+        error_box = gr.Textbox(visible=False, label="Status/Error Messages")
+        with gr.Row():
+            file_input = gr.File(
+                label="Upload Text Files",
+                file_count="multiple",
+                file_types=[".txt"]
+            )
+        process_button = gr.Button("Generate Embeddings")
+        output_text = gr.Textbox(label="Status")
+        with gr.Tab("Search"):
+            query_input = gr.Textbox(
+                label="Enter your search query",
+                placeholder="Enter text to search through your documents..."
+            )
+            top_k = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5,
+                step=1,
+                label="Number of results to return"
+            )
+            search_button = gr.Button("🔍 Search")
+            results_output = gr.Textbox(
+                label="Search Results",
+                lines=10,
+                show_copy_button=True
+            )
+            download_button = gr.Button("⬇️ Download Results")
+            search_button.click(
+                fn=search_and_format,
+                inputs=[query_input, top_k],
+                outputs=results_output
+            )
+            download_button.click(
+                fn=download_results,
+                inputs=[results_output],
+                outputs=[gr.File(label="Download Search Results")]
+            )
+        with gr.Tab("Inspect Embeddings"):
+            embed_input = gr.Textbox(label="Enter Text for Embedding")
+            embed_button = gr.Button("Generate Embedding")
+            embed_output = gr.Textbox(label="Embedding Vector", lines=5)
+            embed_button.click(
+                safe_generate_embedding,
+                inputs=[embed_input],
+                outputs=[embed_output, error_box, error_box]
+            )
+            download_embeddings_button = gr.Button("⬇️ Download Embeddings")
+            download_embeddings_button.click(
+                fn=download_embeddings,
+                outputs=[gr.File(label="Download Embeddings")]
+            )
+        process_button.click(
+            process_files,
+            inputs=[file_input],
+            outputs=[output_text, error_box, error_box]
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(server_name="0.0.0.0")