Ultronprime commited on
Commit
9c81028
·
1 Parent(s): 8a3af63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -238
app.py CHANGED
@@ -1,60 +1,30 @@
1
  import os
2
  import gradio as gr
3
  import logging
4
- import traceback
5
- import spaces
6
- from typing import Optional, List
7
- from dataclasses import dataclass
8
- from datetime import datetime
9
- from pathlib import Path
10
- import gc
11
- import torch
12
- from torch.amp import autocast
13
  from transformers import AutoModel, AutoTokenizer
14
  from sentence_transformers import SentenceTransformer
15
- import numpy as np
16
- import requests
17
- from charset_normalizer import from_bytes
18
- import zipfile
19
- import tempfile
20
- import shutil
21
-
22
- # Custom Exception Class
23
- class GPUQuotaExceededError(Exception):
24
- pass
25
 
26
  # Constants
27
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
28
- CHUNK_SIZE = 500
29
- BATCH_SIZE = 32
30
-
31
- # Set Persistent Storage Path
32
- PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/data")
33
- os.makedirs(PERSISTENT_PATH, exist_ok=True, mode=0o777)
34
 
35
- # Define Subdirectories
36
- TEMP_DIR = os.path.join(PERSISTENT_PATH, "temp")
37
- os.makedirs(TEMP_DIR, exist_ok=True, mode=0o777)
38
-
39
- OUTPUTS_DIR = os.path.join(PERSISTENT_PATH, "outputs")
40
- os.makedirs(OUTPUTS_DIR, exist_ok=True, mode=0o777)
41
-
42
- NPY_CACHE = os.path.join(PERSISTENT_PATH, "npy_cache")
43
- os.makedirs(NPY_CACHE, exist_ok=True, mode=0o777)
44
-
45
- LOG_DIR = os.getenv("LOG_DIR", os.path.join(PERSISTENT_PATH, "logs"))
46
- os.makedirs(LOG_DIR, exist_ok=True, mode=0o777)
47
-
48
- # Set Hugging Face cache directory to persistent storage
49
- os.environ["HF_HOME"] = os.path.join(PERSISTENT_PATH, ".huggingface")
50
- os.makedirs(os.environ["HF_HOME"], exist_ok=True, mode=0o777)
51
-
52
- # Set Hugging Face token
53
- HF_TOKEN = os.getenv("HF_TOKEN")
54
 
55
  # Logging Setup
 
 
 
 
56
  logging.basicConfig(
57
- filename=os.path.join(LOG_DIR, "app.log"),
58
  level=logging.INFO,
59
  format="%(asctime)s - %(levelname)s - %(message)s",
60
  )
@@ -64,230 +34,128 @@ logger = logging.getLogger(__name__)
64
  model = None
65
 
66
  def initialize_model():
67
- """
68
- Initialize the sentence transformer model.
69
- Returns:
70
- bool: Whether the model was successfully initialized.
71
- """
72
  global model
73
  try:
74
  if model is None:
75
- model_cache = os.path.join(PERSISTENT_PATH, "models")
76
- os.makedirs(model_cache, exist_ok=True, mode=0o777)
77
- # Use the HF_TOKEN to load the model
78
- model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=model_cache, use_auth_token=HF_TOKEN)
79
  logger.info(f"Initialized model: {EMBEDDING_MODEL_NAME}")
80
  return True
81
- except requests.exceptions.RequestException as e:
82
- logger.error(f"Connection error during model download: {str(e)}\n{traceback.format_exc()}")
83
- return False
84
  except Exception as e:
85
- logger.error(f"Model initialization failed: {str(e)}\n{traceback.format_exc()}")
86
  return False
87
 
88
- @spaces.GPU
89
- def handle_gpu_operation(func):
90
- try:
91
- start_time = datetime.now()
92
- # Updated autocast usage as per deprecation notice
93
- with autocast(device_type='cuda', dtype=torch.float16):
94
- result = func()
95
- end_time = datetime.now()
96
- duration = (end_time - start_time).total_seconds()
97
- logger.info(f"GPU operation completed in {duration:.2f}s")
98
- return result
99
- except RuntimeError as e:
100
- if "CUDA out of memory" in str(e):
101
- torch.cuda.empty_cache()
102
- logger.error(f"GPU memory error: {str(e)}")
103
- raise GPUQuotaExceededError("GPU memory limit exceeded. Please try with a smaller batch.")
104
- else:
105
- logger.error(f"GPU runtime error: {str(e)}")
106
- raise
107
- except Exception as e:
108
- if "quota exceeded" in str(e).lower():
109
- logger.error(f"GPU quota exceeded: {str(e)}")
110
- raise GPUQuotaExceededError("GPU quota exceeded. Please wait a few minutes before trying again.")
111
- else:
112
- logger.error(f"Unexpected GPU error: {str(e)}")
113
- raise
114
-
115
- def get_model():
116
  global model
117
- if model is None:
118
- if torch.cuda.is_available():
119
- initialize_model()
120
- else:
121
- logger.warning("Attempted to initialize model outside GPU context, deferring.")
122
- return None
123
- return model
124
-
125
- @spaces.GPU
126
- def process_files(files):
127
- if not files:
128
- return "Please upload one or more.txt files.", ""
129
 
130
  try:
131
- if not initialize_model():
132
- return "Failed to initialize the model. Please try again.", ""
133
-
134
- valid_files = [f for f in files if f.name.lower().endswith('.txt')]
135
- if not valid_files:
136
- return "No.txt files found. Please upload valid.txt files.", ""
137
-
138
- all_chunks = []
139
- processed_files = 0
140
-
141
- for file in valid_files:
142
- try:
143
- with open(file.name, 'rb') as f:
144
- content = f.read()
145
- detected_encoding = from_bytes(content).best().encoding
146
- decoded_content = content.decode(detected_encoding, errors='ignore')
147
-
148
- # Split content into chunks
149
- chunks = [decoded_content[i:i+CHUNK_SIZE] for i in range(0, len(decoded_content), CHUNK_SIZE)]
150
- all_chunks.extend(chunks)
151
- processed_files += 1
152
- logger.info(f"Processed file: {file.name}")
153
- except Exception as e:
154
- logger.error(f"Error processing file {file.name}: {str(e)}")
155
-
156
- if not all_chunks:
157
- return "No valid content found in the uploaded files.", ""
158
-
159
- # Generate embeddings in batches
160
- all_embeddings = []
161
- for i in range(0, len(all_chunks), BATCH_SIZE):
162
- batch = all_chunks[i:i+BATCH_SIZE]
163
- if model:
164
- embeddings = handle_gpu_operation(lambda: model.encode(batch))
165
- all_embeddings.extend(embeddings)
166
- else:
167
- return "Model not initialized. Please check model initialization.", ""
168
-
169
- # Save results to OUTPUTS_DIR
170
- embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
171
- np.save(embeddings_path, np.array(all_embeddings))
172
- chunks_path = os.path.join(OUTPUTS_DIR, "chunks.txt")
173
- with open(chunks_path, "w", encoding="utf-8") as f:
174
- for chunk in all_chunks:
175
- f.write(chunk + "\n===CHUNK_SEPARATOR===\n")
176
-
177
- return (
178
- f"Successfully processed {processed_files} files. Generated {len(all_embeddings)} embeddings from {len(all_chunks)} chunks.",
179
- ""
180
- )
181
-
182
  except Exception as e:
183
- logger.error(f"Processing failed: {str(e)}")
184
- return f"Error processing files: {str(e)}", ""
185
-
186
- @spaces.GPU
187
- def semantic_search(query, top_k=5):
188
- global model
189
- if model is None:
190
- return "Model not initialized. Please process files first."
191
 
 
 
192
  try:
193
- # Load saved embeddings and chunks from OUTPUTS_DIR
194
- embeddings_file = os.path.join(OUTPUTS_DIR, "embeddings.npy")
195
- chunks_file = os.path.join(OUTPUTS_DIR, "chunks.txt")
196
- stored_embeddings = np.load(embeddings_file)
197
- with open(chunks_file, "r", encoding="utf-8") as f:
198
- chunks = f.read().split("\n===CHUNK_SEPARATOR===\n")
199
- chunks = [c for c in chunks if c.strip()]
200
-
201
- # Get query embedding
202
- query_embedding = model.encode([query])[0]
203
-
204
- # Calculate similarities
205
- similarities = np.dot(stored_embeddings, query_embedding) / (
206
- np.linalg.norm(stored_embeddings, axis=1) * np.linalg.norm(query_embedding)
207
- )
208
 
209
- # Get top results
210
- top_indices = np.argsort(similarities)[-top_k:][::-1]
211
- results = []
212
- for idx in top_indices:
213
- results.append(f"""
214
- Similarity: {similarities[idx]:.3f}
215
- Content: {chunks[idx]}
216
- -------------------
217
- """)
218
- return "\n".join(results)
219
  except Exception as e:
220
- logger.error(f"Search error: {str(e)}")
221
- return f"Search error occurred: {str(e)}"
 
222
 
223
- def search_and_format(query, num_results):
224
- if not query.strip():
225
- return "Please enter a search query"
226
- return semantic_search(query, top_k=num_results)
 
227
 
228
- def copy_embeddings_to_workspace():
229
  try:
230
- embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
231
- chunks_path = os.path.join(OUTPUTS_DIR, "chunks.txt")
232
- workspace_dir = os.getcwd()
233
- shutil.copy(embeddings_path, workspace_dir)
234
- shutil.copy(chunks_path, workspace_dir)
235
- return "Embeddings copied to workspace directory."
 
 
236
  except Exception as e:
237
- logger.error(f"Error copying embeddings: {str(e)}")
238
- return f"Error copying embeddings: {str(e)}"
 
239
 
240
  def create_gradio_interface():
241
  with gr.Blocks() as demo:
242
- gr.Markdown("## Text Chunk Embeddings Generator")
243
 
244
- error_box = gr.Textbox(visible=False, label="Status/Error Messages")
 
 
245
 
246
  with gr.Row():
247
- file_input = gr.File(
248
- label="Upload Text Files",
249
- file_count="multiple",
250
- file_types=[".txt"]
251
- )
252
 
253
- process_button = gr.Button("Generate Embeddings")
254
- output_text = gr.Textbox(label="Status")
 
255
 
256
- process_button.click(
257
- fn=process_files,
258
- inputs=[file_input],
259
- outputs=[output_text, error_box]
 
 
 
 
 
 
 
 
 
 
 
 
260
  )
261
 
262
- with gr.Tab("Search"):
263
- query_input = gr.Textbox(
264
- label="Enter your search query",
265
- placeholder="Enter text to search through your documents..."
266
- )
267
- top_k_slider = gr.Slider(
268
- minimum=1,
269
- maximum=20,
270
- value=5,
271
- step=1,
272
- label="Number of results to return"
273
- )
274
- search_button = gr.Button(" Search")
275
- results_output = gr.Textbox(
276
- label="Search Results",
277
- lines=10,
278
- show_copy_button=True
279
- )
280
- search_button.click(
281
- fn=search_and_format,
282
- inputs=[query_input, top_k_slider],
283
- outputs=results_output
284
- )
285
 
286
- copy_button = gr.Button("Copy Embeddings to Workspace")
287
- copy_output = gr.Textbox(label="Copy Status")
288
- copy_button.click(
289
- fn=copy_embeddings_to_workspace,
290
- outputs=[copy_output]
 
 
 
 
 
291
  )
292
 
293
  return demo
 
1
  import os
2
  import gradio as gr
3
  import logging
4
+ import numpy as np
 
 
 
 
 
 
 
 
5
  from transformers import AutoModel, AutoTokenizer
6
  from sentence_transformers import SentenceTransformer
7
+ import torch
8
+ from torch.cuda.amp import autocast
9
+ from spaces import GPU
 
 
 
 
 
 
 
10
 
11
  # Constants
12
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
13
+ CACHE_DIR = os.getenv("CACHE_DIR", "/tmp/cache")
14
+ PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/tmp/data")
15
+ HF_TOKEN = "YOUR_HF_TOKEN" # Replace with your Hugging Face token
 
 
 
16
 
17
+ # Create directories
18
+ os.makedirs(CACHE_DIR, exist_ok=True)
19
+ os.makedirs(PERSISTENT_PATH, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Logging Setup
22
+ LOG_DIR = os.getenv("LOG_DIR", "/data/logs")
23
+ os.makedirs(LOG_DIR, exist_ok=True)
24
+ LOG_FILE = LOG_DIR + "/app.log"
25
+
26
  logging.basicConfig(
27
+ filename=LOG_FILE,
28
  level=logging.INFO,
29
  format="%(asctime)s - %(levelname)s - %(message)s",
30
  )
 
34
  model = None
35
 
36
  def initialize_model():
 
 
 
 
 
37
  global model
38
  try:
39
  if model is None:
40
+ model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=CACHE_DIR, use_auth_token=HF_TOKEN)
 
 
 
41
  logger.info(f"Initialized model: {EMBEDDING_MODEL_NAME}")
42
  return True
 
 
 
43
  except Exception as e:
44
+ logger.error(f"Model initialization failed: {str(e)}")
45
  return False
46
 
47
+ @GPU()
48
+ def generate_embedding(text, focus):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  global model
50
+ if model is None:
51
+ initialize_model()
 
 
 
 
 
 
 
 
 
 
52
 
53
  try:
54
+ with autocast("cuda"):
55
+ embedding = model.encode([text])[0].tolist()
56
+ return embedding, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
+ error_msg = f"Error generating embedding: {str(e)}"
59
+ logger.error(error_msg)
60
+ return "", error_msg
 
 
 
 
 
61
 
62
+ @GPU()
63
+ def save_embedding(embedding, name):
64
  try:
65
+ np.save(f"{PERSISTENT_PATH}/{name}.npy", np.array(embedding))
66
+ return f"Embedding saved as {name}.npy"
67
+ except Exception as e:
68
+ error_msg = f"Error saving embedding: {str(e)}"
69
+ logger.error(error_msg)
70
+ return error_msg
 
 
 
 
 
 
 
 
 
71
 
72
+ @GPU()
73
+ def convert_to_json(embedding, name):
74
+ try:
75
+ import json
76
+ with open(f"{PERSISTENT_PATH}/{name}.json", "w") as f:
77
+ json.dump(embedding, f)
78
+ return f"Embedding saved as {name}.json"
 
 
 
79
  except Exception as e:
80
+ error_msg = f"Error converting to JSON: {str(e)}"
81
+ logger.error(error_msg)
82
+ return error_msg
83
 
84
+ @GPU()
85
+ def process_files(files, focus):
86
+ global model
87
+ if model is None:
88
+ initialize_model()
89
 
 
90
  try:
91
+ all_embeddings = []
92
+ for file in files:
93
+ with open(file.name, 'r') as f:
94
+ text = f.read()
95
+ with autocast("cuda"):
96
+ embedding = model.encode([text])[0].tolist()
97
+ all_embeddings.append(embedding)
98
+ return all_embeddings, ""
99
  except Exception as e:
100
+ error_msg = f"Error processing files: {str(e)}"
101
+ logger.error(error_msg)
102
+ return "", error_msg
103
 
104
  def create_gradio_interface():
105
  with gr.Blocks() as demo:
106
+ gr.Markdown("## Text Embedding Generator")
107
 
108
+ with gr.Row():
109
+ text_input = gr.Textbox(label="Enter Text")
110
+ focus_input = gr.Textbox(label="Main Focus of Embedding (e.g., company structure, staff positions, etc.)")
111
 
112
  with gr.Row():
113
+ file_input = gr.File(label="Upload Files", file_count="multiple")
 
 
 
 
114
 
115
+ generate_button = gr.Button("Generate Embedding")
116
+ embedding_output = gr.Textbox(label="Embedding Vector", lines=5)
117
+ error_box = gr.Textbox(label="Status/Error Messages")
118
 
119
+ save_name_input = gr.Textbox(label="Save Embedding As")
120
+ save_button = gr.Button("Save Embedding")
121
+ save_status = gr.Textbox(label="Save Status")
122
+
123
+ convert_button = gr.Button("Convert to JSON")
124
+ convert_status = gr.Textbox(label="Convert Status")
125
+ download_button = gr.Button("Download JSON")
126
+ download_output = gr.File(label="Download JSON")
127
+
128
+ process_button = gr.Button("Process Files")
129
+ process_output = gr.Textbox(label="Processed Files", lines=5)
130
+
131
+ generate_button.click(
132
+ generate_embedding,
133
+ inputs=[text_input, focus_input],
134
+ outputs=[embedding_output, error_box]
135
  )
136
 
137
+ save_button.click(
138
+ save_embedding,
139
+ inputs=[embedding_output, save_name_input],
140
+ outputs=[save_status]
141
+ )
142
+
143
+ convert_button.click(
144
+ convert_to_json,
145
+ inputs=[embedding_output, save_name_input],
146
+ outputs=[convert_status]
147
+ )
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ download_button.click(
150
+ lambda name: f"{PERSISTENT_PATH}/{name}.json",
151
+ inputs=[save_name_input],
152
+ outputs=[download_output]
153
+ )
154
+
155
+ process_button.click(
156
+ process_files,
157
+ inputs=[file_input, focus_input],
158
+ outputs=[process_output, error_box]
159
  )
160
 
161
  return demo