Ultronprime commited on
Commit
f57d5e5
·
1 Parent(s): 165d157

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -51
app.py CHANGED
@@ -17,7 +17,7 @@ import requests
17
  from charset_normalizer import from_bytes
18
  import zipfile
19
  import tempfile
20
- import webbrowser
21
 
22
  # Custom Exception Class
23
  class GPUQuotaExceededError(Exception):
@@ -125,15 +125,15 @@ def get_model():
125
  @spaces.GPU
126
  def process_files(files):
127
  if not files:
128
- return "Please upload one or more.txt files.", "", ""
129
 
130
  try:
131
  if not initialize_model():
132
- return "Failed to initialize the model. Please try again.", "", ""
133
 
134
  valid_files = [f for f in files if f.name.lower().endswith('.txt')]
135
  if not valid_files:
136
- return "No.txt files found. Please upload valid.txt files.", "", ""
137
 
138
  all_chunks = []
139
  processed_files = 0
@@ -154,7 +154,7 @@ def process_files(files):
154
  logger.error(f"Error processing file {file.name}: {str(e)}")
155
 
156
  if not all_chunks:
157
- return "No valid content found in the uploaded files.", "", ""
158
 
159
  # Generate embeddings in batches
160
  all_embeddings = []
@@ -164,7 +164,7 @@ def process_files(files):
164
  embeddings = handle_gpu_operation(lambda: model.encode(batch))
165
  all_embeddings.extend(embeddings)
166
  else:
167
- return "Model not initialized. Please check model initialization.", "", ""
168
 
169
  # Save results to OUTPUTS_DIR
170
  embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
@@ -176,13 +176,12 @@ def process_files(files):
176
 
177
  return (
178
  f"Successfully processed {processed_files} files. Generated {len(all_embeddings)} embeddings from {len(all_chunks)} chunks.",
179
- "",
180
  ""
181
  )
182
 
183
  except Exception as e:
184
  logger.error(f"Processing failed: {str(e)}")
185
- return f"Error processing files: {str(e)}", "", ""
186
 
187
  @spaces.GPU
188
  def semantic_search(query, top_k=5):
@@ -194,13 +193,6 @@ def semantic_search(query, top_k=5):
194
  # Load saved embeddings and chunks from OUTPUTS_DIR
195
  embeddings_file = os.path.join(OUTPUTS_DIR, "embeddings.npy")
196
  chunks_file = os.path.join(OUTPUTS_DIR, "chunks.txt")
197
-
198
- logger.info(f"Checking for embeddings file: {embeddings_file}")
199
- logger.info(f"Checking for chunks file: {chunks_file}")
200
-
201
- if not os.path.exists(embeddings_file) or not os.path.exists(chunks_file):
202
- return "Embeddings or chunks not found. Please generate embeddings first."
203
-
204
  stored_embeddings = np.load(embeddings_file)
205
  with open(chunks_file, "r", encoding="utf-8") as f:
206
  chunks = f.read().split("\n===CHUNK_SEPARATOR===\n")
@@ -233,31 +225,17 @@ def search_and_format(query, num_results):
233
  return "Please enter a search query"
234
  return semantic_search(query, top_k=num_results)
235
 
236
- def browse_outputs():
237
  try:
238
- # Open the outputs directory in a web browser (may work on some systems)
239
- webbrowser.open(f"file://{OUTPUTS_DIR}")
240
- return "Opened outputs directory."
241
- except Exception as e:
242
- logger.error(f"Error opening file browser: {str(e)}")
243
- return "Error opening file browser."
244
-
245
- def download_results():
246
- required_files = ["embeddings.npy", "chunks.txt"]
247
- missing = [f for f in required_files if not os.path.exists(os.path.join(OUTPUTS_DIR, f))]
248
- if missing:
249
- logger.error(f"Missing files: {missing}")
250
- return None
251
- try:
252
- zip_path = os.path.join(OUTPUTS_DIR, "results.zip")
253
- with zipfile.ZipFile(zip_path, 'w') as zipf:
254
- for file in required_files:
255
- file_path = os.path.join(OUTPUTS_DIR, file)
256
- zipf.write(file_path, file)
257
- return zip_path
258
  except Exception as e:
259
- logger.error(f"Error creating download archive: {str(e)}")
260
- return None
261
 
262
  def create_gradio_interface():
263
  with gr.Blocks() as demo:
@@ -278,7 +256,7 @@ def create_gradio_interface():
278
  process_button.click(
279
  fn=process_files,
280
  inputs=[file_input],
281
- outputs=[output_text, error_box, error_box]
282
  )
283
 
284
  with gr.Tab("Search"):
@@ -305,18 +283,12 @@ def create_gradio_interface():
305
  outputs=results_output
306
  )
307
 
308
- download_button = gr.Button(" Download Results")
309
- download_button.click(
310
- fn=download_results,
311
- outputs=[gr.File(label="Download Results")]
312
- )
313
-
314
- with gr.Tab("Outputs"):
315
- browse_button = gr.Button(" Browse Outputs")
316
- browse_button.click(
317
- fn=browse_outputs,
318
- outputs=[gr.Textbox(label="Browse Status")]
319
- )
320
 
321
  return demo
322
 
 
17
  from charset_normalizer import from_bytes
18
  import zipfile
19
  import tempfile
20
+ import shutil
21
 
22
  # Custom Exception Class
23
  class GPUQuotaExceededError(Exception):
 
125
  @spaces.GPU
126
  def process_files(files):
127
  if not files:
128
+ return "Please upload one or more.txt files.", ""
129
 
130
  try:
131
  if not initialize_model():
132
+ return "Failed to initialize the model. Please try again.", ""
133
 
134
  valid_files = [f for f in files if f.name.lower().endswith('.txt')]
135
  if not valid_files:
136
+ return "No.txt files found. Please upload valid.txt files.", ""
137
 
138
  all_chunks = []
139
  processed_files = 0
 
154
  logger.error(f"Error processing file {file.name}: {str(e)}")
155
 
156
  if not all_chunks:
157
+ return "No valid content found in the uploaded files.", ""
158
 
159
  # Generate embeddings in batches
160
  all_embeddings = []
 
164
  embeddings = handle_gpu_operation(lambda: model.encode(batch))
165
  all_embeddings.extend(embeddings)
166
  else:
167
+ return "Model not initialized. Please check model initialization.", ""
168
 
169
  # Save results to OUTPUTS_DIR
170
  embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
 
176
 
177
  return (
178
  f"Successfully processed {processed_files} files. Generated {len(all_embeddings)} embeddings from {len(all_chunks)} chunks.",
 
179
  ""
180
  )
181
 
182
  except Exception as e:
183
  logger.error(f"Processing failed: {str(e)}")
184
+ return f"Error processing files: {str(e)}", ""
185
 
186
  @spaces.GPU
187
  def semantic_search(query, top_k=5):
 
193
  # Load saved embeddings and chunks from OUTPUTS_DIR
194
  embeddings_file = os.path.join(OUTPUTS_DIR, "embeddings.npy")
195
  chunks_file = os.path.join(OUTPUTS_DIR, "chunks.txt")
 
 
 
 
 
 
 
196
  stored_embeddings = np.load(embeddings_file)
197
  with open(chunks_file, "r", encoding="utf-8") as f:
198
  chunks = f.read().split("\n===CHUNK_SEPARATOR===\n")
 
225
  return "Please enter a search query"
226
  return semantic_search(query, top_k=num_results)
227
 
228
+ def copy_embeddings_to_workspace():
229
  try:
230
+ embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
231
+ chunks_path = os.path.join(OUTPUTS_DIR, "chunks.txt")
232
+ workspace_dir = os.getcwd()
233
+ shutil.copy(embeddings_path, workspace_dir)
234
+ shutil.copy(chunks_path, workspace_dir)
235
+ return "Embeddings copied to workspace directory."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  except Exception as e:
237
+ logger.error(f"Error copying embeddings: {str(e)}")
238
+ return f"Error copying embeddings: {str(e)}"
239
 
240
  def create_gradio_interface():
241
  with gr.Blocks() as demo:
 
256
  process_button.click(
257
  fn=process_files,
258
  inputs=[file_input],
259
+ outputs=[output_text, error_box]
260
  )
261
 
262
  with gr.Tab("Search"):
 
283
  outputs=results_output
284
  )
285
 
286
+ copy_button = gr.Button("Copy Embeddings to Workspace")
287
+ copy_output = gr.Textbox(label="Copy Status")
288
+ copy_button.click(
289
+ fn=copy_embeddings_to_workspace,
290
+ outputs=[copy_output]
291
+ )
 
 
 
 
 
 
292
 
293
  return demo
294