Shreyas094 commited on
Commit
4c57776
·
verified ·
1 Parent(s): 73a7410

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -12
app.py CHANGED
@@ -7,7 +7,7 @@ from typing import List
7
  from tempfile import NamedTemporaryFile
8
  from huggingface_hub import InferenceClient
9
  from langchain_community.document_loaders import PyPDFLoader
10
- from langchain_community.embeddings import HuggingFaceEmbeddings
11
  from langchain_community.vectorstores import FAISS
12
  from langchain.docstore.document import Document
13
 
@@ -18,9 +18,14 @@ logging.basicConfig(level=logging.INFO)
18
  DOCUMENTS_FILE = "uploaded_documents.json"
19
  DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
20
  HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
 
21
 
22
  def get_embeddings():
23
- return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
 
 
 
 
24
 
25
  def load_documents():
26
  if os.path.exists(DOCUMENTS_FILE):
@@ -37,16 +42,17 @@ def load_document(file: NamedTemporaryFile) -> List[Document]:
37
  loader = PyPDFLoader(file.name)
38
  return loader.load_and_split()
39
 
40
- def update_vectors(files):
41
  if not files:
42
  return "Please upload at least one file.", []
43
 
 
44
  embed = get_embeddings()
45
  uploaded_documents = load_documents()
46
  total_chunks = 0
47
 
48
  all_data = []
49
- for file in files:
50
  try:
51
  data = load_document(file)
52
  if not data:
@@ -73,7 +79,7 @@ def update_vectors(files):
73
  database.save_local("faiss_database")
74
 
75
  save_documents(uploaded_documents)
76
- return f"Vector store updated successfully. Processed {total_chunks} chunks.", uploaded_documents
77
 
78
  except Exception as e:
79
  return f"Error updating vector store: {str(e)}", []
@@ -102,7 +108,8 @@ def delete_documents(selected_docs):
102
  uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
103
  save_documents(uploaded_documents)
104
 
105
- return f"Deleted documents: {', '.join(selected_docs)}", uploaded_documents
 
106
 
107
  return "No documents to delete.", []
108
 
@@ -164,7 +171,7 @@ def create_interface():
164
  files = gr.File(
165
  label="Upload PDF Documents",
166
  file_types=[".pdf"],
167
- multiple=True
168
  )
169
  upload_button = gr.Button("Upload and Process")
170
 
@@ -179,22 +186,29 @@ def create_interface():
179
 
180
  with gr.Row():
181
  with gr.Column():
182
- question = gr.Textbox(label="Ask a question about the documents")
 
 
 
183
  temperature = gr.Slider(
184
  minimum=0.0,
185
  maximum=1.0,
186
  value=0.2,
187
  step=0.1,
188
- label="Temperature"
189
  )
190
  submit_button = gr.Button("Submit Question")
191
 
192
  with gr.Column():
193
- answer = gr.Textbox(label="Answer", interactive=False)
 
 
 
 
194
 
195
  # Event handlers
196
  upload_button.click(
197
- fn=update_vectors,
198
  inputs=[files],
199
  outputs=[doc_status, doc_list]
200
  )
@@ -210,9 +224,20 @@ def create_interface():
210
  inputs=[question, temperature],
211
  outputs=[answer]
212
  )
 
 
 
 
 
 
 
213
 
214
  return app
215
 
216
  if __name__ == "__main__":
217
  app = create_interface()
218
- app.launch()
 
 
 
 
 
7
  from tempfile import NamedTemporaryFile
8
  from huggingface_hub import InferenceClient
9
  from langchain_community.document_loaders import PyPDFLoader
10
+ from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
11
  from langchain_community.vectorstores import FAISS
12
  from langchain.docstore.document import Document
13
 
 
18
  DOCUMENTS_FILE = "uploaded_documents.json"
19
  DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
20
  HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
21
+ EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
22
 
23
  def get_embeddings():
24
+ return HuggingFaceEmbeddings(
25
+ model_name=EMBED_MODEL,
26
+ model_kwargs={'device': 'cpu'},
27
+ encode_kwargs={'normalize_embeddings': True}
28
+ )
29
 
30
  def load_documents():
31
  if os.path.exists(DOCUMENTS_FILE):
 
42
  loader = PyPDFLoader(file.name)
43
  return loader.load_and_split()
44
 
45
+ def process_uploaded_files(files):
46
  if not files:
47
  return "Please upload at least one file.", []
48
 
49
+ files_list = [files] if not isinstance(files, list) else files
50
  embed = get_embeddings()
51
  uploaded_documents = load_documents()
52
  total_chunks = 0
53
 
54
  all_data = []
55
+ for file in files_list:
56
  try:
57
  data = load_document(file)
58
  if not data:
 
79
  database.save_local("faiss_database")
80
 
81
  save_documents(uploaded_documents)
82
+ return f"Vector store updated successfully. Processed {total_chunks} chunks.", [doc["name"] for doc in uploaded_documents]
83
 
84
  except Exception as e:
85
  return f"Error updating vector store: {str(e)}", []
 
108
  uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
109
  save_documents(uploaded_documents)
110
 
111
+ remaining_docs = [doc["name"] for doc in uploaded_documents]
112
+ return f"Deleted documents: {', '.join(selected_docs)}", remaining_docs
113
 
114
  return "No documents to delete.", []
115
 
 
171
  files = gr.File(
172
  label="Upload PDF Documents",
173
  file_types=[".pdf"],
174
+ file_count="multiple"
175
  )
176
  upload_button = gr.Button("Upload and Process")
177
 
 
186
 
187
  with gr.Row():
188
  with gr.Column():
189
+ question = gr.Textbox(
190
+ label="Ask a question about the documents",
191
+ placeholder="Enter your question here..."
192
+ )
193
  temperature = gr.Slider(
194
  minimum=0.0,
195
  maximum=1.0,
196
  value=0.2,
197
  step=0.1,
198
+ label="Temperature (Higher values make the output more random)"
199
  )
200
  submit_button = gr.Button("Submit Question")
201
 
202
  with gr.Column():
203
+ answer = gr.Textbox(
204
+ label="Answer",
205
+ interactive=False,
206
+ lines=10
207
+ )
208
 
209
  # Event handlers
210
  upload_button.click(
211
+ fn=process_uploaded_files,
212
  inputs=[files],
213
  outputs=[doc_status, doc_list]
214
  )
 
224
  inputs=[question, temperature],
225
  outputs=[answer]
226
  )
227
+
228
+ # Add keyboard shortcut for submitting questions
229
+ question.submit(
230
+ fn=get_response,
231
+ inputs=[question, temperature],
232
+ outputs=[answer]
233
+ )
234
 
235
  return app
236
 
237
  if __name__ == "__main__":
238
  app = create_interface()
239
+ app.launch(
240
+ server_name="0.0.0.0", # Makes the app accessible from other machines
241
+ server_port=7860, # Specify port
242
+ share=True # Creates a public URL
243
+ )