Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ from typing import List
|
|
7 |
from tempfile import NamedTemporaryFile
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from langchain_community.document_loaders import PyPDFLoader
|
10 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
11 |
from langchain_community.vectorstores import FAISS
|
12 |
from langchain.docstore.document import Document
|
13 |
|
@@ -18,9 +18,14 @@ logging.basicConfig(level=logging.INFO)
|
|
18 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
19 |
DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
|
20 |
HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
|
|
|
21 |
|
22 |
def get_embeddings():
|
23 |
-
return HuggingFaceEmbeddings(
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def load_documents():
|
26 |
if os.path.exists(DOCUMENTS_FILE):
|
@@ -37,16 +42,17 @@ def load_document(file: NamedTemporaryFile) -> List[Document]:
|
|
37 |
loader = PyPDFLoader(file.name)
|
38 |
return loader.load_and_split()
|
39 |
|
40 |
-
def
|
41 |
if not files:
|
42 |
return "Please upload at least one file.", []
|
43 |
|
|
|
44 |
embed = get_embeddings()
|
45 |
uploaded_documents = load_documents()
|
46 |
total_chunks = 0
|
47 |
|
48 |
all_data = []
|
49 |
-
for file in
|
50 |
try:
|
51 |
data = load_document(file)
|
52 |
if not data:
|
@@ -73,7 +79,7 @@ def update_vectors(files):
|
|
73 |
database.save_local("faiss_database")
|
74 |
|
75 |
save_documents(uploaded_documents)
|
76 |
-
return f"Vector store updated successfully. Processed {total_chunks} chunks.", uploaded_documents
|
77 |
|
78 |
except Exception as e:
|
79 |
return f"Error updating vector store: {str(e)}", []
|
@@ -102,7 +108,8 @@ def delete_documents(selected_docs):
|
|
102 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
|
103 |
save_documents(uploaded_documents)
|
104 |
|
105 |
-
|
|
|
106 |
|
107 |
return "No documents to delete.", []
|
108 |
|
@@ -164,7 +171,7 @@ def create_interface():
|
|
164 |
files = gr.File(
|
165 |
label="Upload PDF Documents",
|
166 |
file_types=[".pdf"],
|
167 |
-
multiple
|
168 |
)
|
169 |
upload_button = gr.Button("Upload and Process")
|
170 |
|
@@ -179,22 +186,29 @@ def create_interface():
|
|
179 |
|
180 |
with gr.Row():
|
181 |
with gr.Column():
|
182 |
-
question = gr.Textbox(
|
|
|
|
|
|
|
183 |
temperature = gr.Slider(
|
184 |
minimum=0.0,
|
185 |
maximum=1.0,
|
186 |
value=0.2,
|
187 |
step=0.1,
|
188 |
-
label="Temperature"
|
189 |
)
|
190 |
submit_button = gr.Button("Submit Question")
|
191 |
|
192 |
with gr.Column():
|
193 |
-
answer = gr.Textbox(
|
|
|
|
|
|
|
|
|
194 |
|
195 |
# Event handlers
|
196 |
upload_button.click(
|
197 |
-
fn=
|
198 |
inputs=[files],
|
199 |
outputs=[doc_status, doc_list]
|
200 |
)
|
@@ -210,9 +224,20 @@ def create_interface():
|
|
210 |
inputs=[question, temperature],
|
211 |
outputs=[answer]
|
212 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
return app
|
215 |
|
216 |
if __name__ == "__main__":
|
217 |
app = create_interface()
|
218 |
-
app.launch(
|
|
|
|
|
|
|
|
|
|
7 |
from tempfile import NamedTemporaryFile
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from langchain_community.document_loaders import PyPDFLoader
|
10 |
+
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
|
11 |
from langchain_community.vectorstores import FAISS
|
12 |
from langchain.docstore.document import Document
|
13 |
|
|
|
18 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
19 |
DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
|
20 |
HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
|
21 |
+
EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
22 |
|
23 |
def get_embeddings():
|
24 |
+
return HuggingFaceEmbeddings(
|
25 |
+
model_name=EMBED_MODEL,
|
26 |
+
model_kwargs={'device': 'cpu'},
|
27 |
+
encode_kwargs={'normalize_embeddings': True}
|
28 |
+
)
|
29 |
|
30 |
def load_documents():
|
31 |
if os.path.exists(DOCUMENTS_FILE):
|
|
|
42 |
loader = PyPDFLoader(file.name)
|
43 |
return loader.load_and_split()
|
44 |
|
45 |
+
def process_uploaded_files(files):
|
46 |
if not files:
|
47 |
return "Please upload at least one file.", []
|
48 |
|
49 |
+
files_list = [files] if not isinstance(files, list) else files
|
50 |
embed = get_embeddings()
|
51 |
uploaded_documents = load_documents()
|
52 |
total_chunks = 0
|
53 |
|
54 |
all_data = []
|
55 |
+
for file in files_list:
|
56 |
try:
|
57 |
data = load_document(file)
|
58 |
if not data:
|
|
|
79 |
database.save_local("faiss_database")
|
80 |
|
81 |
save_documents(uploaded_documents)
|
82 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks.", [doc["name"] for doc in uploaded_documents]
|
83 |
|
84 |
except Exception as e:
|
85 |
return f"Error updating vector store: {str(e)}", []
|
|
|
108 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
|
109 |
save_documents(uploaded_documents)
|
110 |
|
111 |
+
remaining_docs = [doc["name"] for doc in uploaded_documents]
|
112 |
+
return f"Deleted documents: {', '.join(selected_docs)}", remaining_docs
|
113 |
|
114 |
return "No documents to delete.", []
|
115 |
|
|
|
171 |
files = gr.File(
|
172 |
label="Upload PDF Documents",
|
173 |
file_types=[".pdf"],
|
174 |
+
file_count="multiple"
|
175 |
)
|
176 |
upload_button = gr.Button("Upload and Process")
|
177 |
|
|
|
186 |
|
187 |
with gr.Row():
|
188 |
with gr.Column():
|
189 |
+
question = gr.Textbox(
|
190 |
+
label="Ask a question about the documents",
|
191 |
+
placeholder="Enter your question here..."
|
192 |
+
)
|
193 |
temperature = gr.Slider(
|
194 |
minimum=0.0,
|
195 |
maximum=1.0,
|
196 |
value=0.2,
|
197 |
step=0.1,
|
198 |
+
label="Temperature (Higher values make the output more random)"
|
199 |
)
|
200 |
submit_button = gr.Button("Submit Question")
|
201 |
|
202 |
with gr.Column():
|
203 |
+
answer = gr.Textbox(
|
204 |
+
label="Answer",
|
205 |
+
interactive=False,
|
206 |
+
lines=10
|
207 |
+
)
|
208 |
|
209 |
# Event handlers
|
210 |
upload_button.click(
|
211 |
+
fn=process_uploaded_files,
|
212 |
inputs=[files],
|
213 |
outputs=[doc_status, doc_list]
|
214 |
)
|
|
|
224 |
inputs=[question, temperature],
|
225 |
outputs=[answer]
|
226 |
)
|
227 |
+
|
228 |
+
# Add keyboard shortcut for submitting questions
|
229 |
+
question.submit(
|
230 |
+
fn=get_response,
|
231 |
+
inputs=[question, temperature],
|
232 |
+
outputs=[answer]
|
233 |
+
)
|
234 |
|
235 |
return app
|
236 |
|
237 |
if __name__ == "__main__":
|
238 |
app = create_interface()
|
239 |
+
app.launch(
|
240 |
+
server_name="0.0.0.0", # Makes the app accessible from other machines
|
241 |
+
server_port=7860, # Specify port
|
242 |
+
share=True # Creates a public URL
|
243 |
+
)
|