Spaces:

binqiangliu
/

USinoSiteAIChat

Sleeping

App Files Files Community

binqiangliu commited on Dec 2, 2023

Commit

6831b20

1 Parent(s): 09775a5

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -32

app.py CHANGED Viewed

@@ -82,14 +82,43 @@ def remove_context(text):
 print(f"处理多余的Context文本函数定义结束")
 url="https://www.usinoip.com"
-texts=""
-raw_text=""
-user_question = ""
-initial_embeddings=""
-db_embeddings = ""
-i_file_path=""
-file_path = ""
-random_string=""
 wechat_image= "WeChatCode.jpg"
 st.sidebar.markdown(
@@ -120,17 +149,10 @@ st.markdown(
 user_question = st.text_input("Enter your query here and AI-Chat with your website:")
-#text_splitter = CharacterTextSplitter(
-#    separator = "\n",
-#    chunk_size = 1000,
-#    chunk_overlap  = 200,
-#    length_function = len,
-#)
-text_splitter = RecursiveCharacterTextSplitter(
-    #separator = "\n",
     chunk_size = 1000,
-    chunk_overlap  = 200, #striding over the text
     length_function = len,
 )
@@ -143,29 +165,31 @@ with st.sidebar:
     st.subheader("Enjoy Chatting!")
     st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
     try:
-        with st.spinner("Preparing website materials for you..."):
-            url_loader = WebBaseLoader([url])
-            raw_text = url_loader.load()
-            page_content = raw_text[0].page_content
-            page_content = str(page_content)
-            temp_texts = text_splitter.split_text(page_content)
-            texts = temp_texts
-            initial_embeddings=get_embeddings(texts)
-            db_embeddings = torch.FloatTensor(initial_embeddings)
     except Exception as e:
-        st.write("Unknow error.")
-        print("Please enter a valide URL.")
-        st.stop()
 if st.button('Get AI Response'):
     if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
             q_embedding=get_embeddings(user_question)
             final_q_embedding = torch.FloatTensor(q_embedding)
-            hits = semantic_search(final_q_embedding, db_embeddings, top_k=5)
             page_contents = []
             for i in range(len(hits[0])):
-                page_content = texts[hits[0][i]['corpus_id']]
                 page_contents.append(page_content)
             temp_page_contents=str(page_contents)
             final_page_contents = temp_page_contents.replace('\\n', '')

 print(f"处理多余的Context文本函数定义结束")
 url="https://www.usinoip.com"
+#url="https://www.usinoip.com/UpdatesAbroad/290.html"
+if "url_loader" not in st.session_state:
+    st.session_state.url_loader = ""
+if "raw_text" not in st.session_state:
+    st.session_state.raw_text = ""
+if "initial_page_content" not in st.session_state:
+    st.session_state.initial_page_content = ""
+if "final_page_content" not in st.session_state:
+    st.session_state.final_page_content = ""
+if "texts" not in st.session_state:
+    st.session_state.texts = ""
+#if "user_question" not in st.session_state:
+#    st.session_state.user_question = ""
+if "initial_embeddings" not in st.session_state:
+    st.session_state.initial_embeddings = ""
+if "db_embeddings" not in st.session_state:
+    st.session_state.db_embeddings = ""
+#if "i_file_path" not in st.session_state:
+#    st.session_state.i_file_path = ""
+i_file_path = ""
+#if "file_path" not in st.session_state:
+#    st.session_state.file_path = ""
+#if "random_string" not in st.session_state:
+#    st.session_state.random_string = ""
+random_string = ""
 wechat_image= "WeChatCode.jpg"
 st.sidebar.markdown(
 user_question = st.text_input("Enter your query here and AI-Chat with your website:")
+text_splitter = CharacterTextSplitter(
+    separator = "\n",
     chunk_size = 1000,
+    chunk_overlap  = 200,
     length_function = len,
 )
     st.subheader("Enjoy Chatting!")
     st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
     try:
+        with st.spinner("Preparing website materials for you..."):
+            st.session_state.url_loader = WebBaseLoader([url])
+            st.session_state.raw_text = st.session_state.url_loader.load()
+            st.session_state.initial_page_content = st.session_state.raw_text[0].page_content
+            st.session_state.final_page_content = str(st.session_state.initial_page_content)
+            st.session_state.temp_texts = text_splitter.split_text(st.session_state.final_page_content)
+            #Created a chunk of size 3431, which is longer than the specified 1000
+            st.session_state.texts = st.session_state.temp_texts
+            st.session_state.initial_embeddings=get_embeddings(st.session_state.texts)
+            st.session_state.db_embeddings = torch.FloatTensor(st.session_state.initial_embeddings)
     except Exception as e:
+    #    st.write("Unknow error.")
+    #    print("Please enter a valide URL.")
+    #    st.stop()
+        pass
 if st.button('Get AI Response'):
     if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
             q_embedding=get_embeddings(user_question)
             final_q_embedding = torch.FloatTensor(q_embedding)
+            hits = semantic_search(final_q_embedding, st.session_state.db_embeddings, top_k=5)
             page_contents = []
             for i in range(len(hits[0])):
+                page_content = st.session_state.texts[hits[0][i]['corpus_id']]
                 page_contents.append(page_content)
             temp_page_contents=str(page_contents)
             final_page_contents = temp_page_contents.replace('\\n', '')