Spaces:
Sleeping
Sleeping
Commit
·
6831b20
1
Parent(s):
09775a5
Update app.py
Browse files
app.py
CHANGED
|
@@ -82,14 +82,43 @@ def remove_context(text):
|
|
| 82 |
print(f"处理多余的Context文本函数定义结束")
|
| 83 |
|
| 84 |
url="https://www.usinoip.com"
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
wechat_image= "WeChatCode.jpg"
|
| 94 |
|
| 95 |
st.sidebar.markdown(
|
|
@@ -120,17 +149,10 @@ st.markdown(
|
|
| 120 |
|
| 121 |
user_question = st.text_input("Enter your query here and AI-Chat with your website:")
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
# chunk_size = 1000,
|
| 126 |
-
# chunk_overlap = 200,
|
| 127 |
-
# length_function = len,
|
| 128 |
-
#)
|
| 129 |
-
|
| 130 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
| 131 |
-
#separator = "\n",
|
| 132 |
chunk_size = 1000,
|
| 133 |
-
chunk_overlap = 200,
|
| 134 |
length_function = len,
|
| 135 |
)
|
| 136 |
|
|
@@ -143,29 +165,31 @@ with st.sidebar:
|
|
| 143 |
st.subheader("Enjoy Chatting!")
|
| 144 |
st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
|
| 145 |
try:
|
| 146 |
-
with st.spinner("Preparing website materials for you..."):
|
| 147 |
-
url_loader = WebBaseLoader([url])
|
| 148 |
-
raw_text = url_loader.load()
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
temp_texts = text_splitter.split_text(
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
| 155 |
except Exception as e:
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
| 159 |
|
| 160 |
if st.button('Get AI Response'):
|
| 161 |
if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
|
| 162 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
| 163 |
q_embedding=get_embeddings(user_question)
|
| 164 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
| 165 |
-
hits = semantic_search(final_q_embedding, db_embeddings, top_k=5)
|
| 166 |
page_contents = []
|
| 167 |
for i in range(len(hits[0])):
|
| 168 |
-
page_content = texts[hits[0][i]['corpus_id']]
|
| 169 |
page_contents.append(page_content)
|
| 170 |
temp_page_contents=str(page_contents)
|
| 171 |
final_page_contents = temp_page_contents.replace('\\n', '')
|
|
|
|
| 82 |
print(f"处理多余的Context文本函数定义结束")
|
| 83 |
|
| 84 |
url="https://www.usinoip.com"
|
| 85 |
+
#url="https://www.usinoip.com/UpdatesAbroad/290.html"
|
| 86 |
+
|
| 87 |
+
if "url_loader" not in st.session_state:
|
| 88 |
+
st.session_state.url_loader = ""
|
| 89 |
+
|
| 90 |
+
if "raw_text" not in st.session_state:
|
| 91 |
+
st.session_state.raw_text = ""
|
| 92 |
+
|
| 93 |
+
if "initial_page_content" not in st.session_state:
|
| 94 |
+
st.session_state.initial_page_content = ""
|
| 95 |
+
|
| 96 |
+
if "final_page_content" not in st.session_state:
|
| 97 |
+
st.session_state.final_page_content = ""
|
| 98 |
+
|
| 99 |
+
if "texts" not in st.session_state:
|
| 100 |
+
st.session_state.texts = ""
|
| 101 |
+
|
| 102 |
+
#if "user_question" not in st.session_state:
|
| 103 |
+
# st.session_state.user_question = ""
|
| 104 |
+
|
| 105 |
+
if "initial_embeddings" not in st.session_state:
|
| 106 |
+
st.session_state.initial_embeddings = ""
|
| 107 |
+
|
| 108 |
+
if "db_embeddings" not in st.session_state:
|
| 109 |
+
st.session_state.db_embeddings = ""
|
| 110 |
+
|
| 111 |
+
#if "i_file_path" not in st.session_state:
|
| 112 |
+
# st.session_state.i_file_path = ""
|
| 113 |
+
i_file_path = ""
|
| 114 |
+
|
| 115 |
+
#if "file_path" not in st.session_state:
|
| 116 |
+
# st.session_state.file_path = ""
|
| 117 |
+
|
| 118 |
+
#if "random_string" not in st.session_state:
|
| 119 |
+
# st.session_state.random_string = ""
|
| 120 |
+
random_string = ""
|
| 121 |
+
|
| 122 |
wechat_image= "WeChatCode.jpg"
|
| 123 |
|
| 124 |
st.sidebar.markdown(
|
|
|
|
| 149 |
|
| 150 |
user_question = st.text_input("Enter your query here and AI-Chat with your website:")
|
| 151 |
|
| 152 |
+
text_splitter = CharacterTextSplitter(
|
| 153 |
+
separator = "\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
chunk_size = 1000,
|
| 155 |
+
chunk_overlap = 200,
|
| 156 |
length_function = len,
|
| 157 |
)
|
| 158 |
|
|
|
|
| 165 |
st.subheader("Enjoy Chatting!")
|
| 166 |
st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
|
| 167 |
try:
|
| 168 |
+
with st.spinner("Preparing website materials for you..."):
|
| 169 |
+
st.session_state.url_loader = WebBaseLoader([url])
|
| 170 |
+
st.session_state.raw_text = st.session_state.url_loader.load()
|
| 171 |
+
st.session_state.initial_page_content = st.session_state.raw_text[0].page_content
|
| 172 |
+
st.session_state.final_page_content = str(st.session_state.initial_page_content)
|
| 173 |
+
st.session_state.temp_texts = text_splitter.split_text(st.session_state.final_page_content)
|
| 174 |
+
#Created a chunk of size 3431, which is longer than the specified 1000
|
| 175 |
+
st.session_state.texts = st.session_state.temp_texts
|
| 176 |
+
st.session_state.initial_embeddings=get_embeddings(st.session_state.texts)
|
| 177 |
+
st.session_state.db_embeddings = torch.FloatTensor(st.session_state.initial_embeddings)
|
| 178 |
except Exception as e:
|
| 179 |
+
# st.write("Unknow error.")
|
| 180 |
+
# print("Please enter a valide URL.")
|
| 181 |
+
# st.stop()
|
| 182 |
+
pass
|
| 183 |
|
| 184 |
if st.button('Get AI Response'):
|
| 185 |
if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
|
| 186 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
| 187 |
q_embedding=get_embeddings(user_question)
|
| 188 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
| 189 |
+
hits = semantic_search(final_q_embedding, st.session_state.db_embeddings, top_k=5)
|
| 190 |
page_contents = []
|
| 191 |
for i in range(len(hits[0])):
|
| 192 |
+
page_content = st.session_state.texts[hits[0][i]['corpus_id']]
|
| 193 |
page_contents.append(page_content)
|
| 194 |
temp_page_contents=str(page_contents)
|
| 195 |
final_page_contents = temp_page_contents.replace('\\n', '')
|