File size: 5,701 Bytes
741f393 0c24f35 741f393 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import re
import streamlit as st
import requests
import json
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
STOPWORDS = set(stopwords.words("english"))
pdf_url = "http://127.0.0.1:8000/TestFolder/2201.01647v4.pdf"
st.set_page_config(layout="wide")
st.markdown(
"""
<style>
.main {
display: flex;
justify-content: center;
padding-top: 30px;
}
.content-container {
max-width: 1200px; /* Adjust width for centering */
width: 100%;
}
</style>
""",
unsafe_allow_html=True,
)
st.markdown('<div class="content-container">', unsafe_allow_html=True)
st.title('_:blue[Local File Search]_ :sunglasses:')
if "search_result" not in st.session_state:
st.session_state.search_result = []
if "ai_result" not in st.session_state:
st.session_state.ai_result = ""
if "search_input" not in st.session_state:
st.session_state.search_input = ""
if "ai_input" not in st.session_state:
st.session_state.ai_input = ""
def format_keywords_as_list(content, keywords, num_words=10):
filtered_keywords = [kw for kw in keywords if kw.lower() not in STOPWORDS]
escaped_keywords = "|".join(map(re.escape, filtered_keywords))
if not escaped_keywords:
return ["No relevant content found."]
matches = list(re.finditer(escaped_keywords, content, re.IGNORECASE))
if not matches:
return ["No matches found."]
snippets = []
for match in matches:
start_index = match.start()
words_before = content[:start_index].split()[-10:]
words_after = content[start_index:].split()[:num_words + 1]
snippet = " ".join(words_before + words_after)
highlighted_snippet = re.sub(
escaped_keywords,
lambda m: f"<span style='background-color: yellow; font-weight: bold;'>{m.group(0)}</span>",
snippet,
flags=re.IGNORECASE,
)
snippets.append(f"... {highlighted_snippet} ...")
return snippets
left_col, right_col = st.columns([1, 1])
with left_col:
st.subheader("Search Files")
search_input = st.text_input("Enter keywords to search your local files:", st.session_state.search_input, key="search_input_key")
if st.button("Search files"):
st.session_state.search_input = search_input
url = "http://127.0.0.1:8000/search"
payload = json.dumps({"query": search_input})
headers = {
'Accept': 'application/json',
'Content-Type': 'application/json'
}
try:
response = requests.post(url, headers=headers, data=payload)
response.raise_for_status()
response_data = response.json()
if isinstance(response_data, list):
st.session_state.search_result = response_data
else:
st.session_state.search_result = [{"content": "Unexpected data format received.", "path": ""}]
except requests.exceptions.RequestException as e:
st.session_state.search_result = [{"content": f"HTTP Request failed: {e}", "path": ""}]
except json.JSONDecodeError:
st.session_state.search_result = [{"content": "Failed to decode JSON response.", "path": ""}]
if st.session_state.search_result:
st.write("### Results:")
for item in st.session_state.search_result:
keywords = st.session_state.search_input.split()
snippets = format_keywords_as_list(item.get('content', ""), keywords)
valid_snippets = [snippet for snippet in snippets if snippet != "No matches found."]
if valid_snippets:
st.markdown(f"<span style='font-size:20px; font-weight:bold;'>Document: <a href='{pdf_url}' target='_blank' style='text-decoration: none; color: blue;'>{item.get('path', 'Unknown File')}</a></span>",
unsafe_allow_html=True)
for snippet in valid_snippets:
st.markdown(f"- {snippet}", unsafe_allow_html=True)
with right_col:
st.subheader("Ask LocalAI")
ai_input = st.text_input("Enter your question for LocalAI:", st.session_state.ai_input, key="ai_input_key")
if st.button("Ask LocalAI"):
st.session_state.ai_input = ai_input
url = "http://127.0.0.1:8000/ask_localai"
payload = json.dumps({"query": ai_input})
headers = {
'Accept': 'application/json',
'Content-Type': 'application/json'
}
try:
response = requests.post(url, headers=headers, data=payload)
response.raise_for_status()
response_data = response.json()
if "answer" in response_data:
query = response_data.get("question", "No question provided.")
answer = response_data.get("answer", "No answer provided.")
st.session_state.ai_result = f"### Question:\n{query}\n\n### Answer:\n{answer}"
else:
st.session_state.ai_result = "No 'answer' field found in the response."
except requests.exceptions.RequestException as e:
st.session_state.ai_result = f"HTTP Request failed: {e}"
except json.JSONDecodeError:
st.session_state.ai_result = "Failed to decode JSON response.."
if st.session_state.ai_result:
st.write(st.session_state.ai_result)
st.markdown(
f"<span style='font-size:16px;'>This AI model is trained from the following document: <a href='{pdf_url}' target='_blank' style='color: blue;'>View PDF</a></span>",
unsafe_allow_html=True,
)
st.markdown('</div>', unsafe_allow_html=True)
|