import re import streamlit as st import requests import json import nltk from nltk.corpus import stopwords nltk.download('stopwords', quiet=True) STOPWORDS = set(stopwords.words("english")) pdf_url = "http://127.0.0.1:8000/TestFolder/2201.01647v4.pdf" st.set_page_config(layout="wide") st.markdown( """ """, unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) st.title('_:blue[Local File Search]_ :sunglasses:') if "search_result" not in st.session_state: st.session_state.search_result = [] if "ai_result" not in st.session_state: st.session_state.ai_result = "" if "search_input" not in st.session_state: st.session_state.search_input = "" if "ai_input" not in st.session_state: st.session_state.ai_input = "" def format_keywords_as_list(content, keywords, num_words=10): filtered_keywords = [kw for kw in keywords if kw.lower() not in STOPWORDS] escaped_keywords = "|".join(map(re.escape, filtered_keywords)) if not escaped_keywords: return ["No relevant content found."] matches = list(re.finditer(escaped_keywords, content, re.IGNORECASE)) if not matches: return ["No matches found."] snippets = [] for match in matches: start_index = match.start() words_before = content[:start_index].split()[-10:] words_after = content[start_index:].split()[:num_words + 1] snippet = " ".join(words_before + words_after) highlighted_snippet = re.sub( escaped_keywords, lambda m: f"{m.group(0)}", snippet, flags=re.IGNORECASE, ) snippets.append(f"... {highlighted_snippet} ...") return snippets left_col, right_col = st.columns([1, 1]) with left_col: st.subheader("Search Files") search_input = st.text_input("Enter keywords to search your local files:", st.session_state.search_input, key="search_input_key") if st.button("Search files"): st.session_state.search_input = search_input url = "http://127.0.0.1:8000/search" payload = json.dumps({"query": search_input}) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' } try: response = requests.post(url, headers=headers, data=payload) response.raise_for_status() response_data = response.json() if isinstance(response_data, list): st.session_state.search_result = response_data else: st.session_state.search_result = [{"content": "Unexpected data format received.", "path": ""}] except requests.exceptions.RequestException as e: st.session_state.search_result = [{"content": f"HTTP Request failed: {e}", "path": ""}] except json.JSONDecodeError: st.session_state.search_result = [{"content": "Failed to decode JSON response.", "path": ""}] if st.session_state.search_result: st.write("### Results:") for item in st.session_state.search_result: keywords = st.session_state.search_input.split() snippets = format_keywords_as_list(item.get('content', ""), keywords) valid_snippets = [snippet for snippet in snippets if snippet != "No matches found."] if valid_snippets: st.markdown(f"Document: {item.get('path', 'Unknown File')}", unsafe_allow_html=True) for snippet in valid_snippets: st.markdown(f"- {snippet}", unsafe_allow_html=True) with right_col: st.subheader("Ask LocalAI") ai_input = st.text_input("Enter your question for LocalAI:", st.session_state.ai_input, key="ai_input_key") if st.button("Ask LocalAI"): st.session_state.ai_input = ai_input url = "http://127.0.0.1:8000/ask_localai" payload = json.dumps({"query": ai_input}) headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' } try: response = requests.post(url, headers=headers, data=payload) response.raise_for_status() response_data = response.json() if "answer" in response_data: query = response_data.get("question", "No question provided.") answer = response_data.get("answer", "No answer provided.") st.session_state.ai_result = f"### Question:\n{query}\n\n### Answer:\n{answer}" else: st.session_state.ai_result = "No 'answer' field found in the response." except requests.exceptions.RequestException as e: st.session_state.ai_result = f"HTTP Request failed: {e}" except json.JSONDecodeError: st.session_state.ai_result = "Failed to decode JSON response.." if st.session_state.ai_result: st.write(st.session_state.ai_result) st.markdown( f"This AI model is trained from the following document: View PDF", unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True)