import os import logging import gradio as gr import re #from langchain_chroma import Chroma #from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from groq import Groq from transformers import pipeline import requests # for calling SerpAPI # Add your SerpAPI key here SERPAPI_API_KEY = "48125364c49a1952f1c8fdadf0d22e0d5bc3d195a5a98ad64d7e935aad503efa" # Load zero-shot classification model classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Suppress noisy logs os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' logging.getLogger('tensorflow').setLevel(logging.ERROR) logging.getLogger('torch').setLevel(logging.ERROR) # Config CHROMA_DIR = "chroma_country_info" GROQ_MODEL = "llama-3.3-70b-versatile" # You can also try: mixtral-8x7b or gemma-7b-it # Load Groq client client = Groq(api_key="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") # Load vector DB print("🔍 Loading Chroma Vector DB...") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings) # Memory for user-provided factual information (in-session only) user_memory = [] def is_question(text): QUESTION_STARTERS = [ "who", "what", "when", "where", "why", "which", "whose", "whom", "how", "is", "are", "am", "was", "were", "do", "does", "did", "have", "has", "had", "can", "could", "will", "would", "shall", "should", "may", "might", "must", "didn't", "isn't", "aren't", "wasn't", "weren't", "won't", "shouldn't", "couldn't", "wouldn't" ] text_clean = text.strip().lower() if text_clean.endswith('?'): return True # Rule 2: Starts with known question word words = text_clean.split() if words and words[0] in QUESTION_STARTERS: return True return False def is_fact(text): if is_question(text): return False candidate_labels = ["fact", "opinion", "speculation"] result = classifier(text, candidate_labels) label = result['labels'][0] score = result['scores'][0] # Rule: High-confidence fact => keep as fact if label == "fact" and score > 0.75: final_label = "fact" # Rule: If label is opinion or speculation, but sentence sounds assertive => custom fact elif label in ["opinion", "speculation"] and score < 0.9: final_label = "custom fact" # Rule: Low-confidence fact => custom fact elif label == "fact" and score <= 0.75: final_label = "custom fact" else: final_label = "custom fact" # check if there is "fact in final_label and return True is_fact = "fact" in final_label return is_fact def clear_user_memory(): user_memory.clear() return [], [] # Clear chat and state too (optional) def generate_answer(user_input): print("📚 generate_answer:\n", user_input) # Handle memory commands if "what do you remember" in user_input.lower(): if not user_memory: return "I don't remember anything yet." return "Here's what I remember:\n" + "\n".join(f"- {fact}" for fact in user_memory) if "forget everything" in user_input.lower(): user_memory.clear() return "Okay, I’ve forgotten everything you told me." if not user_input.strip(): return "Please enter a question." # Store statements as memory Isitquestion=True if is_fact(user_input): Isitquestion=False if user_input not in user_memory: user_memory.append(user_input) print("✅ Added to memory:", user_input) # Retrieve context from Chroma docs = vectordb.similarity_search(user_input, k=3) context = "\n\n".join([doc.page_content for doc in docs]) if docs else "" memory_context = "\n".join(user_memory) system_prompt = ( "You are a helpful AI assistant. Use ONLY the context and memory provided below. " "If the answer is not in the context or memory, respond with: 'I don't know based on the context.'\n\n" f"Context from documents:\n{context if context else 'None'}\n\n" f"Memory from conversation:\n{memory_context if memory_context else 'None'}" ) system_prompt = ( "You are a helpful AI assistant. " "You MUST NOT use any knowledge from your pretraining. " "Only use the information provided in the context or memory below. " "If the information is not found in either, always reply with:\n" "'I don't know based on the context.'\n\n" f"Context from documents:\n{context if context else 'None'}\n\n" f"Memory from conversation:\n{memory_context if memory_context else 'None'}" ) system_prompt = ( "You are a helpful AI assistant.\n" "You MUST NOT use any knowledge from your pretraining.\n" "Use ONLY the information in the context or memory below.\n" "ONLY include facts from memory or context **if they directly answer or support the user's input**.\n" "If the information is not found, reply with:\n" "'I don't know based on the context.'\n\n" f"Context from documents:\n{context if context else 'None'}\n\n" f"Memory from conversation:\n{memory_context if memory_context else 'None'}" ) system_prompt = ( "You are a helpful AI assistant.\n" "You must NEVER use your own knowledge or make any assumptions.\n" "Only respond using the information provided in the CONTEXT and MEMORY sections below.\n" "If the answer is not found there, you MUST reply with:\n" "'I don't know based on the context.'\n" "Do not guess or calculate anything that is not already mentioned.\n" "Do not try to verify or correct any user-provided statements.\n\n" f"CONTEXT:\n{context if context else 'None'}\n\n" f"MEMORY:\n{memory_context if memory_context else 'None'}" ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input} ] chat_completion = client.chat.completions.create( model=GROQ_MODEL, messages=messages ) final_answer = chat_completion.choices[0].message.content.strip() print("🤖 Model Response:", final_answer) if not Isitquestion and final_answer.lower() == "i don't know based on the context." : return "Got it" return final_answer def search_google(query): url = f"https://serpapi.com/search.json?q={query}&engine=google&api_key={SERPAPI_API_KEY}" resp = requests.get(url) data = resp.json() if "answer_box" in data: answer_box = data["answer_box"] if "answer" in answer_box and answer_box["answer"]: return answer_box["answer"] if "snippet" in answer_box and answer_box["snippet"]: return answer_box["snippet"] snippets = [] for i, result in enumerate(data.get("organic_results", [])[:3], 1): snippet = result.get("snippet", "") if snippet: snippets.append(f"{i}. {snippet}") if snippets: return "\n\n".join(snippets) return "No snippet found." def search_bing(query): # Bing search via SerpAPI url = f"https://serpapi.com/search.json?q={query}&engine=bing&api_key={SERPAPI_API_KEY}" resp = requests.get(url) data = resp.json() try: snippet = data['organic_results'][0].get('snippet', 'No snippet found.') except Exception: snippet = "No snippet found." return snippet # Gradio UI def chat_interface(message, history): if not message.strip(): return history, history, "" , gr.update(visible=False), gr.update(visible=False) # hide buttons on empty input reply = generate_answer(message) messagegui = f"You said: {message}" history.append({"role": "user", "content": messagegui}) history.append({"role": "assistant", "content": reply}) # Show buttons only if reply is exactly "I don't know based on the context." show_buttons = reply.lower() == "i don't know based on the context." return ( history, history, "", # clear input box gr.update(visible=show_buttons), gr.update(visible=show_buttons), ) def google_search_button_click(history, state): # Find last user message to search last_user_message = "" for msg in reversed(history): if msg["role"] == "user": # Strip "You said: " prefix to get original query last_user_message = msg["content"].replace("You said: ", "", 1) break snippet = search_google(last_user_message) history.append({"role": "assistant", "content": f"Google search snippet:\n{snippet}"}) # After search, keep buttons hidden until next unknown answer return history, history, gr.update(visible=True), gr.update(visible=True) def bing_search_button_click(history, state): last_user_message = "" for msg in reversed(history): if msg["role"] == "user": last_user_message = msg["content"].replace("You said: ", "", 1) break snippet = search_bing(last_user_message) history.append({"role": "assistant", "content": f"Bing search snippet:\n{snippet}"}) return history, history, gr.update(visible=True), gr.update(visible=True) # Launch UI with gr.Blocks() as demo: gr.Markdown("## 🌍 Chatbot : countries and their capital city and population)") chatbot = gr.Chatbot(label="Chat History", type="messages") with gr.Row(): # Add buttons for search, initially hidden google_btn = gr.Button("Search on Google", visible=False) bing_btn = gr.Button("Search on Bing", visible=False) msg = gr.Textbox(label="Your message", placeholder="Ask or tell me anything about countries and capital city", lines=2) with gr.Row(): submit_btn = gr.Button("Submit") clear = gr.Button("Clear chat") clear_memory = gr.Button("Clear My Short Memory") # 👈 New button state = gr.State([]) submit_btn.click(chat_interface, [msg, state], [chatbot, state, msg, google_btn, bing_btn]) clear.click(lambda: ([], []), None, [chatbot, state]) clear_memory.click(clear_user_memory, None, [chatbot, state]) google_btn.click(google_search_button_click, [state, state], [chatbot, state, google_btn, bing_btn]) bing_btn.click(bing_search_button_click, [state, state], [chatbot, state, google_btn, bing_btn]) demo.launch(server_name="0.0.0.0", server_port=7860, share=False)