Final_Assignment_Template

Runtime error

File size: 6,717 Bytes

api_key = "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"

import streamlit as st
from langchain_groq import ChatGroq
from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun, DuckDuckGoSearchRun
from langchain.agents import initialize_agent, AgentType
import os
import requests
import pandas as pd
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Constants for Basic Agent Evaluation
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# Initialize search tools (with warm-up)
@st.cache_resource
def load_tools():
    with st.spinner("Initializing tools (first time may take a few seconds)..."):
        api_wrapper_arxiv = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=250)
        arxiv = ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
        api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
        wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
        search = DuckDuckGoSearchRun(name="Search")
        # Warm up tools
        arxiv.run("machine learning")
        wiki.run("machine learning")
        return [search, arxiv, wiki]

tools = load_tools()

# Streamlit app layout
st.title("Langchain - Chat with Search & Evaluation")

# Sidebar for settings
st.sidebar.title("Settings")
api_key = st.sidebar.text_input("Enter your Groq API Key:", type="password")

# Initialize chat messages
if "messages" not in st.session_state:
    st.session_state["messages"] = [
        {"role": "assistant", "content": "Hi, I am a Chatbot who can search the web and evaluate questions. How can I help you?"}
    ]

# Display chat messages
for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

# Chat input
if prompt := st.chat_input(placeholder="What is machine learning?"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)

    if not api_key:
        st.error("Please enter your Groq API key in the sidebar.")
        st.stop()

    llm = ChatGroq(groq_api_key=api_key, model_name="llama3-70b-8192")
    search_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, handle_parsing_errors=True)

    with st.chat_message("assistant"):
        response = search_agent.run(st.session_state.messages)
        st.session_state.messages.append({'role': 'assistant', "content": response})
        st.write(response)

# Basic Agent Evaluation Section
st.sidebar.title("Basic Agent Evaluation")

def run_evaluation():
    """Function to run the evaluation with progress updates"""
    if not api_key:
        st.error("Please enter your Groq API key in the sidebar.")
        return "API key required", pd.DataFrame()

    # Setup progress tracking
    progress_bar = st.sidebar.progress(0)
    status_text = st.sidebar.empty()
    results_container = st.empty()
    
    username = "streamlit_user"
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    space_id = os.getenv("SPACE_ID", "local")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "local" else "local_execution"

    try:
        # 1. Fetch Questions
        status_text.text("📡 Fetching questions...")
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        total_questions = len(questions_data)
        status_text.text(f"✅ Found {total_questions} questions")

        if not questions_data:
            return "No questions found", pd.DataFrame()

        # 2. Initialize Agent (reuse tools from cache)
        llm = ChatGroq(groq_api_key=api_key, model_name="llama3-70b-8192")
        agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, handle_parsing_errors=True)

        # 3. Process Questions
        results_log = []
        answers_payload = []
        
        for i, item in enumerate(questions_data):
            progress = (i + 1) / total_questions
            progress_bar.progress(progress)
            status_text.text(f"🔍 Processing question {i+1}/{total_questions}...")
            
            task_id = item.get("task_id")
            question_text = item.get("question")
            if not task_id or not question_text:
                continue

            try:
                submitted_answer = agent.run(question_text)
                answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
                results_log.append({"Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, 
                                 "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer})
                
                # Update results table progressively
                if (i + 1) % 3 == 0 or (i + 1) == total_questions:  # Update every 3 questions or at end
                    results_container.dataframe(pd.DataFrame(results_log))
            except Exception as e:
                results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"❌ Error: {str(e)}"})

        # 4. Submit Answers
        status_text.text("📤 Submitting answers...")
        submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        
        final_status = (
            f"✅ Submission Successful!\n"
            f"📊 Score: {result_data.get('score', 'N/A')}%\n"
            f"📝 Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
            f"💬 Message: {result_data.get('message', 'No message')}"
        )
        return final_status, pd.DataFrame(results_log)

    except Exception as e:
        return f"❌ Failed: {str(e)}", pd.DataFrame(results_log if 'results_log' in locals() else [])
    
    finally:
        progress_bar.empty()
        status_text.empty()

# Evaluation button in sidebar
if st.sidebar.button("🚀 Run Evaluation & Submit Answers"):
    with st.spinner("Starting evaluation..."):
        status, results = run_evaluation()
    
    st.sidebar.success("Evaluation completed!")
    st.sidebar.text_area("Results", value=status, height=150)
    
    if not results.empty:
        st.subheader("📋 Detailed Results")
        st.dataframe(results, use_container_width=True)