File size: 6,717 Bytes
e252ecc
7c4d1b9
e252ecc
80328ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b4ec32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80328ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200d8cc
80328ae
 
 
4b4ec32
80328ae
 
 
 
 
 
 
4b4ec32
80328ae
 
 
 
4b4ec32
 
 
 
 
 
80328ae
 
 
 
 
 
 
4b4ec32
 
80328ae
 
 
4b4ec32
 
 
80328ae
4b4ec32
80328ae
4b4ec32
200d8cc
4b4ec32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80328ae
 
 
4b4ec32
80328ae
4b4ec32
 
 
 
80328ae
 
4b4ec32
80328ae
4b4ec32
 
 
 
 
80328ae
 
4b4ec32
 
80328ae
4b4ec32
 
 
80328ae
 
4b4ec32
 
38c5157
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
api_key = "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"

import streamlit as st
from langchain_groq import ChatGroq
from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun, DuckDuckGoSearchRun
from langchain.agents import initialize_agent, AgentType
import os
import requests
import pandas as pd
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Constants for Basic Agent Evaluation
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# Initialize search tools (with warm-up)
@st.cache_resource
def load_tools():
    with st.spinner("Initializing tools (first time may take a few seconds)..."):
        api_wrapper_arxiv = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=250)
        arxiv = ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
        api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
        wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
        search = DuckDuckGoSearchRun(name="Search")
        # Warm up tools
        arxiv.run("machine learning")
        wiki.run("machine learning")
        return [search, arxiv, wiki]

tools = load_tools()

# Streamlit app layout
st.title("Langchain - Chat with Search & Evaluation")

# Sidebar for settings
st.sidebar.title("Settings")
api_key = st.sidebar.text_input("Enter your Groq API Key:", type="password")

# Initialize chat messages
if "messages" not in st.session_state:
    st.session_state["messages"] = [
        {"role": "assistant", "content": "Hi, I am a Chatbot who can search the web and evaluate questions. How can I help you?"}
    ]

# Display chat messages
for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

# Chat input
if prompt := st.chat_input(placeholder="What is machine learning?"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)

    if not api_key:
        st.error("Please enter your Groq API key in the sidebar.")
        st.stop()

    llm = ChatGroq(groq_api_key=api_key, model_name="llama3-70b-8192")
    search_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, handle_parsing_errors=True)

    with st.chat_message("assistant"):
        response = search_agent.run(st.session_state.messages)
        st.session_state.messages.append({'role': 'assistant', "content": response})
        st.write(response)

# Basic Agent Evaluation Section
st.sidebar.title("Basic Agent Evaluation")

def run_evaluation():
    """Function to run the evaluation with progress updates"""
    if not api_key:
        st.error("Please enter your Groq API key in the sidebar.")
        return "API key required", pd.DataFrame()

    # Setup progress tracking
    progress_bar = st.sidebar.progress(0)
    status_text = st.sidebar.empty()
    results_container = st.empty()
    
    username = "streamlit_user"
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    space_id = os.getenv("SPACE_ID", "local")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "local" else "local_execution"

    try:
        # 1. Fetch Questions
        status_text.text("πŸ“‘ Fetching questions...")
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        total_questions = len(questions_data)
        status_text.text(f"βœ… Found {total_questions} questions")

        if not questions_data:
            return "No questions found", pd.DataFrame()

        # 2. Initialize Agent (reuse tools from cache)
        llm = ChatGroq(groq_api_key=api_key, model_name="llama3-70b-8192")
        agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, handle_parsing_errors=True)

        # 3. Process Questions
        results_log = []
        answers_payload = []
        
        for i, item in enumerate(questions_data):
            progress = (i + 1) / total_questions
            progress_bar.progress(progress)
            status_text.text(f"πŸ” Processing question {i+1}/{total_questions}...")
            
            task_id = item.get("task_id")
            question_text = item.get("question")
            if not task_id or not question_text:
                continue

            try:
                submitted_answer = agent.run(question_text)
                answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
                results_log.append({"Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, 
                                 "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer})
                
                # Update results table progressively
                if (i + 1) % 3 == 0 or (i + 1) == total_questions:  # Update every 3 questions or at end
                    results_container.dataframe(pd.DataFrame(results_log))
            except Exception as e:
                results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"❌ Error: {str(e)}"})

        # 4. Submit Answers
        status_text.text("πŸ“€ Submitting answers...")
        submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        
        final_status = (
            f"βœ… Submission Successful!\n"
            f"πŸ“Š Score: {result_data.get('score', 'N/A')}%\n"
            f"πŸ“ Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
            f"πŸ’¬ Message: {result_data.get('message', 'No message')}"
        )
        return final_status, pd.DataFrame(results_log)

    except Exception as e:
        return f"❌ Failed: {str(e)}", pd.DataFrame(results_log if 'results_log' in locals() else [])
    
    finally:
        progress_bar.empty()
        status_text.empty()

# Evaluation button in sidebar
if st.sidebar.button("πŸš€ Run Evaluation & Submit Answers"):
    with st.spinner("Starting evaluation..."):
        status, results = run_evaluation()
    
    st.sidebar.success("Evaluation completed!")
    st.sidebar.text_area("Results", value=status, height=150)
    
    if not results.empty:
        st.subheader("πŸ“‹ Detailed Results")
        st.dataframe(results, use_container_width=True)