Spaces:
Sleeping
Sleeping
Delete community_contributions
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- community_contributions/1_lab1_DA.ipynb +0 -396
- community_contributions/1_lab1_Hy.ipynb +0 -688
- community_contributions/1_lab1_Mudassar.ipynb +0 -260
- community_contributions/1_lab1_Thanh.ipynb +0 -165
- community_contributions/1_lab1_cm.ipynb +0 -305
- community_contributions/1_lab1_gemini.ipynb +0 -305
- community_contributions/1_lab1_groq.ipynb +0 -262
- community_contributions/1_lab1_groq_llama.ipynb +0 -296
- community_contributions/1_lab1_moneek.ipynb +0 -407
- community_contributions/1_lab1_open_router.ipynb +0 -323
- community_contributions/1_lab2_Kaushik_Parallelization.ipynb +0 -355
- community_contributions/1_lab2_Routing_Workflow.ipynb +0 -514
- community_contributions/2_lab2-Evaluator-AnnpaS18.ipynb +0 -474
- community_contributions/2_lab2-judge-prompt-changed.ipynb +0 -476
- community_contributions/2_lab2-parallelization.ipynb +0 -440
- community_contributions/2_lab2_Execution_measurement.py +0 -401
- community_contributions/2_lab2_ReAct_Pattern.ipynb +0 -289
- community_contributions/2_lab2_akash_parallelization.ipynb +0 -295
- community_contributions/2_lab2_async.ipynb +0 -474
- community_contributions/2_lab2_async_with_reasons.ipynb +0 -490
- community_contributions/2_lab2_doclee99_gpt5_improves_gemini.25flash.ipynb +0 -620
- community_contributions/2_lab2_exercise.ipynb +0 -336
- community_contributions/2_lab2_exercise_BrettSanders_ChainOfThought.ipynb +0 -241
- community_contributions/2_lab2_llm_reviewer.ipynb +0 -627
- community_contributions/2_lab2_moneek.ipynb +0 -173
- community_contributions/2_lab2_multi-evaluation-criteria.ipynb +0 -506
- community_contributions/2_lab2_perplexity_support.ipynb +0 -497
- community_contributions/2_lab2_reflection_pattern.ipynb +0 -311
- community_contributions/2_lab2_reflection_pattern2.ipynb +0 -999
- community_contributions/2_lab2_six-thinking-hats-simulator.ipynb +0 -457
- community_contributions/3_lab3_groq_llama_generator_gemini_evaluator.ipynb +0 -286
- community_contributions/4_lab4_slack.ipynb +0 -469
- community_contributions/4_lab4_spotify.ipynb +0 -829
- community_contributions/4_lab4_with_telegram.ipynb +0 -422
- community_contributions/Business_Idea.ipynb +0 -388
- community_contributions/ChatBot_with_evaluator_and_notifier/README.md +0 -97
- community_contributions/ChatBot_with_evaluator_and_notifier/app.py +0 -30
- community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/data_level0.bin +0 -3
- community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/header.bin +0 -3
- community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/length.bin +0 -3
- community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/link_lists.bin +0 -0
- community_contributions/ChatBot_with_evaluator_and_notifier/career_db/chroma.sqlite3 +0 -3
- community_contributions/ChatBot_with_evaluator_and_notifier/chat.py +0 -45
- community_contributions/ChatBot_with_evaluator_and_notifier/controller.py +0 -304
- community_contributions/ChatBot_with_evaluator_and_notifier/evaluator.py +0 -108
- community_contributions/ChatBot_with_evaluator_and_notifier/knowledge_base/summary.txt +0 -499
- community_contributions/ChatBot_with_evaluator_and_notifier/rag.py +0 -207
- community_contributions/ChatBot_with_evaluator_and_notifier/requirements.txt +0 -8
- community_contributions/ChatBot_with_evaluator_and_notifier/tools.py +0 -133
- community_contributions/Indira_1_lab1.ipynb +0 -370
community_contributions/1_lab1_DA.ipynb
DELETED
|
@@ -1,396 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
-
" Well in that case, you're ready!!\n",
|
| 25 |
-
" </span>\n",
|
| 26 |
-
" </td>\n",
|
| 27 |
-
" </tr>\n",
|
| 28 |
-
"</table>"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "markdown",
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"source": [
|
| 35 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
-
" <tr>\n",
|
| 37 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
-
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
-
" </td>\n",
|
| 40 |
-
" <td>\n",
|
| 41 |
-
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
-
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
-
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
-
" </span>\n",
|
| 45 |
-
" </td>\n",
|
| 46 |
-
" </tr>\n",
|
| 47 |
-
"</table>"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "markdown",
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"source": [
|
| 54 |
-
"### And please do remember to contact me if I can help\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
-
"- Open extensions (View >> extensions)\n",
|
| 63 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"And then:\n",
|
| 68 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
-
"3. Enjoy!\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
-
"And then try again.\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
-
"`conda deactivate` \n",
|
| 81 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
-
"`conda config --set auto_activate_base false` \n",
|
| 83 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": null,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"from dotenv import load_dotenv\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": null,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
-
"# If this returns false, see the next cell!\n",
|
| 105 |
-
"\n",
|
| 106 |
-
"load_dotenv(override=True)"
|
| 107 |
-
]
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"cell_type": "markdown",
|
| 111 |
-
"metadata": {},
|
| 112 |
-
"source": [
|
| 113 |
-
"### Wait, did that just output `False`??\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
-
"\n",
|
| 119 |
-
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "markdown",
|
| 124 |
-
"metadata": {},
|
| 125 |
-
"source": [
|
| 126 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
-
" <tr>\n",
|
| 128 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
-
" </td>\n",
|
| 131 |
-
" <td>\n",
|
| 132 |
-
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
-
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
-
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
-
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
-
" </span>\n",
|
| 137 |
-
" </td>\n",
|
| 138 |
-
" </tr>\n",
|
| 139 |
-
"</table>"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"import os\n",
|
| 151 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"if openai_api_key:\n",
|
| 154 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
-
"else:\n",
|
| 156 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
-
" \n"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# And now - the all important import statement\n",
|
| 167 |
-
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
-
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"from openai import OpenAI"
|
| 171 |
-
]
|
| 172 |
-
},
|
| 173 |
-
{
|
| 174 |
-
"cell_type": "code",
|
| 175 |
-
"execution_count": null,
|
| 176 |
-
"metadata": {},
|
| 177 |
-
"outputs": [],
|
| 178 |
-
"source": [
|
| 179 |
-
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
-
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
-
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
-
"\n",
|
| 184 |
-
"openai = OpenAI()"
|
| 185 |
-
]
|
| 186 |
-
},
|
| 187 |
-
{
|
| 188 |
-
"cell_type": "code",
|
| 189 |
-
"execution_count": null,
|
| 190 |
-
"metadata": {},
|
| 191 |
-
"outputs": [],
|
| 192 |
-
"source": [
|
| 193 |
-
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
-
"\n",
|
| 195 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
-
]
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"cell_type": "code",
|
| 200 |
-
"execution_count": null,
|
| 201 |
-
"metadata": {},
|
| 202 |
-
"outputs": [],
|
| 203 |
-
"source": [
|
| 204 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
-
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
-
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
-
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
-
"\n",
|
| 209 |
-
"response = openai.chat.completions.create(\n",
|
| 210 |
-
" model=\"gpt-4.1-nano\",\n",
|
| 211 |
-
" messages=messages\n",
|
| 212 |
-
")\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"print(response.choices[0].message.content)\n"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"# And now - let's ask for a question:\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
-
]
|
| 228 |
-
},
|
| 229 |
-
{
|
| 230 |
-
"cell_type": "code",
|
| 231 |
-
"execution_count": null,
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"outputs": [],
|
| 234 |
-
"source": [
|
| 235 |
-
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
-
"\n",
|
| 237 |
-
"response = openai.chat.completions.create(\n",
|
| 238 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 239 |
-
" messages=messages\n",
|
| 240 |
-
")\n",
|
| 241 |
-
"\n",
|
| 242 |
-
"question = response.choices[0].message.content\n",
|
| 243 |
-
"\n",
|
| 244 |
-
"print(question)\n"
|
| 245 |
-
]
|
| 246 |
-
},
|
| 247 |
-
{
|
| 248 |
-
"cell_type": "code",
|
| 249 |
-
"execution_count": null,
|
| 250 |
-
"metadata": {},
|
| 251 |
-
"outputs": [],
|
| 252 |
-
"source": [
|
| 253 |
-
"# form a new messages list\n",
|
| 254 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
-
]
|
| 256 |
-
},
|
| 257 |
-
{
|
| 258 |
-
"cell_type": "code",
|
| 259 |
-
"execution_count": null,
|
| 260 |
-
"metadata": {},
|
| 261 |
-
"outputs": [],
|
| 262 |
-
"source": [
|
| 263 |
-
"# Ask it again\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"response = openai.chat.completions.create(\n",
|
| 266 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 267 |
-
" messages=messages\n",
|
| 268 |
-
")\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"answer = response.choices[0].message.content\n",
|
| 271 |
-
"print(answer)\n"
|
| 272 |
-
]
|
| 273 |
-
},
|
| 274 |
-
{
|
| 275 |
-
"cell_type": "code",
|
| 276 |
-
"execution_count": null,
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"outputs": [],
|
| 279 |
-
"source": [
|
| 280 |
-
"from IPython.display import Markdown, display\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"display(Markdown(answer))\n",
|
| 283 |
-
"\n"
|
| 284 |
-
]
|
| 285 |
-
},
|
| 286 |
-
{
|
| 287 |
-
"cell_type": "markdown",
|
| 288 |
-
"metadata": {},
|
| 289 |
-
"source": [
|
| 290 |
-
"# Congratulations!\n",
|
| 291 |
-
"\n",
|
| 292 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
-
"\n",
|
| 294 |
-
"Next time things get more interesting..."
|
| 295 |
-
]
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"cell_type": "markdown",
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"source": [
|
| 301 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
-
" <tr>\n",
|
| 303 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
-
" </td>\n",
|
| 306 |
-
" <td>\n",
|
| 307 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
-
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
-
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
-
" </span>\n",
|
| 314 |
-
" </td>\n",
|
| 315 |
-
" </tr>\n",
|
| 316 |
-
"</table>"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# And now - let's ask for a question:\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"import os\n",
|
| 328 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 329 |
-
"from openai import OpenAI\n",
|
| 330 |
-
"from IPython.display import Markdown, display\n",
|
| 331 |
-
"\n",
|
| 332 |
-
"# And now we'll create an instance of the OpenAI class\n",
|
| 333 |
-
"\n",
|
| 334 |
-
"openai = OpenAI()\n",
|
| 335 |
-
"\n",
|
| 336 |
-
"question1 = \"Please pick a business area that might be worth exploring for an Agentic AI opportunity.\"\n",
|
| 337 |
-
"messages1 = [{\"role\": \"user\", \"content\": question1}]\n",
|
| 338 |
-
"\n",
|
| 339 |
-
"# Then make the first call:\n",
|
| 340 |
-
"response1 = openai.chat.completions.create(\n",
|
| 341 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 342 |
-
" messages=messages1\n",
|
| 343 |
-
")\n",
|
| 344 |
-
"\n",
|
| 345 |
-
"question2 = \" Please present the pain-point in \"+response1.choices[0].message.content +\" industry - something challenging that might be ripe for an Agentic solution\"\n",
|
| 346 |
-
"messages2 = [{\"role\": \"user\", \"content\": question2}]\n",
|
| 347 |
-
"\n",
|
| 348 |
-
"# Then make the first call:\n",
|
| 349 |
-
"response2 = openai.chat.completions.create(\n",
|
| 350 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 351 |
-
" messages=messages2\n",
|
| 352 |
-
")\n",
|
| 353 |
-
"\n",
|
| 354 |
-
"question3 = \" Please presentpropose and Agentic AI solution for pain-point \"+response2.choices[0].message.content\n",
|
| 355 |
-
"messages3 = [{\"role\": \"user\", \"content\": question3}]\n",
|
| 356 |
-
"\n",
|
| 357 |
-
"# Then make the first call:\n",
|
| 358 |
-
"response3 = openai.chat.completions.create(\n",
|
| 359 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 360 |
-
" messages=messages3\n",
|
| 361 |
-
")\n",
|
| 362 |
-
"\n",
|
| 363 |
-
"Final_Answer = \" Please presentpropose and Agentic AI solution for pain-point \"+response2.choices[0].message.content\n",
|
| 364 |
-
"\n",
|
| 365 |
-
"display(Markdown(Final_Answer))\n",
|
| 366 |
-
"\n"
|
| 367 |
-
]
|
| 368 |
-
},
|
| 369 |
-
{
|
| 370 |
-
"cell_type": "markdown",
|
| 371 |
-
"metadata": {},
|
| 372 |
-
"source": []
|
| 373 |
-
}
|
| 374 |
-
],
|
| 375 |
-
"metadata": {
|
| 376 |
-
"kernelspec": {
|
| 377 |
-
"display_name": ".venv",
|
| 378 |
-
"language": "python",
|
| 379 |
-
"name": "python3"
|
| 380 |
-
},
|
| 381 |
-
"language_info": {
|
| 382 |
-
"codemirror_mode": {
|
| 383 |
-
"name": "ipython",
|
| 384 |
-
"version": 3
|
| 385 |
-
},
|
| 386 |
-
"file_extension": ".py",
|
| 387 |
-
"mimetype": "text/x-python",
|
| 388 |
-
"name": "python",
|
| 389 |
-
"nbconvert_exporter": "python",
|
| 390 |
-
"pygments_lexer": "ipython3",
|
| 391 |
-
"version": "3.12.11"
|
| 392 |
-
}
|
| 393 |
-
},
|
| 394 |
-
"nbformat": 4,
|
| 395 |
-
"nbformat_minor": 2
|
| 396 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_Hy.ipynb
DELETED
|
@@ -1,688 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
-
" Well in that case, you're ready!!\n",
|
| 25 |
-
" </span>\n",
|
| 26 |
-
" </td>\n",
|
| 27 |
-
" </tr>\n",
|
| 28 |
-
"</table>"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "markdown",
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"source": [
|
| 35 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
-
" <tr>\n",
|
| 37 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
-
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
-
" </td>\n",
|
| 40 |
-
" <td>\n",
|
| 41 |
-
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
-
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
-
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
-
" </span>\n",
|
| 45 |
-
" </td>\n",
|
| 46 |
-
" </tr>\n",
|
| 47 |
-
"</table>"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "markdown",
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"source": [
|
| 54 |
-
"### And please do remember to contact me if I can help\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
-
"- Open extensions (View >> extensions)\n",
|
| 63 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"And then:\n",
|
| 68 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
-
"3. Enjoy!\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
-
"And then try again.\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
-
"`conda deactivate` \n",
|
| 81 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
-
"`conda config --set auto_activate_base false` \n",
|
| 83 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": 1,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"from dotenv import load_dotenv\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": 2,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [
|
| 102 |
-
{
|
| 103 |
-
"data": {
|
| 104 |
-
"text/plain": [
|
| 105 |
-
"True"
|
| 106 |
-
]
|
| 107 |
-
},
|
| 108 |
-
"execution_count": 2,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"output_type": "execute_result"
|
| 111 |
-
}
|
| 112 |
-
],
|
| 113 |
-
"source": [
|
| 114 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 115 |
-
"# If this returns false, see the next cell!\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"load_dotenv(override=True)"
|
| 118 |
-
]
|
| 119 |
-
},
|
| 120 |
-
{
|
| 121 |
-
"cell_type": "markdown",
|
| 122 |
-
"metadata": {},
|
| 123 |
-
"source": [
|
| 124 |
-
"### Wait, did that just output `False`??\n",
|
| 125 |
-
"\n",
|
| 126 |
-
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 127 |
-
"\n",
|
| 128 |
-
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 129 |
-
"\n",
|
| 130 |
-
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 131 |
-
]
|
| 132 |
-
},
|
| 133 |
-
{
|
| 134 |
-
"cell_type": "markdown",
|
| 135 |
-
"metadata": {},
|
| 136 |
-
"source": [
|
| 137 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 138 |
-
" <tr>\n",
|
| 139 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 140 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 141 |
-
" </td>\n",
|
| 142 |
-
" <td>\n",
|
| 143 |
-
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 144 |
-
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 145 |
-
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 146 |
-
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 147 |
-
" </span>\n",
|
| 148 |
-
" </td>\n",
|
| 149 |
-
" </tr>\n",
|
| 150 |
-
"</table>"
|
| 151 |
-
]
|
| 152 |
-
},
|
| 153 |
-
{
|
| 154 |
-
"cell_type": "code",
|
| 155 |
-
"execution_count": 3,
|
| 156 |
-
"metadata": {},
|
| 157 |
-
"outputs": [
|
| 158 |
-
{
|
| 159 |
-
"name": "stdout",
|
| 160 |
-
"output_type": "stream",
|
| 161 |
-
"text": [
|
| 162 |
-
"OpenAI API Key exists and begins sk-proj-\n"
|
| 163 |
-
]
|
| 164 |
-
}
|
| 165 |
-
],
|
| 166 |
-
"source": [
|
| 167 |
-
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 168 |
-
"\n",
|
| 169 |
-
"import os\n",
|
| 170 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 171 |
-
"\n",
|
| 172 |
-
"if openai_api_key:\n",
|
| 173 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 174 |
-
"else:\n",
|
| 175 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 176 |
-
" \n"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": 4,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"# And now - the all important import statement\n",
|
| 186 |
-
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 187 |
-
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 188 |
-
"\n",
|
| 189 |
-
"from openai import OpenAI"
|
| 190 |
-
]
|
| 191 |
-
},
|
| 192 |
-
{
|
| 193 |
-
"cell_type": "code",
|
| 194 |
-
"execution_count": 5,
|
| 195 |
-
"metadata": {},
|
| 196 |
-
"outputs": [],
|
| 197 |
-
"source": [
|
| 198 |
-
"# And now we'll create an instance of the OpenAI class\n",
|
| 199 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 200 |
-
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 201 |
-
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 202 |
-
"\n",
|
| 203 |
-
"openai = OpenAI()"
|
| 204 |
-
]
|
| 205 |
-
},
|
| 206 |
-
{
|
| 207 |
-
"cell_type": "code",
|
| 208 |
-
"execution_count": 6,
|
| 209 |
-
"metadata": {},
|
| 210 |
-
"outputs": [],
|
| 211 |
-
"source": [
|
| 212 |
-
"# Create a list of messages in the familiar OpenAI format\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [
|
| 222 |
-
{
|
| 223 |
-
"name": "stdout",
|
| 224 |
-
"output_type": "stream",
|
| 225 |
-
"text": [
|
| 226 |
-
"ChatCompletion(id='chatcmpl-C9oVaLh1gjzKH07zcVLaXQ4o4FDQ7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='2 + 2 equals 4.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1756455142, model='gpt-4.1-nano-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_c4c155951e', usage=CompletionUsage(completion_tokens=8, prompt_tokens=14, total_tokens=22, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
|
| 227 |
-
"2 + 2 equals 4.\n"
|
| 228 |
-
]
|
| 229 |
-
}
|
| 230 |
-
],
|
| 231 |
-
"source": [
|
| 232 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 233 |
-
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 234 |
-
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 235 |
-
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 236 |
-
"\n",
|
| 237 |
-
"response = openai.chat.completions.create(\n",
|
| 238 |
-
" model=\"gpt-4.1-nano\",\n",
|
| 239 |
-
" messages=messages\n",
|
| 240 |
-
")\n",
|
| 241 |
-
"print(response.choices[0].message.content)\n"
|
| 242 |
-
]
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"cell_type": "code",
|
| 246 |
-
"execution_count": 9,
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"# And now - let's ask for a question:\n",
|
| 251 |
-
"\n",
|
| 252 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 253 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 254 |
-
]
|
| 255 |
-
},
|
| 256 |
-
{
|
| 257 |
-
"cell_type": "code",
|
| 258 |
-
"execution_count": 10,
|
| 259 |
-
"metadata": {},
|
| 260 |
-
"outputs": [
|
| 261 |
-
{
|
| 262 |
-
"name": "stdout",
|
| 263 |
-
"output_type": "stream",
|
| 264 |
-
"text": [
|
| 265 |
-
"If three people can paint three walls in three hours, how many people are needed to paint 18 walls in six hours?\n"
|
| 266 |
-
]
|
| 267 |
-
}
|
| 268 |
-
],
|
| 269 |
-
"source": [
|
| 270 |
-
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 271 |
-
"\n",
|
| 272 |
-
"response = openai.chat.completions.create(\n",
|
| 273 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 274 |
-
" messages=messages\n",
|
| 275 |
-
")\n",
|
| 276 |
-
"\n",
|
| 277 |
-
"question = response.choices[0].message.content\n",
|
| 278 |
-
"\n",
|
| 279 |
-
"print(question)\n"
|
| 280 |
-
]
|
| 281 |
-
},
|
| 282 |
-
{
|
| 283 |
-
"cell_type": "code",
|
| 284 |
-
"execution_count": 11,
|
| 285 |
-
"metadata": {},
|
| 286 |
-
"outputs": [],
|
| 287 |
-
"source": [
|
| 288 |
-
"# form a new messages list\n",
|
| 289 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 290 |
-
]
|
| 291 |
-
},
|
| 292 |
-
{
|
| 293 |
-
"cell_type": "code",
|
| 294 |
-
"execution_count": 12,
|
| 295 |
-
"metadata": {},
|
| 296 |
-
"outputs": [
|
| 297 |
-
{
|
| 298 |
-
"name": "stdout",
|
| 299 |
-
"output_type": "stream",
|
| 300 |
-
"text": [
|
| 301 |
-
"Let's analyze the problem step-by-step:\n",
|
| 302 |
-
"\n",
|
| 303 |
-
"---\n",
|
| 304 |
-
"\n",
|
| 305 |
-
"**Given:**\n",
|
| 306 |
-
"\n",
|
| 307 |
-
"- 3 people can paint 3 walls in 3 hours.\n",
|
| 308 |
-
"\n",
|
| 309 |
-
"**Question:**\n",
|
| 310 |
-
"\n",
|
| 311 |
-
"- How many people are needed to paint 18 walls in 6 hours?\n",
|
| 312 |
-
"\n",
|
| 313 |
-
"---\n",
|
| 314 |
-
"\n",
|
| 315 |
-
"### Step 1: Find the rate of painting per person\n",
|
| 316 |
-
"\n",
|
| 317 |
-
"- Total walls painted: 3 walls\n",
|
| 318 |
-
"- Total people: 3 people\n",
|
| 319 |
-
"- Total time: 3 hours\n",
|
| 320 |
-
"\n",
|
| 321 |
-
"**Walls per person per hour:**\n",
|
| 322 |
-
"\n",
|
| 323 |
-
"First, find how many walls 3 people paint per hour:\n",
|
| 324 |
-
"\n",
|
| 325 |
-
"\\[\n",
|
| 326 |
-
"\\frac{3 \\text{ walls}}{3 \\text{ hours}} = 1 \\text{ wall per hour by 3 people}\n",
|
| 327 |
-
"\\]\n",
|
| 328 |
-
"\n",
|
| 329 |
-
"So, 3 people paint 1 wall per hour.\n",
|
| 330 |
-
"\n",
|
| 331 |
-
"Then, walls per person per hour:\n",
|
| 332 |
-
"\n",
|
| 333 |
-
"\\[\n",
|
| 334 |
-
"\\frac{1 \\text{ wall per hour}}{3 \\text{ people}} = \\frac{1}{3} \\text{ wall per person per hour}\n",
|
| 335 |
-
"\\]\n",
|
| 336 |
-
"\n",
|
| 337 |
-
"---\n",
|
| 338 |
-
"\n",
|
| 339 |
-
"### Step 2: Calculate total work needed\n",
|
| 340 |
-
"\n",
|
| 341 |
-
"You want to paint 18 walls in 6 hours.\n",
|
| 342 |
-
"\n",
|
| 343 |
-
"This means the rate of painting must be:\n",
|
| 344 |
-
"\n",
|
| 345 |
-
"\\[\n",
|
| 346 |
-
"\\frac{18 \\text{ walls}}{6 \\text{ hours}} = 3 \\text{ walls per hour}\n",
|
| 347 |
-
"\\]\n",
|
| 348 |
-
"\n",
|
| 349 |
-
"---\n",
|
| 350 |
-
"\n",
|
| 351 |
-
"### Step 3: Find how many people are needed for this rate\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"Since each person paints \\(\\frac{1}{3}\\) wall per hour,\n",
|
| 354 |
-
"\n",
|
| 355 |
-
"\\[\n",
|
| 356 |
-
"\\text{Number of people} \\times \\frac{1}{3} = 3 \\text{ walls per hour}\n",
|
| 357 |
-
"\\]\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"Multiply both sides by 3:\n",
|
| 360 |
-
"\n",
|
| 361 |
-
"\\[\n",
|
| 362 |
-
"\\text{Number of people} = 3 \\times 3 = 9\n",
|
| 363 |
-
"\\]\n",
|
| 364 |
-
"\n",
|
| 365 |
-
"---\n",
|
| 366 |
-
"\n",
|
| 367 |
-
"### **Answer:**\n",
|
| 368 |
-
"\n",
|
| 369 |
-
"\\[\n",
|
| 370 |
-
"\\boxed{9}\n",
|
| 371 |
-
"\\]\n",
|
| 372 |
-
"\n",
|
| 373 |
-
"You need **9 people** to paint 18 walls in 6 hours.\n"
|
| 374 |
-
]
|
| 375 |
-
}
|
| 376 |
-
],
|
| 377 |
-
"source": [
|
| 378 |
-
"# Ask it again\n",
|
| 379 |
-
"\n",
|
| 380 |
-
"response = openai.chat.completions.create(\n",
|
| 381 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 382 |
-
" messages=messages\n",
|
| 383 |
-
")\n",
|
| 384 |
-
"\n",
|
| 385 |
-
"answer = response.choices[0].message.content\n",
|
| 386 |
-
"print(answer)\n"
|
| 387 |
-
]
|
| 388 |
-
},
|
| 389 |
-
{
|
| 390 |
-
"cell_type": "code",
|
| 391 |
-
"execution_count": 13,
|
| 392 |
-
"metadata": {},
|
| 393 |
-
"outputs": [
|
| 394 |
-
{
|
| 395 |
-
"data": {
|
| 396 |
-
"text/markdown": [
|
| 397 |
-
"Let's analyze the problem step-by-step:\n",
|
| 398 |
-
"\n",
|
| 399 |
-
"---\n",
|
| 400 |
-
"\n",
|
| 401 |
-
"**Given:**\n",
|
| 402 |
-
"\n",
|
| 403 |
-
"- 3 people can paint 3 walls in 3 hours.\n",
|
| 404 |
-
"\n",
|
| 405 |
-
"**Question:**\n",
|
| 406 |
-
"\n",
|
| 407 |
-
"- How many people are needed to paint 18 walls in 6 hours?\n",
|
| 408 |
-
"\n",
|
| 409 |
-
"---\n",
|
| 410 |
-
"\n",
|
| 411 |
-
"### Step 1: Find the rate of painting per person\n",
|
| 412 |
-
"\n",
|
| 413 |
-
"- Total walls painted: 3 walls\n",
|
| 414 |
-
"- Total people: 3 people\n",
|
| 415 |
-
"- Total time: 3 hours\n",
|
| 416 |
-
"\n",
|
| 417 |
-
"**Walls per person per hour:**\n",
|
| 418 |
-
"\n",
|
| 419 |
-
"First, find how many walls 3 people paint per hour:\n",
|
| 420 |
-
"\n",
|
| 421 |
-
"\\[\n",
|
| 422 |
-
"\\frac{3 \\text{ walls}}{3 \\text{ hours}} = 1 \\text{ wall per hour by 3 people}\n",
|
| 423 |
-
"\\]\n",
|
| 424 |
-
"\n",
|
| 425 |
-
"So, 3 people paint 1 wall per hour.\n",
|
| 426 |
-
"\n",
|
| 427 |
-
"Then, walls per person per hour:\n",
|
| 428 |
-
"\n",
|
| 429 |
-
"\\[\n",
|
| 430 |
-
"\\frac{1 \\text{ wall per hour}}{3 \\text{ people}} = \\frac{1}{3} \\text{ wall per person per hour}\n",
|
| 431 |
-
"\\]\n",
|
| 432 |
-
"\n",
|
| 433 |
-
"---\n",
|
| 434 |
-
"\n",
|
| 435 |
-
"### Step 2: Calculate total work needed\n",
|
| 436 |
-
"\n",
|
| 437 |
-
"You want to paint 18 walls in 6 hours.\n",
|
| 438 |
-
"\n",
|
| 439 |
-
"This means the rate of painting must be:\n",
|
| 440 |
-
"\n",
|
| 441 |
-
"\\[\n",
|
| 442 |
-
"\\frac{18 \\text{ walls}}{6 \\text{ hours}} = 3 \\text{ walls per hour}\n",
|
| 443 |
-
"\\]\n",
|
| 444 |
-
"\n",
|
| 445 |
-
"---\n",
|
| 446 |
-
"\n",
|
| 447 |
-
"### Step 3: Find how many people are needed for this rate\n",
|
| 448 |
-
"\n",
|
| 449 |
-
"Since each person paints \\(\\frac{1}{3}\\) wall per hour,\n",
|
| 450 |
-
"\n",
|
| 451 |
-
"\\[\n",
|
| 452 |
-
"\\text{Number of people} \\times \\frac{1}{3} = 3 \\text{ walls per hour}\n",
|
| 453 |
-
"\\]\n",
|
| 454 |
-
"\n",
|
| 455 |
-
"Multiply both sides by 3:\n",
|
| 456 |
-
"\n",
|
| 457 |
-
"\\[\n",
|
| 458 |
-
"\\text{Number of people} = 3 \\times 3 = 9\n",
|
| 459 |
-
"\\]\n",
|
| 460 |
-
"\n",
|
| 461 |
-
"---\n",
|
| 462 |
-
"\n",
|
| 463 |
-
"### **Answer:**\n",
|
| 464 |
-
"\n",
|
| 465 |
-
"\\[\n",
|
| 466 |
-
"\\boxed{9}\n",
|
| 467 |
-
"\\]\n",
|
| 468 |
-
"\n",
|
| 469 |
-
"You need **9 people** to paint 18 walls in 6 hours."
|
| 470 |
-
],
|
| 471 |
-
"text/plain": [
|
| 472 |
-
"<IPython.core.display.Markdown object>"
|
| 473 |
-
]
|
| 474 |
-
},
|
| 475 |
-
"metadata": {},
|
| 476 |
-
"output_type": "display_data"
|
| 477 |
-
}
|
| 478 |
-
],
|
| 479 |
-
"source": [
|
| 480 |
-
"from IPython.display import Markdown, display\n",
|
| 481 |
-
"\n",
|
| 482 |
-
"display(Markdown(answer))\n",
|
| 483 |
-
"\n"
|
| 484 |
-
]
|
| 485 |
-
},
|
| 486 |
-
{
|
| 487 |
-
"cell_type": "markdown",
|
| 488 |
-
"metadata": {},
|
| 489 |
-
"source": [
|
| 490 |
-
"# Congratulations!\n",
|
| 491 |
-
"\n",
|
| 492 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 493 |
-
"\n",
|
| 494 |
-
"Next time things get more interesting..."
|
| 495 |
-
]
|
| 496 |
-
},
|
| 497 |
-
{
|
| 498 |
-
"cell_type": "markdown",
|
| 499 |
-
"metadata": {},
|
| 500 |
-
"source": [
|
| 501 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 502 |
-
" <tr>\n",
|
| 503 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 504 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 505 |
-
" </td>\n",
|
| 506 |
-
" <td>\n",
|
| 507 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 508 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 509 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 510 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 511 |
-
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 512 |
-
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 513 |
-
" </span>\n",
|
| 514 |
-
" </td>\n",
|
| 515 |
-
" </tr>\n",
|
| 516 |
-
"</table>"
|
| 517 |
-
]
|
| 518 |
-
},
|
| 519 |
-
{
|
| 520 |
-
"cell_type": "code",
|
| 521 |
-
"execution_count": 16,
|
| 522 |
-
"metadata": {},
|
| 523 |
-
"outputs": [
|
| 524 |
-
{
|
| 525 |
-
"data": {
|
| 526 |
-
"text/markdown": [
|
| 527 |
-
"Certainly! Building on your outlined pain-point and the high-level Agentic AI functionalities, here’s a detailed proposal for an **Agentic AI solution** designed to tackle fragmented patient data and enable real-time, holistic health management.\n",
|
| 528 |
-
"\n",
|
| 529 |
-
"---\n",
|
| 530 |
-
"\n",
|
| 531 |
-
"# Agentic AI Solution Proposal: **HealthSynth AI**\n",
|
| 532 |
-
"\n",
|
| 533 |
-
"### Overview \n",
|
| 534 |
-
"**HealthSynth AI** is an autonomous health management agent that continuously synthesizes fragmented patient data from multiple sources to provide a real-time, unified, and actionable health profile for patients and their care teams. It acts as a 24/7 health assistant, proactive coordinator, and personalized medical advisor.\n",
|
| 535 |
-
"\n",
|
| 536 |
-
"---\n",
|
| 537 |
-
"\n",
|
| 538 |
-
"## Key Features & Capabilities\n",
|
| 539 |
-
"\n",
|
| 540 |
-
"### 1. **Autonomous Data Aggregation & Normalization** \n",
|
| 541 |
-
"- Uses API integrations, secure data exchanges (FHIR, HL7 standards), and device SDKs to continuously fetch data from: \n",
|
| 542 |
-
" - EHR systems across different providers \n",
|
| 543 |
-
" - Wearable and home medical devices (heart rate, glucose monitors, BP cuffs) \n",
|
| 544 |
-
" - Pharmacy records and prescription databases \n",
|
| 545 |
-
" - Lab results portals \n",
|
| 546 |
-
" - Insurance claims and coverage data \n",
|
| 547 |
-
"- Applies intelligent data cleaning, deduplication, and semantic normalization to unify heterogeneous data formats into a consistent patient health graph.\n",
|
| 548 |
-
"\n",
|
| 549 |
-
"### 2. **Real-Time Multimodal Health Analytics Engine** \n",
|
| 550 |
-
"- Employs advanced ML and deep learning models to detect: \n",
|
| 551 |
-
" - Emerging risk patterns (e.g., early signs of infection, deterioration of chronic conditions) \n",
|
| 552 |
-
" - Anomalies (missed medications, unusual vital sign changes) \n",
|
| 553 |
-
" - Compliance gaps (lifestyle, medication adherence) \n",
|
| 554 |
-
"- Continuously updates predictive health trajectories personalized to each patient’s condition and history.\n",
|
| 555 |
-
"\n",
|
| 556 |
-
"### 3. **Proactive Action & Recommendation System** \n",
|
| 557 |
-
"- Generates context-aware, evidence-based alerts and recommendations such as: \n",
|
| 558 |
-
" - Medication reminders or dosage adjustments flagged in consultation with prescribing physicians \n",
|
| 559 |
-
" - Suggestions for scheduling lab tests or specialist visits timely before symptoms worsen \n",
|
| 560 |
-
" - Lifestyle coaching tips adapted using patient preferences and progress \n",
|
| 561 |
-
"- Classes recommendations into urgency tiers (info, caution, immediate action) and routes notifications appropriately.\n",
|
| 562 |
-
"\n",
|
| 563 |
-
"### 4. **Automated Care Coordination & Workflow Integration** \n",
|
| 564 |
-
"- Interacts programmatically with provider scheduling systems, telemedicine platforms, pharmacies, and insurance portals to: \n",
|
| 565 |
-
" - Automatically request appointment reschedules or referrals based on patient status \n",
|
| 566 |
-
" - Notify involved healthcare professionals about critical health events or lab results \n",
|
| 567 |
-
" - Facilitate prescription renewals or modifications with minimal human intervention \n",
|
| 568 |
-
"- Maintains secure, auditable communication logs ensuring compliance (HIPAA, GDPR).\n",
|
| 569 |
-
"\n",
|
| 570 |
-
"### 5. **Patient-Centric Digital Health Companion** \n",
|
| 571 |
-
"- Provides patients with an intuitive mobile/web app featuring: \n",
|
| 572 |
-
" - A dynamic health dashboard summarizing key metrics, risks, and recent activities in plain language \n",
|
| 573 |
-
" - Intelligent daily check-ins and symptom trackers powered by conversational AI \n",
|
| 574 |
-
" - Adaptive educational content tailored to health literacy levels and language preferences \n",
|
| 575 |
-
" - Privacy controls empowering patients to manage data sharing settings\n",
|
| 576 |
-
"\n",
|
| 577 |
-
"---\n",
|
| 578 |
-
"\n",
|
| 579 |
-
"## Technical Architecture (High-Level)\n",
|
| 580 |
-
"\n",
|
| 581 |
-
"- **Data Ingestion Layer:** Connectors for EHRs, wearables, pharmacies, labs \n",
|
| 582 |
-
"- **Data Lake & Processing:** Cloud-native secure storage with HIPAA-compliant encryption \n",
|
| 583 |
-
"- **Knowledge Graph:** Patient-centric semantic graph linking clinical concepts, timelines, interventions \n",
|
| 584 |
-
"- **Analytics & ML Models:** Ensemble predictive models incorporating temporal health data, risk scoring, anomaly detection \n",
|
| 585 |
-
"- **Agentic Orchestrator:** Rule-based and reinforcement learning-driven workflow engine enabling autonomous decision-making and stakeholder communications \n",
|
| 586 |
-
"- **Frontend Interfaces:** Responsive patient app, provider portals, API access for system integration\n",
|
| 587 |
-
"\n",
|
| 588 |
-
"---\n",
|
| 589 |
-
"\n",
|
| 590 |
-
"## Potential Challenges & Mitigations\n",
|
| 591 |
-
"\n",
|
| 592 |
-
"| Challenge | Mitigation Strategy |\n",
|
| 593 |
-
"|-----------|---------------------|\n",
|
| 594 |
-
"| Data privacy & regulatory compliance | Built-in privacy-by-design, end-to-end encryption, rigorous consent management, audit trails |\n",
|
| 595 |
-
"| Data interoperability & standardization | Utilize open standards (FHIR, DICOM), NLP for unstructured data extraction |\n",
|
| 596 |
-
"| Model explainability | Implement interpretable ML techniques and transparent reasoning for clinicians |\n",
|
| 597 |
-
"| Patient engagement sustainability | Gamification, behavior science-driven personalized nudges |\n",
|
| 598 |
-
"| Integration complexity across healthcare IT systems | Modular adaptors/plugins, partnerships with major EHR vendors |\n",
|
| 599 |
-
"\n",
|
| 600 |
-
"---\n",
|
| 601 |
-
"\n",
|
| 602 |
-
"## Impact & Benefits\n",
|
| 603 |
-
"\n",
|
| 604 |
-
"- **For Patients:** Reduced health risks, increased empowerment, improved treatment adherence, and personal convenience \n",
|
| 605 |
-
"- **For Providers:** Enhanced clinical decision support, reduced administrative burden, timely interventions \n",
|
| 606 |
-
"- **For Payers:** Lowered costs via preventive care and reduced hospital readmissions\n",
|
| 607 |
-
"\n",
|
| 608 |
-
"---\n",
|
| 609 |
-
"\n",
|
| 610 |
-
"Would you like me to help you design detailed user journeys, develop specific ML model architectures, or draft an implementation roadmap for **HealthSynth AI**?"
|
| 611 |
-
],
|
| 612 |
-
"text/plain": [
|
| 613 |
-
"<IPython.core.display.Markdown object>"
|
| 614 |
-
]
|
| 615 |
-
},
|
| 616 |
-
"metadata": {},
|
| 617 |
-
"output_type": "display_data"
|
| 618 |
-
}
|
| 619 |
-
],
|
| 620 |
-
"source": [
|
| 621 |
-
"# First create the messages:\n",
|
| 622 |
-
"\n",
|
| 623 |
-
"messages = [{\"role\": \"user\", \"content\": \"I want you to pick a business area that might be worth exploring for an Agentic AI opportunity.\"}]\n",
|
| 624 |
-
"\n",
|
| 625 |
-
"# Then make the first call:\n",
|
| 626 |
-
"\n",
|
| 627 |
-
"response = openai.chat.completions.create(\n",
|
| 628 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 629 |
-
" messages=messages\n",
|
| 630 |
-
")\n",
|
| 631 |
-
"\n",
|
| 632 |
-
"# Then read the business idea:\n",
|
| 633 |
-
"\n",
|
| 634 |
-
"business_idea = response.choices[0].message.content\n",
|
| 635 |
-
"\n",
|
| 636 |
-
"# print(business_idea)\n",
|
| 637 |
-
"\n",
|
| 638 |
-
"messages = [{\"role\": \"user\", \"content\": f\"Please propose a pain-point in the {business_idea} industry.\"}]\n",
|
| 639 |
-
"\n",
|
| 640 |
-
"response = openai.chat.completions.create(\n",
|
| 641 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 642 |
-
" messages=messages\n",
|
| 643 |
-
")\n",
|
| 644 |
-
"\n",
|
| 645 |
-
"pain_point = response.choices[0].message.content\n",
|
| 646 |
-
"\n",
|
| 647 |
-
"messages = [{\"role\": \"user\", \"content\": f\"Please propose an Agentic AI solution to the pain-point: {pain_point}.\"}]\n",
|
| 648 |
-
"\n",
|
| 649 |
-
"response = openai.chat.completions.create(\n",
|
| 650 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 651 |
-
" messages=messages\n",
|
| 652 |
-
")\n",
|
| 653 |
-
"\n",
|
| 654 |
-
"agentic_solution = response.choices[0].message.content\n",
|
| 655 |
-
"\n",
|
| 656 |
-
"display(Markdown(agentic_solution))\n",
|
| 657 |
-
"\n",
|
| 658 |
-
"# And repeat! In the next message, include the business idea within the message"
|
| 659 |
-
]
|
| 660 |
-
},
|
| 661 |
-
{
|
| 662 |
-
"cell_type": "markdown",
|
| 663 |
-
"metadata": {},
|
| 664 |
-
"source": []
|
| 665 |
-
}
|
| 666 |
-
],
|
| 667 |
-
"metadata": {
|
| 668 |
-
"kernelspec": {
|
| 669 |
-
"display_name": ".venv",
|
| 670 |
-
"language": "python",
|
| 671 |
-
"name": "python3"
|
| 672 |
-
},
|
| 673 |
-
"language_info": {
|
| 674 |
-
"codemirror_mode": {
|
| 675 |
-
"name": "ipython",
|
| 676 |
-
"version": 3
|
| 677 |
-
},
|
| 678 |
-
"file_extension": ".py",
|
| 679 |
-
"mimetype": "text/x-python",
|
| 680 |
-
"name": "python",
|
| 681 |
-
"nbconvert_exporter": "python",
|
| 682 |
-
"pygments_lexer": "ipython3",
|
| 683 |
-
"version": "3.12.11"
|
| 684 |
-
}
|
| 685 |
-
},
|
| 686 |
-
"nbformat": 4,
|
| 687 |
-
"nbformat_minor": 2
|
| 688 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_Mudassar.ipynb
DELETED
|
@@ -1,260 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# First Agentic AI workflow with OPENAI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"#### And please do remember to contact me if I can help\n",
|
| 15 |
-
"\n",
|
| 16 |
-
"And I love to connect: https://www.linkedin.com/in/muhammad-mudassar-a65645192/"
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "markdown",
|
| 21 |
-
"metadata": {},
|
| 22 |
-
"source": [
|
| 23 |
-
"## Import Libraries"
|
| 24 |
-
]
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"cell_type": "code",
|
| 28 |
-
"execution_count": 59,
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"outputs": [],
|
| 31 |
-
"source": [
|
| 32 |
-
"import os\n",
|
| 33 |
-
"import re\n",
|
| 34 |
-
"from openai import OpenAI\n",
|
| 35 |
-
"from dotenv import load_dotenv\n",
|
| 36 |
-
"from IPython.display import Markdown, display"
|
| 37 |
-
]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"cell_type": "code",
|
| 41 |
-
"execution_count": null,
|
| 42 |
-
"metadata": {},
|
| 43 |
-
"outputs": [],
|
| 44 |
-
"source": [
|
| 45 |
-
"load_dotenv(override=True)"
|
| 46 |
-
]
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"cell_type": "code",
|
| 50 |
-
"execution_count": null,
|
| 51 |
-
"metadata": {},
|
| 52 |
-
"outputs": [],
|
| 53 |
-
"source": [
|
| 54 |
-
"openai_api_key=os.getenv(\"OPENAI_API_KEY\")\n",
|
| 55 |
-
"if openai_api_key:\n",
|
| 56 |
-
" print(f\"openai api key exists and begins {openai_api_key[:8]}\")\n",
|
| 57 |
-
"else:\n",
|
| 58 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the gui\")"
|
| 59 |
-
]
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"cell_type": "markdown",
|
| 63 |
-
"metadata": {},
|
| 64 |
-
"source": [
|
| 65 |
-
"## Workflow with OPENAI"
|
| 66 |
-
]
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"cell_type": "code",
|
| 70 |
-
"execution_count": 21,
|
| 71 |
-
"metadata": {},
|
| 72 |
-
"outputs": [],
|
| 73 |
-
"source": [
|
| 74 |
-
"openai=OpenAI()"
|
| 75 |
-
]
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"cell_type": "code",
|
| 79 |
-
"execution_count": 31,
|
| 80 |
-
"metadata": {},
|
| 81 |
-
"outputs": [],
|
| 82 |
-
"source": [
|
| 83 |
-
"message = [{'role':'user','content':\"what is 2+3?\"}]"
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": null,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 93 |
-
"print(response.choices[0].message.content)"
|
| 94 |
-
]
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"cell_type": "code",
|
| 98 |
-
"execution_count": 33,
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [],
|
| 101 |
-
"source": [
|
| 102 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 103 |
-
"message=[{'role':'user','content':question}]"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": null,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 113 |
-
"question=response.choices[0].message.content\n",
|
| 114 |
-
"print(f\"Answer: {question}\")"
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"cell_type": "code",
|
| 119 |
-
"execution_count": 35,
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [],
|
| 122 |
-
"source": [
|
| 123 |
-
"message=[{'role':'user','content':question}]"
|
| 124 |
-
]
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"cell_type": "code",
|
| 128 |
-
"execution_count": null,
|
| 129 |
-
"metadata": {},
|
| 130 |
-
"outputs": [],
|
| 131 |
-
"source": [
|
| 132 |
-
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 133 |
-
"answer = response.choices[0].message.content\n",
|
| 134 |
-
"print(f\"Answer: {answer}\")"
|
| 135 |
-
]
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"cell_type": "code",
|
| 139 |
-
"execution_count": null,
|
| 140 |
-
"metadata": {},
|
| 141 |
-
"outputs": [],
|
| 142 |
-
"source": [
|
| 143 |
-
"# convert \\[ ... \\] to $$ ... $$, to properly render Latex\n",
|
| 144 |
-
"converted_answer = re.sub(r'\\\\[\\[\\]]', '$$', answer)\n",
|
| 145 |
-
"display(Markdown(converted_answer))"
|
| 146 |
-
]
|
| 147 |
-
},
|
| 148 |
-
{
|
| 149 |
-
"cell_type": "markdown",
|
| 150 |
-
"metadata": {},
|
| 151 |
-
"source": [
|
| 152 |
-
"## Exercise"
|
| 153 |
-
]
|
| 154 |
-
},
|
| 155 |
-
{
|
| 156 |
-
"cell_type": "markdown",
|
| 157 |
-
"metadata": {},
|
| 158 |
-
"source": [
|
| 159 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 160 |
-
" <tr>\n",
|
| 161 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 162 |
-
" <img src=\"../../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 163 |
-
" </td>\n",
|
| 164 |
-
" <td>\n",
|
| 165 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 166 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 167 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 168 |
-
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 169 |
-
" </span>\n",
|
| 170 |
-
" </td>\n",
|
| 171 |
-
" </tr>\n",
|
| 172 |
-
"</table>"
|
| 173 |
-
]
|
| 174 |
-
},
|
| 175 |
-
{
|
| 176 |
-
"cell_type": "code",
|
| 177 |
-
"execution_count": 42,
|
| 178 |
-
"metadata": {},
|
| 179 |
-
"outputs": [],
|
| 180 |
-
"source": [
|
| 181 |
-
"message = [{'role':'user','content':\"give me a business area related to ecommerce that might be worth exploring for a agentic opportunity.\"}]"
|
| 182 |
-
]
|
| 183 |
-
},
|
| 184 |
-
{
|
| 185 |
-
"cell_type": "code",
|
| 186 |
-
"execution_count": null,
|
| 187 |
-
"metadata": {},
|
| 188 |
-
"outputs": [],
|
| 189 |
-
"source": [
|
| 190 |
-
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 191 |
-
"business_area = response.choices[0].message.content\n",
|
| 192 |
-
"business_area"
|
| 193 |
-
]
|
| 194 |
-
},
|
| 195 |
-
{
|
| 196 |
-
"cell_type": "code",
|
| 197 |
-
"execution_count": null,
|
| 198 |
-
"metadata": {},
|
| 199 |
-
"outputs": [],
|
| 200 |
-
"source": [
|
| 201 |
-
"message = business_area + \"present a pain-point in that industry - something challenging that might be ripe for an agentic solutions.\"\n",
|
| 202 |
-
"message"
|
| 203 |
-
]
|
| 204 |
-
},
|
| 205 |
-
{
|
| 206 |
-
"cell_type": "code",
|
| 207 |
-
"execution_count": null,
|
| 208 |
-
"metadata": {},
|
| 209 |
-
"outputs": [],
|
| 210 |
-
"source": [
|
| 211 |
-
"message = [{'role': 'user', 'content': message}]\n",
|
| 212 |
-
"response = openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 213 |
-
"question=response.choices[0].message.content\n",
|
| 214 |
-
"question"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"message=[{'role':'user','content':question}]\n",
|
| 224 |
-
"response=openai.chat.completions.create(model=\"gpt-4o-mini\",messages=message)\n",
|
| 225 |
-
"answer=response.choices[0].message.content\n",
|
| 226 |
-
"print(answer)"
|
| 227 |
-
]
|
| 228 |
-
},
|
| 229 |
-
{
|
| 230 |
-
"cell_type": "code",
|
| 231 |
-
"execution_count": null,
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"outputs": [],
|
| 234 |
-
"source": [
|
| 235 |
-
"display(Markdown(answer))"
|
| 236 |
-
]
|
| 237 |
-
}
|
| 238 |
-
],
|
| 239 |
-
"metadata": {
|
| 240 |
-
"kernelspec": {
|
| 241 |
-
"display_name": ".venv",
|
| 242 |
-
"language": "python",
|
| 243 |
-
"name": "python3"
|
| 244 |
-
},
|
| 245 |
-
"language_info": {
|
| 246 |
-
"codemirror_mode": {
|
| 247 |
-
"name": "ipython",
|
| 248 |
-
"version": 3
|
| 249 |
-
},
|
| 250 |
-
"file_extension": ".py",
|
| 251 |
-
"mimetype": "text/x-python",
|
| 252 |
-
"name": "python",
|
| 253 |
-
"nbconvert_exporter": "python",
|
| 254 |
-
"pygments_lexer": "ipython3",
|
| 255 |
-
"version": "3.12.5"
|
| 256 |
-
}
|
| 257 |
-
},
|
| 258 |
-
"nbformat": 4,
|
| 259 |
-
"nbformat_minor": 2
|
| 260 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_Thanh.ipynb
DELETED
|
@@ -1,165 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"### And please do remember to contact me if I can help\n",
|
| 15 |
-
"\n",
|
| 16 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 17 |
-
"\n",
|
| 18 |
-
"\n",
|
| 19 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 20 |
-
"\n",
|
| 21 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 22 |
-
"- Open extensions (View >> extensions)\n",
|
| 23 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 24 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 25 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 26 |
-
"\n",
|
| 27 |
-
"And then:\n",
|
| 28 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 29 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 30 |
-
"3. Enjoy!\n",
|
| 31 |
-
"\n",
|
| 32 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 33 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 34 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 35 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 36 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 37 |
-
"And then try again.\n",
|
| 38 |
-
"\n",
|
| 39 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 40 |
-
"`conda deactivate` \n",
|
| 41 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 42 |
-
"`conda config --set auto_activate_base false` \n",
|
| 43 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"from dotenv import load_dotenv\n",
|
| 53 |
-
"load_dotenv()"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Check the keys\n",
|
| 63 |
-
"import google.generativeai as genai\n",
|
| 64 |
-
"import os\n",
|
| 65 |
-
"genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))\n",
|
| 66 |
-
"model = genai.GenerativeModel(model_name=\"gemini-1.5-flash\")\n"
|
| 67 |
-
]
|
| 68 |
-
},
|
| 69 |
-
{
|
| 70 |
-
"cell_type": "code",
|
| 71 |
-
"execution_count": null,
|
| 72 |
-
"metadata": {},
|
| 73 |
-
"outputs": [],
|
| 74 |
-
"source": [
|
| 75 |
-
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"response = model.generate_content([\"2+2=?\"])\n",
|
| 78 |
-
"response.text"
|
| 79 |
-
]
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"cell_type": "code",
|
| 83 |
-
"execution_count": null,
|
| 84 |
-
"metadata": {},
|
| 85 |
-
"outputs": [],
|
| 86 |
-
"source": [
|
| 87 |
-
"# And now - let's ask for a question:\n",
|
| 88 |
-
"\n",
|
| 89 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 90 |
-
"\n",
|
| 91 |
-
"response = model.generate_content([question])\n",
|
| 92 |
-
"print(response.text)"
|
| 93 |
-
]
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"cell_type": "code",
|
| 97 |
-
"execution_count": null,
|
| 98 |
-
"metadata": {},
|
| 99 |
-
"outputs": [],
|
| 100 |
-
"source": [
|
| 101 |
-
"from IPython.display import Markdown, display\n",
|
| 102 |
-
"\n",
|
| 103 |
-
"display(Markdown(response.text))"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "markdown",
|
| 108 |
-
"metadata": {},
|
| 109 |
-
"source": [
|
| 110 |
-
"# Congratulations!\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 113 |
-
"\n",
|
| 114 |
-
"Next time things get more interesting..."
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"cell_type": "code",
|
| 119 |
-
"execution_count": null,
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [],
|
| 122 |
-
"source": [
|
| 123 |
-
"# First create the messages:\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"messages = [{\"role\": \"user\", \"content\": \"Something here\"}]\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"# Then make the first call:\n",
|
| 128 |
-
"\n",
|
| 129 |
-
"response =\n",
|
| 130 |
-
"\n",
|
| 131 |
-
"# Then read the business idea:\n",
|
| 132 |
-
"\n",
|
| 133 |
-
"business_idea = response.\n",
|
| 134 |
-
"\n",
|
| 135 |
-
"# And repeat!"
|
| 136 |
-
]
|
| 137 |
-
},
|
| 138 |
-
{
|
| 139 |
-
"cell_type": "markdown",
|
| 140 |
-
"metadata": {},
|
| 141 |
-
"source": []
|
| 142 |
-
}
|
| 143 |
-
],
|
| 144 |
-
"metadata": {
|
| 145 |
-
"kernelspec": {
|
| 146 |
-
"display_name": "llm_projects",
|
| 147 |
-
"language": "python",
|
| 148 |
-
"name": "python3"
|
| 149 |
-
},
|
| 150 |
-
"language_info": {
|
| 151 |
-
"codemirror_mode": {
|
| 152 |
-
"name": "ipython",
|
| 153 |
-
"version": 3
|
| 154 |
-
},
|
| 155 |
-
"file_extension": ".py",
|
| 156 |
-
"mimetype": "text/x-python",
|
| 157 |
-
"name": "python",
|
| 158 |
-
"nbconvert_exporter": "python",
|
| 159 |
-
"pygments_lexer": "ipython3",
|
| 160 |
-
"version": "3.10.15"
|
| 161 |
-
}
|
| 162 |
-
},
|
| 163 |
-
"nbformat": 4,
|
| 164 |
-
"nbformat_minor": 2
|
| 165 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_cm.ipynb
DELETED
|
@@ -1,305 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
-
" Well in that case, you're ready!!\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "markdown",
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"source": [
|
| 34 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
-
" <tr>\n",
|
| 36 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
-
" <img src=\"../../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
-
" </td>\n",
|
| 39 |
-
" <td>\n",
|
| 40 |
-
" <h2 style=\"color:#00bfff;\">Treat these labs as a resource</h2>\n",
|
| 41 |
-
" <span style=\"color:#00bfff;\">I push updates to the code regularly. When people ask questions or have problems, I incorporate it in the code, adding more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but in addition, I've added more steps and better explanations. Consider this like an interactive book that accompanies the lectures.\n",
|
| 42 |
-
" </span>\n",
|
| 43 |
-
" </td>\n",
|
| 44 |
-
" </tr>\n",
|
| 45 |
-
"</table>"
|
| 46 |
-
]
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"cell_type": "markdown",
|
| 50 |
-
"metadata": {},
|
| 51 |
-
"source": [
|
| 52 |
-
"### And please do remember to contact me if I can help\n",
|
| 53 |
-
"\n",
|
| 54 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"\n",
|
| 57 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 60 |
-
"- Open extensions (View >> extensions)\n",
|
| 61 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 62 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 63 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 64 |
-
"\n",
|
| 65 |
-
"And then:\n",
|
| 66 |
-
"1. Run `uv add google-genai` to install the Google Gemini library. (If you had started your environment before running this command, you will need to restart your environment in the Jupyter notebook.)\n",
|
| 67 |
-
"2. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
-
"3. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
-
"4. Enjoy!\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
-
"1. From the Cursor menu, choose Settings >> VSCode Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 73 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 74 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 75 |
-
"And then try again.\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 78 |
-
"`conda deactivate` \n",
|
| 79 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 80 |
-
"`conda config --set auto_activate_base false` \n",
|
| 81 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 82 |
-
]
|
| 83 |
-
},
|
| 84 |
-
{
|
| 85 |
-
"cell_type": "code",
|
| 86 |
-
"execution_count": null,
|
| 87 |
-
"metadata": {},
|
| 88 |
-
"outputs": [],
|
| 89 |
-
"source": [
|
| 90 |
-
"from dotenv import load_dotenv\n"
|
| 91 |
-
]
|
| 92 |
-
},
|
| 93 |
-
{
|
| 94 |
-
"cell_type": "code",
|
| 95 |
-
"execution_count": null,
|
| 96 |
-
"metadata": {},
|
| 97 |
-
"outputs": [],
|
| 98 |
-
"source": [
|
| 99 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"load_dotenv(override=True)"
|
| 102 |
-
]
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"cell_type": "code",
|
| 106 |
-
"execution_count": null,
|
| 107 |
-
"metadata": {},
|
| 108 |
-
"outputs": [],
|
| 109 |
-
"source": [
|
| 110 |
-
"# Check the keys\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"import os\n",
|
| 113 |
-
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"if gemini_api_key:\n",
|
| 116 |
-
" print(f\"Gemini API Key exists and begins {gemini_api_key[:8]}\")\n",
|
| 117 |
-
"else:\n",
|
| 118 |
-
" print(\"Gemini API Key not set - please head to the troubleshooting guide in the guides folder\")\n",
|
| 119 |
-
" \n"
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "code",
|
| 124 |
-
"execution_count": null,
|
| 125 |
-
"metadata": {},
|
| 126 |
-
"outputs": [],
|
| 127 |
-
"source": [
|
| 128 |
-
"# And now - the all important import statement\n",
|
| 129 |
-
"# If you get an import error - head over to troubleshooting guide\n",
|
| 130 |
-
"\n",
|
| 131 |
-
"from google import genai"
|
| 132 |
-
]
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"cell_type": "code",
|
| 136 |
-
"execution_count": null,
|
| 137 |
-
"metadata": {},
|
| 138 |
-
"outputs": [],
|
| 139 |
-
"source": [
|
| 140 |
-
"# And now we'll create an instance of the Gemini GenAI class\n",
|
| 141 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder!\n",
|
| 142 |
-
"# If you get a NameError - head over to the guides folder to learn about NameErrors\n",
|
| 143 |
-
"\n",
|
| 144 |
-
"client = genai.Client(api_key=gemini_api_key)"
|
| 145 |
-
]
|
| 146 |
-
},
|
| 147 |
-
{
|
| 148 |
-
"cell_type": "code",
|
| 149 |
-
"execution_count": null,
|
| 150 |
-
"metadata": {},
|
| 151 |
-
"outputs": [],
|
| 152 |
-
"source": [
|
| 153 |
-
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"messages = [\"What is 2+2?\"]"
|
| 156 |
-
]
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"cell_type": "code",
|
| 160 |
-
"execution_count": null,
|
| 161 |
-
"metadata": {},
|
| 162 |
-
"outputs": [],
|
| 163 |
-
"source": [
|
| 164 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"response = client.models.generate_content(\n",
|
| 167 |
-
" model=\"gemini-2.0-flash\", contents=messages\n",
|
| 168 |
-
")\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"print(response.text)\n"
|
| 171 |
-
]
|
| 172 |
-
},
|
| 173 |
-
{
|
| 174 |
-
"cell_type": "code",
|
| 175 |
-
"execution_count": null,
|
| 176 |
-
"metadata": {},
|
| 177 |
-
"outputs": [],
|
| 178 |
-
"source": [
|
| 179 |
-
"\n",
|
| 180 |
-
"# Lets no create a challenging question\n",
|
| 181 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 182 |
-
"\n",
|
| 183 |
-
"# Ask the the model\n",
|
| 184 |
-
"response = client.models.generate_content(\n",
|
| 185 |
-
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 186 |
-
")\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"question = response.text\n",
|
| 189 |
-
"\n",
|
| 190 |
-
"print(question)\n"
|
| 191 |
-
]
|
| 192 |
-
},
|
| 193 |
-
{
|
| 194 |
-
"cell_type": "code",
|
| 195 |
-
"execution_count": null,
|
| 196 |
-
"metadata": {},
|
| 197 |
-
"outputs": [],
|
| 198 |
-
"source": [
|
| 199 |
-
"# Ask the models generated question to the model\n",
|
| 200 |
-
"response = client.models.generate_content(\n",
|
| 201 |
-
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 202 |
-
")\n",
|
| 203 |
-
"\n",
|
| 204 |
-
"# Extract the answer from the response\n",
|
| 205 |
-
"answer = response.text\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"# Debug log the answer\n",
|
| 208 |
-
"print(answer)\n"
|
| 209 |
-
]
|
| 210 |
-
},
|
| 211 |
-
{
|
| 212 |
-
"cell_type": "code",
|
| 213 |
-
"execution_count": null,
|
| 214 |
-
"metadata": {},
|
| 215 |
-
"outputs": [],
|
| 216 |
-
"source": [
|
| 217 |
-
"from IPython.display import Markdown, display\n",
|
| 218 |
-
"\n",
|
| 219 |
-
"# Nicely format the answer using Markdown\n",
|
| 220 |
-
"display(Markdown(answer))\n",
|
| 221 |
-
"\n"
|
| 222 |
-
]
|
| 223 |
-
},
|
| 224 |
-
{
|
| 225 |
-
"cell_type": "markdown",
|
| 226 |
-
"metadata": {},
|
| 227 |
-
"source": [
|
| 228 |
-
"# Congratulations!\n",
|
| 229 |
-
"\n",
|
| 230 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 231 |
-
"\n",
|
| 232 |
-
"Next time things get more interesting..."
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "markdown",
|
| 237 |
-
"metadata": {},
|
| 238 |
-
"source": [
|
| 239 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
-
" <tr>\n",
|
| 241 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
-
" </td>\n",
|
| 244 |
-
" <td>\n",
|
| 245 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 247 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 248 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 249 |
-
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 250 |
-
" </span>\n",
|
| 251 |
-
" </td>\n",
|
| 252 |
-
" </tr>\n",
|
| 253 |
-
"</table>"
|
| 254 |
-
]
|
| 255 |
-
},
|
| 256 |
-
{
|
| 257 |
-
"cell_type": "code",
|
| 258 |
-
"execution_count": null,
|
| 259 |
-
"metadata": {},
|
| 260 |
-
"outputs": [],
|
| 261 |
-
"source": [
|
| 262 |
-
"# First create the messages:\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"messages = [\"Something here\"]\n",
|
| 266 |
-
"\n",
|
| 267 |
-
"# Then make the first call:\n",
|
| 268 |
-
"\n",
|
| 269 |
-
"response =\n",
|
| 270 |
-
"\n",
|
| 271 |
-
"# Then read the business idea:\n",
|
| 272 |
-
"\n",
|
| 273 |
-
"business_idea = response.\n",
|
| 274 |
-
"\n",
|
| 275 |
-
"# And repeat!"
|
| 276 |
-
]
|
| 277 |
-
},
|
| 278 |
-
{
|
| 279 |
-
"cell_type": "markdown",
|
| 280 |
-
"metadata": {},
|
| 281 |
-
"source": []
|
| 282 |
-
}
|
| 283 |
-
],
|
| 284 |
-
"metadata": {
|
| 285 |
-
"kernelspec": {
|
| 286 |
-
"display_name": ".venv",
|
| 287 |
-
"language": "python",
|
| 288 |
-
"name": "python3"
|
| 289 |
-
},
|
| 290 |
-
"language_info": {
|
| 291 |
-
"codemirror_mode": {
|
| 292 |
-
"name": "ipython",
|
| 293 |
-
"version": 3
|
| 294 |
-
},
|
| 295 |
-
"file_extension": ".py",
|
| 296 |
-
"mimetype": "text/x-python",
|
| 297 |
-
"name": "python",
|
| 298 |
-
"nbconvert_exporter": "python",
|
| 299 |
-
"pygments_lexer": "ipython3",
|
| 300 |
-
"version": "3.12.10"
|
| 301 |
-
}
|
| 302 |
-
},
|
| 303 |
-
"nbformat": 4,
|
| 304 |
-
"nbformat_minor": 2
|
| 305 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_gemini.ipynb
DELETED
|
@@ -1,305 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
-
" Well in that case, you're ready!!\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "markdown",
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"source": [
|
| 34 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
-
" <tr>\n",
|
| 36 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
-
" <img src=\"../../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
-
" </td>\n",
|
| 39 |
-
" <td>\n",
|
| 40 |
-
" <h2 style=\"color:#00bfff;\">Treat these labs as a resource</h2>\n",
|
| 41 |
-
" <span style=\"color:#00bfff;\">I push updates to the code regularly. When people ask questions or have problems, I incorporate it in the code, adding more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but in addition, I've added more steps and better explanations. Consider this like an interactive book that accompanies the lectures.\n",
|
| 42 |
-
" </span>\n",
|
| 43 |
-
" </td>\n",
|
| 44 |
-
" </tr>\n",
|
| 45 |
-
"</table>"
|
| 46 |
-
]
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"cell_type": "markdown",
|
| 50 |
-
"metadata": {},
|
| 51 |
-
"source": [
|
| 52 |
-
"### And please do remember to contact me if I can help\n",
|
| 53 |
-
"\n",
|
| 54 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"\n",
|
| 57 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 60 |
-
"- Open extensions (View >> extensions)\n",
|
| 61 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 62 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 63 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 64 |
-
"\n",
|
| 65 |
-
"And then:\n",
|
| 66 |
-
"1. Run `uv add google-genai` to install the Google Gemini library. (If you had started your environment before running this command, you will need to restart your environment in the Jupyter notebook.)\n",
|
| 67 |
-
"2. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
-
"3. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
-
"4. Enjoy!\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
-
"1. From the Cursor menu, choose Settings >> VSCode Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 73 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 74 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 75 |
-
"And then try again.\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 78 |
-
"`conda deactivate` \n",
|
| 79 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 80 |
-
"`conda config --set auto_activate_base false` \n",
|
| 81 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 82 |
-
]
|
| 83 |
-
},
|
| 84 |
-
{
|
| 85 |
-
"cell_type": "code",
|
| 86 |
-
"execution_count": null,
|
| 87 |
-
"metadata": {},
|
| 88 |
-
"outputs": [],
|
| 89 |
-
"source": [
|
| 90 |
-
"from dotenv import load_dotenv\n"
|
| 91 |
-
]
|
| 92 |
-
},
|
| 93 |
-
{
|
| 94 |
-
"cell_type": "code",
|
| 95 |
-
"execution_count": null,
|
| 96 |
-
"metadata": {},
|
| 97 |
-
"outputs": [],
|
| 98 |
-
"source": [
|
| 99 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"load_dotenv(override=True)"
|
| 102 |
-
]
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"cell_type": "code",
|
| 106 |
-
"execution_count": null,
|
| 107 |
-
"metadata": {},
|
| 108 |
-
"outputs": [],
|
| 109 |
-
"source": [
|
| 110 |
-
"# Check the keys\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"import os\n",
|
| 113 |
-
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"if gemini_api_key:\n",
|
| 116 |
-
" print(f\"Gemini API Key exists and begins {gemini_api_key[:8]}\")\n",
|
| 117 |
-
"else:\n",
|
| 118 |
-
" print(\"Gemini API Key not set - please head to the troubleshooting guide in the guides folder\")\n",
|
| 119 |
-
" \n"
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "code",
|
| 124 |
-
"execution_count": null,
|
| 125 |
-
"metadata": {},
|
| 126 |
-
"outputs": [],
|
| 127 |
-
"source": [
|
| 128 |
-
"# And now - the all important import statement\n",
|
| 129 |
-
"# If you get an import error - head over to troubleshooting guide\n",
|
| 130 |
-
"\n",
|
| 131 |
-
"from google import genai"
|
| 132 |
-
]
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"cell_type": "code",
|
| 136 |
-
"execution_count": null,
|
| 137 |
-
"metadata": {},
|
| 138 |
-
"outputs": [],
|
| 139 |
-
"source": [
|
| 140 |
-
"# And now we'll create an instance of the Gemini GenAI class\n",
|
| 141 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder!\n",
|
| 142 |
-
"# If you get a NameError - head over to the guides folder to learn about NameErrors\n",
|
| 143 |
-
"\n",
|
| 144 |
-
"client = genai.Client(api_key=gemini_api_key)"
|
| 145 |
-
]
|
| 146 |
-
},
|
| 147 |
-
{
|
| 148 |
-
"cell_type": "code",
|
| 149 |
-
"execution_count": null,
|
| 150 |
-
"metadata": {},
|
| 151 |
-
"outputs": [],
|
| 152 |
-
"source": [
|
| 153 |
-
"# Create a list of messages in the familiar Gemini GenAI format\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"messages = [\"What is 2+2?\"]"
|
| 156 |
-
]
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"cell_type": "code",
|
| 160 |
-
"execution_count": null,
|
| 161 |
-
"metadata": {},
|
| 162 |
-
"outputs": [],
|
| 163 |
-
"source": [
|
| 164 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"response = client.models.generate_content(\n",
|
| 167 |
-
" model=\"gemini-2.0-flash\", contents=messages\n",
|
| 168 |
-
")\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"print(response.text)\n"
|
| 171 |
-
]
|
| 172 |
-
},
|
| 173 |
-
{
|
| 174 |
-
"cell_type": "code",
|
| 175 |
-
"execution_count": null,
|
| 176 |
-
"metadata": {},
|
| 177 |
-
"outputs": [],
|
| 178 |
-
"source": [
|
| 179 |
-
"\n",
|
| 180 |
-
"# Lets no create a challenging question\n",
|
| 181 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 182 |
-
"\n",
|
| 183 |
-
"# Ask the the model\n",
|
| 184 |
-
"response = client.models.generate_content(\n",
|
| 185 |
-
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 186 |
-
")\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"question = response.text\n",
|
| 189 |
-
"\n",
|
| 190 |
-
"print(question)\n"
|
| 191 |
-
]
|
| 192 |
-
},
|
| 193 |
-
{
|
| 194 |
-
"cell_type": "code",
|
| 195 |
-
"execution_count": null,
|
| 196 |
-
"metadata": {},
|
| 197 |
-
"outputs": [],
|
| 198 |
-
"source": [
|
| 199 |
-
"# Ask the models generated question to the model\n",
|
| 200 |
-
"response = client.models.generate_content(\n",
|
| 201 |
-
" model=\"gemini-2.0-flash\", contents=question\n",
|
| 202 |
-
")\n",
|
| 203 |
-
"\n",
|
| 204 |
-
"# Extract the answer from the response\n",
|
| 205 |
-
"answer = response.text\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"# Debug log the answer\n",
|
| 208 |
-
"print(answer)\n"
|
| 209 |
-
]
|
| 210 |
-
},
|
| 211 |
-
{
|
| 212 |
-
"cell_type": "code",
|
| 213 |
-
"execution_count": null,
|
| 214 |
-
"metadata": {},
|
| 215 |
-
"outputs": [],
|
| 216 |
-
"source": [
|
| 217 |
-
"from IPython.display import Markdown, display\n",
|
| 218 |
-
"\n",
|
| 219 |
-
"# Nicely format the answer using Markdown\n",
|
| 220 |
-
"display(Markdown(answer))\n",
|
| 221 |
-
"\n"
|
| 222 |
-
]
|
| 223 |
-
},
|
| 224 |
-
{
|
| 225 |
-
"cell_type": "markdown",
|
| 226 |
-
"metadata": {},
|
| 227 |
-
"source": [
|
| 228 |
-
"# Congratulations!\n",
|
| 229 |
-
"\n",
|
| 230 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 231 |
-
"\n",
|
| 232 |
-
"Next time things get more interesting..."
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "markdown",
|
| 237 |
-
"metadata": {},
|
| 238 |
-
"source": [
|
| 239 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
-
" <tr>\n",
|
| 241 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
-
" </td>\n",
|
| 244 |
-
" <td>\n",
|
| 245 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 247 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 248 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 249 |
-
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 250 |
-
" </span>\n",
|
| 251 |
-
" </td>\n",
|
| 252 |
-
" </tr>\n",
|
| 253 |
-
"</table>"
|
| 254 |
-
]
|
| 255 |
-
},
|
| 256 |
-
{
|
| 257 |
-
"cell_type": "code",
|
| 258 |
-
"execution_count": null,
|
| 259 |
-
"metadata": {},
|
| 260 |
-
"outputs": [],
|
| 261 |
-
"source": [
|
| 262 |
-
"# First create the messages:\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"messages = [\"Something here\"]\n",
|
| 266 |
-
"\n",
|
| 267 |
-
"# Then make the first call:\n",
|
| 268 |
-
"\n",
|
| 269 |
-
"response =\n",
|
| 270 |
-
"\n",
|
| 271 |
-
"# Then read the business idea:\n",
|
| 272 |
-
"\n",
|
| 273 |
-
"business_idea = response.\n",
|
| 274 |
-
"\n",
|
| 275 |
-
"# And repeat!"
|
| 276 |
-
]
|
| 277 |
-
},
|
| 278 |
-
{
|
| 279 |
-
"cell_type": "markdown",
|
| 280 |
-
"metadata": {},
|
| 281 |
-
"source": []
|
| 282 |
-
}
|
| 283 |
-
],
|
| 284 |
-
"metadata": {
|
| 285 |
-
"kernelspec": {
|
| 286 |
-
"display_name": ".venv",
|
| 287 |
-
"language": "python",
|
| 288 |
-
"name": "python3"
|
| 289 |
-
},
|
| 290 |
-
"language_info": {
|
| 291 |
-
"codemirror_mode": {
|
| 292 |
-
"name": "ipython",
|
| 293 |
-
"version": 3
|
| 294 |
-
},
|
| 295 |
-
"file_extension": ".py",
|
| 296 |
-
"mimetype": "text/x-python",
|
| 297 |
-
"name": "python",
|
| 298 |
-
"nbconvert_exporter": "python",
|
| 299 |
-
"pygments_lexer": "ipython3",
|
| 300 |
-
"version": "3.12.10"
|
| 301 |
-
}
|
| 302 |
-
},
|
| 303 |
-
"nbformat": 4,
|
| 304 |
-
"nbformat_minor": 2
|
| 305 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_groq.ipynb
DELETED
|
@@ -1,262 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"### Implementing Notebook 1 using various LLMs via Groq"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "code",
|
| 12 |
-
"execution_count": null,
|
| 13 |
-
"metadata": {},
|
| 14 |
-
"outputs": [],
|
| 15 |
-
"source": [
|
| 16 |
-
"from dotenv import load_dotenv"
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "code",
|
| 21 |
-
"execution_count": null,
|
| 22 |
-
"metadata": {},
|
| 23 |
-
"outputs": [],
|
| 24 |
-
"source": [
|
| 25 |
-
"load_dotenv(override=True)"
|
| 26 |
-
]
|
| 27 |
-
},
|
| 28 |
-
{
|
| 29 |
-
"cell_type": "code",
|
| 30 |
-
"execution_count": null,
|
| 31 |
-
"metadata": {},
|
| 32 |
-
"outputs": [],
|
| 33 |
-
"source": [
|
| 34 |
-
"import os\n",
|
| 35 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 36 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"if openai_api_key:\n",
|
| 39 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 40 |
-
"else:\n",
|
| 41 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 42 |
-
"\n",
|
| 43 |
-
"if groq_api_key:\n",
|
| 44 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:2]}\")\n",
|
| 45 |
-
"else:\n",
|
| 46 |
-
" print(\"Groq API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 47 |
-
" \n"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "code",
|
| 52 |
-
"execution_count": null,
|
| 53 |
-
"metadata": {},
|
| 54 |
-
"outputs": [],
|
| 55 |
-
"source": [
|
| 56 |
-
"from openai import OpenAI"
|
| 57 |
-
]
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"cell_type": "code",
|
| 61 |
-
"execution_count": null,
|
| 62 |
-
"metadata": {},
|
| 63 |
-
"outputs": [],
|
| 64 |
-
"source": [
|
| 65 |
-
"openai = OpenAI(\n",
|
| 66 |
-
" base_url=\"https://api.groq.com/openai/v1\",\n",
|
| 67 |
-
" api_key=groq_api_key\n",
|
| 68 |
-
")"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": null,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [],
|
| 76 |
-
"source": [
|
| 77 |
-
"# And now - let's ask for a question:\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 80 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 81 |
-
]
|
| 82 |
-
},
|
| 83 |
-
{
|
| 84 |
-
"cell_type": "code",
|
| 85 |
-
"execution_count": null,
|
| 86 |
-
"metadata": {},
|
| 87 |
-
"outputs": [],
|
| 88 |
-
"source": [
|
| 89 |
-
"# openai/gpt-oss-120b\n",
|
| 90 |
-
"\n",
|
| 91 |
-
"response = openai.chat.completions.create(\n",
|
| 92 |
-
" model=\"openai/gpt-oss-120b\",\n",
|
| 93 |
-
" messages=messages\n",
|
| 94 |
-
")\n",
|
| 95 |
-
"\n",
|
| 96 |
-
"print(response.choices[0].message.content)\n",
|
| 97 |
-
"\n"
|
| 98 |
-
]
|
| 99 |
-
},
|
| 100 |
-
{
|
| 101 |
-
"cell_type": "code",
|
| 102 |
-
"execution_count": null,
|
| 103 |
-
"metadata": {},
|
| 104 |
-
"outputs": [],
|
| 105 |
-
"source": [
|
| 106 |
-
"# moonshotai/kimi-k2-instruct\n",
|
| 107 |
-
"\n",
|
| 108 |
-
"response = openai.chat.completions.create(\n",
|
| 109 |
-
" model=\"moonshotai/kimi-k2-instruct\",\n",
|
| 110 |
-
" messages=messages\n",
|
| 111 |
-
")\n",
|
| 112 |
-
"\n",
|
| 113 |
-
"question = response.choices[0].message.content\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"print(question)\n"
|
| 116 |
-
]
|
| 117 |
-
},
|
| 118 |
-
{
|
| 119 |
-
"cell_type": "code",
|
| 120 |
-
"execution_count": null,
|
| 121 |
-
"metadata": {},
|
| 122 |
-
"outputs": [],
|
| 123 |
-
"source": [
|
| 124 |
-
"# form a new messages list\n",
|
| 125 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"cell_type": "code",
|
| 130 |
-
"execution_count": null,
|
| 131 |
-
"metadata": {},
|
| 132 |
-
"outputs": [],
|
| 133 |
-
"source": [
|
| 134 |
-
"# Ask meta-llama/llama-guard-4-12b\n",
|
| 135 |
-
"\n",
|
| 136 |
-
"response = openai.chat.completions.create(\n",
|
| 137 |
-
" model=\"llama-3.1-8b-instant\",\n",
|
| 138 |
-
" messages=messages\n",
|
| 139 |
-
")\n",
|
| 140 |
-
"\n",
|
| 141 |
-
"answer = response.choices[0].message.content\n",
|
| 142 |
-
"print(answer)\n"
|
| 143 |
-
]
|
| 144 |
-
},
|
| 145 |
-
{
|
| 146 |
-
"cell_type": "code",
|
| 147 |
-
"execution_count": null,
|
| 148 |
-
"metadata": {},
|
| 149 |
-
"outputs": [],
|
| 150 |
-
"source": [
|
| 151 |
-
"from IPython.display import Markdown, display\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"display(Markdown(question))\n",
|
| 154 |
-
"display(Markdown(answer))"
|
| 155 |
-
]
|
| 156 |
-
},
|
| 157 |
-
{
|
| 158 |
-
"cell_type": "markdown",
|
| 159 |
-
"metadata": {},
|
| 160 |
-
"source": [
|
| 161 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 162 |
-
" <tr>\n",
|
| 163 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 164 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 165 |
-
" </td>\n",
|
| 166 |
-
" <td>\n",
|
| 167 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 168 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 169 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 170 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 171 |
-
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 172 |
-
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 173 |
-
" </span>\n",
|
| 174 |
-
" </td>\n",
|
| 175 |
-
" </tr>\n",
|
| 176 |
-
"</table>"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"# First create the messages:\n",
|
| 186 |
-
"\n",
|
| 187 |
-
"messages = [{\"role\": \"user\", \"content\": \"Pick a business area that is worth exploring for a Gen-Z audience, that can be an agentic-ai opportunity. \\\n",
|
| 188 |
-
" Somehwere where the concept of agentisation can be applied commerically. Respond only with the business idea.\"}]\n",
|
| 189 |
-
"\n",
|
| 190 |
-
"# Then make the first call: \n",
|
| 191 |
-
"\n",
|
| 192 |
-
"response = openai.chat.completions.create(\n",
|
| 193 |
-
" model = \"qwen/qwen3-32b\",\n",
|
| 194 |
-
" messages = messages\n",
|
| 195 |
-
")\n",
|
| 196 |
-
"\n",
|
| 197 |
-
"# Then read the business idea:\n",
|
| 198 |
-
"\n",
|
| 199 |
-
"business_idea = response.choices[0].message.content\n",
|
| 200 |
-
"print(business_idea)\n",
|
| 201 |
-
"\n",
|
| 202 |
-
"# And repeat! In the next message, include the business idea within the message\n",
|
| 203 |
-
"\n",
|
| 204 |
-
"user_prompt_pain_point = f\"What is the pain point of the Gen-Z audience in the business area of {business_idea}?, that can be solved by an agentic-ai solution? Give a brief answer\"\n",
|
| 205 |
-
"\n",
|
| 206 |
-
"response = openai.chat.completions.create(\n",
|
| 207 |
-
" model = \"gemma2-9b-it\",\n",
|
| 208 |
-
" messages = [{\"role\": \"user\", \"content\": user_prompt_pain_point}]\n",
|
| 209 |
-
")\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"pain_point = response.choices[0].message.content\n",
|
| 212 |
-
"print(pain_point)\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"user_prompt_solution = f\"What is the solution to the pain point {pain_point} of the Gen-Z audience in the business area of {business_idea}?, that can be solved by an agentic-ai solution? Provide a step-by-step breakdown\"\n",
|
| 215 |
-
"\n",
|
| 216 |
-
"response = openai.chat.completions.create(\n",
|
| 217 |
-
" model = \"deepseek-r1-distill-llama-70b\",\n",
|
| 218 |
-
" messages = [{\"role\": \"user\", \"content\": user_prompt_solution}]\n",
|
| 219 |
-
")\n",
|
| 220 |
-
"\n",
|
| 221 |
-
"business_solution = response.choices[0].message.content"
|
| 222 |
-
]
|
| 223 |
-
},
|
| 224 |
-
{
|
| 225 |
-
"cell_type": "code",
|
| 226 |
-
"execution_count": null,
|
| 227 |
-
"metadata": {},
|
| 228 |
-
"outputs": [],
|
| 229 |
-
"source": [
|
| 230 |
-
"display(Markdown(business_solution))"
|
| 231 |
-
]
|
| 232 |
-
},
|
| 233 |
-
{
|
| 234 |
-
"cell_type": "code",
|
| 235 |
-
"execution_count": null,
|
| 236 |
-
"metadata": {},
|
| 237 |
-
"outputs": [],
|
| 238 |
-
"source": []
|
| 239 |
-
}
|
| 240 |
-
],
|
| 241 |
-
"metadata": {
|
| 242 |
-
"kernelspec": {
|
| 243 |
-
"display_name": ".venv",
|
| 244 |
-
"language": "python",
|
| 245 |
-
"name": "python3"
|
| 246 |
-
},
|
| 247 |
-
"language_info": {
|
| 248 |
-
"codemirror_mode": {
|
| 249 |
-
"name": "ipython",
|
| 250 |
-
"version": 3
|
| 251 |
-
},
|
| 252 |
-
"file_extension": ".py",
|
| 253 |
-
"mimetype": "text/x-python",
|
| 254 |
-
"name": "python",
|
| 255 |
-
"nbconvert_exporter": "python",
|
| 256 |
-
"pygments_lexer": "ipython3",
|
| 257 |
-
"version": "3.12.2"
|
| 258 |
-
}
|
| 259 |
-
},
|
| 260 |
-
"nbformat": 4,
|
| 261 |
-
"nbformat_minor": 2
|
| 262 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_groq_llama.ipynb
DELETED
|
@@ -1,296 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# First Agentic AI workflow with Groq and Llama-3.3 LLM(Free of cost) "
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "code",
|
| 12 |
-
"execution_count": 1,
|
| 13 |
-
"metadata": {},
|
| 14 |
-
"outputs": [],
|
| 15 |
-
"source": [
|
| 16 |
-
"# First let's do an import\n",
|
| 17 |
-
"from dotenv import load_dotenv"
|
| 18 |
-
]
|
| 19 |
-
},
|
| 20 |
-
{
|
| 21 |
-
"cell_type": "code",
|
| 22 |
-
"execution_count": null,
|
| 23 |
-
"metadata": {},
|
| 24 |
-
"outputs": [],
|
| 25 |
-
"source": [
|
| 26 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 27 |
-
"\n",
|
| 28 |
-
"load_dotenv(override=True)"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "code",
|
| 33 |
-
"execution_count": null,
|
| 34 |
-
"metadata": {},
|
| 35 |
-
"outputs": [],
|
| 36 |
-
"source": [
|
| 37 |
-
"# Check the Groq API key\n",
|
| 38 |
-
"\n",
|
| 39 |
-
"import os\n",
|
| 40 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 41 |
-
"\n",
|
| 42 |
-
"if groq_api_key:\n",
|
| 43 |
-
" print(f\"GROQ API Key exists and begins {groq_api_key[:8]}\")\n",
|
| 44 |
-
"else:\n",
|
| 45 |
-
" print(\"GROQ API Key not set\")\n",
|
| 46 |
-
" \n"
|
| 47 |
-
]
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
"cell_type": "code",
|
| 51 |
-
"execution_count": 4,
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"outputs": [],
|
| 54 |
-
"source": [
|
| 55 |
-
"# And now - the all important import statement\n",
|
| 56 |
-
"# If you get an import error - head over to troubleshooting guide\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"from groq import Groq"
|
| 59 |
-
]
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"cell_type": "code",
|
| 63 |
-
"execution_count": 5,
|
| 64 |
-
"metadata": {},
|
| 65 |
-
"outputs": [],
|
| 66 |
-
"source": [
|
| 67 |
-
"# Create a Groq instance\n",
|
| 68 |
-
"groq = Groq()"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": 6,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [],
|
| 76 |
-
"source": [
|
| 77 |
-
"# Create a list of messages in the familiar Groq format\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 80 |
-
]
|
| 81 |
-
},
|
| 82 |
-
{
|
| 83 |
-
"cell_type": "code",
|
| 84 |
-
"execution_count": null,
|
| 85 |
-
"metadata": {},
|
| 86 |
-
"outputs": [],
|
| 87 |
-
"source": [
|
| 88 |
-
"# And now call it!\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 91 |
-
"print(response.choices[0].message.content)\n"
|
| 92 |
-
]
|
| 93 |
-
},
|
| 94 |
-
{
|
| 95 |
-
"cell_type": "code",
|
| 96 |
-
"execution_count": null,
|
| 97 |
-
"metadata": {},
|
| 98 |
-
"outputs": [],
|
| 99 |
-
"source": []
|
| 100 |
-
},
|
| 101 |
-
{
|
| 102 |
-
"cell_type": "code",
|
| 103 |
-
"execution_count": 8,
|
| 104 |
-
"metadata": {},
|
| 105 |
-
"outputs": [],
|
| 106 |
-
"source": [
|
| 107 |
-
"# And now - let's ask for a question:\n",
|
| 108 |
-
"\n",
|
| 109 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 110 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 111 |
-
]
|
| 112 |
-
},
|
| 113 |
-
{
|
| 114 |
-
"cell_type": "code",
|
| 115 |
-
"execution_count": null,
|
| 116 |
-
"metadata": {},
|
| 117 |
-
"outputs": [],
|
| 118 |
-
"source": [
|
| 119 |
-
"# ask it\n",
|
| 120 |
-
"response = groq.chat.completions.create(\n",
|
| 121 |
-
" model=\"llama-3.3-70b-versatile\",\n",
|
| 122 |
-
" messages=messages\n",
|
| 123 |
-
")\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"question = response.choices[0].message.content\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"print(question)\n"
|
| 128 |
-
]
|
| 129 |
-
},
|
| 130 |
-
{
|
| 131 |
-
"cell_type": "code",
|
| 132 |
-
"execution_count": 10,
|
| 133 |
-
"metadata": {},
|
| 134 |
-
"outputs": [],
|
| 135 |
-
"source": [
|
| 136 |
-
"# form a new messages list\n",
|
| 137 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 138 |
-
]
|
| 139 |
-
},
|
| 140 |
-
{
|
| 141 |
-
"cell_type": "code",
|
| 142 |
-
"execution_count": null,
|
| 143 |
-
"metadata": {},
|
| 144 |
-
"outputs": [],
|
| 145 |
-
"source": [
|
| 146 |
-
"# Ask it again\n",
|
| 147 |
-
"\n",
|
| 148 |
-
"response = groq.chat.completions.create(\n",
|
| 149 |
-
" model=\"llama-3.3-70b-versatile\",\n",
|
| 150 |
-
" messages=messages\n",
|
| 151 |
-
")\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"answer = response.choices[0].message.content\n",
|
| 154 |
-
"print(answer)\n"
|
| 155 |
-
]
|
| 156 |
-
},
|
| 157 |
-
{
|
| 158 |
-
"cell_type": "code",
|
| 159 |
-
"execution_count": null,
|
| 160 |
-
"metadata": {},
|
| 161 |
-
"outputs": [],
|
| 162 |
-
"source": [
|
| 163 |
-
"from IPython.display import Markdown, display\n",
|
| 164 |
-
"\n",
|
| 165 |
-
"display(Markdown(answer))\n",
|
| 166 |
-
"\n"
|
| 167 |
-
]
|
| 168 |
-
},
|
| 169 |
-
{
|
| 170 |
-
"cell_type": "markdown",
|
| 171 |
-
"metadata": {},
|
| 172 |
-
"source": [
|
| 173 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 174 |
-
" <tr>\n",
|
| 175 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 176 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 177 |
-
" </td>\n",
|
| 178 |
-
" <td>\n",
|
| 179 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 180 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 181 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 182 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 183 |
-
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 184 |
-
" </span>\n",
|
| 185 |
-
" </td>\n",
|
| 186 |
-
" </tr>\n",
|
| 187 |
-
"</table>"
|
| 188 |
-
]
|
| 189 |
-
},
|
| 190 |
-
{
|
| 191 |
-
"cell_type": "code",
|
| 192 |
-
"execution_count": 17,
|
| 193 |
-
"metadata": {},
|
| 194 |
-
"outputs": [],
|
| 195 |
-
"source": [
|
| 196 |
-
"# First create the messages:\n",
|
| 197 |
-
"\n",
|
| 198 |
-
"messages = [{\"role\": \"user\", \"content\": \"Give me a business area that might be ripe for an Agentic AI solution.\"}]\n",
|
| 199 |
-
"\n",
|
| 200 |
-
"# Then make the first call:\n",
|
| 201 |
-
"\n",
|
| 202 |
-
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 203 |
-
"\n",
|
| 204 |
-
"# Then read the business idea:\n",
|
| 205 |
-
"\n",
|
| 206 |
-
"business_idea = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"\n",
|
| 209 |
-
"# And repeat!"
|
| 210 |
-
]
|
| 211 |
-
},
|
| 212 |
-
{
|
| 213 |
-
"cell_type": "code",
|
| 214 |
-
"execution_count": null,
|
| 215 |
-
"metadata": {},
|
| 216 |
-
"outputs": [],
|
| 217 |
-
"source": [
|
| 218 |
-
"\n",
|
| 219 |
-
"display(Markdown(business_idea))"
|
| 220 |
-
]
|
| 221 |
-
},
|
| 222 |
-
{
|
| 223 |
-
"cell_type": "code",
|
| 224 |
-
"execution_count": 19,
|
| 225 |
-
"metadata": {},
|
| 226 |
-
"outputs": [],
|
| 227 |
-
"source": [
|
| 228 |
-
"# Update the message with the business idea from previous step\n",
|
| 229 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is the pain point in the business area of \" + business_idea + \"?\"}]"
|
| 230 |
-
]
|
| 231 |
-
},
|
| 232 |
-
{
|
| 233 |
-
"cell_type": "code",
|
| 234 |
-
"execution_count": 20,
|
| 235 |
-
"metadata": {},
|
| 236 |
-
"outputs": [],
|
| 237 |
-
"source": [
|
| 238 |
-
"# Make the second call\n",
|
| 239 |
-
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 240 |
-
"# Read the pain point\n",
|
| 241 |
-
"pain_point = response.choices[0].message.content\n"
|
| 242 |
-
]
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"cell_type": "code",
|
| 246 |
-
"execution_count": null,
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"display(Markdown(pain_point))\n"
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "code",
|
| 255 |
-
"execution_count": null,
|
| 256 |
-
"metadata": {},
|
| 257 |
-
"outputs": [],
|
| 258 |
-
"source": [
|
| 259 |
-
"# Make the third call\n",
|
| 260 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is the Agentic AI solution for the pain point of \" + pain_point + \"?\"}]\n",
|
| 261 |
-
"response = groq.chat.completions.create(model='llama-3.3-70b-versatile', messages=messages)\n",
|
| 262 |
-
"# Read the agentic solution\n",
|
| 263 |
-
"agentic_solution = response.choices[0].message.content\n",
|
| 264 |
-
"display(Markdown(agentic_solution))"
|
| 265 |
-
]
|
| 266 |
-
},
|
| 267 |
-
{
|
| 268 |
-
"cell_type": "code",
|
| 269 |
-
"execution_count": null,
|
| 270 |
-
"metadata": {},
|
| 271 |
-
"outputs": [],
|
| 272 |
-
"source": []
|
| 273 |
-
}
|
| 274 |
-
],
|
| 275 |
-
"metadata": {
|
| 276 |
-
"kernelspec": {
|
| 277 |
-
"display_name": ".venv",
|
| 278 |
-
"language": "python",
|
| 279 |
-
"name": "python3"
|
| 280 |
-
},
|
| 281 |
-
"language_info": {
|
| 282 |
-
"codemirror_mode": {
|
| 283 |
-
"name": "ipython",
|
| 284 |
-
"version": 3
|
| 285 |
-
},
|
| 286 |
-
"file_extension": ".py",
|
| 287 |
-
"mimetype": "text/x-python",
|
| 288 |
-
"name": "python",
|
| 289 |
-
"nbconvert_exporter": "python",
|
| 290 |
-
"pygments_lexer": "ipython3",
|
| 291 |
-
"version": "3.12.10"
|
| 292 |
-
}
|
| 293 |
-
},
|
| 294 |
-
"nbformat": 4,
|
| 295 |
-
"nbformat_minor": 2
|
| 296 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_moneek.ipynb
DELETED
|
@@ -1,407 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
-
" Well in that case, you're ready!!\n",
|
| 25 |
-
" </span>\n",
|
| 26 |
-
" </td>\n",
|
| 27 |
-
" </tr>\n",
|
| 28 |
-
"</table>"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "markdown",
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"source": [
|
| 35 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
-
" <tr>\n",
|
| 37 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
-
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
-
" </td>\n",
|
| 40 |
-
" <td>\n",
|
| 41 |
-
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
-
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
-
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
-
" </span>\n",
|
| 45 |
-
" </td>\n",
|
| 46 |
-
" </tr>\n",
|
| 47 |
-
"</table>"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "markdown",
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"source": [
|
| 54 |
-
"### And please do remember to contact me if I can help\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
-
"- Open extensions (View >> extensions)\n",
|
| 63 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"And then:\n",
|
| 68 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
-
"3. Enjoy!\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
-
"And then try again.\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
-
"`conda deactivate` \n",
|
| 81 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
-
"`conda config --set auto_activate_base false` \n",
|
| 83 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": null,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"from dotenv import load_dotenv\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": null,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
-
"# If this returns false, see the next cell!\n",
|
| 105 |
-
"\n",
|
| 106 |
-
"load_dotenv(override=True)"
|
| 107 |
-
]
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"cell_type": "markdown",
|
| 111 |
-
"metadata": {},
|
| 112 |
-
"source": [
|
| 113 |
-
"### Wait, did that just output `False`??\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
-
"\n",
|
| 119 |
-
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "markdown",
|
| 124 |
-
"metadata": {},
|
| 125 |
-
"source": [
|
| 126 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
-
" <tr>\n",
|
| 128 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
-
" </td>\n",
|
| 131 |
-
" <td>\n",
|
| 132 |
-
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
-
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
-
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
-
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
-
" </span>\n",
|
| 137 |
-
" </td>\n",
|
| 138 |
-
" </tr>\n",
|
| 139 |
-
"</table>"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"import os\n",
|
| 151 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"if openai_api_key:\n",
|
| 154 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
-
"else:\n",
|
| 156 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
-
" \n"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# And now - the all important import statement\n",
|
| 167 |
-
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
-
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"from openai import OpenAI"
|
| 171 |
-
]
|
| 172 |
-
},
|
| 173 |
-
{
|
| 174 |
-
"cell_type": "code",
|
| 175 |
-
"execution_count": null,
|
| 176 |
-
"metadata": {},
|
| 177 |
-
"outputs": [],
|
| 178 |
-
"source": [
|
| 179 |
-
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
-
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
-
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
-
"\n",
|
| 184 |
-
"openai = OpenAI()"
|
| 185 |
-
]
|
| 186 |
-
},
|
| 187 |
-
{
|
| 188 |
-
"cell_type": "code",
|
| 189 |
-
"execution_count": null,
|
| 190 |
-
"metadata": {},
|
| 191 |
-
"outputs": [],
|
| 192 |
-
"source": [
|
| 193 |
-
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
-
"\n",
|
| 195 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
-
]
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"cell_type": "code",
|
| 200 |
-
"execution_count": null,
|
| 201 |
-
"metadata": {},
|
| 202 |
-
"outputs": [],
|
| 203 |
-
"source": [
|
| 204 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
-
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
-
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
-
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
-
"\n",
|
| 209 |
-
"response = openai.chat.completions.create(\n",
|
| 210 |
-
" model=\"gpt-4.1-nano\",\n",
|
| 211 |
-
" messages=messages\n",
|
| 212 |
-
")\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"print(response.choices[0].message.content)\n"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"# And now - let's ask for a question:\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
-
]
|
| 228 |
-
},
|
| 229 |
-
{
|
| 230 |
-
"cell_type": "code",
|
| 231 |
-
"execution_count": null,
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"outputs": [],
|
| 234 |
-
"source": [
|
| 235 |
-
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
-
"\n",
|
| 237 |
-
"response = openai.chat.completions.create(\n",
|
| 238 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 239 |
-
" messages=messages\n",
|
| 240 |
-
")\n",
|
| 241 |
-
"\n",
|
| 242 |
-
"question = response.choices[0].message.content\n",
|
| 243 |
-
"\n",
|
| 244 |
-
"print(question)\n"
|
| 245 |
-
]
|
| 246 |
-
},
|
| 247 |
-
{
|
| 248 |
-
"cell_type": "code",
|
| 249 |
-
"execution_count": null,
|
| 250 |
-
"metadata": {},
|
| 251 |
-
"outputs": [],
|
| 252 |
-
"source": [
|
| 253 |
-
"# form a new messages list\n",
|
| 254 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
-
]
|
| 256 |
-
},
|
| 257 |
-
{
|
| 258 |
-
"cell_type": "code",
|
| 259 |
-
"execution_count": null,
|
| 260 |
-
"metadata": {},
|
| 261 |
-
"outputs": [],
|
| 262 |
-
"source": [
|
| 263 |
-
"# Ask it again\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"response = openai.chat.completions.create(\n",
|
| 266 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 267 |
-
" messages=messages\n",
|
| 268 |
-
")\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"answer = response.choices[0].message.content\n",
|
| 271 |
-
"print(answer)\n"
|
| 272 |
-
]
|
| 273 |
-
},
|
| 274 |
-
{
|
| 275 |
-
"cell_type": "code",
|
| 276 |
-
"execution_count": null,
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"outputs": [],
|
| 279 |
-
"source": [
|
| 280 |
-
"from IPython.display import Markdown, display\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"display(Markdown(answer))\n",
|
| 283 |
-
"\n"
|
| 284 |
-
]
|
| 285 |
-
},
|
| 286 |
-
{
|
| 287 |
-
"cell_type": "markdown",
|
| 288 |
-
"metadata": {},
|
| 289 |
-
"source": [
|
| 290 |
-
"# Congratulations!\n",
|
| 291 |
-
"\n",
|
| 292 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
-
"\n",
|
| 294 |
-
"Next time things get more interesting..."
|
| 295 |
-
]
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"cell_type": "markdown",
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"source": [
|
| 301 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
-
" <tr>\n",
|
| 303 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
-
" </td>\n",
|
| 306 |
-
" <td>\n",
|
| 307 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
-
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
-
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
-
" </span>\n",
|
| 314 |
-
" </td>\n",
|
| 315 |
-
" </tr>\n",
|
| 316 |
-
"</table>"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# First create the messages:\n",
|
| 326 |
-
"question = \"Pick a business area that may have agentic AI opportunities\"\n",
|
| 327 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 328 |
-
"\n",
|
| 329 |
-
"# Then make the first call:\n",
|
| 330 |
-
"\n",
|
| 331 |
-
"response = openai.chat.completions.create(\n",
|
| 332 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 333 |
-
" messages=messages\n",
|
| 334 |
-
")\n",
|
| 335 |
-
"\n",
|
| 336 |
-
"# Then read the business idea:\n",
|
| 337 |
-
"\n",
|
| 338 |
-
"business_idea = response.choices[0].message.content\n",
|
| 339 |
-
"print(business_idea)\n",
|
| 340 |
-
"\n",
|
| 341 |
-
"# And repeat! In the next message, include the business idea within the message"
|
| 342 |
-
]
|
| 343 |
-
},
|
| 344 |
-
{
|
| 345 |
-
"cell_type": "code",
|
| 346 |
-
"execution_count": null,
|
| 347 |
-
"metadata": {},
|
| 348 |
-
"outputs": [],
|
| 349 |
-
"source": [
|
| 350 |
-
"messages = [{\"role\": \"user\", \"content\": question + \"\\n\\n\" + business_idea},\n",
|
| 351 |
-
" {\"role\": \"assistant\", \"content\": \"What is the pain point in this industry?\" }]\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"response = openai.chat.completions.create(\n",
|
| 354 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 355 |
-
" messages=messages\n",
|
| 356 |
-
")\n",
|
| 357 |
-
"\n",
|
| 358 |
-
"pain_point = response.choices[0].message.content\n",
|
| 359 |
-
"print(pain_point)"
|
| 360 |
-
]
|
| 361 |
-
},
|
| 362 |
-
{
|
| 363 |
-
"cell_type": "code",
|
| 364 |
-
"execution_count": null,
|
| 365 |
-
"metadata": {},
|
| 366 |
-
"outputs": [],
|
| 367 |
-
"source": [
|
| 368 |
-
"messages = [{\"role\": \"user\", \"content\": question + \"\\n\\n\" + business_idea + \"\\n\\n\" + pain_point}, \n",
|
| 369 |
-
" {\"role\": \"assistant\", \"content\": \"What is the Agentic AI solution?\"}]\n",
|
| 370 |
-
"\n",
|
| 371 |
-
"response = openai.chat.completions.create(\n",
|
| 372 |
-
" model=\"gpt-4.1-mini\",\n",
|
| 373 |
-
" messages=messages\n",
|
| 374 |
-
")\n",
|
| 375 |
-
"\n",
|
| 376 |
-
"agentic_solution = response.choices[0].message.content\n",
|
| 377 |
-
"print(agentic_solution)\n"
|
| 378 |
-
]
|
| 379 |
-
},
|
| 380 |
-
{
|
| 381 |
-
"cell_type": "markdown",
|
| 382 |
-
"metadata": {},
|
| 383 |
-
"source": []
|
| 384 |
-
}
|
| 385 |
-
],
|
| 386 |
-
"metadata": {
|
| 387 |
-
"kernelspec": {
|
| 388 |
-
"display_name": ".venv",
|
| 389 |
-
"language": "python",
|
| 390 |
-
"name": "python3"
|
| 391 |
-
},
|
| 392 |
-
"language_info": {
|
| 393 |
-
"codemirror_mode": {
|
| 394 |
-
"name": "ipython",
|
| 395 |
-
"version": 3
|
| 396 |
-
},
|
| 397 |
-
"file_extension": ".py",
|
| 398 |
-
"mimetype": "text/x-python",
|
| 399 |
-
"name": "python",
|
| 400 |
-
"nbconvert_exporter": "python",
|
| 401 |
-
"pygments_lexer": "ipython3",
|
| 402 |
-
"version": "3.12.11"
|
| 403 |
-
}
|
| 404 |
-
},
|
| 405 |
-
"nbformat": 4,
|
| 406 |
-
"nbformat_minor": 2
|
| 407 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab1_open_router.ipynb
DELETED
|
@@ -1,323 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 23 |
-
" Well in that case, you're ready!!\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "markdown",
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"source": [
|
| 34 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
-
" <tr>\n",
|
| 36 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
-
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
-
" </td>\n",
|
| 39 |
-
" <td>\n",
|
| 40 |
-
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 41 |
-
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 42 |
-
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 43 |
-
" </span>\n",
|
| 44 |
-
" </td>\n",
|
| 45 |
-
" </tr>\n",
|
| 46 |
-
"</table>"
|
| 47 |
-
]
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
"cell_type": "markdown",
|
| 51 |
-
"metadata": {},
|
| 52 |
-
"source": [
|
| 53 |
-
"### And please do remember to contact me if I can help\n",
|
| 54 |
-
"\n",
|
| 55 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 56 |
-
"\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 59 |
-
"\n",
|
| 60 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 61 |
-
"- Open extensions (View >> extensions)\n",
|
| 62 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 63 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 64 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 65 |
-
"\n",
|
| 66 |
-
"And then:\n",
|
| 67 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 68 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 69 |
-
"3. Enjoy!\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 72 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 73 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 74 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 75 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 76 |
-
"And then try again.\n",
|
| 77 |
-
"\n",
|
| 78 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 79 |
-
"`conda deactivate` \n",
|
| 80 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 81 |
-
"`conda config --set auto_activate_base false` \n",
|
| 82 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 83 |
-
]
|
| 84 |
-
},
|
| 85 |
-
{
|
| 86 |
-
"cell_type": "code",
|
| 87 |
-
"execution_count": 76,
|
| 88 |
-
"metadata": {},
|
| 89 |
-
"outputs": [],
|
| 90 |
-
"source": [
|
| 91 |
-
"# First let's do an import\n",
|
| 92 |
-
"from dotenv import load_dotenv\n"
|
| 93 |
-
]
|
| 94 |
-
},
|
| 95 |
-
{
|
| 96 |
-
"cell_type": "code",
|
| 97 |
-
"execution_count": null,
|
| 98 |
-
"metadata": {},
|
| 99 |
-
"outputs": [],
|
| 100 |
-
"source": [
|
| 101 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 102 |
-
"\n",
|
| 103 |
-
"load_dotenv(override=True)"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": null,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"# Check the keys\n",
|
| 113 |
-
"\n",
|
| 114 |
-
"import os\n",
|
| 115 |
-
"open_router_api_key = os.getenv('OPEN_ROUTER_API_KEY')\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"if open_router_api_key:\n",
|
| 118 |
-
" print(f\"Open router API Key exists and begins {open_router_api_key[:8]}\")\n",
|
| 119 |
-
"else:\n",
|
| 120 |
-
" print(\"Open router API Key not set - please head to the troubleshooting guide in the setup folder\")\n"
|
| 121 |
-
]
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"cell_type": "code",
|
| 125 |
-
"execution_count": 79,
|
| 126 |
-
"metadata": {},
|
| 127 |
-
"outputs": [],
|
| 128 |
-
"source": [
|
| 129 |
-
"from openai import OpenAI"
|
| 130 |
-
]
|
| 131 |
-
},
|
| 132 |
-
{
|
| 133 |
-
"cell_type": "code",
|
| 134 |
-
"execution_count": 80,
|
| 135 |
-
"metadata": {},
|
| 136 |
-
"outputs": [],
|
| 137 |
-
"source": [
|
| 138 |
-
"# Initialize the client to point at OpenRouter instead of OpenAI\n",
|
| 139 |
-
"# You can use the exact same OpenAI Python package—just swap the base_url!\n",
|
| 140 |
-
"client = OpenAI(\n",
|
| 141 |
-
" base_url=\"https://openrouter.ai/api/v1\",\n",
|
| 142 |
-
" api_key=open_router_api_key\n",
|
| 143 |
-
")"
|
| 144 |
-
]
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"cell_type": "code",
|
| 148 |
-
"execution_count": 81,
|
| 149 |
-
"metadata": {},
|
| 150 |
-
"outputs": [],
|
| 151 |
-
"source": [
|
| 152 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 153 |
-
]
|
| 154 |
-
},
|
| 155 |
-
{
|
| 156 |
-
"cell_type": "code",
|
| 157 |
-
"execution_count": null,
|
| 158 |
-
"metadata": {},
|
| 159 |
-
"outputs": [],
|
| 160 |
-
"source": [
|
| 161 |
-
"client = OpenAI(\n",
|
| 162 |
-
" base_url=\"https://openrouter.ai/api/v1\",\n",
|
| 163 |
-
" api_key=open_router_api_key\n",
|
| 164 |
-
")\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"resp = client.chat.completions.create(\n",
|
| 167 |
-
" # Select a model from https://openrouter.ai/models and provide the model name here\n",
|
| 168 |
-
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 169 |
-
" messages=messages\n",
|
| 170 |
-
")\n",
|
| 171 |
-
"print(resp.choices[0].message.content)"
|
| 172 |
-
]
|
| 173 |
-
},
|
| 174 |
-
{
|
| 175 |
-
"cell_type": "code",
|
| 176 |
-
"execution_count": 83,
|
| 177 |
-
"metadata": {},
|
| 178 |
-
"outputs": [],
|
| 179 |
-
"source": [
|
| 180 |
-
"# And now - let's ask for a question:\n",
|
| 181 |
-
"\n",
|
| 182 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 183 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 184 |
-
]
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"cell_type": "code",
|
| 188 |
-
"execution_count": null,
|
| 189 |
-
"metadata": {},
|
| 190 |
-
"outputs": [],
|
| 191 |
-
"source": [
|
| 192 |
-
"response = client.chat.completions.create(\n",
|
| 193 |
-
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 194 |
-
" messages=messages\n",
|
| 195 |
-
")\n",
|
| 196 |
-
"\n",
|
| 197 |
-
"question = response.choices[0].message.content\n",
|
| 198 |
-
"\n",
|
| 199 |
-
"print(question)"
|
| 200 |
-
]
|
| 201 |
-
},
|
| 202 |
-
{
|
| 203 |
-
"cell_type": "code",
|
| 204 |
-
"execution_count": 85,
|
| 205 |
-
"metadata": {},
|
| 206 |
-
"outputs": [],
|
| 207 |
-
"source": [
|
| 208 |
-
"# form a new messages list\n",
|
| 209 |
-
"\n",
|
| 210 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"# Ask it again\n",
|
| 220 |
-
"\n",
|
| 221 |
-
"response = client.chat.completions.create(\n",
|
| 222 |
-
" model=\"meta-llama/llama-3.3-8b-instruct:free\",\n",
|
| 223 |
-
" messages=messages\n",
|
| 224 |
-
")\n",
|
| 225 |
-
"\n",
|
| 226 |
-
"answer = response.choices[0].message.content\n",
|
| 227 |
-
"print(answer)"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "code",
|
| 232 |
-
"execution_count": null,
|
| 233 |
-
"metadata": {},
|
| 234 |
-
"outputs": [],
|
| 235 |
-
"source": [
|
| 236 |
-
"from IPython.display import Markdown, display\n",
|
| 237 |
-
"\n",
|
| 238 |
-
"display(Markdown(answer))\n",
|
| 239 |
-
"\n"
|
| 240 |
-
]
|
| 241 |
-
},
|
| 242 |
-
{
|
| 243 |
-
"cell_type": "markdown",
|
| 244 |
-
"metadata": {},
|
| 245 |
-
"source": [
|
| 246 |
-
"# Congratulations!\n",
|
| 247 |
-
"\n",
|
| 248 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 249 |
-
"\n",
|
| 250 |
-
"Next time things get more interesting..."
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "markdown",
|
| 255 |
-
"metadata": {},
|
| 256 |
-
"source": [
|
| 257 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 258 |
-
" <tr>\n",
|
| 259 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 260 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 261 |
-
" </td>\n",
|
| 262 |
-
" <td>\n",
|
| 263 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 264 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 265 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 266 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 267 |
-
" Finally have 3 third LLM call propose the Agentic AI solution.\n",
|
| 268 |
-
" </span>\n",
|
| 269 |
-
" </td>\n",
|
| 270 |
-
" </tr>\n",
|
| 271 |
-
"</table>"
|
| 272 |
-
]
|
| 273 |
-
},
|
| 274 |
-
{
|
| 275 |
-
"cell_type": "code",
|
| 276 |
-
"execution_count": null,
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"outputs": [],
|
| 279 |
-
"source": [
|
| 280 |
-
"# First create the messages:\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"\n",
|
| 283 |
-
"messages = [\"Something here\"]\n",
|
| 284 |
-
"\n",
|
| 285 |
-
"# Then make the first call:\n",
|
| 286 |
-
"\n",
|
| 287 |
-
"response =\n",
|
| 288 |
-
"\n",
|
| 289 |
-
"# Then read the business idea:\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"business_idea = response.\n",
|
| 292 |
-
"\n",
|
| 293 |
-
"# And repeat!"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "markdown",
|
| 298 |
-
"metadata": {},
|
| 299 |
-
"source": []
|
| 300 |
-
}
|
| 301 |
-
],
|
| 302 |
-
"metadata": {
|
| 303 |
-
"kernelspec": {
|
| 304 |
-
"display_name": ".venv",
|
| 305 |
-
"language": "python",
|
| 306 |
-
"name": "python3"
|
| 307 |
-
},
|
| 308 |
-
"language_info": {
|
| 309 |
-
"codemirror_mode": {
|
| 310 |
-
"name": "ipython",
|
| 311 |
-
"version": 3
|
| 312 |
-
},
|
| 313 |
-
"file_extension": ".py",
|
| 314 |
-
"mimetype": "text/x-python",
|
| 315 |
-
"name": "python",
|
| 316 |
-
"nbconvert_exporter": "python",
|
| 317 |
-
"pygments_lexer": "ipython3",
|
| 318 |
-
"version": "3.12.7"
|
| 319 |
-
}
|
| 320 |
-
},
|
| 321 |
-
"nbformat": 4,
|
| 322 |
-
"nbformat_minor": 2
|
| 323 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab2_Kaushik_Parallelization.ipynb
DELETED
|
@@ -1,355 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [],
|
| 8 |
-
"source": [
|
| 9 |
-
"import os\n",
|
| 10 |
-
"import json\n",
|
| 11 |
-
"from dotenv import load_dotenv\n",
|
| 12 |
-
"from openai import OpenAI\n",
|
| 13 |
-
"from IPython.display import Markdown"
|
| 14 |
-
]
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"cell_type": "markdown",
|
| 18 |
-
"metadata": {},
|
| 19 |
-
"source": [
|
| 20 |
-
"### Refresh dot env"
|
| 21 |
-
]
|
| 22 |
-
},
|
| 23 |
-
{
|
| 24 |
-
"cell_type": "code",
|
| 25 |
-
"execution_count": null,
|
| 26 |
-
"metadata": {},
|
| 27 |
-
"outputs": [],
|
| 28 |
-
"source": [
|
| 29 |
-
"load_dotenv(override=True)"
|
| 30 |
-
]
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"cell_type": "code",
|
| 34 |
-
"execution_count": 3,
|
| 35 |
-
"metadata": {},
|
| 36 |
-
"outputs": [],
|
| 37 |
-
"source": [
|
| 38 |
-
"open_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
| 39 |
-
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")"
|
| 40 |
-
]
|
| 41 |
-
},
|
| 42 |
-
{
|
| 43 |
-
"cell_type": "markdown",
|
| 44 |
-
"metadata": {},
|
| 45 |
-
"source": [
|
| 46 |
-
"### Create initial query to get challange reccomendation"
|
| 47 |
-
]
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
"cell_type": "code",
|
| 51 |
-
"execution_count": 4,
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"outputs": [],
|
| 54 |
-
"source": [
|
| 55 |
-
"query = 'Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. '\n",
|
| 56 |
-
"query += 'Answer only with the question, no explanation.'\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"messages = [{'role':'user', 'content':query}]"
|
| 59 |
-
]
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"cell_type": "code",
|
| 63 |
-
"execution_count": null,
|
| 64 |
-
"metadata": {},
|
| 65 |
-
"outputs": [],
|
| 66 |
-
"source": [
|
| 67 |
-
"print(messages)"
|
| 68 |
-
]
|
| 69 |
-
},
|
| 70 |
-
{
|
| 71 |
-
"cell_type": "markdown",
|
| 72 |
-
"metadata": {},
|
| 73 |
-
"source": [
|
| 74 |
-
"### Call openai gpt-4o-mini "
|
| 75 |
-
]
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"cell_type": "code",
|
| 79 |
-
"execution_count": 6,
|
| 80 |
-
"metadata": {},
|
| 81 |
-
"outputs": [],
|
| 82 |
-
"source": [
|
| 83 |
-
"openai = OpenAI()\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"response = openai.chat.completions.create(\n",
|
| 86 |
-
" messages=messages,\n",
|
| 87 |
-
" model='gpt-4o-mini'\n",
|
| 88 |
-
")\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"challange = response.choices[0].message.content\n"
|
| 91 |
-
]
|
| 92 |
-
},
|
| 93 |
-
{
|
| 94 |
-
"cell_type": "code",
|
| 95 |
-
"execution_count": null,
|
| 96 |
-
"metadata": {},
|
| 97 |
-
"outputs": [],
|
| 98 |
-
"source": [
|
| 99 |
-
"print(challange)"
|
| 100 |
-
]
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"cell_type": "code",
|
| 104 |
-
"execution_count": 8,
|
| 105 |
-
"metadata": {},
|
| 106 |
-
"outputs": [],
|
| 107 |
-
"source": [
|
| 108 |
-
"competitors = []\n",
|
| 109 |
-
"answers = []"
|
| 110 |
-
]
|
| 111 |
-
},
|
| 112 |
-
{
|
| 113 |
-
"cell_type": "markdown",
|
| 114 |
-
"metadata": {},
|
| 115 |
-
"source": [
|
| 116 |
-
"### Create messages with the challange query"
|
| 117 |
-
]
|
| 118 |
-
},
|
| 119 |
-
{
|
| 120 |
-
"cell_type": "code",
|
| 121 |
-
"execution_count": 9,
|
| 122 |
-
"metadata": {},
|
| 123 |
-
"outputs": [],
|
| 124 |
-
"source": [
|
| 125 |
-
"messages = [{'role':'user', 'content':challange}]"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"cell_type": "code",
|
| 130 |
-
"execution_count": null,
|
| 131 |
-
"metadata": {},
|
| 132 |
-
"outputs": [],
|
| 133 |
-
"source": [
|
| 134 |
-
"print(messages)"
|
| 135 |
-
]
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"cell_type": "code",
|
| 139 |
-
"execution_count": null,
|
| 140 |
-
"metadata": {},
|
| 141 |
-
"outputs": [],
|
| 142 |
-
"source": [
|
| 143 |
-
"!ollama pull llama3.2"
|
| 144 |
-
]
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"cell_type": "code",
|
| 148 |
-
"execution_count": 12,
|
| 149 |
-
"metadata": {},
|
| 150 |
-
"outputs": [],
|
| 151 |
-
"source": [
|
| 152 |
-
"from threading import Thread"
|
| 153 |
-
]
|
| 154 |
-
},
|
| 155 |
-
{
|
| 156 |
-
"cell_type": "code",
|
| 157 |
-
"execution_count": 13,
|
| 158 |
-
"metadata": {},
|
| 159 |
-
"outputs": [],
|
| 160 |
-
"source": [
|
| 161 |
-
"def gpt_mini_processor():\n",
|
| 162 |
-
" modleName = 'gpt-4o-mini'\n",
|
| 163 |
-
" competitors.append(modleName)\n",
|
| 164 |
-
" response_gpt = openai.chat.completions.create(\n",
|
| 165 |
-
" messages=messages,\n",
|
| 166 |
-
" model=modleName\n",
|
| 167 |
-
" )\n",
|
| 168 |
-
" answers.append(response_gpt.choices[0].message.content)\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"def gemini_processor():\n",
|
| 171 |
-
" gemini = OpenAI(api_key=google_api_key, base_url='https://generativelanguage.googleapis.com/v1beta/openai/')\n",
|
| 172 |
-
" modleName = 'gemini-2.0-flash'\n",
|
| 173 |
-
" competitors.append(modleName)\n",
|
| 174 |
-
" response_gemini = gemini.chat.completions.create(\n",
|
| 175 |
-
" messages=messages,\n",
|
| 176 |
-
" model=modleName\n",
|
| 177 |
-
" )\n",
|
| 178 |
-
" answers.append(response_gemini.choices[0].message.content)\n",
|
| 179 |
-
"\n",
|
| 180 |
-
"def llama_processor():\n",
|
| 181 |
-
" ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 182 |
-
" modleName = 'llama3.2'\n",
|
| 183 |
-
" competitors.append(modleName)\n",
|
| 184 |
-
" response_llama = ollama.chat.completions.create(\n",
|
| 185 |
-
" messages=messages,\n",
|
| 186 |
-
" model=modleName\n",
|
| 187 |
-
" )\n",
|
| 188 |
-
" answers.append(response_llama.choices[0].message.content)"
|
| 189 |
-
]
|
| 190 |
-
},
|
| 191 |
-
{
|
| 192 |
-
"cell_type": "markdown",
|
| 193 |
-
"metadata": {},
|
| 194 |
-
"source": [
|
| 195 |
-
"### Paraller execution of LLM calls"
|
| 196 |
-
]
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"cell_type": "code",
|
| 200 |
-
"execution_count": 14,
|
| 201 |
-
"metadata": {},
|
| 202 |
-
"outputs": [],
|
| 203 |
-
"source": [
|
| 204 |
-
"thread1 = Thread(target=gpt_mini_processor)\n",
|
| 205 |
-
"thread2 = Thread(target=gemini_processor)\n",
|
| 206 |
-
"thread3 = Thread(target=llama_processor)\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"thread1.start()\n",
|
| 209 |
-
"thread2.start()\n",
|
| 210 |
-
"thread3.start()\n",
|
| 211 |
-
"\n",
|
| 212 |
-
"thread1.join()\n",
|
| 213 |
-
"thread2.join()\n",
|
| 214 |
-
"thread3.join()"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"print(competitors)\n",
|
| 224 |
-
"print(answers)"
|
| 225 |
-
]
|
| 226 |
-
},
|
| 227 |
-
{
|
| 228 |
-
"cell_type": "code",
|
| 229 |
-
"execution_count": null,
|
| 230 |
-
"metadata": {},
|
| 231 |
-
"outputs": [],
|
| 232 |
-
"source": [
|
| 233 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 234 |
-
" print(f'Competitor:{competitor}\\n\\n{answer}')"
|
| 235 |
-
]
|
| 236 |
-
},
|
| 237 |
-
{
|
| 238 |
-
"cell_type": "code",
|
| 239 |
-
"execution_count": 17,
|
| 240 |
-
"metadata": {},
|
| 241 |
-
"outputs": [],
|
| 242 |
-
"source": [
|
| 243 |
-
"together = ''\n",
|
| 244 |
-
"for index, answer in enumerate(answers):\n",
|
| 245 |
-
" together += f'# Response from competitor {index + 1}\\n\\n'\n",
|
| 246 |
-
" together += answer + '\\n\\n'"
|
| 247 |
-
]
|
| 248 |
-
},
|
| 249 |
-
{
|
| 250 |
-
"cell_type": "code",
|
| 251 |
-
"execution_count": null,
|
| 252 |
-
"metadata": {},
|
| 253 |
-
"outputs": [],
|
| 254 |
-
"source": [
|
| 255 |
-
"print(together)"
|
| 256 |
-
]
|
| 257 |
-
},
|
| 258 |
-
{
|
| 259 |
-
"cell_type": "markdown",
|
| 260 |
-
"metadata": {},
|
| 261 |
-
"source": [
|
| 262 |
-
"### Prompt to judge the LLM results"
|
| 263 |
-
]
|
| 264 |
-
},
|
| 265 |
-
{
|
| 266 |
-
"cell_type": "code",
|
| 267 |
-
"execution_count": 19,
|
| 268 |
-
"metadata": {},
|
| 269 |
-
"outputs": [],
|
| 270 |
-
"source": [
|
| 271 |
-
"to_judge = f'''You are judging a competition between {len(competitors)} competitors.\n",
|
| 272 |
-
"Each model has been given this question:\n",
|
| 273 |
-
"\n",
|
| 274 |
-
"{challange}\n",
|
| 275 |
-
"\n",
|
| 276 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 277 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 278 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 279 |
-
"\n",
|
| 280 |
-
"Here are the responses from each competitor:\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"{together}\n",
|
| 283 |
-
"\n",
|
| 284 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n",
|
| 285 |
-
"\n",
|
| 286 |
-
"'''"
|
| 287 |
-
]
|
| 288 |
-
},
|
| 289 |
-
{
|
| 290 |
-
"cell_type": "code",
|
| 291 |
-
"execution_count": 20,
|
| 292 |
-
"metadata": {},
|
| 293 |
-
"outputs": [],
|
| 294 |
-
"source": [
|
| 295 |
-
"to_judge_message = [{'role':'user', 'content':to_judge}]"
|
| 296 |
-
]
|
| 297 |
-
},
|
| 298 |
-
{
|
| 299 |
-
"cell_type": "markdown",
|
| 300 |
-
"metadata": {},
|
| 301 |
-
"source": [
|
| 302 |
-
"### Execute o3-mini to analyze the LLM results"
|
| 303 |
-
]
|
| 304 |
-
},
|
| 305 |
-
{
|
| 306 |
-
"cell_type": "code",
|
| 307 |
-
"execution_count": null,
|
| 308 |
-
"metadata": {},
|
| 309 |
-
"outputs": [],
|
| 310 |
-
"source": [
|
| 311 |
-
"openai = OpenAI()\n",
|
| 312 |
-
"response = openai.chat.completions.create(\n",
|
| 313 |
-
" messages=to_judge_message,\n",
|
| 314 |
-
" model='o3-mini'\n",
|
| 315 |
-
")\n",
|
| 316 |
-
"result = response.choices[0].message.content\n",
|
| 317 |
-
"print(result)"
|
| 318 |
-
]
|
| 319 |
-
},
|
| 320 |
-
{
|
| 321 |
-
"cell_type": "code",
|
| 322 |
-
"execution_count": null,
|
| 323 |
-
"metadata": {},
|
| 324 |
-
"outputs": [],
|
| 325 |
-
"source": [
|
| 326 |
-
"results_dict = json.loads(result)\n",
|
| 327 |
-
"ranks = results_dict[\"results\"]\n",
|
| 328 |
-
"for index, result in enumerate(ranks):\n",
|
| 329 |
-
" competitor = competitors[int(result)-1]\n",
|
| 330 |
-
" print(f\"Rank {index+1}: {competitor}\")"
|
| 331 |
-
]
|
| 332 |
-
}
|
| 333 |
-
],
|
| 334 |
-
"metadata": {
|
| 335 |
-
"kernelspec": {
|
| 336 |
-
"display_name": ".venv",
|
| 337 |
-
"language": "python",
|
| 338 |
-
"name": "python3"
|
| 339 |
-
},
|
| 340 |
-
"language_info": {
|
| 341 |
-
"codemirror_mode": {
|
| 342 |
-
"name": "ipython",
|
| 343 |
-
"version": 3
|
| 344 |
-
},
|
| 345 |
-
"file_extension": ".py",
|
| 346 |
-
"mimetype": "text/x-python",
|
| 347 |
-
"name": "python",
|
| 348 |
-
"nbconvert_exporter": "python",
|
| 349 |
-
"pygments_lexer": "ipython3",
|
| 350 |
-
"version": "3.12.10"
|
| 351 |
-
}
|
| 352 |
-
},
|
| 353 |
-
"nbformat": 4,
|
| 354 |
-
"nbformat_minor": 2
|
| 355 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/1_lab2_Routing_Workflow.ipynb
DELETED
|
@@ -1,514 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Judging and Routing — Optimizing Resource Usage by Evaluating Problem Complexity"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"In the original Lab 2, we explored the **Orchestrator–Worker pattern**, where a planner sent the same question to multiple agents, and a judge assessed their responses to evaluate agent intelligence.\n",
|
| 15 |
-
"\n",
|
| 16 |
-
"In this notebook, we extend that design by adding multiple judges and a routing component to optimize model usage based on task complexity. "
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "markdown",
|
| 21 |
-
"metadata": {},
|
| 22 |
-
"source": [
|
| 23 |
-
"## Imports and Environment Setup"
|
| 24 |
-
]
|
| 25 |
-
},
|
| 26 |
-
{
|
| 27 |
-
"cell_type": "code",
|
| 28 |
-
"execution_count": 1,
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"outputs": [],
|
| 31 |
-
"source": [
|
| 32 |
-
"import os\n",
|
| 33 |
-
"import json\n",
|
| 34 |
-
"from dotenv import load_dotenv\n",
|
| 35 |
-
"from openai import OpenAI\n",
|
| 36 |
-
"from anthropic import Anthropic\n",
|
| 37 |
-
"from IPython.display import Markdown, display"
|
| 38 |
-
]
|
| 39 |
-
},
|
| 40 |
-
{
|
| 41 |
-
"cell_type": "code",
|
| 42 |
-
"execution_count": null,
|
| 43 |
-
"metadata": {},
|
| 44 |
-
"outputs": [],
|
| 45 |
-
"source": [
|
| 46 |
-
"load_dotenv(override=True)\n",
|
| 47 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 49 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 50 |
-
"if openai_api_key and google_api_key and deepseek_api_key:\n",
|
| 51 |
-
" print(\"All keys were loaded successfully\")"
|
| 52 |
-
]
|
| 53 |
-
},
|
| 54 |
-
{
|
| 55 |
-
"cell_type": "code",
|
| 56 |
-
"execution_count": null,
|
| 57 |
-
"metadata": {},
|
| 58 |
-
"outputs": [],
|
| 59 |
-
"source": [
|
| 60 |
-
"!ollama pull llama3.2\n",
|
| 61 |
-
"!ollama pull mistral"
|
| 62 |
-
]
|
| 63 |
-
},
|
| 64 |
-
{
|
| 65 |
-
"cell_type": "markdown",
|
| 66 |
-
"metadata": {},
|
| 67 |
-
"source": [
|
| 68 |
-
"## Creating Models"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "markdown",
|
| 73 |
-
"metadata": {},
|
| 74 |
-
"source": [
|
| 75 |
-
"The notebook uses instances of GPT, Gemini and DeepSeek APIs, along with two local models served via Ollama: ```llama3.2``` and ```mistral```."
|
| 76 |
-
]
|
| 77 |
-
},
|
| 78 |
-
{
|
| 79 |
-
"cell_type": "code",
|
| 80 |
-
"execution_count": 4,
|
| 81 |
-
"metadata": {},
|
| 82 |
-
"outputs": [],
|
| 83 |
-
"source": [
|
| 84 |
-
"model_specs = {\n",
|
| 85 |
-
" \"gpt-4o-mini\" : None,\n",
|
| 86 |
-
" \"gemini-2.0-flash\": {\n",
|
| 87 |
-
" \"api_key\" : google_api_key,\n",
|
| 88 |
-
" \"url\" : \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 89 |
-
" },\n",
|
| 90 |
-
" \"deepseek-chat\" : {\n",
|
| 91 |
-
" \"api_key\" : deepseek_api_key,\n",
|
| 92 |
-
" \"url\" : \"https://api.deepseek.com/v1\"\n",
|
| 93 |
-
" },\n",
|
| 94 |
-
" \"llama3.2\" : {\n",
|
| 95 |
-
" \"api_key\" : \"ollama\",\n",
|
| 96 |
-
" \"url\" : \"http://localhost:11434/v1\"\n",
|
| 97 |
-
" },\n",
|
| 98 |
-
" \"mistral\" : {\n",
|
| 99 |
-
" \"api_key\" : \"ollama\",\n",
|
| 100 |
-
" \"url\" : \"http://localhost:11434/v1\"\n",
|
| 101 |
-
" }\n",
|
| 102 |
-
"}\n",
|
| 103 |
-
"\n",
|
| 104 |
-
"def create_model(model_name):\n",
|
| 105 |
-
" spec = model_specs[model_name]\n",
|
| 106 |
-
" if spec is None:\n",
|
| 107 |
-
" return OpenAI()\n",
|
| 108 |
-
" \n",
|
| 109 |
-
" return OpenAI(api_key=spec[\"api_key\"], base_url=spec[\"url\"])"
|
| 110 |
-
]
|
| 111 |
-
},
|
| 112 |
-
{
|
| 113 |
-
"cell_type": "code",
|
| 114 |
-
"execution_count": 5,
|
| 115 |
-
"metadata": {},
|
| 116 |
-
"outputs": [],
|
| 117 |
-
"source": [
|
| 118 |
-
"orchestrator_model = \"gemini-2.0-flash\"\n",
|
| 119 |
-
"generator = create_model(orchestrator_model)\n",
|
| 120 |
-
"router = create_model(orchestrator_model)\n",
|
| 121 |
-
"\n",
|
| 122 |
-
"qa_models = {\n",
|
| 123 |
-
" model_name : create_model(model_name) \n",
|
| 124 |
-
" for model_name in model_specs.keys()\n",
|
| 125 |
-
"}\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"judges = {\n",
|
| 128 |
-
" model_name : create_model(model_name) \n",
|
| 129 |
-
" for model_name, specs in model_specs.items() \n",
|
| 130 |
-
" if not(specs) or specs[\"api_key\"] != \"ollama\"\n",
|
| 131 |
-
"}"
|
| 132 |
-
]
|
| 133 |
-
},
|
| 134 |
-
{
|
| 135 |
-
"cell_type": "markdown",
|
| 136 |
-
"metadata": {},
|
| 137 |
-
"source": [
|
| 138 |
-
"## Orchestrator-Worker Workflow"
|
| 139 |
-
]
|
| 140 |
-
},
|
| 141 |
-
{
|
| 142 |
-
"cell_type": "markdown",
|
| 143 |
-
"metadata": {},
|
| 144 |
-
"source": [
|
| 145 |
-
"First, we generate a question to evaluate the intelligence of each LLM."
|
| 146 |
-
]
|
| 147 |
-
},
|
| 148 |
-
{
|
| 149 |
-
"cell_type": "code",
|
| 150 |
-
"execution_count": null,
|
| 151 |
-
"metadata": {},
|
| 152 |
-
"outputs": [],
|
| 153 |
-
"source": [
|
| 154 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs \"\n",
|
| 155 |
-
"request += \"to evaluate and rank them based on their intelligence. \" \n",
|
| 156 |
-
"request += \"Answer **only** with the question, no explanation or preamble.\"\n",
|
| 157 |
-
"\n",
|
| 158 |
-
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 159 |
-
"messages"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"cell_type": "code",
|
| 164 |
-
"execution_count": 7,
|
| 165 |
-
"metadata": {},
|
| 166 |
-
"outputs": [],
|
| 167 |
-
"source": [
|
| 168 |
-
"response = generator.chat.completions.create(\n",
|
| 169 |
-
" model=orchestrator_model,\n",
|
| 170 |
-
" messages=messages,\n",
|
| 171 |
-
")\n",
|
| 172 |
-
"eval_question = response.choices[0].message.content"
|
| 173 |
-
]
|
| 174 |
-
},
|
| 175 |
-
{
|
| 176 |
-
"cell_type": "code",
|
| 177 |
-
"execution_count": null,
|
| 178 |
-
"metadata": {},
|
| 179 |
-
"outputs": [],
|
| 180 |
-
"source": [
|
| 181 |
-
"display(Markdown(eval_question))"
|
| 182 |
-
]
|
| 183 |
-
},
|
| 184 |
-
{
|
| 185 |
-
"cell_type": "markdown",
|
| 186 |
-
"metadata": {},
|
| 187 |
-
"source": [
|
| 188 |
-
"### Task Parallelization"
|
| 189 |
-
]
|
| 190 |
-
},
|
| 191 |
-
{
|
| 192 |
-
"cell_type": "markdown",
|
| 193 |
-
"metadata": {},
|
| 194 |
-
"source": [
|
| 195 |
-
"Now, having the question and all the models instantiated it's time to see what each model has to say about the complex task it was given."
|
| 196 |
-
]
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"cell_type": "code",
|
| 200 |
-
"execution_count": null,
|
| 201 |
-
"metadata": {},
|
| 202 |
-
"outputs": [],
|
| 203 |
-
"source": [
|
| 204 |
-
"question = [{\"role\": \"user\", \"content\": eval_question}]\n",
|
| 205 |
-
"answers = []\n",
|
| 206 |
-
"competitors = []\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"for name, model in qa_models.items():\n",
|
| 209 |
-
" response = model.chat.completions.create(model=name, messages=question)\n",
|
| 210 |
-
" answer = response.choices[0].message.content\n",
|
| 211 |
-
" competitors.append(name)\n",
|
| 212 |
-
" answers.append(answer)\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"answers"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"report = \"# Answer report for each of the 5 models\\n\\n\"\n",
|
| 224 |
-
"report += \"\\n\\n\".join([f\"## **Model: {model}**\\n\\n{answer}\" for model, answer in zip(competitors, answers)])\n",
|
| 225 |
-
"display(Markdown(report))"
|
| 226 |
-
]
|
| 227 |
-
},
|
| 228 |
-
{
|
| 229 |
-
"cell_type": "markdown",
|
| 230 |
-
"metadata": {},
|
| 231 |
-
"source": [
|
| 232 |
-
"### Synthetizer/Judge"
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "markdown",
|
| 237 |
-
"metadata": {},
|
| 238 |
-
"source": [
|
| 239 |
-
"The Judge Agents ranks the LLM responses based on coherence and relevance to the evaluation prompt. Judges vote and the final LLM ranking is based on the aggregated ranking of all three judges."
|
| 240 |
-
]
|
| 241 |
-
},
|
| 242 |
-
{
|
| 243 |
-
"cell_type": "code",
|
| 244 |
-
"execution_count": null,
|
| 245 |
-
"metadata": {},
|
| 246 |
-
"outputs": [],
|
| 247 |
-
"source": [
|
| 248 |
-
"together = \"\"\n",
|
| 249 |
-
"for index, answer in enumerate(answers):\n",
|
| 250 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 251 |
-
" together += answer + \"\\n\\n\"\n",
|
| 252 |
-
"\n",
|
| 253 |
-
"together"
|
| 254 |
-
]
|
| 255 |
-
},
|
| 256 |
-
{
|
| 257 |
-
"cell_type": "code",
|
| 258 |
-
"execution_count": 12,
|
| 259 |
-
"metadata": {},
|
| 260 |
-
"outputs": [],
|
| 261 |
-
"source": [
|
| 262 |
-
"judge_prompt = f\"\"\"\n",
|
| 263 |
-
" You are judging a competition between {len(competitors)} LLM competitors.\n",
|
| 264 |
-
" Each model has been given this nuanced question to evaluate their intelligence:\n",
|
| 265 |
-
"\n",
|
| 266 |
-
" {eval_question}\n",
|
| 267 |
-
"\n",
|
| 268 |
-
" Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 269 |
-
" Respond with JSON, and only JSON, with the following format:\n",
|
| 270 |
-
" {{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 271 |
-
" With 'best competitor number being ONLY the number', for instance:\n",
|
| 272 |
-
" {{\"results\": [\"5\", \"2\", \"4\", ...]}}\n",
|
| 273 |
-
" Here are the responses from each competitor:\n",
|
| 274 |
-
"\n",
|
| 275 |
-
" {together}\n",
|
| 276 |
-
"\n",
|
| 277 |
-
" Now respond with the JSON with the ranked order of the competitors, nothing else. Do NOT include MARKDOWN FORMATTING or CODE BLOCKS. ONLY the JSON\n",
|
| 278 |
-
" \"\"\"\n",
|
| 279 |
-
"\n",
|
| 280 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge_prompt}]"
|
| 281 |
-
]
|
| 282 |
-
},
|
| 283 |
-
{
|
| 284 |
-
"cell_type": "code",
|
| 285 |
-
"execution_count": null,
|
| 286 |
-
"metadata": {},
|
| 287 |
-
"outputs": [],
|
| 288 |
-
"source": [
|
| 289 |
-
"from collections import defaultdict\n",
|
| 290 |
-
"import re\n",
|
| 291 |
-
"\n",
|
| 292 |
-
"N = len(competitors)\n",
|
| 293 |
-
"scores = defaultdict(int)\n",
|
| 294 |
-
"for judge_name, judge in judges.items():\n",
|
| 295 |
-
" response = judge.chat.completions.create(\n",
|
| 296 |
-
" model=judge_name,\n",
|
| 297 |
-
" messages=judge_messages,\n",
|
| 298 |
-
" )\n",
|
| 299 |
-
" response = response.choices[0].message.content\n",
|
| 300 |
-
" response_json = re.findall(r'\\{.*?\\}', response)[0]\n",
|
| 301 |
-
" results = json.loads(response_json)[\"results\"]\n",
|
| 302 |
-
" ranks = [int(result) for result in results]\n",
|
| 303 |
-
" print(f\"Judge {judge_name} ranking:\")\n",
|
| 304 |
-
" for i, c in enumerate(ranks):\n",
|
| 305 |
-
" model_name = competitors[c - 1]\n",
|
| 306 |
-
" print(f\"#{i+1} : {model_name}\")\n",
|
| 307 |
-
" scores[c - 1] += (N - i)\n",
|
| 308 |
-
" print()"
|
| 309 |
-
]
|
| 310 |
-
},
|
| 311 |
-
{
|
| 312 |
-
"cell_type": "code",
|
| 313 |
-
"execution_count": null,
|
| 314 |
-
"metadata": {},
|
| 315 |
-
"outputs": [],
|
| 316 |
-
"source": [
|
| 317 |
-
"sorted_indices = sorted(scores, key=scores.get)\n",
|
| 318 |
-
"\n",
|
| 319 |
-
"# Convert to model names\n",
|
| 320 |
-
"ranked_model_names = [competitors[i] for i in sorted_indices]\n",
|
| 321 |
-
"\n",
|
| 322 |
-
"print(\"Final ranking from best to worst:\")\n",
|
| 323 |
-
"for i, name in enumerate(ranked_model_names[::-1], 1):\n",
|
| 324 |
-
" print(f\"#{i}: {name}\")"
|
| 325 |
-
]
|
| 326 |
-
},
|
| 327 |
-
{
|
| 328 |
-
"cell_type": "markdown",
|
| 329 |
-
"metadata": {},
|
| 330 |
-
"source": [
|
| 331 |
-
"## Routing Workflow"
|
| 332 |
-
]
|
| 333 |
-
},
|
| 334 |
-
{
|
| 335 |
-
"cell_type": "markdown",
|
| 336 |
-
"metadata": {},
|
| 337 |
-
"source": [
|
| 338 |
-
"We now define a routing agent responsible for classifying task complexity and delegating the prompt to the most appropriate model."
|
| 339 |
-
]
|
| 340 |
-
},
|
| 341 |
-
{
|
| 342 |
-
"cell_type": "code",
|
| 343 |
-
"execution_count": 15,
|
| 344 |
-
"metadata": {},
|
| 345 |
-
"outputs": [],
|
| 346 |
-
"source": [
|
| 347 |
-
"def classify_question_complexity(question: str, routing_agent, routing_model) -> int:\n",
|
| 348 |
-
" \"\"\"\n",
|
| 349 |
-
" Ask an LLM to classify the question complexity from 1 (easy) to 5 (very hard).\n",
|
| 350 |
-
" \"\"\"\n",
|
| 351 |
-
" prompt = f\"\"\"\n",
|
| 352 |
-
" You are a classifier responsible for assigning a complexity level to user questions, based on how difficult they would be for a language model to answer.\n",
|
| 353 |
-
"\n",
|
| 354 |
-
" Please read the question below and assign a complexity score from 1 to 5:\n",
|
| 355 |
-
"\n",
|
| 356 |
-
" - Level 1: Very simple factual or definitional question (e.g., “What is the capital of France?”)\n",
|
| 357 |
-
" - Level 2: Slightly more involved, requiring basic reasoning or comparison\n",
|
| 358 |
-
" - Level 3: Moderate complexity, requiring synthesis, context understanding, or multi-part answers\n",
|
| 359 |
-
" - Level 4: High complexity, requiring abstract thinking, ethical judgment, or creative generation\n",
|
| 360 |
-
" - Level 5: Extremely challenging, requiring deep reasoning, philosophical reflection, or long-term multi-step inference\n",
|
| 361 |
-
"\n",
|
| 362 |
-
" Respond ONLY with a single integer between 1 and 5 that best reflects the complexity of the question.\n",
|
| 363 |
-
"\n",
|
| 364 |
-
" Question:\n",
|
| 365 |
-
" {question}\n",
|
| 366 |
-
" \"\"\"\n",
|
| 367 |
-
"\n",
|
| 368 |
-
" response = routing_agent.chat.completions.create(\n",
|
| 369 |
-
" model=routing_model,\n",
|
| 370 |
-
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
|
| 371 |
-
" )\n",
|
| 372 |
-
" try:\n",
|
| 373 |
-
" return int(response.choices[0].message.content.strip())\n",
|
| 374 |
-
" except Exception:\n",
|
| 375 |
-
" return 3 # default to medium complexity on error\n",
|
| 376 |
-
" \n",
|
| 377 |
-
"def route_question_to_model(question: str, models_by_rank, classifier_model=router, model_name=orchestrator_model):\n",
|
| 378 |
-
" level = classify_question_complexity(question, classifier_model, model_name)\n",
|
| 379 |
-
" selected_model_name = models_by_rank[level - 1]\n",
|
| 380 |
-
" return selected_model_name"
|
| 381 |
-
]
|
| 382 |
-
},
|
| 383 |
-
{
|
| 384 |
-
"cell_type": "code",
|
| 385 |
-
"execution_count": 16,
|
| 386 |
-
"metadata": {},
|
| 387 |
-
"outputs": [],
|
| 388 |
-
"source": [
|
| 389 |
-
"difficulty_prompts = [\n",
|
| 390 |
-
" \"Generate a very basic, factual question that a small or entry-level language model could answer easily. It should require no reasoning, just direct knowledge lookup.\",\n",
|
| 391 |
-
" \"Generate a slightly involved question that requires basic reasoning, comparison, or combining two known facts. Still within the grasp of small models but not purely factual.\",\n",
|
| 392 |
-
" \"Generate a moderately challenging question that requires some synthesis of ideas, multi-step reasoning, or contextual understanding. A mid-tier model should be able to answer it with effort.\",\n",
|
| 393 |
-
" \"Generate a difficult question involving abstract thinking, open-ended reasoning, or ethical tradeoffs. The question should challenge large models to produce thoughtful and coherent responses.\",\n",
|
| 394 |
-
" \"Generate an extremely complex and nuanced question that tests the limits of current language models. It should require deep reasoning, long-term planning, philosophy, or advanced multi-domain knowledge.\"\n",
|
| 395 |
-
"]\n",
|
| 396 |
-
"def generate_question(level, generator=generator, generator_model=orchestrator_model):\n",
|
| 397 |
-
" prompt = (\n",
|
| 398 |
-
" f\"{difficulty_prompts[level - 1]}\\n\"\n",
|
| 399 |
-
" \"Answer only with the question, no explanation.\"\n",
|
| 400 |
-
" )\n",
|
| 401 |
-
" messages = [{\"role\": \"user\", \"content\": prompt}]\n",
|
| 402 |
-
" response = generator.chat.completions.create(\n",
|
| 403 |
-
" model=generator_model, # or your planner model\n",
|
| 404 |
-
" messages=messages\n",
|
| 405 |
-
" )\n",
|
| 406 |
-
" \n",
|
| 407 |
-
" return response.choices[0].message.content\n",
|
| 408 |
-
"\n"
|
| 409 |
-
]
|
| 410 |
-
},
|
| 411 |
-
{
|
| 412 |
-
"cell_type": "markdown",
|
| 413 |
-
"metadata": {},
|
| 414 |
-
"source": [
|
| 415 |
-
"### Testing Routing Workflow"
|
| 416 |
-
]
|
| 417 |
-
},
|
| 418 |
-
{
|
| 419 |
-
"cell_type": "markdown",
|
| 420 |
-
"metadata": {},
|
| 421 |
-
"source": [
|
| 422 |
-
"Finally, to test the routing workflow, we create a function that accepts a task complexity level and triggers the full routing process.\n",
|
| 423 |
-
"\n",
|
| 424 |
-
"*Note: A level-N prompt isn't always assigned to the Nth-most capable model due to the classifier's subjective decisions.*"
|
| 425 |
-
]
|
| 426 |
-
},
|
| 427 |
-
{
|
| 428 |
-
"cell_type": "code",
|
| 429 |
-
"execution_count": 17,
|
| 430 |
-
"metadata": {},
|
| 431 |
-
"outputs": [],
|
| 432 |
-
"source": [
|
| 433 |
-
"def test_generation_routing(level):\n",
|
| 434 |
-
" question = generate_question(level=level)\n",
|
| 435 |
-
" answer_model = route_question_to_model(question, ranked_model_names)\n",
|
| 436 |
-
" messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 437 |
-
"\n",
|
| 438 |
-
" response =qa_models[answer_model].chat.completions.create(\n",
|
| 439 |
-
" model=answer_model, # or your planner model\n",
|
| 440 |
-
" messages=messages\n",
|
| 441 |
-
" )\n",
|
| 442 |
-
" print(f\"Question : {question}\")\n",
|
| 443 |
-
" print(f\"Routed to {answer_model}\")\n",
|
| 444 |
-
" display(Markdown(response.choices[0].message.content))"
|
| 445 |
-
]
|
| 446 |
-
},
|
| 447 |
-
{
|
| 448 |
-
"cell_type": "code",
|
| 449 |
-
"execution_count": null,
|
| 450 |
-
"metadata": {},
|
| 451 |
-
"outputs": [],
|
| 452 |
-
"source": [
|
| 453 |
-
"test_generation_routing(level=1)"
|
| 454 |
-
]
|
| 455 |
-
},
|
| 456 |
-
{
|
| 457 |
-
"cell_type": "code",
|
| 458 |
-
"execution_count": null,
|
| 459 |
-
"metadata": {},
|
| 460 |
-
"outputs": [],
|
| 461 |
-
"source": [
|
| 462 |
-
"test_generation_routing(level=2)"
|
| 463 |
-
]
|
| 464 |
-
},
|
| 465 |
-
{
|
| 466 |
-
"cell_type": "code",
|
| 467 |
-
"execution_count": null,
|
| 468 |
-
"metadata": {},
|
| 469 |
-
"outputs": [],
|
| 470 |
-
"source": [
|
| 471 |
-
"test_generation_routing(level=3)"
|
| 472 |
-
]
|
| 473 |
-
},
|
| 474 |
-
{
|
| 475 |
-
"cell_type": "code",
|
| 476 |
-
"execution_count": null,
|
| 477 |
-
"metadata": {},
|
| 478 |
-
"outputs": [],
|
| 479 |
-
"source": [
|
| 480 |
-
"test_generation_routing(level=4)"
|
| 481 |
-
]
|
| 482 |
-
},
|
| 483 |
-
{
|
| 484 |
-
"cell_type": "code",
|
| 485 |
-
"execution_count": null,
|
| 486 |
-
"metadata": {},
|
| 487 |
-
"outputs": [],
|
| 488 |
-
"source": [
|
| 489 |
-
"test_generation_routing(level=5)"
|
| 490 |
-
]
|
| 491 |
-
}
|
| 492 |
-
],
|
| 493 |
-
"metadata": {
|
| 494 |
-
"kernelspec": {
|
| 495 |
-
"display_name": ".venv",
|
| 496 |
-
"language": "python",
|
| 497 |
-
"name": "python3"
|
| 498 |
-
},
|
| 499 |
-
"language_info": {
|
| 500 |
-
"codemirror_mode": {
|
| 501 |
-
"name": "ipython",
|
| 502 |
-
"version": 3
|
| 503 |
-
},
|
| 504 |
-
"file_extension": ".py",
|
| 505 |
-
"mimetype": "text/x-python",
|
| 506 |
-
"name": "python",
|
| 507 |
-
"nbconvert_exporter": "python",
|
| 508 |
-
"pygments_lexer": "ipython3",
|
| 509 |
-
"version": "3.12.11"
|
| 510 |
-
}
|
| 511 |
-
},
|
| 512 |
-
"nbformat": 4,
|
| 513 |
-
"nbformat_minor": 2
|
| 514 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2-Evaluator-AnnpaS18.ipynb
DELETED
|
@@ -1,474 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": 1,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"import os\n",
|
| 39 |
-
"import json\n",
|
| 40 |
-
"from dotenv import load_dotenv\n",
|
| 41 |
-
"from openai import OpenAI\n",
|
| 42 |
-
"from anthropic import Anthropic\n",
|
| 43 |
-
"from IPython.display import Markdown, display"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Always remember to do this!\n",
|
| 53 |
-
"load_dotenv(override=True)"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if openai_api_key:\n",
|
| 71 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
-
" \n",
|
| 75 |
-
"if anthropic_api_key:\n",
|
| 76 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"if google_api_key:\n",
|
| 81 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
-
"else:\n",
|
| 83 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"if deepseek_api_key:\n",
|
| 86 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
-
"else:\n",
|
| 88 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"if groq_api_key:\n",
|
| 91 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
-
"else:\n",
|
| 93 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
-
]
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"cell_type": "code",
|
| 98 |
-
"execution_count": 4,
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [],
|
| 101 |
-
"source": [
|
| 102 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
-
]
|
| 106 |
-
},
|
| 107 |
-
{
|
| 108 |
-
"cell_type": "code",
|
| 109 |
-
"execution_count": null,
|
| 110 |
-
"metadata": {},
|
| 111 |
-
"outputs": [],
|
| 112 |
-
"source": [
|
| 113 |
-
"messages"
|
| 114 |
-
]
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"cell_type": "code",
|
| 118 |
-
"execution_count": null,
|
| 119 |
-
"metadata": {},
|
| 120 |
-
"outputs": [],
|
| 121 |
-
"source": [
|
| 122 |
-
"openai = OpenAI()\n",
|
| 123 |
-
"response = openai.chat.completions.create(\n",
|
| 124 |
-
" model=\"gpt-4o-mini\",\n",
|
| 125 |
-
" messages=messages,\n",
|
| 126 |
-
")\n",
|
| 127 |
-
"question = response.choices[0].message.content\n",
|
| 128 |
-
"print(question)\n"
|
| 129 |
-
]
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
"cell_type": "code",
|
| 133 |
-
"execution_count": 7,
|
| 134 |
-
"metadata": {},
|
| 135 |
-
"outputs": [],
|
| 136 |
-
"source": [
|
| 137 |
-
"competitors = []\n",
|
| 138 |
-
"answers = []\n",
|
| 139 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# The API we know well\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
-
"\n",
|
| 152 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
-
"answer = response.choices[0].message.content\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"display(Markdown(answer))\n",
|
| 156 |
-
"competitors.append(model_name)\n",
|
| 157 |
-
"answers.append(answer)"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"claude = Anthropic()\n",
|
| 171 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
-
"answer = response.content[0].text\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"display(Markdown(answer))\n",
|
| 175 |
-
"competitors.append(model_name)\n",
|
| 176 |
-
"answers.append(answer)"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
-
"answer = response.choices[0].message.content\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"display(Markdown(answer))\n",
|
| 192 |
-
"competitors.append(model_name)\n",
|
| 193 |
-
"answers.append(answer)"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
-
"model_name = \"deepseek-chat\"\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
-
"answer = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"display(Markdown(answer))\n",
|
| 209 |
-
"competitors.append(model_name)\n",
|
| 210 |
-
"answers.append(answer)"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
-
"answer = response.choices[0].message.content\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"display(Markdown(answer))\n",
|
| 226 |
-
"competitors.append(model_name)\n",
|
| 227 |
-
"answers.append(answer)\n"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "markdown",
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"source": [
|
| 234 |
-
"## For the next cell, we will use Ollama\n",
|
| 235 |
-
"\n",
|
| 236 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
-
"and runs models locally using high performance C++ code.\n",
|
| 238 |
-
"\n",
|
| 239 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
-
"\n",
|
| 241 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
-
"\n",
|
| 243 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
-
"\n",
|
| 247 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"cell_type": "markdown",
|
| 254 |
-
"metadata": {},
|
| 255 |
-
"source": [
|
| 256 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
-
" <tr>\n",
|
| 258 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
-
" </td>\n",
|
| 261 |
-
" <td>\n",
|
| 262 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
-
" </span>\n",
|
| 265 |
-
" </td>\n",
|
| 266 |
-
" </tr>\n",
|
| 267 |
-
"</table>"
|
| 268 |
-
]
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"cell_type": "code",
|
| 272 |
-
"execution_count": null,
|
| 273 |
-
"metadata": {},
|
| 274 |
-
"outputs": [],
|
| 275 |
-
"source": [
|
| 276 |
-
"!ollama pull llama3.2"
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
-
"model_name = \"llama3.2\"\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
-
"answer = response.choices[0].message.content\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"display(Markdown(answer))\n",
|
| 292 |
-
"competitors.append(model_name)\n",
|
| 293 |
-
"answers.append(answer)"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "code",
|
| 298 |
-
"execution_count": null,
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"outputs": [],
|
| 301 |
-
"source": [
|
| 302 |
-
"# So where are we?\n",
|
| 303 |
-
"\n",
|
| 304 |
-
"print(competitors)\n",
|
| 305 |
-
"print(answers)\n"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"cell_type": "code",
|
| 310 |
-
"execution_count": null,
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": 20,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"together = \"\"\n",
|
| 328 |
-
"for index, answer in enumerate(answers):\n",
|
| 329 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
-
" together += answer + \"\\n\\n\""
|
| 331 |
-
]
|
| 332 |
-
},
|
| 333 |
-
{
|
| 334 |
-
"cell_type": "code",
|
| 335 |
-
"execution_count": null,
|
| 336 |
-
"metadata": {},
|
| 337 |
-
"outputs": [],
|
| 338 |
-
"source": [
|
| 339 |
-
"print(together)"
|
| 340 |
-
]
|
| 341 |
-
},
|
| 342 |
-
{
|
| 343 |
-
"cell_type": "code",
|
| 344 |
-
"execution_count": 22,
|
| 345 |
-
"metadata": {},
|
| 346 |
-
"outputs": [],
|
| 347 |
-
"source": [
|
| 348 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 349 |
-
"Each model has been given this question:\n",
|
| 350 |
-
"\n",
|
| 351 |
-
"{question}\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 354 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 355 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 356 |
-
"\n",
|
| 357 |
-
"Here are the responses from each competitor:\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"{together}\n",
|
| 360 |
-
"\n",
|
| 361 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 362 |
-
]
|
| 363 |
-
},
|
| 364 |
-
{
|
| 365 |
-
"cell_type": "code",
|
| 366 |
-
"execution_count": null,
|
| 367 |
-
"metadata": {},
|
| 368 |
-
"outputs": [],
|
| 369 |
-
"source": [
|
| 370 |
-
"print(judge)"
|
| 371 |
-
]
|
| 372 |
-
},
|
| 373 |
-
{
|
| 374 |
-
"cell_type": "code",
|
| 375 |
-
"execution_count": 29,
|
| 376 |
-
"metadata": {},
|
| 377 |
-
"outputs": [],
|
| 378 |
-
"source": [
|
| 379 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 380 |
-
]
|
| 381 |
-
},
|
| 382 |
-
{
|
| 383 |
-
"cell_type": "code",
|
| 384 |
-
"execution_count": null,
|
| 385 |
-
"metadata": {},
|
| 386 |
-
"outputs": [],
|
| 387 |
-
"source": [
|
| 388 |
-
"# Judgement time!\n",
|
| 389 |
-
"\n",
|
| 390 |
-
"openai = OpenAI()\n",
|
| 391 |
-
"response = openai.chat.completions.create(\n",
|
| 392 |
-
" model=\"o3-mini\",\n",
|
| 393 |
-
" messages=judge_messages,\n",
|
| 394 |
-
")\n",
|
| 395 |
-
"results = response.choices[0].message.content\n",
|
| 396 |
-
"print(results)\n"
|
| 397 |
-
]
|
| 398 |
-
},
|
| 399 |
-
{
|
| 400 |
-
"cell_type": "code",
|
| 401 |
-
"execution_count": null,
|
| 402 |
-
"metadata": {},
|
| 403 |
-
"outputs": [],
|
| 404 |
-
"source": [
|
| 405 |
-
"# OK let's turn this into results!\n",
|
| 406 |
-
"\n",
|
| 407 |
-
"results_dict = json.loads(results)\n",
|
| 408 |
-
"ranks = results_dict[\"results\"]\n",
|
| 409 |
-
"for index, result in enumerate(ranks):\n",
|
| 410 |
-
" competitor = competitors[int(result)-1]\n",
|
| 411 |
-
" print(f\"Rank {index+1}: {competitor}\")"
|
| 412 |
-
]
|
| 413 |
-
},
|
| 414 |
-
{
|
| 415 |
-
"cell_type": "markdown",
|
| 416 |
-
"metadata": {},
|
| 417 |
-
"source": [
|
| 418 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 419 |
-
" <tr>\n",
|
| 420 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 421 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 422 |
-
" </td>\n",
|
| 423 |
-
" <td>\n",
|
| 424 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 425 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 426 |
-
" </span>\n",
|
| 427 |
-
" </td>\n",
|
| 428 |
-
" </tr>\n",
|
| 429 |
-
"</table>"
|
| 430 |
-
]
|
| 431 |
-
},
|
| 432 |
-
{
|
| 433 |
-
"cell_type": "markdown",
|
| 434 |
-
"metadata": {},
|
| 435 |
-
"source": [
|
| 436 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 437 |
-
" <tr>\n",
|
| 438 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 439 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 440 |
-
" </td>\n",
|
| 441 |
-
" <td>\n",
|
| 442 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 443 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 444 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 445 |
-
" to business projects where accuracy is critical.\n",
|
| 446 |
-
" </span>\n",
|
| 447 |
-
" </td>\n",
|
| 448 |
-
" </tr>\n",
|
| 449 |
-
"</table>"
|
| 450 |
-
]
|
| 451 |
-
}
|
| 452 |
-
],
|
| 453 |
-
"metadata": {
|
| 454 |
-
"kernelspec": {
|
| 455 |
-
"display_name": ".venv",
|
| 456 |
-
"language": "python",
|
| 457 |
-
"name": "python3"
|
| 458 |
-
},
|
| 459 |
-
"language_info": {
|
| 460 |
-
"codemirror_mode": {
|
| 461 |
-
"name": "ipython",
|
| 462 |
-
"version": 3
|
| 463 |
-
},
|
| 464 |
-
"file_extension": ".py",
|
| 465 |
-
"mimetype": "text/x-python",
|
| 466 |
-
"name": "python",
|
| 467 |
-
"nbconvert_exporter": "python",
|
| 468 |
-
"pygments_lexer": "ipython3",
|
| 469 |
-
"version": "3.12.9"
|
| 470 |
-
}
|
| 471 |
-
},
|
| 472 |
-
"nbformat": 4,
|
| 473 |
-
"nbformat_minor": 2
|
| 474 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2-judge-prompt-changed.ipynb
DELETED
|
@@ -1,476 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": 1,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"import os\n",
|
| 39 |
-
"import json\n",
|
| 40 |
-
"from dotenv import load_dotenv\n",
|
| 41 |
-
"from openai import OpenAI\n",
|
| 42 |
-
"from anthropic import Anthropic\n",
|
| 43 |
-
"from IPython.display import Markdown, display"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Always remember to do this!\n",
|
| 53 |
-
"load_dotenv(override=True)"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if openai_api_key:\n",
|
| 71 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
-
" \n",
|
| 75 |
-
"if anthropic_api_key:\n",
|
| 76 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"if google_api_key:\n",
|
| 81 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
-
"else:\n",
|
| 83 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"if deepseek_api_key:\n",
|
| 86 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
-
"else:\n",
|
| 88 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"if groq_api_key:\n",
|
| 91 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
-
"else:\n",
|
| 93 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
-
]
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"cell_type": "code",
|
| 98 |
-
"execution_count": 4,
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [],
|
| 101 |
-
"source": [
|
| 102 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
-
]
|
| 106 |
-
},
|
| 107 |
-
{
|
| 108 |
-
"cell_type": "code",
|
| 109 |
-
"execution_count": null,
|
| 110 |
-
"metadata": {},
|
| 111 |
-
"outputs": [],
|
| 112 |
-
"source": [
|
| 113 |
-
"messages"
|
| 114 |
-
]
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"cell_type": "code",
|
| 118 |
-
"execution_count": null,
|
| 119 |
-
"metadata": {},
|
| 120 |
-
"outputs": [],
|
| 121 |
-
"source": [
|
| 122 |
-
"openai = OpenAI()\n",
|
| 123 |
-
"response = openai.chat.completions.create(\n",
|
| 124 |
-
" model=\"gpt-4o-mini\",\n",
|
| 125 |
-
" messages=messages,\n",
|
| 126 |
-
")\n",
|
| 127 |
-
"question = response.choices[0].message.content\n",
|
| 128 |
-
"print(question)\n"
|
| 129 |
-
]
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
"cell_type": "code",
|
| 133 |
-
"execution_count": 7,
|
| 134 |
-
"metadata": {},
|
| 135 |
-
"outputs": [],
|
| 136 |
-
"source": [
|
| 137 |
-
"competitors = []\n",
|
| 138 |
-
"answers = []\n",
|
| 139 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# The API we know well\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
-
"\n",
|
| 152 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
-
"answer = response.choices[0].message.content\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"display(Markdown(answer))\n",
|
| 156 |
-
"competitors.append(model_name)\n",
|
| 157 |
-
"answers.append(answer)"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"claude = Anthropic()\n",
|
| 171 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
-
"answer = response.content[0].text\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"display(Markdown(answer))\n",
|
| 175 |
-
"competitors.append(model_name)\n",
|
| 176 |
-
"answers.append(answer)"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
-
"answer = response.choices[0].message.content\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"display(Markdown(answer))\n",
|
| 192 |
-
"competitors.append(model_name)\n",
|
| 193 |
-
"answers.append(answer)"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
-
"model_name = \"deepseek-chat\"\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
-
"answer = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"display(Markdown(answer))\n",
|
| 209 |
-
"competitors.append(model_name)\n",
|
| 210 |
-
"answers.append(answer)"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
-
"answer = response.choices[0].message.content\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"display(Markdown(answer))\n",
|
| 226 |
-
"competitors.append(model_name)\n",
|
| 227 |
-
"answers.append(answer)\n"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "markdown",
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"source": [
|
| 234 |
-
"## For the next cell, we will use Ollama\n",
|
| 235 |
-
"\n",
|
| 236 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
-
"and runs models locally using high performance C++ code.\n",
|
| 238 |
-
"\n",
|
| 239 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
-
"\n",
|
| 241 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
-
"\n",
|
| 243 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
-
"\n",
|
| 247 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"cell_type": "markdown",
|
| 254 |
-
"metadata": {},
|
| 255 |
-
"source": [
|
| 256 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
-
" <tr>\n",
|
| 258 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
-
" </td>\n",
|
| 261 |
-
" <td>\n",
|
| 262 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
-
" </span>\n",
|
| 265 |
-
" </td>\n",
|
| 266 |
-
" </tr>\n",
|
| 267 |
-
"</table>"
|
| 268 |
-
]
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"cell_type": "code",
|
| 272 |
-
"execution_count": null,
|
| 273 |
-
"metadata": {},
|
| 274 |
-
"outputs": [],
|
| 275 |
-
"source": [
|
| 276 |
-
"!ollama pull llama3.2"
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
-
"model_name = \"llama3.2\"\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
-
"answer = response.choices[0].message.content\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"display(Markdown(answer))\n",
|
| 292 |
-
"competitors.append(model_name)\n",
|
| 293 |
-
"answers.append(answer)"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "code",
|
| 298 |
-
"execution_count": null,
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"outputs": [],
|
| 301 |
-
"source": [
|
| 302 |
-
"# So where are we?\n",
|
| 303 |
-
"\n",
|
| 304 |
-
"print(competitors)\n",
|
| 305 |
-
"print(answers)\n"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"cell_type": "code",
|
| 310 |
-
"execution_count": null,
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": 20,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"together = \"\"\n",
|
| 328 |
-
"for index, answer in enumerate(answers):\n",
|
| 329 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
-
" together += answer + \"\\n\\n\""
|
| 331 |
-
]
|
| 332 |
-
},
|
| 333 |
-
{
|
| 334 |
-
"cell_type": "code",
|
| 335 |
-
"execution_count": null,
|
| 336 |
-
"metadata": {},
|
| 337 |
-
"outputs": [],
|
| 338 |
-
"source": [
|
| 339 |
-
"print(together)"
|
| 340 |
-
]
|
| 341 |
-
},
|
| 342 |
-
{
|
| 343 |
-
"cell_type": "code",
|
| 344 |
-
"execution_count": null,
|
| 345 |
-
"metadata": {},
|
| 346 |
-
"outputs": [],
|
| 347 |
-
"source": [
|
| 348 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 349 |
-
"Each model has been given this question:\n",
|
| 350 |
-
"\n",
|
| 351 |
-
"{question}\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 354 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 355 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 356 |
-
"Answer only the number for example\n",
|
| 357 |
-
"{{\"results\": [\"1\", \"2\", \"3\", ...]}}\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"Here are the responses from each competitor:\n",
|
| 360 |
-
"\n",
|
| 361 |
-
"{together}\n",
|
| 362 |
-
"\n",
|
| 363 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 364 |
-
]
|
| 365 |
-
},
|
| 366 |
-
{
|
| 367 |
-
"cell_type": "code",
|
| 368 |
-
"execution_count": null,
|
| 369 |
-
"metadata": {},
|
| 370 |
-
"outputs": [],
|
| 371 |
-
"source": [
|
| 372 |
-
"print(judge)"
|
| 373 |
-
]
|
| 374 |
-
},
|
| 375 |
-
{
|
| 376 |
-
"cell_type": "code",
|
| 377 |
-
"execution_count": 29,
|
| 378 |
-
"metadata": {},
|
| 379 |
-
"outputs": [],
|
| 380 |
-
"source": [
|
| 381 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 382 |
-
]
|
| 383 |
-
},
|
| 384 |
-
{
|
| 385 |
-
"cell_type": "code",
|
| 386 |
-
"execution_count": null,
|
| 387 |
-
"metadata": {},
|
| 388 |
-
"outputs": [],
|
| 389 |
-
"source": [
|
| 390 |
-
"# Judgement time!\n",
|
| 391 |
-
"\n",
|
| 392 |
-
"openai = OpenAI()\n",
|
| 393 |
-
"response = openai.chat.completions.create(\n",
|
| 394 |
-
" model=\"o3-mini\",\n",
|
| 395 |
-
" messages=judge_messages,\n",
|
| 396 |
-
")\n",
|
| 397 |
-
"results = response.choices[0].message.content\n",
|
| 398 |
-
"print(results)\n"
|
| 399 |
-
]
|
| 400 |
-
},
|
| 401 |
-
{
|
| 402 |
-
"cell_type": "code",
|
| 403 |
-
"execution_count": null,
|
| 404 |
-
"metadata": {},
|
| 405 |
-
"outputs": [],
|
| 406 |
-
"source": [
|
| 407 |
-
"# OK let's turn this into results!\n",
|
| 408 |
-
"\n",
|
| 409 |
-
"results_dict = json.loads(results)\n",
|
| 410 |
-
"ranks = results_dict[\"results\"]\n",
|
| 411 |
-
"for index, result in enumerate(ranks):\n",
|
| 412 |
-
" competitor = competitors[int(result)-1]\n",
|
| 413 |
-
" print(f\"Rank {index+1}: {competitor}\")"
|
| 414 |
-
]
|
| 415 |
-
},
|
| 416 |
-
{
|
| 417 |
-
"cell_type": "markdown",
|
| 418 |
-
"metadata": {},
|
| 419 |
-
"source": [
|
| 420 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 421 |
-
" <tr>\n",
|
| 422 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 423 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 424 |
-
" </td>\n",
|
| 425 |
-
" <td>\n",
|
| 426 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 427 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 428 |
-
" </span>\n",
|
| 429 |
-
" </td>\n",
|
| 430 |
-
" </tr>\n",
|
| 431 |
-
"</table>"
|
| 432 |
-
]
|
| 433 |
-
},
|
| 434 |
-
{
|
| 435 |
-
"cell_type": "markdown",
|
| 436 |
-
"metadata": {},
|
| 437 |
-
"source": [
|
| 438 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 439 |
-
" <tr>\n",
|
| 440 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 441 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 442 |
-
" </td>\n",
|
| 443 |
-
" <td>\n",
|
| 444 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 445 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 446 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 447 |
-
" to business projects where accuracy is critical.\n",
|
| 448 |
-
" </span>\n",
|
| 449 |
-
" </td>\n",
|
| 450 |
-
" </tr>\n",
|
| 451 |
-
"</table>"
|
| 452 |
-
]
|
| 453 |
-
}
|
| 454 |
-
],
|
| 455 |
-
"metadata": {
|
| 456 |
-
"kernelspec": {
|
| 457 |
-
"display_name": ".venv",
|
| 458 |
-
"language": "python",
|
| 459 |
-
"name": "python3"
|
| 460 |
-
},
|
| 461 |
-
"language_info": {
|
| 462 |
-
"codemirror_mode": {
|
| 463 |
-
"name": "ipython",
|
| 464 |
-
"version": 3
|
| 465 |
-
},
|
| 466 |
-
"file_extension": ".py",
|
| 467 |
-
"mimetype": "text/x-python",
|
| 468 |
-
"name": "python",
|
| 469 |
-
"nbconvert_exporter": "python",
|
| 470 |
-
"pygments_lexer": "ipython3",
|
| 471 |
-
"version": "3.12.9"
|
| 472 |
-
}
|
| 473 |
-
},
|
| 474 |
-
"nbformat": 4,
|
| 475 |
-
"nbformat_minor": 2
|
| 476 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2-parallelization.ipynb
DELETED
|
@@ -1,440 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Changes I've made with this lab.\n",
|
| 10 |
-
"1) Modified the original question to instead generate a range of questions, 12 of them. These questions will be used to evaluate each LLM's reasoning, knowledge, creativity, and ability to handle nuanced scenarios.\n",
|
| 11 |
-
"2) I've changed this lab to run the queries in parallel. Thanks GPT for helping with the code to do that. :)\n",
|
| 12 |
-
"3) Instead of having one LLM rate all the responses, I have all of the LLM's rate each others work and then use a Borda Count to asign points to determine the winner."
|
| 13 |
-
]
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"cell_type": "code",
|
| 17 |
-
"execution_count": null,
|
| 18 |
-
"metadata": {},
|
| 19 |
-
"outputs": [],
|
| 20 |
-
"source": [
|
| 21 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 22 |
-
"\n",
|
| 23 |
-
"import os\n",
|
| 24 |
-
"import json\n",
|
| 25 |
-
"from dotenv import load_dotenv\n",
|
| 26 |
-
"from openai import OpenAI\n",
|
| 27 |
-
"from anthropic import Anthropic\n",
|
| 28 |
-
"from IPython.display import Markdown, display"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "code",
|
| 33 |
-
"execution_count": null,
|
| 34 |
-
"metadata": {},
|
| 35 |
-
"outputs": [],
|
| 36 |
-
"source": [
|
| 37 |
-
"# Always remember to do this!\n",
|
| 38 |
-
"load_dotenv(override=True)"
|
| 39 |
-
]
|
| 40 |
-
},
|
| 41 |
-
{
|
| 42 |
-
"cell_type": "code",
|
| 43 |
-
"execution_count": null,
|
| 44 |
-
"metadata": {},
|
| 45 |
-
"outputs": [],
|
| 46 |
-
"source": [
|
| 47 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 48 |
-
"\n",
|
| 49 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 50 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 51 |
-
"gemini_api_key = os.getenv('GEMINI_API_KEY')\n",
|
| 52 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 53 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 54 |
-
"\n",
|
| 55 |
-
"if openai_api_key:\n",
|
| 56 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 57 |
-
"else:\n",
|
| 58 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 59 |
-
" \n",
|
| 60 |
-
"if anthropic_api_key:\n",
|
| 61 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 62 |
-
"else:\n",
|
| 63 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 64 |
-
"\n",
|
| 65 |
-
"if gemini_api_key:\n",
|
| 66 |
-
" print(f\"Gemini API Key exists and begins {gemini_api_key[:2]}\")\n",
|
| 67 |
-
"else:\n",
|
| 68 |
-
" print(\"Gemini API Key not set (and this is optional)\")\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if deepseek_api_key:\n",
|
| 71 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 74 |
-
"\n",
|
| 75 |
-
"if groq_api_key:\n",
|
| 76 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 79 |
-
]
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"cell_type": "code",
|
| 83 |
-
"execution_count": null,
|
| 84 |
-
"metadata": {},
|
| 85 |
-
"outputs": [],
|
| 86 |
-
"source": [
|
| 87 |
-
"request = \"\"\"You are being evaluated for your reasoning, knowledge, creativity, and ability to handle nuanced scenarios. \n",
|
| 88 |
-
"Generate 12 questions that cover the following categories:\n",
|
| 89 |
-
"- Logical reasoning and problem solving\n",
|
| 90 |
-
"- Creative writing and storytelling\n",
|
| 91 |
-
"- Factual accuracy and knowledge recall\n",
|
| 92 |
-
"- Following instructions with strict constraints\n",
|
| 93 |
-
"- Multi-step planning and organization\n",
|
| 94 |
-
"- Ethical dilemmas and debatable issues\n",
|
| 95 |
-
"- Philosophical or abstract reasoning\n",
|
| 96 |
-
"- Summarization and explanation at different levels\n",
|
| 97 |
-
"- Translation and multilingual ability\n",
|
| 98 |
-
"- Roleplay or adaptive communication style\n",
|
| 99 |
-
"\n",
|
| 100 |
-
"Number each question from 1 to 12. \n",
|
| 101 |
-
"The result should be a balanced benchmark question set that fully tests an LLM’s capabilities.\n",
|
| 102 |
-
"\n",
|
| 103 |
-
"Important: Output only clean plain text. \n",
|
| 104 |
-
"Do not use any markup, formatting symbols, quotation marks, brackets, lists, or special characters \n",
|
| 105 |
-
"that could cause misinterpretation. Only provide plain text questions, one per line, numbered 1 to 20.\n",
|
| 106 |
-
"\"\"\"\n",
|
| 107 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 108 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 109 |
-
]
|
| 110 |
-
},
|
| 111 |
-
{
|
| 112 |
-
"cell_type": "code",
|
| 113 |
-
"execution_count": null,
|
| 114 |
-
"metadata": {},
|
| 115 |
-
"outputs": [],
|
| 116 |
-
"source": [
|
| 117 |
-
"# Generate the questions.\n",
|
| 118 |
-
"openai = OpenAI()\n",
|
| 119 |
-
"response = openai.chat.completions.create(\n",
|
| 120 |
-
" model=\"gpt-4o-mini\",\n",
|
| 121 |
-
" messages=messages,\n",
|
| 122 |
-
")\n",
|
| 123 |
-
"question = response.choices[0].message.content\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"display(Markdown(question))"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"cell_type": "code",
|
| 130 |
-
"execution_count": null,
|
| 131 |
-
"metadata": {},
|
| 132 |
-
"outputs": [],
|
| 133 |
-
"source": [
|
| 134 |
-
"competitors = []\n",
|
| 135 |
-
"answers = []\n",
|
| 136 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 137 |
-
]
|
| 138 |
-
},
|
| 139 |
-
{
|
| 140 |
-
"cell_type": "code",
|
| 141 |
-
"execution_count": null,
|
| 142 |
-
"metadata": {},
|
| 143 |
-
"outputs": [],
|
| 144 |
-
"source": [
|
| 145 |
-
"# Ask the LLM's in Parallel\n",
|
| 146 |
-
"\n",
|
| 147 |
-
"import asyncio\n",
|
| 148 |
-
"\n",
|
| 149 |
-
"clients = {\n",
|
| 150 |
-
" \"openai\": OpenAI(),\n",
|
| 151 |
-
" \"claude\": Anthropic(),\n",
|
| 152 |
-
" \"gemini\": OpenAI(api_key=gemini_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"),\n",
|
| 153 |
-
" \"deepseek\": OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\"),\n",
|
| 154 |
-
" \"groq\": OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\"),\n",
|
| 155 |
-
"}\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"# Get the answers from the LLM\n",
|
| 158 |
-
"async def call_llm(model_name, messages):\n",
|
| 159 |
-
" try:\n",
|
| 160 |
-
" if \"claude\" in model_name:\n",
|
| 161 |
-
" response = await asyncio.to_thread(\n",
|
| 162 |
-
" clients[\"claude\"].messages.create,\n",
|
| 163 |
-
" model=model_name,\n",
|
| 164 |
-
" messages=messages,\n",
|
| 165 |
-
" max_tokens=3000,\n",
|
| 166 |
-
" )\n",
|
| 167 |
-
" answer = \"\".join([c.text for c in response.content if c.type == \"text\"])\n",
|
| 168 |
-
" \n",
|
| 169 |
-
" elif \"gpt-4o-mini\" in model_name:\n",
|
| 170 |
-
" response = await asyncio.to_thread(\n",
|
| 171 |
-
" clients[\"openai\"].chat.completions.create,\n",
|
| 172 |
-
" model=model_name,\n",
|
| 173 |
-
" messages=messages,\n",
|
| 174 |
-
" )\n",
|
| 175 |
-
" answer = response.choices[0].message.content\n",
|
| 176 |
-
"\n",
|
| 177 |
-
" elif \"gemini\" in model_name:\n",
|
| 178 |
-
" response = await asyncio.to_thread(\n",
|
| 179 |
-
" clients[\"gemini\"].chat.completions.create,\n",
|
| 180 |
-
" model=model_name,\n",
|
| 181 |
-
" messages=messages,\n",
|
| 182 |
-
" )\n",
|
| 183 |
-
" answer = response.choices[0].message.content\n",
|
| 184 |
-
"\n",
|
| 185 |
-
" elif \"deepseek\" in model_name:\n",
|
| 186 |
-
" response = await asyncio.to_thread(\n",
|
| 187 |
-
" clients[\"deepseek\"].chat.completions.create,\n",
|
| 188 |
-
" model=model_name,\n",
|
| 189 |
-
" messages=messages,\n",
|
| 190 |
-
" )\n",
|
| 191 |
-
" answer = response.choices[0].message.content\n",
|
| 192 |
-
"\n",
|
| 193 |
-
" elif \"llama\" in model_name:\n",
|
| 194 |
-
" response = await asyncio.to_thread(\n",
|
| 195 |
-
" clients[\"groq\"].chat.completions.create,\n",
|
| 196 |
-
" model=model_name,\n",
|
| 197 |
-
" messages=messages,\n",
|
| 198 |
-
" )\n",
|
| 199 |
-
" answer = response.choices[0].message.content\n",
|
| 200 |
-
"\n",
|
| 201 |
-
" return model_name, answer \n",
|
| 202 |
-
"\n",
|
| 203 |
-
" except Exception as e:\n",
|
| 204 |
-
" print (f\"❌ Error: {str(e)}\")\n",
|
| 205 |
-
" return model_name, \"I was not able to generate answers for any of the questions.\"\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"# send out the calls to the LLM to ask teh questions.\n",
|
| 209 |
-
"async def ask_questions_in_parallel(messages):\n",
|
| 210 |
-
" competitor_models = [\n",
|
| 211 |
-
" \"gpt-4o-mini\",\n",
|
| 212 |
-
" \"claude-3-7-sonnet-latest\",\n",
|
| 213 |
-
" \"gemini-2.0-flash\",\n",
|
| 214 |
-
" \"deepseek-chat\",\n",
|
| 215 |
-
" \"llama-3.3-70b-versatile\"\n",
|
| 216 |
-
" ]\n",
|
| 217 |
-
"\n",
|
| 218 |
-
" # create tasks to call the LLM's in parallel\n",
|
| 219 |
-
" tasks = [call_llm(model, messages) for model in competitor_models]\n",
|
| 220 |
-
"\n",
|
| 221 |
-
" answers = []\n",
|
| 222 |
-
" competitors = []\n",
|
| 223 |
-
"\n",
|
| 224 |
-
" # When we have an answer, we can process it. No waiting.\n",
|
| 225 |
-
" for task in asyncio.as_completed(tasks):\n",
|
| 226 |
-
" model_name, answer = await task\n",
|
| 227 |
-
" competitors.append(model_name)\n",
|
| 228 |
-
" answers.append(answer)\n",
|
| 229 |
-
" print(f\"\\n✅ Got response from {model_name}\")\n",
|
| 230 |
-
"\n",
|
| 231 |
-
" return competitors, answers"
|
| 232 |
-
]
|
| 233 |
-
},
|
| 234 |
-
{
|
| 235 |
-
"cell_type": "code",
|
| 236 |
-
"execution_count": null,
|
| 237 |
-
"metadata": {},
|
| 238 |
-
"outputs": [],
|
| 239 |
-
"source": [
|
| 240 |
-
"# Fire off the ask to all the LLM's at once. Parallelization...\n",
|
| 241 |
-
"competitors, answers = await ask_questions_in_parallel(messages)"
|
| 242 |
-
]
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"cell_type": "code",
|
| 246 |
-
"execution_count": null,
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"#Look at the results\n",
|
| 251 |
-
"print (len(answers))\n",
|
| 252 |
-
"print (len(competitors))\n",
|
| 253 |
-
"print (competitors)"
|
| 254 |
-
]
|
| 255 |
-
},
|
| 256 |
-
{
|
| 257 |
-
"cell_type": "code",
|
| 258 |
-
"execution_count": null,
|
| 259 |
-
"metadata": {},
|
| 260 |
-
"outputs": [],
|
| 261 |
-
"source": [
|
| 262 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"together = \"\"\n",
|
| 265 |
-
"for index, answer in enumerate(answers):\n",
|
| 266 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 267 |
-
" together += answer + \"\\n\\n\""
|
| 268 |
-
]
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"cell_type": "code",
|
| 272 |
-
"execution_count": null,
|
| 273 |
-
"metadata": {},
|
| 274 |
-
"outputs": [],
|
| 275 |
-
"source": [
|
| 276 |
-
"print(together)"
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"\n",
|
| 286 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 287 |
-
"Each model has been given the folowing questions:\n",
|
| 288 |
-
"\n",
|
| 289 |
-
"{question}\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"Your task is to evaluate the overall strength of the arguments presented by each competitor. \n",
|
| 292 |
-
"Consider the following factors:\n",
|
| 293 |
-
"- Clarity: how clearly the ideas are communicated\n",
|
| 294 |
-
"- Relevance: how directly the response addresses the question\n",
|
| 295 |
-
"- Depth: the level of reasoning, insight, or supporting evidence provided\n",
|
| 296 |
-
"- Persuasiveness: how compelling or convincing the response is overall\n",
|
| 297 |
-
"Respond with JSON, and only JSON.\n",
|
| 298 |
-
"The output must be a single JSON array of competitor names, ordered from best to worst.\n",
|
| 299 |
-
"Do not include any keys, labels, or extra text.\n",
|
| 300 |
-
"\n",
|
| 301 |
-
"Example format:\n",
|
| 302 |
-
"[\"1\", \"3\", \"5\", \"2\", \"4\"]\n",
|
| 303 |
-
"\n",
|
| 304 |
-
"Here are the responses from each competitor:\n",
|
| 305 |
-
"\n",
|
| 306 |
-
"{together}\n",
|
| 307 |
-
"\n",
|
| 308 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\n",
|
| 309 |
-
"Do not deviate from the json format as described above. Do not include the term ranking in the final json\"\"\"\n"
|
| 310 |
-
]
|
| 311 |
-
},
|
| 312 |
-
{
|
| 313 |
-
"cell_type": "code",
|
| 314 |
-
"execution_count": null,
|
| 315 |
-
"metadata": {},
|
| 316 |
-
"outputs": [],
|
| 317 |
-
"source": [
|
| 318 |
-
"print(judge)"
|
| 319 |
-
]
|
| 320 |
-
},
|
| 321 |
-
{
|
| 322 |
-
"cell_type": "code",
|
| 323 |
-
"execution_count": null,
|
| 324 |
-
"metadata": {},
|
| 325 |
-
"outputs": [],
|
| 326 |
-
"source": [
|
| 327 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 328 |
-
]
|
| 329 |
-
},
|
| 330 |
-
{
|
| 331 |
-
"cell_type": "code",
|
| 332 |
-
"execution_count": null,
|
| 333 |
-
"metadata": {},
|
| 334 |
-
"outputs": [],
|
| 335 |
-
"source": [
|
| 336 |
-
"# Have each LLM rate all of the results.\n",
|
| 337 |
-
"results = dict()\n",
|
| 338 |
-
"LLM_result = ''\n",
|
| 339 |
-
"\n",
|
| 340 |
-
"competitors, answers = await ask_questions_in_parallel(judge_messages)\n",
|
| 341 |
-
"\n",
|
| 342 |
-
"results = dict()\n",
|
| 343 |
-
"for index, each_competitor in enumerate(competitors):\n",
|
| 344 |
-
" results[each_competitor] = answers[index].strip()"
|
| 345 |
-
]
|
| 346 |
-
},
|
| 347 |
-
{
|
| 348 |
-
"cell_type": "code",
|
| 349 |
-
"execution_count": null,
|
| 350 |
-
"metadata": {},
|
| 351 |
-
"outputs": [],
|
| 352 |
-
"source": [
|
| 353 |
-
"# See the results\n",
|
| 354 |
-
"print (len(answers))\n",
|
| 355 |
-
"results = dict()\n",
|
| 356 |
-
"for index, each_competitor in enumerate(competitors):\n",
|
| 357 |
-
" results[each_competitor] = answers[index]\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"print (results)"
|
| 360 |
-
]
|
| 361 |
-
},
|
| 362 |
-
{
|
| 363 |
-
"cell_type": "code",
|
| 364 |
-
"execution_count": null,
|
| 365 |
-
"metadata": {},
|
| 366 |
-
"outputs": [],
|
| 367 |
-
"source": [
|
| 368 |
-
"# Lets convert these rankings into scores. Borda Count - (1st gets 4, 2nd gets 3, etc.).\n",
|
| 369 |
-
"number_of_competitors = len(competitors)\n",
|
| 370 |
-
"scores = {}\n",
|
| 371 |
-
"\n",
|
| 372 |
-
"for rankings in results.values():\n",
|
| 373 |
-
" print(rankings)"
|
| 374 |
-
]
|
| 375 |
-
},
|
| 376 |
-
{
|
| 377 |
-
"cell_type": "code",
|
| 378 |
-
"execution_count": null,
|
| 379 |
-
"metadata": {},
|
| 380 |
-
"outputs": [],
|
| 381 |
-
"source": [
|
| 382 |
-
"# # Borda count points (1st gets n-1, 2nd gets n-2, etc.)\n",
|
| 383 |
-
"num_competitors = len(competitors)\n",
|
| 384 |
-
"\n",
|
| 385 |
-
"competitor_dict = dict()\n",
|
| 386 |
-
"for index, each_competitor in enumerate(competitors):\n",
|
| 387 |
-
" competitor_dict[each_competitor] = index + 1\n",
|
| 388 |
-
"\n",
|
| 389 |
-
"borda_scores_dict = dict()\n",
|
| 390 |
-
"for each_competitor in competitors:\n",
|
| 391 |
-
" if each_competitor not in borda_scores_dict:\n",
|
| 392 |
-
" borda_scores_dict[each_competitor] = 0\n",
|
| 393 |
-
"\n",
|
| 394 |
-
"for voter_llm, ranking_str in results.items():\n",
|
| 395 |
-
" ranking_indices = json.loads(ranking_str)\n",
|
| 396 |
-
" ranking_indices = [int(x) for x in ranking_indices]\n",
|
| 397 |
-
"\n",
|
| 398 |
-
" # For each position in the ranking, award points\n",
|
| 399 |
-
" for position, competitor_index in enumerate(ranking_indices):\n",
|
| 400 |
-
" competitor_name = competitors[competitor_index - 1]\n",
|
| 401 |
-
"\n",
|
| 402 |
-
" # Borda count points (1st gets n-1, 2nd gets n-2, etc.)\n",
|
| 403 |
-
" points = num_competitors - 1 - position \n",
|
| 404 |
-
" borda_scores_dict[competitor_name] += points\n",
|
| 405 |
-
" \n",
|
| 406 |
-
"sorted_results = sorted(borda_scores_dict.items(), key=lambda x: x[1], reverse=True)\n",
|
| 407 |
-
"\n",
|
| 408 |
-
"print(f\"{'Rank':<4} {'LLM':<30} {'Points':<3}\")\n",
|
| 409 |
-
"print(\"-\" * 50)\n",
|
| 410 |
-
"\n",
|
| 411 |
-
"for rank, (llm, points) in enumerate(sorted_results, 1):\n",
|
| 412 |
-
" print(f\"{rank:<4} {llm:<30} {points:<8}\")\n",
|
| 413 |
-
"\n",
|
| 414 |
-
"print(\"\\nQuestions asked:\")\n",
|
| 415 |
-
"print(question)"
|
| 416 |
-
]
|
| 417 |
-
}
|
| 418 |
-
],
|
| 419 |
-
"metadata": {
|
| 420 |
-
"kernelspec": {
|
| 421 |
-
"display_name": ".venv",
|
| 422 |
-
"language": "python",
|
| 423 |
-
"name": "python3"
|
| 424 |
-
},
|
| 425 |
-
"language_info": {
|
| 426 |
-
"codemirror_mode": {
|
| 427 |
-
"name": "ipython",
|
| 428 |
-
"version": 3
|
| 429 |
-
},
|
| 430 |
-
"file_extension": ".py",
|
| 431 |
-
"mimetype": "text/x-python",
|
| 432 |
-
"name": "python",
|
| 433 |
-
"nbconvert_exporter": "python",
|
| 434 |
-
"pygments_lexer": "ipython3",
|
| 435 |
-
"version": "3.12.2"
|
| 436 |
-
}
|
| 437 |
-
},
|
| 438 |
-
"nbformat": 4,
|
| 439 |
-
"nbformat_minor": 2
|
| 440 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_Execution_measurement.py
DELETED
|
@@ -1,401 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import json
|
| 3 |
-
import asyncio
|
| 4 |
-
import concurrent.futures
|
| 5 |
-
import time
|
| 6 |
-
from typing import Dict, List, Tuple, Optional
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
-
from openai import OpenAI
|
| 9 |
-
|
| 10 |
-
load_dotenv(override=True)
|
| 11 |
-
|
| 12 |
-
openai = OpenAI()
|
| 13 |
-
competitors = []
|
| 14 |
-
answers = []
|
| 15 |
-
together = ""
|
| 16 |
-
openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 17 |
-
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
|
| 18 |
-
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 19 |
-
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
|
| 20 |
-
groq_api_key = os.getenv('GROQ_API_KEY')
|
| 21 |
-
|
| 22 |
-
models_dict = {
|
| 23 |
-
'openai': {
|
| 24 |
-
'model': 'gpt-4o-mini',
|
| 25 |
-
'api_key': openai_api_key,
|
| 26 |
-
'base_url': None
|
| 27 |
-
},
|
| 28 |
-
'gemini': {
|
| 29 |
-
'model': 'gemini-2.0-flash',
|
| 30 |
-
'api_key': google_api_key,
|
| 31 |
-
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai/'
|
| 32 |
-
},
|
| 33 |
-
'groq': {
|
| 34 |
-
'model': 'llama-3.3-70b-versatile',
|
| 35 |
-
'api_key': groq_api_key,
|
| 36 |
-
'base_url': 'https://api.groq.com/openai/v1'
|
| 37 |
-
},
|
| 38 |
-
'ollama': {
|
| 39 |
-
'model': 'llama3.2',
|
| 40 |
-
'api_key': 'ollama',
|
| 41 |
-
'base_url': 'http://localhost:11434/v1'
|
| 42 |
-
}
|
| 43 |
-
}
|
| 44 |
-
|
| 45 |
-
def key_checker():
|
| 46 |
-
|
| 47 |
-
if openai_api_key:
|
| 48 |
-
print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
|
| 49 |
-
else:
|
| 50 |
-
print("OpenAI API Key not set")
|
| 51 |
-
|
| 52 |
-
if anthropic_api_key:
|
| 53 |
-
print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
|
| 54 |
-
else:
|
| 55 |
-
print("Anthropic API Key not set (and this is optional)")
|
| 56 |
-
|
| 57 |
-
if google_api_key:
|
| 58 |
-
print(f"Google API Key exists and begins {google_api_key[:2]}")
|
| 59 |
-
else:
|
| 60 |
-
print("Google API Key not set (and this is optional)")
|
| 61 |
-
|
| 62 |
-
if deepseek_api_key:
|
| 63 |
-
print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
|
| 64 |
-
else:
|
| 65 |
-
print("DeepSeek API Key not set (and this is optional)")
|
| 66 |
-
|
| 67 |
-
if groq_api_key:
|
| 68 |
-
print(f"Groq API Key exists and begins {groq_api_key[:4]}")
|
| 69 |
-
else:
|
| 70 |
-
print("Groq API Key not set (and this is optional)")
|
| 71 |
-
|
| 72 |
-
def question_prompt_generator():
|
| 73 |
-
request = "Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. "
|
| 74 |
-
request += "Answer only with the question, no explanation."
|
| 75 |
-
messages = [{"role": "user", "content": request}]
|
| 76 |
-
return messages
|
| 77 |
-
|
| 78 |
-
def generate_competition_question():
|
| 79 |
-
"""
|
| 80 |
-
Generate a challenging question for the LLM competition
|
| 81 |
-
Returns the question text and formatted messages for LLM calls
|
| 82 |
-
"""
|
| 83 |
-
print("Generating competition question...")
|
| 84 |
-
question_prompt = question_prompt_generator()
|
| 85 |
-
question = llm_caller(question_prompt)
|
| 86 |
-
question_messages = [{"role": "user", "content": question}]
|
| 87 |
-
print(f"Question: \n{question}")
|
| 88 |
-
return question, question_messages
|
| 89 |
-
|
| 90 |
-
def llm_caller(messages):
|
| 91 |
-
response = openai.chat.completions.create(
|
| 92 |
-
model="gpt-4o-mini",
|
| 93 |
-
messages=messages,
|
| 94 |
-
)
|
| 95 |
-
return response.choices[0].message.content
|
| 96 |
-
|
| 97 |
-
def llm_caller_with_model(messages, model_name, api_key, base_url):
|
| 98 |
-
llm = None
|
| 99 |
-
|
| 100 |
-
if base_url:
|
| 101 |
-
try:
|
| 102 |
-
llm = OpenAI(api_key=api_key, base_url=base_url)
|
| 103 |
-
except Exception as e:
|
| 104 |
-
print(f"Error creating OpenAI client: {e}")
|
| 105 |
-
return None
|
| 106 |
-
else:
|
| 107 |
-
try:
|
| 108 |
-
llm = OpenAI(api_key=api_key)
|
| 109 |
-
except Exception as e:
|
| 110 |
-
print(f"Error creating OpenAI client: {e}")
|
| 111 |
-
return None
|
| 112 |
-
|
| 113 |
-
response = llm.chat.completions.create(model=model_name, messages=messages)
|
| 114 |
-
return response.choices[0].message.content
|
| 115 |
-
|
| 116 |
-
def get_single_model_answer(provider: str, details: Dict, question_messages: List[Dict]) -> Tuple[str, Optional[str]]:
|
| 117 |
-
"""
|
| 118 |
-
Call a single model and return (provider, answer) or (provider, None) if failed.
|
| 119 |
-
This function is designed to be used with ThreadPoolExecutor.
|
| 120 |
-
"""
|
| 121 |
-
print(f"Calling model {provider}...")
|
| 122 |
-
try:
|
| 123 |
-
answer = llm_caller_with_model(question_messages, details['model'], details['api_key'], details['base_url'])
|
| 124 |
-
print(f"Model {provider} was successfully called!")
|
| 125 |
-
return provider, answer
|
| 126 |
-
except Exception as e:
|
| 127 |
-
print(f"Model {provider} failed to call: {e}")
|
| 128 |
-
return provider, None
|
| 129 |
-
|
| 130 |
-
def get_models_answers(question_messages):
|
| 131 |
-
"""
|
| 132 |
-
Sequential version - kept for backward compatibility
|
| 133 |
-
"""
|
| 134 |
-
for provider, details in models_dict.items():
|
| 135 |
-
print(f"Calling model {provider}...")
|
| 136 |
-
try:
|
| 137 |
-
answer = llm_caller_with_model(question_messages, details['model'], details['api_key'], details['base_url'])
|
| 138 |
-
print(f"Model {provider} was successful called!")
|
| 139 |
-
except Exception as e:
|
| 140 |
-
print(f"Model {provider} failed to call: {e}")
|
| 141 |
-
continue
|
| 142 |
-
competitors.append(provider)
|
| 143 |
-
answers.append(answer)
|
| 144 |
-
|
| 145 |
-
def get_models_answers_parallel(question_messages, max_workers: int = 4):
|
| 146 |
-
"""
|
| 147 |
-
Parallel version - calls all models simultaneously using ThreadPoolExecutor
|
| 148 |
-
"""
|
| 149 |
-
print("Starting parallel execution of all models...")
|
| 150 |
-
|
| 151 |
-
# Clear previous results
|
| 152 |
-
competitors.clear()
|
| 153 |
-
answers.clear()
|
| 154 |
-
|
| 155 |
-
# Use ThreadPoolExecutor for parallel execution
|
| 156 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 157 |
-
# Submit all tasks
|
| 158 |
-
future_to_provider = {
|
| 159 |
-
executor.submit(get_single_model_answer, provider, details, question_messages): provider
|
| 160 |
-
for provider, details in models_dict.items()
|
| 161 |
-
}
|
| 162 |
-
|
| 163 |
-
# Collect results as they complete
|
| 164 |
-
for future in concurrent.futures.as_completed(future_to_provider):
|
| 165 |
-
provider, answer = future.result()
|
| 166 |
-
if answer is not None: # Only add successful calls
|
| 167 |
-
competitors.append(provider)
|
| 168 |
-
answers.append(answer)
|
| 169 |
-
|
| 170 |
-
print(f"Parallel execution completed. {len(competitors)} models responded successfully.")
|
| 171 |
-
|
| 172 |
-
async def get_single_model_answer_async(provider: str, details: Dict, question_messages: List[Dict]) -> Tuple[str, Optional[str]]:
|
| 173 |
-
"""
|
| 174 |
-
Async version of single model call - for even better performance
|
| 175 |
-
"""
|
| 176 |
-
print(f"Calling model {provider} (async)...")
|
| 177 |
-
try:
|
| 178 |
-
# Run the synchronous call in a thread pool
|
| 179 |
-
loop = asyncio.get_event_loop()
|
| 180 |
-
answer = await loop.run_in_executor(
|
| 181 |
-
None,
|
| 182 |
-
llm_caller_with_model,
|
| 183 |
-
question_messages,
|
| 184 |
-
details['model'],
|
| 185 |
-
details['api_key'],
|
| 186 |
-
details['base_url']
|
| 187 |
-
)
|
| 188 |
-
print(f"Model {provider} was successfully called!")
|
| 189 |
-
return provider, answer
|
| 190 |
-
except Exception as e:
|
| 191 |
-
print(f"Model {provider} failed to call: {e}")
|
| 192 |
-
return provider, None
|
| 193 |
-
|
| 194 |
-
async def get_models_answers_async(question_messages):
|
| 195 |
-
"""
|
| 196 |
-
Async version - calls all models simultaneously using asyncio
|
| 197 |
-
"""
|
| 198 |
-
print("Starting async execution of all models...")
|
| 199 |
-
|
| 200 |
-
# Clear previous results
|
| 201 |
-
competitors.clear()
|
| 202 |
-
answers.clear()
|
| 203 |
-
|
| 204 |
-
# Create tasks for all models
|
| 205 |
-
tasks = [
|
| 206 |
-
get_single_model_answer_async(provider, details, question_messages)
|
| 207 |
-
for provider, details in models_dict.items()
|
| 208 |
-
]
|
| 209 |
-
|
| 210 |
-
# Wait for all tasks to complete
|
| 211 |
-
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 212 |
-
|
| 213 |
-
# Process results
|
| 214 |
-
for result in results:
|
| 215 |
-
if isinstance(result, Exception):
|
| 216 |
-
print(f"Task failed with exception: {result}")
|
| 217 |
-
continue
|
| 218 |
-
provider, answer = result
|
| 219 |
-
if answer is not None: # Only add successful calls
|
| 220 |
-
competitors.append(provider)
|
| 221 |
-
answers.append(answer)
|
| 222 |
-
|
| 223 |
-
print(f"Async execution completed. {len(competitors)} models responded successfully.")
|
| 224 |
-
|
| 225 |
-
def together_maker(answers):
|
| 226 |
-
together = ""
|
| 227 |
-
for index, answer in enumerate(answers):
|
| 228 |
-
together += f"# Response from competitor {index+1}\n\n"
|
| 229 |
-
together += answer + "\n\n"
|
| 230 |
-
return together
|
| 231 |
-
|
| 232 |
-
def judge_prompt_generator(competitors, question, together):
|
| 233 |
-
judge = f"""You are judging a competition between {len(competitors)} competitors.
|
| 234 |
-
Each model has been given this question:
|
| 235 |
-
|
| 236 |
-
{question}
|
| 237 |
-
|
| 238 |
-
Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.
|
| 239 |
-
Respond with JSON, and only JSON, with the following format:
|
| 240 |
-
{{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}}
|
| 241 |
-
|
| 242 |
-
Here are the responses from each competitor:
|
| 243 |
-
|
| 244 |
-
{together}
|
| 245 |
-
|
| 246 |
-
Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks."""
|
| 247 |
-
return judge
|
| 248 |
-
|
| 249 |
-
def judge_caller(judge_prompt, competitors):
|
| 250 |
-
print(f"Calling judge...")
|
| 251 |
-
judge_messages = [{"role": "user", "content": judge_prompt}]
|
| 252 |
-
results = llm_caller_with_model(judge_messages, "o3-mini", openai_api_key, None)
|
| 253 |
-
results_dict = json.loads(results)
|
| 254 |
-
ranks = results_dict["results"]
|
| 255 |
-
for index, result in enumerate(ranks):
|
| 256 |
-
competitor = competitors[int(result)-1]
|
| 257 |
-
print(f"Rank {index+1}: {competitor}")
|
| 258 |
-
return ranks
|
| 259 |
-
|
| 260 |
-
def compare_execution_methods(question_messages, runs_per_method=1):
|
| 261 |
-
"""
|
| 262 |
-
Compare performance of different execution methods
|
| 263 |
-
"""
|
| 264 |
-
methods = ['sequential', 'parallel', 'async']
|
| 265 |
-
results = {}
|
| 266 |
-
|
| 267 |
-
for method in methods:
|
| 268 |
-
print(f"\n{'='*50}")
|
| 269 |
-
print(f"Testing {method} execution method")
|
| 270 |
-
print(f"{'='*50}")
|
| 271 |
-
|
| 272 |
-
method_times = []
|
| 273 |
-
|
| 274 |
-
for run in range(runs_per_method):
|
| 275 |
-
print(f"\nRun {run + 1}/{runs_per_method}")
|
| 276 |
-
|
| 277 |
-
# Clear previous results
|
| 278 |
-
competitors.clear()
|
| 279 |
-
answers.clear()
|
| 280 |
-
|
| 281 |
-
start_time = time.time()
|
| 282 |
-
|
| 283 |
-
if method == 'sequential':
|
| 284 |
-
get_models_answers(question_messages)
|
| 285 |
-
elif method == 'parallel':
|
| 286 |
-
get_models_answers_parallel(question_messages, max_workers=4)
|
| 287 |
-
elif method == 'async':
|
| 288 |
-
asyncio.run(get_models_answers_async(question_messages))
|
| 289 |
-
|
| 290 |
-
execution_time = time.time() - start_time
|
| 291 |
-
method_times.append(execution_time)
|
| 292 |
-
print(f"Run {run + 1} completed in {execution_time:.2f} seconds")
|
| 293 |
-
|
| 294 |
-
avg_time = sum(method_times) / len(method_times)
|
| 295 |
-
results[method] = {
|
| 296 |
-
'times': method_times,
|
| 297 |
-
'avg_time': avg_time,
|
| 298 |
-
'successful_models': len(competitors)
|
| 299 |
-
}
|
| 300 |
-
|
| 301 |
-
print(f"\n{method.upper()} Results:")
|
| 302 |
-
print(f" Average time: {avg_time:.2f} seconds")
|
| 303 |
-
print(f" Successful models: {len(competitors)}")
|
| 304 |
-
print(f" All times: {[f'{t:.2f}s' for t in method_times]}")
|
| 305 |
-
|
| 306 |
-
# Print comparison summary
|
| 307 |
-
print(f"\n{'='*60}")
|
| 308 |
-
print("PERFORMANCE COMPARISON SUMMARY")
|
| 309 |
-
print(f"{'='*60}")
|
| 310 |
-
|
| 311 |
-
for method, data in results.items():
|
| 312 |
-
print(f"{method.upper():>12}: {data['avg_time']:>6.2f}s avg, {data['successful_models']} models")
|
| 313 |
-
|
| 314 |
-
# Calculate speedup
|
| 315 |
-
if 'sequential' in results:
|
| 316 |
-
seq_time = results['sequential']['avg_time']
|
| 317 |
-
print(f"\nSpeedup vs Sequential:")
|
| 318 |
-
for method, data in results.items():
|
| 319 |
-
if method != 'sequential':
|
| 320 |
-
speedup = seq_time / data['avg_time']
|
| 321 |
-
print(f" {method.upper()}: {speedup:.2f}x faster")
|
| 322 |
-
|
| 323 |
-
return results
|
| 324 |
-
|
| 325 |
-
def run_llm_competition(question_messages, execution_method, question):
|
| 326 |
-
"""
|
| 327 |
-
Run the LLM competition with the specified execution method
|
| 328 |
-
"""
|
| 329 |
-
print(f"\nUsing {execution_method} execution method...")
|
| 330 |
-
start_time = time.time()
|
| 331 |
-
|
| 332 |
-
if execution_method == 'sequential':
|
| 333 |
-
get_models_answers(question_messages)
|
| 334 |
-
elif execution_method == 'parallel':
|
| 335 |
-
get_models_answers_parallel(question_messages, max_workers=4)
|
| 336 |
-
elif execution_method == 'async':
|
| 337 |
-
asyncio.run(get_models_answers_async(question_messages))
|
| 338 |
-
else:
|
| 339 |
-
raise ValueError(f"Unknown execution method: {execution_method}")
|
| 340 |
-
|
| 341 |
-
execution_time = time.time() - start_time
|
| 342 |
-
print(f"Execution completed in {execution_time:.2f} seconds")
|
| 343 |
-
|
| 344 |
-
together = together_maker(answers)
|
| 345 |
-
judge_prompt = judge_prompt_generator(competitors, question, together)
|
| 346 |
-
judge_caller(judge_prompt, competitors)
|
| 347 |
-
|
| 348 |
-
return execution_time
|
| 349 |
-
|
| 350 |
-
# Interactive execution method selection
|
| 351 |
-
def get_execution_method():
|
| 352 |
-
"""
|
| 353 |
-
Prompt user to select execution method
|
| 354 |
-
"""
|
| 355 |
-
print("\n" + "="*60)
|
| 356 |
-
print("EXECUTION METHOD SELECTION")
|
| 357 |
-
print("="*60)
|
| 358 |
-
print("Choose how to execute the LLM calls:")
|
| 359 |
-
print("1. Sequential - Call models one after another (original method)")
|
| 360 |
-
print("2. Parallel - Call all models simultaneously (recommended)")
|
| 361 |
-
print("3. Async - Use async/await for maximum performance")
|
| 362 |
-
print("4. Compare - Run all methods and compare performance")
|
| 363 |
-
print("="*60)
|
| 364 |
-
|
| 365 |
-
while True:
|
| 366 |
-
try:
|
| 367 |
-
choice = input("Enter your choice (1-4): ").strip()
|
| 368 |
-
|
| 369 |
-
if choice == '1':
|
| 370 |
-
return 'sequential'
|
| 371 |
-
elif choice == '2':
|
| 372 |
-
return 'parallel'
|
| 373 |
-
elif choice == '3':
|
| 374 |
-
return 'async'
|
| 375 |
-
elif choice == '4':
|
| 376 |
-
return 'compare'
|
| 377 |
-
else:
|
| 378 |
-
print("Invalid choice. Please enter 1, 2, 3, or 4.")
|
| 379 |
-
continue
|
| 380 |
-
except KeyboardInterrupt:
|
| 381 |
-
print("\nExiting...")
|
| 382 |
-
exit(0)
|
| 383 |
-
except EOFError:
|
| 384 |
-
print("\nExiting...")
|
| 385 |
-
exit(0)
|
| 386 |
-
|
| 387 |
-
def main():
|
| 388 |
-
key_checker()
|
| 389 |
-
|
| 390 |
-
# Get user's execution method choice
|
| 391 |
-
EXECUTION_METHOD = get_execution_method()
|
| 392 |
-
# Generate the competition question and get the question messages
|
| 393 |
-
question, question_messages = generate_competition_question()
|
| 394 |
-
|
| 395 |
-
if EXECUTION_METHOD == 'compare':
|
| 396 |
-
print("\nRunning performance comparison...")
|
| 397 |
-
compare_execution_methods(question_messages, runs_per_method=1)
|
| 398 |
-
else:
|
| 399 |
-
run_llm_competition(question_messages, EXECUTION_METHOD, question)
|
| 400 |
-
|
| 401 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_ReAct_Pattern.ipynb
DELETED
|
@@ -1,289 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "markdown",
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"source": [
|
| 34 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 35 |
-
" <tr>\n",
|
| 36 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 37 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 38 |
-
" </td>\n",
|
| 39 |
-
" <td>\n",
|
| 40 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 41 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 42 |
-
" </span>\n",
|
| 43 |
-
" </td>\n",
|
| 44 |
-
" </tr>\n",
|
| 45 |
-
"</table>"
|
| 46 |
-
]
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"cell_type": "markdown",
|
| 50 |
-
"metadata": {},
|
| 51 |
-
"source": [
|
| 52 |
-
"# ReAct Pattern"
|
| 53 |
-
]
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"cell_type": "code",
|
| 57 |
-
"execution_count": 26,
|
| 58 |
-
"metadata": {},
|
| 59 |
-
"outputs": [],
|
| 60 |
-
"source": [
|
| 61 |
-
"import openai\n",
|
| 62 |
-
"import os\n",
|
| 63 |
-
"from dotenv import load_dotenv\n",
|
| 64 |
-
"import io\n",
|
| 65 |
-
"from anthropic import Anthropic\n",
|
| 66 |
-
"from IPython.display import Markdown, display"
|
| 67 |
-
]
|
| 68 |
-
},
|
| 69 |
-
{
|
| 70 |
-
"cell_type": "code",
|
| 71 |
-
"execution_count": null,
|
| 72 |
-
"metadata": {},
|
| 73 |
-
"outputs": [],
|
| 74 |
-
"source": [
|
| 75 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 78 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 79 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 80 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 81 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 82 |
-
"\n",
|
| 83 |
-
"if openai_api_key:\n",
|
| 84 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 85 |
-
"else:\n",
|
| 86 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 87 |
-
" \n",
|
| 88 |
-
"if anthropic_api_key:\n",
|
| 89 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 90 |
-
"else:\n",
|
| 91 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 92 |
-
"\n",
|
| 93 |
-
"if google_api_key:\n",
|
| 94 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 95 |
-
"else:\n",
|
| 96 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 97 |
-
"\n",
|
| 98 |
-
"if deepseek_api_key:\n",
|
| 99 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 100 |
-
"else:\n",
|
| 101 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 102 |
-
"\n",
|
| 103 |
-
"if groq_api_key:\n",
|
| 104 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 105 |
-
"else:\n",
|
| 106 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 107 |
-
]
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"cell_type": "code",
|
| 111 |
-
"execution_count": 50,
|
| 112 |
-
"metadata": {},
|
| 113 |
-
"outputs": [],
|
| 114 |
-
"source": [
|
| 115 |
-
"\n",
|
| 116 |
-
"from openai import OpenAI\n",
|
| 117 |
-
"\n",
|
| 118 |
-
"openai = OpenAI()\n",
|
| 119 |
-
"\n",
|
| 120 |
-
"# Request prompt\n",
|
| 121 |
-
"request = (\n",
|
| 122 |
-
" \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 123 |
-
" \"Answer only with the question, no explanation.\"\n",
|
| 124 |
-
")\n",
|
| 125 |
-
"\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"\n",
|
| 128 |
-
"def generate_question(prompt: str) -> str:\n",
|
| 129 |
-
" response = openai.chat.completions.create(\n",
|
| 130 |
-
" model='gpt-4o-mini',\n",
|
| 131 |
-
" messages=[{'role': 'user', 'content': prompt}]\n",
|
| 132 |
-
" )\n",
|
| 133 |
-
" question = response.choices[0].message.content\n",
|
| 134 |
-
" return question\n",
|
| 135 |
-
"\n",
|
| 136 |
-
"def react_agent_decide_model(question: str) -> str:\n",
|
| 137 |
-
" prompt = f\"\"\"\n",
|
| 138 |
-
" You are an intelligent AI assistant tasked with evaluating which language model is most suitable to answer a given question.\n",
|
| 139 |
-
"\n",
|
| 140 |
-
" Available models:\n",
|
| 141 |
-
" - OpenAI: excels at reasoning and factual answers.\n",
|
| 142 |
-
" - Claude: better for philosophical, nuanced, and ethical topics.\n",
|
| 143 |
-
" - Gemini: good for concise and structured summaries.\n",
|
| 144 |
-
" - Groq: good for creative or exploratory tasks.\n",
|
| 145 |
-
" - DeepSeek: strong at coding, technical reasoning, and multilingual responses.\n",
|
| 146 |
-
"\n",
|
| 147 |
-
" Here is the question to answer:\n",
|
| 148 |
-
" \"{question}\"\n",
|
| 149 |
-
"\n",
|
| 150 |
-
" ### Thought:\n",
|
| 151 |
-
" Which model is best suited to answer this question, and why?\n",
|
| 152 |
-
"\n",
|
| 153 |
-
" ### Action:\n",
|
| 154 |
-
" Respond with only the model name you choose (e.g., \"Claude\").\n",
|
| 155 |
-
" \"\"\"\n",
|
| 156 |
-
"\n",
|
| 157 |
-
" response = openai.chat.completions.create(\n",
|
| 158 |
-
" model=\"o3-mini\",\n",
|
| 159 |
-
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
|
| 160 |
-
" )\n",
|
| 161 |
-
" model = response.choices[0].message.content.strip()\n",
|
| 162 |
-
" return model\n",
|
| 163 |
-
"\n",
|
| 164 |
-
"def generate_answer_openai(prompt):\n",
|
| 165 |
-
" answer = openai.chat.completions.create(\n",
|
| 166 |
-
" model='gpt-4o-mini',\n",
|
| 167 |
-
" messages=[{'role': 'user', 'content': prompt}]\n",
|
| 168 |
-
" ).choices[0].message.content\n",
|
| 169 |
-
" return answer\n",
|
| 170 |
-
"\n",
|
| 171 |
-
"def generate_answer_anthropic(prompt):\n",
|
| 172 |
-
" anthropic = Anthropic(api_key=anthropic_api_key)\n",
|
| 173 |
-
" model_name = \"claude-3-5-sonnet-20240620\"\n",
|
| 174 |
-
" answer = anthropic.messages.create(\n",
|
| 175 |
-
" model=model_name,\n",
|
| 176 |
-
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 177 |
-
" max_tokens=1000\n",
|
| 178 |
-
" ).content[0].text\n",
|
| 179 |
-
" return answer\n",
|
| 180 |
-
"\n",
|
| 181 |
-
"def generate_answer_deepseek(prompt):\n",
|
| 182 |
-
" deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 183 |
-
" model_name = \"deepseek-chat\" \n",
|
| 184 |
-
" answer = deepseek.chat.completions.create(\n",
|
| 185 |
-
" model=model_name,\n",
|
| 186 |
-
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 187 |
-
" base_url='https://api.deepseek.com/v1'\n",
|
| 188 |
-
" ).choices[0].message.content\n",
|
| 189 |
-
" return answer\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"def generate_answer_gemini(prompt):\n",
|
| 192 |
-
" gemini=OpenAI(base_url='https://generativelanguage.googleapis.com/v1beta/openai/',api_key=google_api_key)\n",
|
| 193 |
-
" model_name = \"gemini-2.0-flash\"\n",
|
| 194 |
-
" answer = gemini.chat.completions.create(\n",
|
| 195 |
-
" model=model_name,\n",
|
| 196 |
-
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 197 |
-
" ).choices[0].message.content\n",
|
| 198 |
-
" return answer\n",
|
| 199 |
-
"\n",
|
| 200 |
-
"def generate_answer_groq(prompt):\n",
|
| 201 |
-
" groq=OpenAI(base_url='https://api.groq.com/openai/v1',api_key=groq_api_key)\n",
|
| 202 |
-
" model_name=\"llama3-70b-8192\"\n",
|
| 203 |
-
" answer = groq.chat.completions.create(\n",
|
| 204 |
-
" model=model_name,\n",
|
| 205 |
-
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 206 |
-
" base_url=\"https://api.groq.com/openai/v1\"\n",
|
| 207 |
-
" ).choices[0].message.content\n",
|
| 208 |
-
" return answer\n",
|
| 209 |
-
"\n",
|
| 210 |
-
"def main():\n",
|
| 211 |
-
" print(\"Generating question...\")\n",
|
| 212 |
-
" question = generate_question(request)\n",
|
| 213 |
-
" print(f\"\\n🧠 Question: {question}\\n\")\n",
|
| 214 |
-
" selected_model = react_agent_decide_model(question)\n",
|
| 215 |
-
" print(f\"\\n🔹 {selected_model}:\\n\")\n",
|
| 216 |
-
" \n",
|
| 217 |
-
" if selected_model.lower() == \"openai\":\n",
|
| 218 |
-
" answer = generate_answer_openai(question)\n",
|
| 219 |
-
" elif selected_model.lower() == \"deepseek\":\n",
|
| 220 |
-
" answer = generate_answer_deepseek(question)\n",
|
| 221 |
-
" elif selected_model.lower() == \"gemini\":\n",
|
| 222 |
-
" answer = generate_answer_gemini(question)\n",
|
| 223 |
-
" elif selected_model.lower() == \"groq\":\n",
|
| 224 |
-
" answer = generate_answer_groq(question)\n",
|
| 225 |
-
" elif selected_model.lower() == \"claude\":\n",
|
| 226 |
-
" answer = generate_answer_anthropic(question)\n",
|
| 227 |
-
" print(f\"\\n🔹 {selected_model}:\\n{answer}\\n\")\n",
|
| 228 |
-
" \n"
|
| 229 |
-
]
|
| 230 |
-
},
|
| 231 |
-
{
|
| 232 |
-
"cell_type": "code",
|
| 233 |
-
"execution_count": null,
|
| 234 |
-
"metadata": {},
|
| 235 |
-
"outputs": [],
|
| 236 |
-
"source": [
|
| 237 |
-
"main()"
|
| 238 |
-
]
|
| 239 |
-
},
|
| 240 |
-
{
|
| 241 |
-
"cell_type": "code",
|
| 242 |
-
"execution_count": null,
|
| 243 |
-
"metadata": {},
|
| 244 |
-
"outputs": [],
|
| 245 |
-
"source": []
|
| 246 |
-
},
|
| 247 |
-
{
|
| 248 |
-
"cell_type": "markdown",
|
| 249 |
-
"metadata": {},
|
| 250 |
-
"source": [
|
| 251 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 252 |
-
" <tr>\n",
|
| 253 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 254 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 255 |
-
" </td>\n",
|
| 256 |
-
" <td>\n",
|
| 257 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 258 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 259 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 260 |
-
" to business projects where accuracy is critical.\n",
|
| 261 |
-
" </span>\n",
|
| 262 |
-
" </td>\n",
|
| 263 |
-
" </tr>\n",
|
| 264 |
-
"</table>"
|
| 265 |
-
]
|
| 266 |
-
}
|
| 267 |
-
],
|
| 268 |
-
"metadata": {
|
| 269 |
-
"kernelspec": {
|
| 270 |
-
"display_name": ".venv",
|
| 271 |
-
"language": "python",
|
| 272 |
-
"name": "python3"
|
| 273 |
-
},
|
| 274 |
-
"language_info": {
|
| 275 |
-
"codemirror_mode": {
|
| 276 |
-
"name": "ipython",
|
| 277 |
-
"version": 3
|
| 278 |
-
},
|
| 279 |
-
"file_extension": ".py",
|
| 280 |
-
"mimetype": "text/x-python",
|
| 281 |
-
"name": "python",
|
| 282 |
-
"nbconvert_exporter": "python",
|
| 283 |
-
"pygments_lexer": "ipython3",
|
| 284 |
-
"version": "3.12.4"
|
| 285 |
-
}
|
| 286 |
-
},
|
| 287 |
-
"nbformat": 4,
|
| 288 |
-
"nbformat_minor": 2
|
| 289 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_akash_parallelization.ipynb
DELETED
|
@@ -1,295 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "code",
|
| 14 |
-
"execution_count": null,
|
| 15 |
-
"metadata": {},
|
| 16 |
-
"outputs": [],
|
| 17 |
-
"source": [
|
| 18 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"import os\n",
|
| 21 |
-
"import json\n",
|
| 22 |
-
"from dotenv import load_dotenv\n",
|
| 23 |
-
"from openai import OpenAI, AsyncOpenAI\n",
|
| 24 |
-
"from IPython.display import Markdown, display\n",
|
| 25 |
-
"import asyncio\n",
|
| 26 |
-
"from functools import partial"
|
| 27 |
-
]
|
| 28 |
-
},
|
| 29 |
-
{
|
| 30 |
-
"cell_type": "code",
|
| 31 |
-
"execution_count": null,
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"outputs": [],
|
| 34 |
-
"source": [
|
| 35 |
-
"# Always remember to do this!\n",
|
| 36 |
-
"load_dotenv(override=True)"
|
| 37 |
-
]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"cell_type": "code",
|
| 41 |
-
"execution_count": null,
|
| 42 |
-
"metadata": {},
|
| 43 |
-
"outputs": [],
|
| 44 |
-
"source": [
|
| 45 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
-
"\n",
|
| 47 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 49 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 50 |
-
"\n",
|
| 51 |
-
"if openai_api_key:\n",
|
| 52 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 53 |
-
"else:\n",
|
| 54 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"\n",
|
| 57 |
-
"if google_api_key:\n",
|
| 58 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 59 |
-
"else:\n",
|
| 60 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 61 |
-
"\n",
|
| 62 |
-
"if groq_api_key:\n",
|
| 63 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 64 |
-
"else:\n",
|
| 65 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 66 |
-
]
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"cell_type": "code",
|
| 70 |
-
"execution_count": null,
|
| 71 |
-
"metadata": {},
|
| 72 |
-
"outputs": [],
|
| 73 |
-
"source": [
|
| 74 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 75 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 76 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 77 |
-
]
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
"cell_type": "code",
|
| 81 |
-
"execution_count": null,
|
| 82 |
-
"metadata": {},
|
| 83 |
-
"outputs": [],
|
| 84 |
-
"source": [
|
| 85 |
-
"openai = AsyncOpenAI()\n",
|
| 86 |
-
"response = await openai.chat.completions.create(\n",
|
| 87 |
-
" model=\"gpt-4o-mini\",\n",
|
| 88 |
-
" messages=messages,\n",
|
| 89 |
-
")\n",
|
| 90 |
-
"question = response.choices[0].message.content\n",
|
| 91 |
-
"print(question)\n"
|
| 92 |
-
]
|
| 93 |
-
},
|
| 94 |
-
{
|
| 95 |
-
"cell_type": "code",
|
| 96 |
-
"execution_count": null,
|
| 97 |
-
"metadata": {},
|
| 98 |
-
"outputs": [],
|
| 99 |
-
"source": [
|
| 100 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 101 |
-
]
|
| 102 |
-
},
|
| 103 |
-
{
|
| 104 |
-
"cell_type": "code",
|
| 105 |
-
"execution_count": null,
|
| 106 |
-
"metadata": {},
|
| 107 |
-
"outputs": [],
|
| 108 |
-
"source": [
|
| 109 |
-
"from dataclasses import dataclass\n",
|
| 110 |
-
"\n",
|
| 111 |
-
"@dataclass\n",
|
| 112 |
-
"class LLMResource:\n",
|
| 113 |
-
" api_key: str\n",
|
| 114 |
-
" model: str\n",
|
| 115 |
-
" url: str = None # optional otherwise NOone\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"llm_resources = [\n",
|
| 118 |
-
" LLMResource(api_key=openai_api_key, model=\"gpt-4o-mini\"),\n",
|
| 119 |
-
" LLMResource(api_key=google_api_key, model=\"gemini-2.5-flash\", url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"),\n",
|
| 120 |
-
" LLMResource(api_key=groq_api_key, model=\"qwen/qwen3-32b\", url=\"https://api.groq.com/openai/v1\"),\n",
|
| 121 |
-
" LLMResource(api_key=\"ollama\", model=\"deepseek-r1:1.5b\", url=\"http://localhost:11434/v1\" )\n",
|
| 122 |
-
"]\n"
|
| 123 |
-
]
|
| 124 |
-
},
|
| 125 |
-
{
|
| 126 |
-
"cell_type": "code",
|
| 127 |
-
"execution_count": null,
|
| 128 |
-
"metadata": {},
|
| 129 |
-
"outputs": [],
|
| 130 |
-
"source": [
|
| 131 |
-
"\n",
|
| 132 |
-
"\n",
|
| 133 |
-
"async def llm_call(key, model_name, url, messages) -> tuple:\n",
|
| 134 |
-
" if url is None:\n",
|
| 135 |
-
" llm = AsyncOpenAI(api_key=key)\n",
|
| 136 |
-
" else: \n",
|
| 137 |
-
" llm = AsyncOpenAI(base_url=url,api_key=key)\n",
|
| 138 |
-
" \n",
|
| 139 |
-
" response = await llm.chat.completions.create(\n",
|
| 140 |
-
" model=model_name, messages=messages)\n",
|
| 141 |
-
" \n",
|
| 142 |
-
" answer = (model_name, response.choices[0].message.content)\n",
|
| 143 |
-
"\n",
|
| 144 |
-
" return answer #returns tuple of modle and response from LLM\n",
|
| 145 |
-
"\n",
|
| 146 |
-
"llm_callable = partial(llm_call, messages=messages) #prefill with messages\n",
|
| 147 |
-
"# Always remember to do this!"
|
| 148 |
-
]
|
| 149 |
-
},
|
| 150 |
-
{
|
| 151 |
-
"cell_type": "code",
|
| 152 |
-
"execution_count": null,
|
| 153 |
-
"metadata": {},
|
| 154 |
-
"outputs": [],
|
| 155 |
-
"source": [
|
| 156 |
-
"#gather all responses concurrently\n",
|
| 157 |
-
"tasks = [llm_callable(res.api_key,res.model,res.url) for res in llm_resources]\n",
|
| 158 |
-
"results = await asyncio.gather(*tasks)\n",
|
| 159 |
-
"together = [f'Response from competitor {model}:{answer}' for model,answer in results]#gather results once all model finish running\n"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"cell_type": "code",
|
| 164 |
-
"execution_count": null,
|
| 165 |
-
"metadata": {},
|
| 166 |
-
"outputs": [],
|
| 167 |
-
"source": [
|
| 168 |
-
"judge = f\"\"\"You are judging a competition between {len(llm_resources)} competitors.\n",
|
| 169 |
-
"Each model has been given this question:\n",
|
| 170 |
-
"\n",
|
| 171 |
-
"{request}\n",
|
| 172 |
-
"\n",
|
| 173 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 174 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 175 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 176 |
-
"\n",
|
| 177 |
-
"Here are the responses from each competitor:\n",
|
| 178 |
-
"\n",
|
| 179 |
-
"{together} # all responses\n",
|
| 180 |
-
"\n",
|
| 181 |
-
"Now respond with the JSON with the ranked order of the competitors name, nothing else. Do not include markdown formatting or code blocks.\"\"\""
|
| 182 |
-
]
|
| 183 |
-
},
|
| 184 |
-
{
|
| 185 |
-
"cell_type": "code",
|
| 186 |
-
"execution_count": null,
|
| 187 |
-
"metadata": {},
|
| 188 |
-
"outputs": [],
|
| 189 |
-
"source": [
|
| 190 |
-
"print(judge)"
|
| 191 |
-
]
|
| 192 |
-
},
|
| 193 |
-
{
|
| 194 |
-
"cell_type": "code",
|
| 195 |
-
"execution_count": null,
|
| 196 |
-
"metadata": {},
|
| 197 |
-
"outputs": [],
|
| 198 |
-
"source": [
|
| 199 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 200 |
-
]
|
| 201 |
-
},
|
| 202 |
-
{
|
| 203 |
-
"cell_type": "code",
|
| 204 |
-
"execution_count": null,
|
| 205 |
-
"metadata": {},
|
| 206 |
-
"outputs": [],
|
| 207 |
-
"source": [
|
| 208 |
-
"# Judgement time!\n",
|
| 209 |
-
"\n",
|
| 210 |
-
"openai = OpenAI()\n",
|
| 211 |
-
"response = openai.chat.completions.create(\n",
|
| 212 |
-
" model=\"o3-mini\",\n",
|
| 213 |
-
" messages=judge_messages,\n",
|
| 214 |
-
")\n",
|
| 215 |
-
"results = response.choices[0].message.content\n",
|
| 216 |
-
"print(results)\n"
|
| 217 |
-
]
|
| 218 |
-
},
|
| 219 |
-
{
|
| 220 |
-
"cell_type": "code",
|
| 221 |
-
"execution_count": null,
|
| 222 |
-
"metadata": {},
|
| 223 |
-
"outputs": [],
|
| 224 |
-
"source": [
|
| 225 |
-
"# OK let's turn this into results!\n",
|
| 226 |
-
"\n",
|
| 227 |
-
"results_dict = json.loads(results)\n",
|
| 228 |
-
"\n",
|
| 229 |
-
"ranks = results_dict[\"results\"]\n",
|
| 230 |
-
"\n",
|
| 231 |
-
"for index, result in enumerate(ranks):\n",
|
| 232 |
-
" print(f\"Rank {index+1}: {result}\")"
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "markdown",
|
| 237 |
-
"metadata": {},
|
| 238 |
-
"source": [
|
| 239 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 240 |
-
" <tr>\n",
|
| 241 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 242 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 243 |
-
" </td>\n",
|
| 244 |
-
" <td>\n",
|
| 245 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 246 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 247 |
-
" </span>\n",
|
| 248 |
-
" </td>\n",
|
| 249 |
-
" </tr>\n",
|
| 250 |
-
"</table>"
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "markdown",
|
| 255 |
-
"metadata": {},
|
| 256 |
-
"source": [
|
| 257 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 258 |
-
" <tr>\n",
|
| 259 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 260 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 261 |
-
" </td>\n",
|
| 262 |
-
" <td>\n",
|
| 263 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 264 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 265 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 266 |
-
" to business projects where accuracy is critical.\n",
|
| 267 |
-
" </span>\n",
|
| 268 |
-
" </td>\n",
|
| 269 |
-
" </tr>\n",
|
| 270 |
-
"</table>"
|
| 271 |
-
]
|
| 272 |
-
}
|
| 273 |
-
],
|
| 274 |
-
"metadata": {
|
| 275 |
-
"kernelspec": {
|
| 276 |
-
"display_name": ".venv",
|
| 277 |
-
"language": "python",
|
| 278 |
-
"name": "python3"
|
| 279 |
-
},
|
| 280 |
-
"language_info": {
|
| 281 |
-
"codemirror_mode": {
|
| 282 |
-
"name": "ipython",
|
| 283 |
-
"version": 3
|
| 284 |
-
},
|
| 285 |
-
"file_extension": ".py",
|
| 286 |
-
"mimetype": "text/x-python",
|
| 287 |
-
"name": "python",
|
| 288 |
-
"nbconvert_exporter": "python",
|
| 289 |
-
"pygments_lexer": "ipython3",
|
| 290 |
-
"version": "3.12.3"
|
| 291 |
-
}
|
| 292 |
-
},
|
| 293 |
-
"nbformat": 4,
|
| 294 |
-
"nbformat_minor": 2
|
| 295 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_async.ipynb
DELETED
|
@@ -1,474 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "code",
|
| 14 |
-
"execution_count": 1,
|
| 15 |
-
"metadata": {},
|
| 16 |
-
"outputs": [],
|
| 17 |
-
"source": [
|
| 18 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"import os\n",
|
| 21 |
-
"import json\n",
|
| 22 |
-
"import asyncio\n",
|
| 23 |
-
"from dotenv import load_dotenv\n",
|
| 24 |
-
"from openai import OpenAI, AsyncOpenAI\n",
|
| 25 |
-
"from anthropic import AsyncAnthropic\n",
|
| 26 |
-
"from pydantic import BaseModel"
|
| 27 |
-
]
|
| 28 |
-
},
|
| 29 |
-
{
|
| 30 |
-
"cell_type": "code",
|
| 31 |
-
"execution_count": null,
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"outputs": [],
|
| 34 |
-
"source": [
|
| 35 |
-
"# Always remember to do this!\n",
|
| 36 |
-
"load_dotenv(override=True)"
|
| 37 |
-
]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"cell_type": "code",
|
| 41 |
-
"execution_count": null,
|
| 42 |
-
"metadata": {},
|
| 43 |
-
"outputs": [],
|
| 44 |
-
"source": [
|
| 45 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
-
"\n",
|
| 47 |
-
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
-
"ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 49 |
-
"GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')\n",
|
| 50 |
-
"DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 51 |
-
"GROQ_API_KEY = os.getenv('GROQ_API_KEY')\n",
|
| 52 |
-
"\n",
|
| 53 |
-
"if OPENAI_API_KEY:\n",
|
| 54 |
-
" print(f\"OpenAI API Key exists and begins {OPENAI_API_KEY[:8]}\")\n",
|
| 55 |
-
"else:\n",
|
| 56 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 57 |
-
" \n",
|
| 58 |
-
"if ANTHROPIC_API_KEY:\n",
|
| 59 |
-
" print(f\"Anthropic API Key exists and begins {ANTHROPIC_API_KEY[:7]}\")\n",
|
| 60 |
-
"else:\n",
|
| 61 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 62 |
-
"\n",
|
| 63 |
-
"if GOOGLE_API_KEY:\n",
|
| 64 |
-
" print(f\"Google API Key exists and begins {GOOGLE_API_KEY[:2]}\")\n",
|
| 65 |
-
"else:\n",
|
| 66 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 67 |
-
"\n",
|
| 68 |
-
"if DEEPSEEK_API_KEY:\n",
|
| 69 |
-
" print(f\"DeepSeek API Key exists and begins {DEEPSEEK_API_KEY[:3]}\")\n",
|
| 70 |
-
"else:\n",
|
| 71 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 72 |
-
"\n",
|
| 73 |
-
"if GROQ_API_KEY:\n",
|
| 74 |
-
" print(f\"Groq API Key exists and begins {GROQ_API_KEY[:4]}\")\n",
|
| 75 |
-
"else:\n",
|
| 76 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 77 |
-
]
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
"cell_type": "code",
|
| 81 |
-
"execution_count": 4,
|
| 82 |
-
"metadata": {},
|
| 83 |
-
"outputs": [],
|
| 84 |
-
"source": [
|
| 85 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 86 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 87 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 88 |
-
]
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"cell_type": "code",
|
| 92 |
-
"execution_count": null,
|
| 93 |
-
"metadata": {},
|
| 94 |
-
"outputs": [],
|
| 95 |
-
"source": [
|
| 96 |
-
"print(messages)"
|
| 97 |
-
]
|
| 98 |
-
},
|
| 99 |
-
{
|
| 100 |
-
"cell_type": "code",
|
| 101 |
-
"execution_count": null,
|
| 102 |
-
"metadata": {},
|
| 103 |
-
"outputs": [],
|
| 104 |
-
"source": [
|
| 105 |
-
"openai = AsyncOpenAI()\n",
|
| 106 |
-
"response = await openai.chat.completions.create(\n",
|
| 107 |
-
" model=\"gpt-4o-mini\",\n",
|
| 108 |
-
" messages=messages,\n",
|
| 109 |
-
")\n",
|
| 110 |
-
"question = response.choices[0].message.content\n",
|
| 111 |
-
"print(question)\n"
|
| 112 |
-
]
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"cell_type": "code",
|
| 116 |
-
"execution_count": 7,
|
| 117 |
-
"metadata": {},
|
| 118 |
-
"outputs": [],
|
| 119 |
-
"source": [
|
| 120 |
-
"# Define Pydantic model for storing LLM results\n",
|
| 121 |
-
"class LLMResult(BaseModel):\n",
|
| 122 |
-
" model: str\n",
|
| 123 |
-
" answer: str\n"
|
| 124 |
-
]
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"cell_type": "code",
|
| 128 |
-
"execution_count": 8,
|
| 129 |
-
"metadata": {},
|
| 130 |
-
"outputs": [],
|
| 131 |
-
"source": [
|
| 132 |
-
"results: list[LLMResult] = []\n",
|
| 133 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 134 |
-
]
|
| 135 |
-
},
|
| 136 |
-
{
|
| 137 |
-
"cell_type": "code",
|
| 138 |
-
"execution_count": 9,
|
| 139 |
-
"metadata": {},
|
| 140 |
-
"outputs": [],
|
| 141 |
-
"source": [
|
| 142 |
-
"# The API we know well\n",
|
| 143 |
-
"async def openai_answer() -> None:\n",
|
| 144 |
-
"\n",
|
| 145 |
-
" if OPENAI_API_KEY is None:\n",
|
| 146 |
-
" return None\n",
|
| 147 |
-
" \n",
|
| 148 |
-
" print(\"OpenAI starting!\")\n",
|
| 149 |
-
" model_name = \"gpt-4o-mini\"\n",
|
| 150 |
-
"\n",
|
| 151 |
-
" try:\n",
|
| 152 |
-
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
-
" answer = response.choices[0].message.content\n",
|
| 154 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 155 |
-
" except Exception as e:\n",
|
| 156 |
-
" print(f\"Error with OpenAI: {e}\")\n",
|
| 157 |
-
" return None\n",
|
| 158 |
-
"\n",
|
| 159 |
-
" print(\"OpenAI done!\")"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"cell_type": "code",
|
| 164 |
-
"execution_count": 10,
|
| 165 |
-
"metadata": {},
|
| 166 |
-
"outputs": [],
|
| 167 |
-
"source": [
|
| 168 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"async def anthropic_answer() -> None:\n",
|
| 171 |
-
"\n",
|
| 172 |
-
" if ANTHROPIC_API_KEY is None:\n",
|
| 173 |
-
" return None\n",
|
| 174 |
-
" \n",
|
| 175 |
-
" print(\"Anthropic starting!\")\n",
|
| 176 |
-
" model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 177 |
-
"\n",
|
| 178 |
-
" claude = AsyncAnthropic()\n",
|
| 179 |
-
" try:\n",
|
| 180 |
-
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 181 |
-
" answer = response.content[0].text\n",
|
| 182 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 183 |
-
" except Exception as e:\n",
|
| 184 |
-
" print(f\"Error with Anthropic: {e}\")\n",
|
| 185 |
-
" return None\n",
|
| 186 |
-
"\n",
|
| 187 |
-
" print(\"Anthropic done!\")"
|
| 188 |
-
]
|
| 189 |
-
},
|
| 190 |
-
{
|
| 191 |
-
"cell_type": "code",
|
| 192 |
-
"execution_count": 11,
|
| 193 |
-
"metadata": {},
|
| 194 |
-
"outputs": [],
|
| 195 |
-
"source": [
|
| 196 |
-
"async def google_answer() -> None:\n",
|
| 197 |
-
"\n",
|
| 198 |
-
" if GOOGLE_API_KEY is None:\n",
|
| 199 |
-
" return None\n",
|
| 200 |
-
" \n",
|
| 201 |
-
" print(\"Google starting!\")\n",
|
| 202 |
-
" model_name = \"gemini-2.0-flash\"\n",
|
| 203 |
-
"\n",
|
| 204 |
-
" gemini = AsyncOpenAI(api_key=GOOGLE_API_KEY, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 205 |
-
" try:\n",
|
| 206 |
-
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 207 |
-
" answer = response.choices[0].message.content\n",
|
| 208 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 209 |
-
" except Exception as e:\n",
|
| 210 |
-
" print(f\"Error with Google: {e}\")\n",
|
| 211 |
-
" return None\n",
|
| 212 |
-
"\n",
|
| 213 |
-
" print(\"Google done!\")"
|
| 214 |
-
]
|
| 215 |
-
},
|
| 216 |
-
{
|
| 217 |
-
"cell_type": "code",
|
| 218 |
-
"execution_count": 12,
|
| 219 |
-
"metadata": {},
|
| 220 |
-
"outputs": [],
|
| 221 |
-
"source": [
|
| 222 |
-
"async def deepseek_answer() -> None:\n",
|
| 223 |
-
"\n",
|
| 224 |
-
" if DEEPSEEK_API_KEY is None:\n",
|
| 225 |
-
" return None\n",
|
| 226 |
-
" \n",
|
| 227 |
-
" print(\"DeepSeek starting!\")\n",
|
| 228 |
-
" model_name = \"deepseek-chat\"\n",
|
| 229 |
-
"\n",
|
| 230 |
-
" deepseek = AsyncOpenAI(api_key=DEEPSEEK_API_KEY, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 231 |
-
" try:\n",
|
| 232 |
-
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 233 |
-
" answer = response.choices[0].message.content\n",
|
| 234 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 235 |
-
" except Exception as e:\n",
|
| 236 |
-
" print(f\"Error with DeepSeek: {e}\")\n",
|
| 237 |
-
" return None\n",
|
| 238 |
-
"\n",
|
| 239 |
-
" print(\"DeepSeek done!\")"
|
| 240 |
-
]
|
| 241 |
-
},
|
| 242 |
-
{
|
| 243 |
-
"cell_type": "code",
|
| 244 |
-
"execution_count": 13,
|
| 245 |
-
"metadata": {},
|
| 246 |
-
"outputs": [],
|
| 247 |
-
"source": [
|
| 248 |
-
"async def groq_answer() -> None:\n",
|
| 249 |
-
"\n",
|
| 250 |
-
" if GROQ_API_KEY is None:\n",
|
| 251 |
-
" return None\n",
|
| 252 |
-
" \n",
|
| 253 |
-
" print(\"Groq starting!\")\n",
|
| 254 |
-
" model_name = \"llama-3.3-70b-versatile\"\n",
|
| 255 |
-
"\n",
|
| 256 |
-
" groq = AsyncOpenAI(api_key=GROQ_API_KEY, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 257 |
-
" try:\n",
|
| 258 |
-
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 259 |
-
" answer = response.choices[0].message.content\n",
|
| 260 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 261 |
-
" except Exception as e:\n",
|
| 262 |
-
" print(f\"Error with Groq: {e}\")\n",
|
| 263 |
-
" return None\n",
|
| 264 |
-
"\n",
|
| 265 |
-
" print(\"Groq done!\")\n"
|
| 266 |
-
]
|
| 267 |
-
},
|
| 268 |
-
{
|
| 269 |
-
"cell_type": "markdown",
|
| 270 |
-
"metadata": {},
|
| 271 |
-
"source": [
|
| 272 |
-
"## For the next cell, we will use Ollama\n",
|
| 273 |
-
"\n",
|
| 274 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 275 |
-
"and runs models locally using high performance C++ code.\n",
|
| 276 |
-
"\n",
|
| 277 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 278 |
-
"\n",
|
| 279 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 280 |
-
"\n",
|
| 281 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 282 |
-
"\n",
|
| 283 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 284 |
-
"\n",
|
| 285 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 286 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 287 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 288 |
-
]
|
| 289 |
-
},
|
| 290 |
-
{
|
| 291 |
-
"cell_type": "markdown",
|
| 292 |
-
"metadata": {},
|
| 293 |
-
"source": [
|
| 294 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 295 |
-
" <tr>\n",
|
| 296 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 297 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 298 |
-
" </td>\n",
|
| 299 |
-
" <td>\n",
|
| 300 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 301 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 302 |
-
" </span>\n",
|
| 303 |
-
" </td>\n",
|
| 304 |
-
" </tr>\n",
|
| 305 |
-
"</table>"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"cell_type": "code",
|
| 310 |
-
"execution_count": null,
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"!ollama pull llama3.2"
|
| 315 |
-
]
|
| 316 |
-
},
|
| 317 |
-
{
|
| 318 |
-
"cell_type": "code",
|
| 319 |
-
"execution_count": 15,
|
| 320 |
-
"metadata": {},
|
| 321 |
-
"outputs": [],
|
| 322 |
-
"source": [
|
| 323 |
-
"async def ollama_answer() -> None:\n",
|
| 324 |
-
" model_name = \"llama3.2\"\n",
|
| 325 |
-
"\n",
|
| 326 |
-
" print(\"Ollama starting!\")\n",
|
| 327 |
-
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 328 |
-
" try:\n",
|
| 329 |
-
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 330 |
-
" answer = response.choices[0].message.content\n",
|
| 331 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 332 |
-
" except Exception as e:\n",
|
| 333 |
-
" print(f\"Error with Ollama: {e}\")\n",
|
| 334 |
-
" return None\n",
|
| 335 |
-
"\n",
|
| 336 |
-
" print(\"Ollama done!\") "
|
| 337 |
-
]
|
| 338 |
-
},
|
| 339 |
-
{
|
| 340 |
-
"cell_type": "code",
|
| 341 |
-
"execution_count": null,
|
| 342 |
-
"metadata": {},
|
| 343 |
-
"outputs": [],
|
| 344 |
-
"source": [
|
| 345 |
-
"async def gather_answers():\n",
|
| 346 |
-
" tasks = [\n",
|
| 347 |
-
" openai_answer(),\n",
|
| 348 |
-
" anthropic_answer(),\n",
|
| 349 |
-
" google_answer(),\n",
|
| 350 |
-
" deepseek_answer(),\n",
|
| 351 |
-
" groq_answer(),\n",
|
| 352 |
-
" ollama_answer()\n",
|
| 353 |
-
" ]\n",
|
| 354 |
-
" await asyncio.gather(*tasks)\n",
|
| 355 |
-
"\n",
|
| 356 |
-
"await gather_answers()"
|
| 357 |
-
]
|
| 358 |
-
},
|
| 359 |
-
{
|
| 360 |
-
"cell_type": "code",
|
| 361 |
-
"execution_count": null,
|
| 362 |
-
"metadata": {},
|
| 363 |
-
"outputs": [],
|
| 364 |
-
"source": [
|
| 365 |
-
"together = \"\"\n",
|
| 366 |
-
"competitors = []\n",
|
| 367 |
-
"answers = []\n",
|
| 368 |
-
"\n",
|
| 369 |
-
"for res in results:\n",
|
| 370 |
-
" competitor = res.model\n",
|
| 371 |
-
" answer = res.answer\n",
|
| 372 |
-
" competitors.append(competitor)\n",
|
| 373 |
-
" answers.append(answer)\n",
|
| 374 |
-
" together += f\"# Response from competitor {competitor}\\n\\n\"\n",
|
| 375 |
-
" together += answer + \"\\n\\n\"\n",
|
| 376 |
-
"\n",
|
| 377 |
-
"print(f\"Number of competitors: {len(results)}\")\n",
|
| 378 |
-
"print(together)\n"
|
| 379 |
-
]
|
| 380 |
-
},
|
| 381 |
-
{
|
| 382 |
-
"cell_type": "code",
|
| 383 |
-
"execution_count": 18,
|
| 384 |
-
"metadata": {},
|
| 385 |
-
"outputs": [],
|
| 386 |
-
"source": [
|
| 387 |
-
"judge = f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 388 |
-
"Each model has been given this question:\n",
|
| 389 |
-
"\n",
|
| 390 |
-
"{question}\n",
|
| 391 |
-
"\n",
|
| 392 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 393 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 394 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 395 |
-
"\n",
|
| 396 |
-
"Here are the responses from each competitor:\n",
|
| 397 |
-
"\n",
|
| 398 |
-
"{together}\n",
|
| 399 |
-
"\n",
|
| 400 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 401 |
-
]
|
| 402 |
-
},
|
| 403 |
-
{
|
| 404 |
-
"cell_type": "code",
|
| 405 |
-
"execution_count": null,
|
| 406 |
-
"metadata": {},
|
| 407 |
-
"outputs": [],
|
| 408 |
-
"source": [
|
| 409 |
-
"print(judge)"
|
| 410 |
-
]
|
| 411 |
-
},
|
| 412 |
-
{
|
| 413 |
-
"cell_type": "code",
|
| 414 |
-
"execution_count": 20,
|
| 415 |
-
"metadata": {},
|
| 416 |
-
"outputs": [],
|
| 417 |
-
"source": [
|
| 418 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 419 |
-
]
|
| 420 |
-
},
|
| 421 |
-
{
|
| 422 |
-
"cell_type": "code",
|
| 423 |
-
"execution_count": null,
|
| 424 |
-
"metadata": {},
|
| 425 |
-
"outputs": [],
|
| 426 |
-
"source": [
|
| 427 |
-
"# Judgement time!\n",
|
| 428 |
-
"\n",
|
| 429 |
-
"openai = OpenAI()\n",
|
| 430 |
-
"response = openai.chat.completions.create(\n",
|
| 431 |
-
" model=\"o3-mini\",\n",
|
| 432 |
-
" messages=judge_messages,\n",
|
| 433 |
-
")\n",
|
| 434 |
-
"judgement = response.choices[0].message.content\n",
|
| 435 |
-
"print(judgement)\n"
|
| 436 |
-
]
|
| 437 |
-
},
|
| 438 |
-
{
|
| 439 |
-
"cell_type": "code",
|
| 440 |
-
"execution_count": null,
|
| 441 |
-
"metadata": {},
|
| 442 |
-
"outputs": [],
|
| 443 |
-
"source": [
|
| 444 |
-
"# OK let's turn this into results!\n",
|
| 445 |
-
"\n",
|
| 446 |
-
"results_dict = json.loads(judgement)\n",
|
| 447 |
-
"ranks = results_dict[\"results\"]\n",
|
| 448 |
-
"for index, comp in enumerate(ranks):\n",
|
| 449 |
-
" print(f\"Rank {index+1}: {comp}\")"
|
| 450 |
-
]
|
| 451 |
-
}
|
| 452 |
-
],
|
| 453 |
-
"metadata": {
|
| 454 |
-
"kernelspec": {
|
| 455 |
-
"display_name": ".venv",
|
| 456 |
-
"language": "python",
|
| 457 |
-
"name": "python3"
|
| 458 |
-
},
|
| 459 |
-
"language_info": {
|
| 460 |
-
"codemirror_mode": {
|
| 461 |
-
"name": "ipython",
|
| 462 |
-
"version": 3
|
| 463 |
-
},
|
| 464 |
-
"file_extension": ".py",
|
| 465 |
-
"mimetype": "text/x-python",
|
| 466 |
-
"name": "python",
|
| 467 |
-
"nbconvert_exporter": "python",
|
| 468 |
-
"pygments_lexer": "ipython3",
|
| 469 |
-
"version": "3.12.11"
|
| 470 |
-
}
|
| 471 |
-
},
|
| 472 |
-
"nbformat": 4,
|
| 473 |
-
"nbformat_minor": 2
|
| 474 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_async_with_reasons.ipynb
DELETED
|
@@ -1,490 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"This was derived from 2_lab2_async. "
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "code",
|
| 21 |
-
"execution_count": null,
|
| 22 |
-
"metadata": {},
|
| 23 |
-
"outputs": [],
|
| 24 |
-
"source": [
|
| 25 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 26 |
-
"\n",
|
| 27 |
-
"import os\n",
|
| 28 |
-
"import json\n",
|
| 29 |
-
"import asyncio\n",
|
| 30 |
-
"from dotenv import load_dotenv\n",
|
| 31 |
-
"from openai import OpenAI, AsyncOpenAI\n",
|
| 32 |
-
"from anthropic import AsyncAnthropic\n",
|
| 33 |
-
"from pydantic import BaseModel"
|
| 34 |
-
]
|
| 35 |
-
},
|
| 36 |
-
{
|
| 37 |
-
"cell_type": "code",
|
| 38 |
-
"execution_count": null,
|
| 39 |
-
"metadata": {},
|
| 40 |
-
"outputs": [],
|
| 41 |
-
"source": [
|
| 42 |
-
"# Always remember to do this!\n",
|
| 43 |
-
"load_dotenv(override=True)"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 53 |
-
"\n",
|
| 54 |
-
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
|
| 55 |
-
"ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 56 |
-
"GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')\n",
|
| 57 |
-
"DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 58 |
-
"GROQ_API_KEY = os.getenv('GROQ_API_KEY')\n",
|
| 59 |
-
"\n",
|
| 60 |
-
"if OPENAI_API_KEY:\n",
|
| 61 |
-
" print(f\"OpenAI API Key exists and begins {OPENAI_API_KEY[:8]}\")\n",
|
| 62 |
-
"else:\n",
|
| 63 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 64 |
-
" \n",
|
| 65 |
-
"if ANTHROPIC_API_KEY:\n",
|
| 66 |
-
" print(f\"Anthropic API Key exists and begins {ANTHROPIC_API_KEY[:7]}\")\n",
|
| 67 |
-
"else:\n",
|
| 68 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if GOOGLE_API_KEY:\n",
|
| 71 |
-
" print(f\"Google API Key exists and begins {GOOGLE_API_KEY[:2]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 74 |
-
"\n",
|
| 75 |
-
"if DEEPSEEK_API_KEY:\n",
|
| 76 |
-
" print(f\"DeepSeek API Key exists and begins {DEEPSEEK_API_KEY[:3]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"if GROQ_API_KEY:\n",
|
| 81 |
-
" print(f\"Groq API Key exists and begins {GROQ_API_KEY[:4]}\")\n",
|
| 82 |
-
"else:\n",
|
| 83 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": null,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 93 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 94 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": null,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"print(messages)"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": null,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"openai = AsyncOpenAI()\n",
|
| 113 |
-
"response = await openai.chat.completions.create(\n",
|
| 114 |
-
" model=\"gpt-4o-mini\",\n",
|
| 115 |
-
" messages=messages,\n",
|
| 116 |
-
")\n",
|
| 117 |
-
"question = response.choices[0].message.content\n",
|
| 118 |
-
"print(question)\n"
|
| 119 |
-
]
|
| 120 |
-
},
|
| 121 |
-
{
|
| 122 |
-
"cell_type": "code",
|
| 123 |
-
"execution_count": null,
|
| 124 |
-
"metadata": {},
|
| 125 |
-
"outputs": [],
|
| 126 |
-
"source": [
|
| 127 |
-
"# Define Pydantic model for storing LLM results\n",
|
| 128 |
-
"class LLMResult(BaseModel):\n",
|
| 129 |
-
" model: str\n",
|
| 130 |
-
" answer: str\n"
|
| 131 |
-
]
|
| 132 |
-
},
|
| 133 |
-
{
|
| 134 |
-
"cell_type": "code",
|
| 135 |
-
"execution_count": null,
|
| 136 |
-
"metadata": {},
|
| 137 |
-
"outputs": [],
|
| 138 |
-
"source": [
|
| 139 |
-
"results: list[LLMResult] = []\n",
|
| 140 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 141 |
-
]
|
| 142 |
-
},
|
| 143 |
-
{
|
| 144 |
-
"cell_type": "code",
|
| 145 |
-
"execution_count": null,
|
| 146 |
-
"metadata": {},
|
| 147 |
-
"outputs": [],
|
| 148 |
-
"source": [
|
| 149 |
-
"# The API we know well\n",
|
| 150 |
-
"async def openai_answer() -> None:\n",
|
| 151 |
-
"\n",
|
| 152 |
-
" if OPENAI_API_KEY is None:\n",
|
| 153 |
-
" return None\n",
|
| 154 |
-
" \n",
|
| 155 |
-
" print(\"OpenAI starting!\")\n",
|
| 156 |
-
" model_name = \"gpt-4o-mini\"\n",
|
| 157 |
-
"\n",
|
| 158 |
-
" try:\n",
|
| 159 |
-
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 160 |
-
" answer = response.choices[0].message.content\n",
|
| 161 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 162 |
-
" except Exception as e:\n",
|
| 163 |
-
" print(f\"Error with OpenAI: {e}\")\n",
|
| 164 |
-
" return None\n",
|
| 165 |
-
"\n",
|
| 166 |
-
" print(\"OpenAI done!\")"
|
| 167 |
-
]
|
| 168 |
-
},
|
| 169 |
-
{
|
| 170 |
-
"cell_type": "code",
|
| 171 |
-
"execution_count": null,
|
| 172 |
-
"metadata": {},
|
| 173 |
-
"outputs": [],
|
| 174 |
-
"source": [
|
| 175 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 176 |
-
"\n",
|
| 177 |
-
"async def anthropic_answer() -> None:\n",
|
| 178 |
-
"\n",
|
| 179 |
-
" if ANTHROPIC_API_KEY is None:\n",
|
| 180 |
-
" return None\n",
|
| 181 |
-
" \n",
|
| 182 |
-
" print(\"Anthropic starting!\")\n",
|
| 183 |
-
" model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 184 |
-
"\n",
|
| 185 |
-
" claude = AsyncAnthropic()\n",
|
| 186 |
-
" try:\n",
|
| 187 |
-
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 188 |
-
" answer = response.content[0].text\n",
|
| 189 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 190 |
-
" except Exception as e:\n",
|
| 191 |
-
" print(f\"Error with Anthropic: {e}\")\n",
|
| 192 |
-
" return None\n",
|
| 193 |
-
"\n",
|
| 194 |
-
" print(\"Anthropic done!\")"
|
| 195 |
-
]
|
| 196 |
-
},
|
| 197 |
-
{
|
| 198 |
-
"cell_type": "code",
|
| 199 |
-
"execution_count": null,
|
| 200 |
-
"metadata": {},
|
| 201 |
-
"outputs": [],
|
| 202 |
-
"source": [
|
| 203 |
-
"async def google_answer() -> None:\n",
|
| 204 |
-
"\n",
|
| 205 |
-
" if GOOGLE_API_KEY is None:\n",
|
| 206 |
-
" return None\n",
|
| 207 |
-
" \n",
|
| 208 |
-
" print(\"Google starting!\")\n",
|
| 209 |
-
" model_name = \"gemini-2.0-flash\"\n",
|
| 210 |
-
"\n",
|
| 211 |
-
" gemini = AsyncOpenAI(api_key=GOOGLE_API_KEY, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 212 |
-
" try:\n",
|
| 213 |
-
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 214 |
-
" answer = response.choices[0].message.content\n",
|
| 215 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 216 |
-
" except Exception as e:\n",
|
| 217 |
-
" print(f\"Error with Google: {e}\")\n",
|
| 218 |
-
" return None\n",
|
| 219 |
-
"\n",
|
| 220 |
-
" print(\"Google done!\")"
|
| 221 |
-
]
|
| 222 |
-
},
|
| 223 |
-
{
|
| 224 |
-
"cell_type": "code",
|
| 225 |
-
"execution_count": null,
|
| 226 |
-
"metadata": {},
|
| 227 |
-
"outputs": [],
|
| 228 |
-
"source": [
|
| 229 |
-
"async def deepseek_answer() -> None:\n",
|
| 230 |
-
"\n",
|
| 231 |
-
" if DEEPSEEK_API_KEY is None:\n",
|
| 232 |
-
" return None\n",
|
| 233 |
-
" \n",
|
| 234 |
-
" print(\"DeepSeek starting!\")\n",
|
| 235 |
-
" model_name = \"deepseek-chat\"\n",
|
| 236 |
-
"\n",
|
| 237 |
-
" deepseek = AsyncOpenAI(api_key=DEEPSEEK_API_KEY, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 238 |
-
" try:\n",
|
| 239 |
-
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 240 |
-
" answer = response.choices[0].message.content\n",
|
| 241 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 242 |
-
" except Exception as e:\n",
|
| 243 |
-
" print(f\"Error with DeepSeek: {e}\")\n",
|
| 244 |
-
" return None\n",
|
| 245 |
-
"\n",
|
| 246 |
-
" print(\"DeepSeek done!\")"
|
| 247 |
-
]
|
| 248 |
-
},
|
| 249 |
-
{
|
| 250 |
-
"cell_type": "code",
|
| 251 |
-
"execution_count": null,
|
| 252 |
-
"metadata": {},
|
| 253 |
-
"outputs": [],
|
| 254 |
-
"source": [
|
| 255 |
-
"async def groq_answer() -> None:\n",
|
| 256 |
-
"\n",
|
| 257 |
-
" if GROQ_API_KEY is None:\n",
|
| 258 |
-
" return None\n",
|
| 259 |
-
" \n",
|
| 260 |
-
" print(\"Groq starting!\")\n",
|
| 261 |
-
" model_name = \"llama-3.3-70b-versatile\"\n",
|
| 262 |
-
"\n",
|
| 263 |
-
" groq = AsyncOpenAI(api_key=GROQ_API_KEY, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 264 |
-
" try:\n",
|
| 265 |
-
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 266 |
-
" answer = response.choices[0].message.content\n",
|
| 267 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 268 |
-
" except Exception as e:\n",
|
| 269 |
-
" print(f\"Error with Groq: {e}\")\n",
|
| 270 |
-
" return None\n",
|
| 271 |
-
"\n",
|
| 272 |
-
" print(\"Groq done!\")\n"
|
| 273 |
-
]
|
| 274 |
-
},
|
| 275 |
-
{
|
| 276 |
-
"cell_type": "markdown",
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"source": [
|
| 279 |
-
"## For the next cell, we will use Ollama\n",
|
| 280 |
-
"\n",
|
| 281 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 282 |
-
"and runs models locally using high performance C++ code.\n",
|
| 283 |
-
"\n",
|
| 284 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 285 |
-
"\n",
|
| 286 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 289 |
-
"\n",
|
| 290 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 291 |
-
"\n",
|
| 292 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 293 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 294 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 295 |
-
]
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"cell_type": "markdown",
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"source": [
|
| 301 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
-
" <tr>\n",
|
| 303 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
-
" </td>\n",
|
| 306 |
-
" <td>\n",
|
| 307 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 308 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 309 |
-
" </span>\n",
|
| 310 |
-
" </td>\n",
|
| 311 |
-
" </tr>\n",
|
| 312 |
-
"</table>"
|
| 313 |
-
]
|
| 314 |
-
},
|
| 315 |
-
{
|
| 316 |
-
"cell_type": "code",
|
| 317 |
-
"execution_count": null,
|
| 318 |
-
"metadata": {},
|
| 319 |
-
"outputs": [],
|
| 320 |
-
"source": [
|
| 321 |
-
"!ollama pull llama3.2"
|
| 322 |
-
]
|
| 323 |
-
},
|
| 324 |
-
{
|
| 325 |
-
"cell_type": "code",
|
| 326 |
-
"execution_count": null,
|
| 327 |
-
"metadata": {},
|
| 328 |
-
"outputs": [],
|
| 329 |
-
"source": [
|
| 330 |
-
"async def ollama_answer() -> None:\n",
|
| 331 |
-
" model_name = \"llama3.2\"\n",
|
| 332 |
-
"\n",
|
| 333 |
-
" print(\"Ollama starting!\")\n",
|
| 334 |
-
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 335 |
-
" try:\n",
|
| 336 |
-
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 337 |
-
" answer = response.choices[0].message.content\n",
|
| 338 |
-
" results.append(LLMResult(model=model_name, answer=answer))\n",
|
| 339 |
-
" except Exception as e:\n",
|
| 340 |
-
" print(f\"Error with Ollama: {e}\")\n",
|
| 341 |
-
" return None\n",
|
| 342 |
-
"\n",
|
| 343 |
-
" print(\"Ollama done!\") "
|
| 344 |
-
]
|
| 345 |
-
},
|
| 346 |
-
{
|
| 347 |
-
"cell_type": "code",
|
| 348 |
-
"execution_count": null,
|
| 349 |
-
"metadata": {},
|
| 350 |
-
"outputs": [],
|
| 351 |
-
"source": [
|
| 352 |
-
"async def gather_answers():\n",
|
| 353 |
-
" tasks = [\n",
|
| 354 |
-
" openai_answer(),\n",
|
| 355 |
-
" anthropic_answer(),\n",
|
| 356 |
-
" google_answer(),\n",
|
| 357 |
-
" deepseek_answer(),\n",
|
| 358 |
-
" groq_answer(),\n",
|
| 359 |
-
" ollama_answer()\n",
|
| 360 |
-
" ]\n",
|
| 361 |
-
" await asyncio.gather(*tasks)\n",
|
| 362 |
-
"\n",
|
| 363 |
-
"await gather_answers()"
|
| 364 |
-
]
|
| 365 |
-
},
|
| 366 |
-
{
|
| 367 |
-
"cell_type": "code",
|
| 368 |
-
"execution_count": null,
|
| 369 |
-
"metadata": {},
|
| 370 |
-
"outputs": [],
|
| 371 |
-
"source": [
|
| 372 |
-
"together = \"\"\n",
|
| 373 |
-
"competitors = []\n",
|
| 374 |
-
"answers = []\n",
|
| 375 |
-
"\n",
|
| 376 |
-
"for res in results:\n",
|
| 377 |
-
" competitor = res.model\n",
|
| 378 |
-
" answer = res.answer\n",
|
| 379 |
-
" competitors.append(competitor)\n",
|
| 380 |
-
" answers.append(answer)\n",
|
| 381 |
-
" together += f\"# Response from competitor {competitor}\\n\\n\"\n",
|
| 382 |
-
" together += answer + \"\\n\\n\"\n",
|
| 383 |
-
"\n",
|
| 384 |
-
"print(f\"Number of competitors: {len(results)}\")\n",
|
| 385 |
-
"print(together)\n"
|
| 386 |
-
]
|
| 387 |
-
},
|
| 388 |
-
{
|
| 389 |
-
"cell_type": "code",
|
| 390 |
-
"execution_count": null,
|
| 391 |
-
"metadata": {},
|
| 392 |
-
"outputs": [],
|
| 393 |
-
"source": [
|
| 394 |
-
"judge = f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 395 |
-
"Each model has been given this question:\n",
|
| 396 |
-
"\n",
|
| 397 |
-
"{question}\n",
|
| 398 |
-
"\n",
|
| 399 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 400 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 401 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...],\n",
|
| 402 |
-
"\"explanations\": [\"explanation for each rank\", \"explanation for each rank\", \"explanation for each rank\", ...]}}\n",
|
| 403 |
-
"\n",
|
| 404 |
-
"Here are the responses from each competitor:\n",
|
| 405 |
-
"\n",
|
| 406 |
-
"{together}\n",
|
| 407 |
-
"\n",
|
| 408 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 409 |
-
]
|
| 410 |
-
},
|
| 411 |
-
{
|
| 412 |
-
"cell_type": "code",
|
| 413 |
-
"execution_count": null,
|
| 414 |
-
"metadata": {},
|
| 415 |
-
"outputs": [],
|
| 416 |
-
"source": [
|
| 417 |
-
"print(judge)"
|
| 418 |
-
]
|
| 419 |
-
},
|
| 420 |
-
{
|
| 421 |
-
"cell_type": "code",
|
| 422 |
-
"execution_count": null,
|
| 423 |
-
"metadata": {},
|
| 424 |
-
"outputs": [],
|
| 425 |
-
"source": [
|
| 426 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 427 |
-
]
|
| 428 |
-
},
|
| 429 |
-
{
|
| 430 |
-
"cell_type": "code",
|
| 431 |
-
"execution_count": null,
|
| 432 |
-
"metadata": {},
|
| 433 |
-
"outputs": [],
|
| 434 |
-
"source": [
|
| 435 |
-
"# Judgement time!\n",
|
| 436 |
-
"\n",
|
| 437 |
-
"openai = OpenAI()\n",
|
| 438 |
-
"response = openai.chat.completions.create(\n",
|
| 439 |
-
" model=\"o3-mini\",\n",
|
| 440 |
-
" messages=judge_messages,\n",
|
| 441 |
-
")\n",
|
| 442 |
-
"judgement = response.choices[0].message.content\n",
|
| 443 |
-
"print(judgement)\n"
|
| 444 |
-
]
|
| 445 |
-
},
|
| 446 |
-
{
|
| 447 |
-
"cell_type": "code",
|
| 448 |
-
"execution_count": null,
|
| 449 |
-
"metadata": {},
|
| 450 |
-
"outputs": [],
|
| 451 |
-
"source": [
|
| 452 |
-
"# OK let's turn this into results!\n",
|
| 453 |
-
"\n",
|
| 454 |
-
"results_dict = json.loads(judgement)\n",
|
| 455 |
-
"ranks = results_dict[\"results\"]\n",
|
| 456 |
-
"explanations = results_dict[\"explanations\"]\n",
|
| 457 |
-
"for index, comp in enumerate(ranks):\n",
|
| 458 |
-
" print(f\"Rank {index+1}: {comp} \\n\\t{explanations[index]}\")"
|
| 459 |
-
]
|
| 460 |
-
},
|
| 461 |
-
{
|
| 462 |
-
"cell_type": "code",
|
| 463 |
-
"execution_count": null,
|
| 464 |
-
"metadata": {},
|
| 465 |
-
"outputs": [],
|
| 466 |
-
"source": []
|
| 467 |
-
}
|
| 468 |
-
],
|
| 469 |
-
"metadata": {
|
| 470 |
-
"kernelspec": {
|
| 471 |
-
"display_name": ".venv",
|
| 472 |
-
"language": "python",
|
| 473 |
-
"name": "python3"
|
| 474 |
-
},
|
| 475 |
-
"language_info": {
|
| 476 |
-
"codemirror_mode": {
|
| 477 |
-
"name": "ipython",
|
| 478 |
-
"version": 3
|
| 479 |
-
},
|
| 480 |
-
"file_extension": ".py",
|
| 481 |
-
"mimetype": "text/x-python",
|
| 482 |
-
"name": "python",
|
| 483 |
-
"nbconvert_exporter": "python",
|
| 484 |
-
"pygments_lexer": "ipython3",
|
| 485 |
-
"version": "3.12.2"
|
| 486 |
-
}
|
| 487 |
-
},
|
| 488 |
-
"nbformat": 4,
|
| 489 |
-
"nbformat_minor": 2
|
| 490 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_doclee99_gpt5_improves_gemini.25flash.ipynb
DELETED
|
@@ -1,620 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": null,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"import os\n",
|
| 39 |
-
"import json\n",
|
| 40 |
-
"from dotenv import load_dotenv\n",
|
| 41 |
-
"from openai import OpenAI\n",
|
| 42 |
-
"from anthropic import Anthropic\n",
|
| 43 |
-
"from IPython.display import Markdown, display"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Always remember to do this!\n",
|
| 53 |
-
"load_dotenv(override=True)"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if openai_api_key:\n",
|
| 71 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
-
" \n",
|
| 75 |
-
"if anthropic_api_key:\n",
|
| 76 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"if google_api_key:\n",
|
| 81 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
-
"else:\n",
|
| 83 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"if deepseek_api_key:\n",
|
| 86 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
-
"else:\n",
|
| 88 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"if groq_api_key:\n",
|
| 91 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
-
"else:\n",
|
| 93 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
-
]
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"cell_type": "code",
|
| 98 |
-
"execution_count": null,
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [],
|
| 101 |
-
"source": [
|
| 102 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
-
]
|
| 106 |
-
},
|
| 107 |
-
{
|
| 108 |
-
"cell_type": "code",
|
| 109 |
-
"execution_count": null,
|
| 110 |
-
"metadata": {},
|
| 111 |
-
"outputs": [],
|
| 112 |
-
"source": [
|
| 113 |
-
"messages"
|
| 114 |
-
]
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"cell_type": "code",
|
| 118 |
-
"execution_count": null,
|
| 119 |
-
"metadata": {},
|
| 120 |
-
"outputs": [],
|
| 121 |
-
"source": [
|
| 122 |
-
"openai = OpenAI()\n",
|
| 123 |
-
"response = openai.chat.completions.create(\n",
|
| 124 |
-
" model=\"gpt-4o-mini\",\n",
|
| 125 |
-
" messages=messages,\n",
|
| 126 |
-
")\n",
|
| 127 |
-
"question = response.choices[0].message.content\n",
|
| 128 |
-
"print(question)\n"
|
| 129 |
-
]
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
"cell_type": "code",
|
| 133 |
-
"execution_count": null,
|
| 134 |
-
"metadata": {},
|
| 135 |
-
"outputs": [],
|
| 136 |
-
"source": [
|
| 137 |
-
"competitors = []\n",
|
| 138 |
-
"answers = []\n",
|
| 139 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# The API we know well\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
-
"\n",
|
| 152 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
-
"answer = response.choices[0].message.content\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"display(Markdown(answer))\n",
|
| 156 |
-
"competitors.append(model_name)\n",
|
| 157 |
-
"answers.append(answer)"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"claude = Anthropic()\n",
|
| 171 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
-
"answer = response.content[0].text\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"display(Markdown(answer))\n",
|
| 175 |
-
"competitors.append(model_name)\n",
|
| 176 |
-
"answers.append(answer)"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
-
"answer = response.choices[0].message.content\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"display(Markdown(answer))\n",
|
| 192 |
-
"competitors.append(model_name)\n",
|
| 193 |
-
"answers.append(answer)"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
-
"model_name = \"deepseek-chat\"\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
-
"answer = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"display(Markdown(answer))\n",
|
| 209 |
-
"competitors.append(model_name)\n",
|
| 210 |
-
"answers.append(answer)"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
-
"answer = response.choices[0].message.content\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"display(Markdown(answer))\n",
|
| 226 |
-
"competitors.append(model_name)\n",
|
| 227 |
-
"answers.append(answer)\n"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "markdown",
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"source": [
|
| 234 |
-
"## For the next cell, we will use Ollama\n",
|
| 235 |
-
"\n",
|
| 236 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
-
"and runs models locally using high performance C++ code.\n",
|
| 238 |
-
"\n",
|
| 239 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
-
"\n",
|
| 241 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
-
"\n",
|
| 243 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
-
"\n",
|
| 247 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"cell_type": "markdown",
|
| 254 |
-
"metadata": {},
|
| 255 |
-
"source": [
|
| 256 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
-
" <tr>\n",
|
| 258 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
-
" </td>\n",
|
| 261 |
-
" <td>\n",
|
| 262 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
-
" </span>\n",
|
| 265 |
-
" </td>\n",
|
| 266 |
-
" </tr>\n",
|
| 267 |
-
"</table>"
|
| 268 |
-
]
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"cell_type": "code",
|
| 272 |
-
"execution_count": null,
|
| 273 |
-
"metadata": {},
|
| 274 |
-
"outputs": [],
|
| 275 |
-
"source": [
|
| 276 |
-
"!ollama pull llama3.2"
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
-
"model_name = \"llama3.2\"\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
-
"answer = response.choices[0].message.content\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"display(Markdown(answer))\n",
|
| 292 |
-
"competitors.append(model_name)\n",
|
| 293 |
-
"answers.append(answer)"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "code",
|
| 298 |
-
"execution_count": null,
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"outputs": [],
|
| 301 |
-
"source": [
|
| 302 |
-
"# So where are we?\n",
|
| 303 |
-
"\n",
|
| 304 |
-
"print(competitors)\n",
|
| 305 |
-
"print(answers)\n"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"cell_type": "code",
|
| 310 |
-
"execution_count": null,
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"together = \"\"\n",
|
| 328 |
-
"for index, answer in enumerate(answers):\n",
|
| 329 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 330 |
-
" together += answer + \"\\n\\n\""
|
| 331 |
-
]
|
| 332 |
-
},
|
| 333 |
-
{
|
| 334 |
-
"cell_type": "code",
|
| 335 |
-
"execution_count": null,
|
| 336 |
-
"metadata": {},
|
| 337 |
-
"outputs": [],
|
| 338 |
-
"source": [
|
| 339 |
-
"# print(together)\n",
|
| 340 |
-
"display(Markdown(together))"
|
| 341 |
-
]
|
| 342 |
-
},
|
| 343 |
-
{
|
| 344 |
-
"cell_type": "code",
|
| 345 |
-
"execution_count": null,
|
| 346 |
-
"metadata": {},
|
| 347 |
-
"outputs": [],
|
| 348 |
-
"source": [
|
| 349 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 350 |
-
"Each model has been given this question:\n",
|
| 351 |
-
"\n",
|
| 352 |
-
"{question}\n",
|
| 353 |
-
"\n",
|
| 354 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 355 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 356 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 357 |
-
"\n",
|
| 358 |
-
"Here are the responses from each competitor:\n",
|
| 359 |
-
"\n",
|
| 360 |
-
"{together}\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 363 |
-
]
|
| 364 |
-
},
|
| 365 |
-
{
|
| 366 |
-
"cell_type": "code",
|
| 367 |
-
"execution_count": null,
|
| 368 |
-
"metadata": {},
|
| 369 |
-
"outputs": [],
|
| 370 |
-
"source": [
|
| 371 |
-
"print(judge)"
|
| 372 |
-
]
|
| 373 |
-
},
|
| 374 |
-
{
|
| 375 |
-
"cell_type": "code",
|
| 376 |
-
"execution_count": null,
|
| 377 |
-
"metadata": {},
|
| 378 |
-
"outputs": [],
|
| 379 |
-
"source": [
|
| 380 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 381 |
-
]
|
| 382 |
-
},
|
| 383 |
-
{
|
| 384 |
-
"cell_type": "code",
|
| 385 |
-
"execution_count": null,
|
| 386 |
-
"metadata": {},
|
| 387 |
-
"outputs": [],
|
| 388 |
-
"source": [
|
| 389 |
-
"# Judgement time!\n",
|
| 390 |
-
"\n",
|
| 391 |
-
"openai = OpenAI()\n",
|
| 392 |
-
"response = openai.chat.completions.create(\n",
|
| 393 |
-
" model=\"o3-mini\",\n",
|
| 394 |
-
" messages=judge_messages,\n",
|
| 395 |
-
")\n",
|
| 396 |
-
"results = response.choices[0].message.content\n",
|
| 397 |
-
"print(results)\n"
|
| 398 |
-
]
|
| 399 |
-
},
|
| 400 |
-
{
|
| 401 |
-
"cell_type": "code",
|
| 402 |
-
"execution_count": null,
|
| 403 |
-
"metadata": {},
|
| 404 |
-
"outputs": [],
|
| 405 |
-
"source": [
|
| 406 |
-
"# OK let's turn this into results!\n",
|
| 407 |
-
"\n",
|
| 408 |
-
"results_dict = json.loads(results)\n",
|
| 409 |
-
"ranks = results_dict[\"results\"]\n",
|
| 410 |
-
"for index, result in enumerate(ranks):\n",
|
| 411 |
-
" competitor = competitors[int(result)-1]\n",
|
| 412 |
-
" print(f\"Rank {index+1}: {competitor}\")"
|
| 413 |
-
]
|
| 414 |
-
},
|
| 415 |
-
{
|
| 416 |
-
"cell_type": "markdown",
|
| 417 |
-
"metadata": {},
|
| 418 |
-
"source": [
|
| 419 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 420 |
-
" <tr>\n",
|
| 421 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 422 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 423 |
-
" </td>\n",
|
| 424 |
-
" <td>\n",
|
| 425 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 426 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 427 |
-
" </span>\n",
|
| 428 |
-
" </td>\n",
|
| 429 |
-
" </tr>\n",
|
| 430 |
-
"</table>"
|
| 431 |
-
]
|
| 432 |
-
},
|
| 433 |
-
{
|
| 434 |
-
"cell_type": "code",
|
| 435 |
-
"execution_count": null,
|
| 436 |
-
"metadata": {},
|
| 437 |
-
"outputs": [],
|
| 438 |
-
"source": [
|
| 439 |
-
"# Implement Evaluator-Optimizer workflow design pattern - An Optimizer LLM analyzes the response of the top-ranked competitor\n",
|
| 440 |
-
"# and creates a system prompt designed to improve the response. The system prompot is then\n",
|
| 441 |
-
"# sent back to the top-ranked competitor to deliver a new response. \n",
|
| 442 |
-
"# The optimizer LLM then compares the new response to the old response and surmises\n",
|
| 443 |
-
"# what aspects of the system prompt may be responsible for the differences in the responses.\n",
|
| 444 |
-
"\n",
|
| 445 |
-
"\n",
|
| 446 |
-
"\n",
|
| 447 |
-
"# Get the top competitor (model name) and their response\n",
|
| 448 |
-
"top_rank_index = int(ranks[0]) - 1\n",
|
| 449 |
-
"top_competitor_name = competitors[top_rank_index]\n",
|
| 450 |
-
"top_competitor_response = answers[top_rank_index]\n",
|
| 451 |
-
"top_competitor_prompt = question\n",
|
| 452 |
-
"\n",
|
| 453 |
-
"# Compose a system prompt for GPT-5 to act as an expert evaluator of question quality and answer depth\n",
|
| 454 |
-
"system_prompt = (\n",
|
| 455 |
-
" \"You are an expert evaluator of LLM prompt quality and answer depth. \"\n",
|
| 456 |
-
" \"Your task is to analyze the comprehensiveness and depth of thought in the following answer, \"\n",
|
| 457 |
-
" \"which was generated by a language model in response to a challenging question. \"\n",
|
| 458 |
-
" \"Consider aspects such as completeness, insight, reasoning, and nuance. \"\n",
|
| 459 |
-
" \"Provide a detailed analysis of the answer's strengths and weaknesses and store in the 'markdown_analysis' property.\"\n",
|
| 460 |
-
" \"Generate a suggested system prompt that will improve the answer and store in the 'system_prompt' property.\"\n",
|
| 461 |
-
")\n",
|
| 462 |
-
"\n",
|
| 463 |
-
"# Compose the user prompt for GPT-5\n",
|
| 464 |
-
"user_prompt = (\n",
|
| 465 |
-
" f\"Prompt:\\n{top_competitor_prompt}\\n\\n\"\n",
|
| 466 |
-
" f\"Answer:\\n{top_competitor_response}\\n\\n\"\n",
|
| 467 |
-
" \"Please analyze the comprehensiveness and depth of thought of the above answer. \"\n",
|
| 468 |
-
" \"Discuss its strengths and weaknesses in detail.\"\n",
|
| 469 |
-
")\n",
|
| 470 |
-
"\n",
|
| 471 |
-
"# Call GPT-5 to perform the evaluation\n",
|
| 472 |
-
"gpt5 = OpenAI()\n",
|
| 473 |
-
"\n",
|
| 474 |
-
"# Define the tool schema\n",
|
| 475 |
-
"tools = [\n",
|
| 476 |
-
" {\n",
|
| 477 |
-
" \"type\": \"function\",\n",
|
| 478 |
-
" \"function\": {\n",
|
| 479 |
-
" \"name\": \"markdown_and_structured_data\",\n",
|
| 480 |
-
" \"description\": \"Provide both markdown analysis and structured data\",\n",
|
| 481 |
-
" \"parameters\": {\n",
|
| 482 |
-
" \"type\": \"object\",\n",
|
| 483 |
-
" \"properties\": {\n",
|
| 484 |
-
" \"markdown_analysis\": {\n",
|
| 485 |
-
" \"type\": \"string\",\n",
|
| 486 |
-
" \"description\": \"Detailed markdown analysis\"\n",
|
| 487 |
-
" },\n",
|
| 488 |
-
" \"system_prompt\": {\n",
|
| 489 |
-
" \"type\": \"string\"\n",
|
| 490 |
-
" }\n",
|
| 491 |
-
" },\n",
|
| 492 |
-
" \"required\": [\"markdown_analysis\", \"sentiment\", \"confidence\", \"key_phrases\"]\n",
|
| 493 |
-
" }\n",
|
| 494 |
-
" }\n",
|
| 495 |
-
" }\n",
|
| 496 |
-
"]\n",
|
| 497 |
-
"\n",
|
| 498 |
-
"gpt5_response = gpt5.chat.completions.create(\n",
|
| 499 |
-
" model=\"gpt-5\",\n",
|
| 500 |
-
" messages=[\n",
|
| 501 |
-
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
| 502 |
-
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
| 503 |
-
" ],\n",
|
| 504 |
-
" tools=tools,\n",
|
| 505 |
-
" tool_choice={\"type\": \"function\", \"function\": {\"name\": \"markdown_and_structured_data\"}}\n",
|
| 506 |
-
")\n",
|
| 507 |
-
"\n",
|
| 508 |
-
"tool_call = gpt5_response.choices[0].message.tool_calls[0]\n",
|
| 509 |
-
"arguments = json.loads(tool_call.function.arguments)\n",
|
| 510 |
-
"\n",
|
| 511 |
-
"markdown_analysis = arguments[\"markdown_analysis\"]\n",
|
| 512 |
-
"system_prompt = arguments[\"system_prompt\"]\n",
|
| 513 |
-
"\n",
|
| 514 |
-
"\n",
|
| 515 |
-
"\n",
|
| 516 |
-
"\n",
|
| 517 |
-
"# Display the evaluation\n",
|
| 518 |
-
"from IPython.display import Markdown, display\n",
|
| 519 |
-
"display(Markdown(\"### GPT-5 Evaluation of Top Competitor's Answer\"))\n",
|
| 520 |
-
"display(Markdown(f\"Top Competitor: {top_competitor_name}\"))\n",
|
| 521 |
-
"display(Markdown(markdown_analysis))\n",
|
| 522 |
-
"display(Markdown(\"### Suggested System Prompt\"))\n",
|
| 523 |
-
"display(Markdown(system_prompt))\n",
|
| 524 |
-
"\n",
|
| 525 |
-
"\n",
|
| 526 |
-
"# The top competitor was gemini-2.0-flash, so send the original question and suggested system prompt to generate a new response\n",
|
| 527 |
-
"# Send the system_prompt and original question to gemini-2.0-flash to generate a new answer\n",
|
| 528 |
-
"\n",
|
| 529 |
-
"gemini_response = gemini.chat.completions.create(\n",
|
| 530 |
-
" model=\"gemini-2.0-flash\",\n",
|
| 531 |
-
" messages=[\n",
|
| 532 |
-
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
| 533 |
-
" {\"role\": \"user\", \"content\": question}\n",
|
| 534 |
-
" ]\n",
|
| 535 |
-
")\n",
|
| 536 |
-
"\n",
|
| 537 |
-
"new_answer = gemini_response.choices[0].message.content\n",
|
| 538 |
-
"\n",
|
| 539 |
-
"display(Markdown(\"### Gemini-2.0-Flash New Answer with Suggested System Prompt\"))\n",
|
| 540 |
-
"display(Markdown(new_answer))\n",
|
| 541 |
-
"\n",
|
| 542 |
-
"comparison_prompt = f\"\"\"You are an expert LLM evaluator. Compare the following two answers to the same question, where the only difference is that the second answer was generated using a system prompt suggested by you (GPT-5) after evaluating the first answer.\n",
|
| 543 |
-
"\n",
|
| 544 |
-
"Original Answer (from {top_competitor_name}):\n",
|
| 545 |
-
"{top_competitor_response}\n",
|
| 546 |
-
"\n",
|
| 547 |
-
"New Answer (from {top_competitor_name} with your system prompt):\n",
|
| 548 |
-
"{new_answer}\n",
|
| 549 |
-
"\n",
|
| 550 |
-
"System Prompt Used for New Answer:\n",
|
| 551 |
-
"{system_prompt}\n",
|
| 552 |
-
"\n",
|
| 553 |
-
"Please analyze:\n",
|
| 554 |
-
"- What are the key differences between the two answers?\n",
|
| 555 |
-
"- What aspects of the system prompt likely contributed to these differences?\n",
|
| 556 |
-
"- Did the system prompt improve the quality, accuracy, or style of the answer? How?\n",
|
| 557 |
-
"- Any remaining limitations or further suggestions.\n",
|
| 558 |
-
"\n",
|
| 559 |
-
"Provide a detailed, structured analysis.\n",
|
| 560 |
-
"\"\"\"\n",
|
| 561 |
-
"\n",
|
| 562 |
-
"gpt5_comparison_response = gpt5.chat.completions.create(\n",
|
| 563 |
-
" model=\"gpt-5\",\n",
|
| 564 |
-
" messages=[\n",
|
| 565 |
-
" {\"role\": \"system\", \"content\": \"You are an expert LLM evaluator.\"},\n",
|
| 566 |
-
" {\"role\": \"user\", \"content\": comparison_prompt}\n",
|
| 567 |
-
" ]\n",
|
| 568 |
-
")\n",
|
| 569 |
-
"\n",
|
| 570 |
-
"comparison_analysis = gpt5_comparison_response.choices[0].message.content\n",
|
| 571 |
-
"\n",
|
| 572 |
-
"display(Markdown(\"### GPT-5 Analysis: Impact of System Prompt on Gemini-2.0-Flash's Answer\"))\n",
|
| 573 |
-
"display(Markdown(comparison_analysis))\n",
|
| 574 |
-
"\n",
|
| 575 |
-
"\n"
|
| 576 |
-
]
|
| 577 |
-
},
|
| 578 |
-
{
|
| 579 |
-
"cell_type": "markdown",
|
| 580 |
-
"metadata": {},
|
| 581 |
-
"source": [
|
| 582 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 583 |
-
" <tr>\n",
|
| 584 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 585 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 586 |
-
" </td>\n",
|
| 587 |
-
" <td>\n",
|
| 588 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 589 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 590 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 591 |
-
" to business projects where accuracy is critical.\n",
|
| 592 |
-
" </span>\n",
|
| 593 |
-
" </td>\n",
|
| 594 |
-
" </tr>\n",
|
| 595 |
-
"</table>"
|
| 596 |
-
]
|
| 597 |
-
}
|
| 598 |
-
],
|
| 599 |
-
"metadata": {
|
| 600 |
-
"kernelspec": {
|
| 601 |
-
"display_name": ".venv",
|
| 602 |
-
"language": "python",
|
| 603 |
-
"name": "python3"
|
| 604 |
-
},
|
| 605 |
-
"language_info": {
|
| 606 |
-
"codemirror_mode": {
|
| 607 |
-
"name": "ipython",
|
| 608 |
-
"version": 3
|
| 609 |
-
},
|
| 610 |
-
"file_extension": ".py",
|
| 611 |
-
"mimetype": "text/x-python",
|
| 612 |
-
"name": "python",
|
| 613 |
-
"nbconvert_exporter": "python",
|
| 614 |
-
"pygments_lexer": "ipython3",
|
| 615 |
-
"version": "3.12.7"
|
| 616 |
-
}
|
| 617 |
-
},
|
| 618 |
-
"nbformat": 4,
|
| 619 |
-
"nbformat_minor": 2
|
| 620 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_exercise.ipynb
DELETED
|
@@ -1,336 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# From Judging to Synthesizing — Evolving Multi-Agent Patterns\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"In the original 2_lab2.ipynb, we explored a powerful agentic design pattern: sending the same question to multiple large language models (LLMs), then using a separate “judge” agent to evaluate and rank their responses. This approach is valuable for identifying the single best answer among many, leveraging the strengths of ensemble reasoning and critical evaluation.\n",
|
| 10 |
-
"\n",
|
| 11 |
-
"However, selecting just one “winner” can leave valuable insights from other models untapped. To address this, I am shifting to a new agentic pattern in this notebook: the synthesizer/improver pattern. Instead of merely ranking responses, we will prompt a dedicated LLM to review all answers, extract the most compelling ideas from each, and synthesize them into a single, improved response. \n",
|
| 12 |
-
"\n",
|
| 13 |
-
"This approach aims to combine the collective intelligence of multiple models, producing an answer that is richer, more nuanced, and more robust than any individual response.\n"
|
| 14 |
-
]
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"cell_type": "code",
|
| 18 |
-
"execution_count": 1,
|
| 19 |
-
"metadata": {},
|
| 20 |
-
"outputs": [],
|
| 21 |
-
"source": [
|
| 22 |
-
"import os\n",
|
| 23 |
-
"import json\n",
|
| 24 |
-
"from dotenv import load_dotenv\n",
|
| 25 |
-
"from openai import OpenAI\n",
|
| 26 |
-
"from anthropic import Anthropic\n",
|
| 27 |
-
"from IPython.display import Markdown, display"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": null,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"load_dotenv(override=True)"
|
| 37 |
-
]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"cell_type": "code",
|
| 41 |
-
"execution_count": null,
|
| 42 |
-
"metadata": {},
|
| 43 |
-
"outputs": [],
|
| 44 |
-
"source": [
|
| 45 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 46 |
-
"\n",
|
| 47 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 48 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 49 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 50 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 51 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 52 |
-
"\n",
|
| 53 |
-
"if openai_api_key:\n",
|
| 54 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 55 |
-
"else:\n",
|
| 56 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 57 |
-
" \n",
|
| 58 |
-
"if anthropic_api_key:\n",
|
| 59 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 60 |
-
"else:\n",
|
| 61 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 62 |
-
"\n",
|
| 63 |
-
"if google_api_key:\n",
|
| 64 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 65 |
-
"else:\n",
|
| 66 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 67 |
-
"\n",
|
| 68 |
-
"if deepseek_api_key:\n",
|
| 69 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 70 |
-
"else:\n",
|
| 71 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 72 |
-
"\n",
|
| 73 |
-
"if groq_api_key:\n",
|
| 74 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 75 |
-
"else:\n",
|
| 76 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 77 |
-
]
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
"cell_type": "code",
|
| 81 |
-
"execution_count": 7,
|
| 82 |
-
"metadata": {},
|
| 83 |
-
"outputs": [],
|
| 84 |
-
"source": [
|
| 85 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their collective intelligence. \"\n",
|
| 86 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 87 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 88 |
-
]
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"cell_type": "code",
|
| 92 |
-
"execution_count": null,
|
| 93 |
-
"metadata": {},
|
| 94 |
-
"outputs": [],
|
| 95 |
-
"source": [
|
| 96 |
-
"messages"
|
| 97 |
-
]
|
| 98 |
-
},
|
| 99 |
-
{
|
| 100 |
-
"cell_type": "code",
|
| 101 |
-
"execution_count": null,
|
| 102 |
-
"metadata": {},
|
| 103 |
-
"outputs": [],
|
| 104 |
-
"source": [
|
| 105 |
-
"openai = OpenAI()\n",
|
| 106 |
-
"response = openai.chat.completions.create(\n",
|
| 107 |
-
" model=\"gpt-4o-mini\",\n",
|
| 108 |
-
" messages=messages,\n",
|
| 109 |
-
")\n",
|
| 110 |
-
"question = response.choices[0].message.content\n",
|
| 111 |
-
"print(question)\n"
|
| 112 |
-
]
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"cell_type": "code",
|
| 116 |
-
"execution_count": 10,
|
| 117 |
-
"metadata": {},
|
| 118 |
-
"outputs": [],
|
| 119 |
-
"source": [
|
| 120 |
-
"teammates = []\n",
|
| 121 |
-
"answers = []\n",
|
| 122 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 123 |
-
]
|
| 124 |
-
},
|
| 125 |
-
{
|
| 126 |
-
"cell_type": "code",
|
| 127 |
-
"execution_count": null,
|
| 128 |
-
"metadata": {},
|
| 129 |
-
"outputs": [],
|
| 130 |
-
"source": [
|
| 131 |
-
"# The API we know well\n",
|
| 132 |
-
"\n",
|
| 133 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 134 |
-
"\n",
|
| 135 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 136 |
-
"answer = response.choices[0].message.content\n",
|
| 137 |
-
"\n",
|
| 138 |
-
"display(Markdown(answer))\n",
|
| 139 |
-
"teammates.append(model_name)\n",
|
| 140 |
-
"answers.append(answer)"
|
| 141 |
-
]
|
| 142 |
-
},
|
| 143 |
-
{
|
| 144 |
-
"cell_type": "code",
|
| 145 |
-
"execution_count": null,
|
| 146 |
-
"metadata": {},
|
| 147 |
-
"outputs": [],
|
| 148 |
-
"source": [
|
| 149 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 150 |
-
"\n",
|
| 151 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"claude = Anthropic()\n",
|
| 154 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 155 |
-
"answer = response.content[0].text\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"display(Markdown(answer))\n",
|
| 158 |
-
"teammates.append(model_name)\n",
|
| 159 |
-
"answers.append(answer)"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"cell_type": "code",
|
| 164 |
-
"execution_count": null,
|
| 165 |
-
"metadata": {},
|
| 166 |
-
"outputs": [],
|
| 167 |
-
"source": [
|
| 168 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 169 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 170 |
-
"\n",
|
| 171 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 172 |
-
"answer = response.choices[0].message.content\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"display(Markdown(answer))\n",
|
| 175 |
-
"teammates.append(model_name)\n",
|
| 176 |
-
"answers.append(answer)"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 186 |
-
"model_name = \"deepseek-chat\"\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
-
"answer = response.choices[0].message.content\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"display(Markdown(answer))\n",
|
| 192 |
-
"teammates.append(model_name)\n",
|
| 193 |
-
"answers.append(answer)"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 203 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
-
"answer = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"display(Markdown(answer))\n",
|
| 209 |
-
"teammates.append(model_name)\n",
|
| 210 |
-
"answers.append(answer)"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"# So where are we?\n",
|
| 220 |
-
"\n",
|
| 221 |
-
"print(teammates)\n",
|
| 222 |
-
"print(answers)"
|
| 223 |
-
]
|
| 224 |
-
},
|
| 225 |
-
{
|
| 226 |
-
"cell_type": "code",
|
| 227 |
-
"execution_count": null,
|
| 228 |
-
"metadata": {},
|
| 229 |
-
"outputs": [],
|
| 230 |
-
"source": [
|
| 231 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 232 |
-
"for teammate, answer in zip(teammates, answers):\n",
|
| 233 |
-
" print(f\"Teammate: {teammate}\\n\\n{answer}\")"
|
| 234 |
-
]
|
| 235 |
-
},
|
| 236 |
-
{
|
| 237 |
-
"cell_type": "code",
|
| 238 |
-
"execution_count": 23,
|
| 239 |
-
"metadata": {},
|
| 240 |
-
"outputs": [],
|
| 241 |
-
"source": [
|
| 242 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 243 |
-
"\n",
|
| 244 |
-
"together = \"\"\n",
|
| 245 |
-
"for index, answer in enumerate(answers):\n",
|
| 246 |
-
" together += f\"# Response from teammate {index+1}\\n\\n\"\n",
|
| 247 |
-
" together += answer + \"\\n\\n\""
|
| 248 |
-
]
|
| 249 |
-
},
|
| 250 |
-
{
|
| 251 |
-
"cell_type": "code",
|
| 252 |
-
"execution_count": null,
|
| 253 |
-
"metadata": {},
|
| 254 |
-
"outputs": [],
|
| 255 |
-
"source": [
|
| 256 |
-
"print(together)"
|
| 257 |
-
]
|
| 258 |
-
},
|
| 259 |
-
{
|
| 260 |
-
"cell_type": "code",
|
| 261 |
-
"execution_count": 36,
|
| 262 |
-
"metadata": {},
|
| 263 |
-
"outputs": [],
|
| 264 |
-
"source": [
|
| 265 |
-
"formatter = f\"\"\"You are taking the nost interesting ideas fron {len(teammates)} teammates.\n",
|
| 266 |
-
"Each model has been given this question:\n",
|
| 267 |
-
"\n",
|
| 268 |
-
"{question}\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"Your job is to evaluate each response for clarity and strength of argument, select the most relevant ideas and make a report, including a title, subtitles to separate sections, and quoting the LLM providing the idea.\n",
|
| 271 |
-
"From that, you will create a new improved answer.\"\"\""
|
| 272 |
-
]
|
| 273 |
-
},
|
| 274 |
-
{
|
| 275 |
-
"cell_type": "code",
|
| 276 |
-
"execution_count": null,
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"outputs": [],
|
| 279 |
-
"source": [
|
| 280 |
-
"print(formatter)"
|
| 281 |
-
]
|
| 282 |
-
},
|
| 283 |
-
{
|
| 284 |
-
"cell_type": "code",
|
| 285 |
-
"execution_count": 38,
|
| 286 |
-
"metadata": {},
|
| 287 |
-
"outputs": [],
|
| 288 |
-
"source": [
|
| 289 |
-
"formatter_messages = [{\"role\": \"user\", \"content\": formatter}]"
|
| 290 |
-
]
|
| 291 |
-
},
|
| 292 |
-
{
|
| 293 |
-
"cell_type": "code",
|
| 294 |
-
"execution_count": null,
|
| 295 |
-
"metadata": {},
|
| 296 |
-
"outputs": [],
|
| 297 |
-
"source": [
|
| 298 |
-
"openai = OpenAI()\n",
|
| 299 |
-
"response = openai.chat.completions.create(\n",
|
| 300 |
-
" model=\"o3-mini\",\n",
|
| 301 |
-
" messages=formatter_messages,\n",
|
| 302 |
-
")\n",
|
| 303 |
-
"results = response.choices[0].message.content\n",
|
| 304 |
-
"display(Markdown(results))"
|
| 305 |
-
]
|
| 306 |
-
},
|
| 307 |
-
{
|
| 308 |
-
"cell_type": "code",
|
| 309 |
-
"execution_count": null,
|
| 310 |
-
"metadata": {},
|
| 311 |
-
"outputs": [],
|
| 312 |
-
"source": []
|
| 313 |
-
}
|
| 314 |
-
],
|
| 315 |
-
"metadata": {
|
| 316 |
-
"kernelspec": {
|
| 317 |
-
"display_name": ".venv",
|
| 318 |
-
"language": "python",
|
| 319 |
-
"name": "python3"
|
| 320 |
-
},
|
| 321 |
-
"language_info": {
|
| 322 |
-
"codemirror_mode": {
|
| 323 |
-
"name": "ipython",
|
| 324 |
-
"version": 3
|
| 325 |
-
},
|
| 326 |
-
"file_extension": ".py",
|
| 327 |
-
"mimetype": "text/x-python",
|
| 328 |
-
"name": "python",
|
| 329 |
-
"nbconvert_exporter": "python",
|
| 330 |
-
"pygments_lexer": "ipython3",
|
| 331 |
-
"version": "3.12.7"
|
| 332 |
-
}
|
| 333 |
-
},
|
| 334 |
-
"nbformat": 4,
|
| 335 |
-
"nbformat_minor": 2
|
| 336 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_exercise_BrettSanders_ChainOfThought.ipynb
DELETED
|
@@ -1,241 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "raw",
|
| 5 |
-
"metadata": {
|
| 6 |
-
"vscode": {
|
| 7 |
-
"languageId": "raw"
|
| 8 |
-
}
|
| 9 |
-
},
|
| 10 |
-
"source": [
|
| 11 |
-
"# Lab 2 Exercise - Extending the Patterns\n",
|
| 12 |
-
"\n",
|
| 13 |
-
"This notebook extends the original lab by adding the Chain of Thought pattern to enhance the evaluation process.\n"
|
| 14 |
-
]
|
| 15 |
-
},
|
| 16 |
-
{
|
| 17 |
-
"cell_type": "code",
|
| 18 |
-
"execution_count": 1,
|
| 19 |
-
"metadata": {},
|
| 20 |
-
"outputs": [],
|
| 21 |
-
"source": [
|
| 22 |
-
"# Import required packages\n",
|
| 23 |
-
"import os\n",
|
| 24 |
-
"import json\n",
|
| 25 |
-
"from dotenv import load_dotenv\n",
|
| 26 |
-
"from openai import OpenAI\n",
|
| 27 |
-
"from anthropic import Anthropic\n",
|
| 28 |
-
"from IPython.display import Markdown, display\n"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "code",
|
| 33 |
-
"execution_count": null,
|
| 34 |
-
"metadata": {},
|
| 35 |
-
"outputs": [],
|
| 36 |
-
"source": [
|
| 37 |
-
"# Load environment variables\n",
|
| 38 |
-
"load_dotenv(override=True)\n"
|
| 39 |
-
]
|
| 40 |
-
},
|
| 41 |
-
{
|
| 42 |
-
"cell_type": "code",
|
| 43 |
-
"execution_count": 3,
|
| 44 |
-
"metadata": {},
|
| 45 |
-
"outputs": [],
|
| 46 |
-
"source": [
|
| 47 |
-
"# Initialize API clients\n",
|
| 48 |
-
"openai = OpenAI()\n",
|
| 49 |
-
"claude = Anthropic()\n"
|
| 50 |
-
]
|
| 51 |
-
},
|
| 52 |
-
{
|
| 53 |
-
"cell_type": "code",
|
| 54 |
-
"execution_count": null,
|
| 55 |
-
"metadata": {},
|
| 56 |
-
"outputs": [],
|
| 57 |
-
"source": [
|
| 58 |
-
"# Original question generation\n",
|
| 59 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 60 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 61 |
-
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 62 |
-
"\n",
|
| 63 |
-
"response = openai.chat.completions.create(\n",
|
| 64 |
-
" model=\"gpt-4o-mini\",\n",
|
| 65 |
-
" messages=messages,\n",
|
| 66 |
-
")\n",
|
| 67 |
-
"question = response.choices[0].message.content\n",
|
| 68 |
-
"print(question)\n"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": null,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [],
|
| 76 |
-
"source": [
|
| 77 |
-
"# Get responses from multiple models\n",
|
| 78 |
-
"competitors = []\n",
|
| 79 |
-
"answers = []\n",
|
| 80 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 81 |
-
"\n",
|
| 82 |
-
"# OpenAI\n",
|
| 83 |
-
"response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
|
| 84 |
-
"answer = response.choices[0].message.content\n",
|
| 85 |
-
"competitors.append(\"gpt-4o-mini\")\n",
|
| 86 |
-
"answers.append(answer)\n",
|
| 87 |
-
"display(Markdown(answer))\n",
|
| 88 |
-
"\n",
|
| 89 |
-
"# Claude\n",
|
| 90 |
-
"response = claude.messages.create(model=\"claude-3-7-sonnet-latest\", messages=messages, max_tokens=1000)\n",
|
| 91 |
-
"answer = response.content[0].text\n",
|
| 92 |
-
"competitors.append(\"claude-3-7-sonnet-latest\")\n",
|
| 93 |
-
"answers.append(answer)\n",
|
| 94 |
-
"display(Markdown(answer))\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": 6,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"# NEW: Chain of Thought Evaluation\n",
|
| 104 |
-
"# First, let's create a detailed evaluation prompt that encourages step-by-step reasoning\n",
|
| 105 |
-
"\n",
|
| 106 |
-
"evaluation_prompt = f\"\"\"You are an expert evaluator of AI responses. Your task is to analyze and rank the following responses to this question:\n",
|
| 107 |
-
"\n",
|
| 108 |
-
"{question}\n",
|
| 109 |
-
"\n",
|
| 110 |
-
"Please follow these steps in your evaluation:\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"1. For each response:\n",
|
| 113 |
-
" - Identify the main arguments presented\n",
|
| 114 |
-
" - Evaluate the clarity and coherence of the reasoning\n",
|
| 115 |
-
" - Assess the depth and breadth of the analysis\n",
|
| 116 |
-
" - Note any unique insights or perspectives\n",
|
| 117 |
-
"\n",
|
| 118 |
-
"2. Compare the responses:\n",
|
| 119 |
-
" - How do they differ in their approach?\n",
|
| 120 |
-
" - Which response demonstrates the most sophisticated understanding?\n",
|
| 121 |
-
" - Which response provides the most practical and actionable insights?\n",
|
| 122 |
-
"\n",
|
| 123 |
-
"3. Provide your final ranking with detailed justification for each position.\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"Here are the responses:\n",
|
| 126 |
-
"\n",
|
| 127 |
-
"{'\\\\n\\\\n'.join([f'Response {i+1} ({competitors[i]}):\\\\n{answer}' for i, answer in enumerate(answers)])}\n",
|
| 128 |
-
"\n",
|
| 129 |
-
"Please provide your evaluation in JSON format with the following structure:\n",
|
| 130 |
-
"{{\n",
|
| 131 |
-
" \"detailed_analysis\": [\n",
|
| 132 |
-
" {{\"competitor\": \"name\", \"strengths\": [], \"weaknesses\": [], \"unique_aspects\": []}},\n",
|
| 133 |
-
" ...\n",
|
| 134 |
-
" ],\n",
|
| 135 |
-
" \"comparative_analysis\": \"detailed comparison of responses\",\n",
|
| 136 |
-
" \"final_ranking\": [\"ranked competitor numbers\"],\n",
|
| 137 |
-
" \"justification\": \"detailed explanation of the ranking\"\n",
|
| 138 |
-
"}}\"\"\"\n"
|
| 139 |
-
]
|
| 140 |
-
},
|
| 141 |
-
{
|
| 142 |
-
"cell_type": "code",
|
| 143 |
-
"execution_count": null,
|
| 144 |
-
"metadata": {},
|
| 145 |
-
"outputs": [],
|
| 146 |
-
"source": [
|
| 147 |
-
"# Get the detailed evaluation\n",
|
| 148 |
-
"evaluation_messages = [{\"role\": \"user\", \"content\": evaluation_prompt}]\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"response = openai.chat.completions.create(\n",
|
| 151 |
-
" model=\"gpt-4o-mini\",\n",
|
| 152 |
-
" messages=evaluation_messages,\n",
|
| 153 |
-
")\n",
|
| 154 |
-
"detailed_evaluation = response.choices[0].message.content\n",
|
| 155 |
-
"print(detailed_evaluation)\n"
|
| 156 |
-
]
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"cell_type": "code",
|
| 160 |
-
"execution_count": null,
|
| 161 |
-
"metadata": {},
|
| 162 |
-
"outputs": [],
|
| 163 |
-
"source": [
|
| 164 |
-
"# Parse and display the results in a more readable format\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"# Clean up the JSON string by removing markdown code block markers\n",
|
| 167 |
-
"json_str = detailed_evaluation.replace(\"```json\", \"\").replace(\"```\", \"\").strip()\n",
|
| 168 |
-
"\n",
|
| 169 |
-
"evaluation_dict = json.loads(json_str)\n",
|
| 170 |
-
"\n",
|
| 171 |
-
"print(\"Detailed Analysis:\")\n",
|
| 172 |
-
"for analysis in evaluation_dict[\"detailed_analysis\"]:\n",
|
| 173 |
-
" print(f\"\\nCompetitor: {analysis['competitor']}\")\n",
|
| 174 |
-
" print(\"Strengths:\")\n",
|
| 175 |
-
" for strength in analysis['strengths']:\n",
|
| 176 |
-
" print(f\"- {strength}\")\n",
|
| 177 |
-
" print(\"\\nWeaknesses:\")\n",
|
| 178 |
-
" for weakness in analysis['weaknesses']:\n",
|
| 179 |
-
" print(f\"- {weakness}\")\n",
|
| 180 |
-
" print(\"\\nUnique Aspects:\")\n",
|
| 181 |
-
" for aspect in analysis['unique_aspects']:\n",
|
| 182 |
-
" print(f\"- {aspect}\")\n",
|
| 183 |
-
"\n",
|
| 184 |
-
"print(\"\\nComparative Analysis:\")\n",
|
| 185 |
-
"print(evaluation_dict[\"comparative_analysis\"])\n",
|
| 186 |
-
"\n",
|
| 187 |
-
"print(\"\\nFinal Ranking:\")\n",
|
| 188 |
-
"for i, rank in enumerate(evaluation_dict[\"final_ranking\"]):\n",
|
| 189 |
-
" print(f\"{i+1}. {competitors[int(rank)-1]}\")\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"print(\"\\nJustification:\")\n",
|
| 192 |
-
"print(evaluation_dict[\"justification\"])\n"
|
| 193 |
-
]
|
| 194 |
-
},
|
| 195 |
-
{
|
| 196 |
-
"cell_type": "raw",
|
| 197 |
-
"metadata": {
|
| 198 |
-
"vscode": {
|
| 199 |
-
"languageId": "raw"
|
| 200 |
-
}
|
| 201 |
-
},
|
| 202 |
-
"source": [
|
| 203 |
-
"## Pattern Analysis\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"This enhanced version uses several agentic design patterns:\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"1. **Multi-agent Collaboration**: Sending the same question to multiple LLMs\n",
|
| 208 |
-
"2. **Evaluation/Judgment Pattern**: Using one LLM to evaluate responses from others\n",
|
| 209 |
-
"3. **Parallel Processing**: Running multiple models simultaneously\n",
|
| 210 |
-
"4. **Chain of Thought**: Added a structured, step-by-step evaluation process that breaks down the analysis into clear stages\n",
|
| 211 |
-
"\n",
|
| 212 |
-
"The Chain of Thought pattern is particularly valuable here because it:\n",
|
| 213 |
-
"- Forces the evaluator to consider multiple aspects of each response\n",
|
| 214 |
-
"- Provides more detailed and structured feedback\n",
|
| 215 |
-
"- Makes the evaluation process more transparent and explainable\n",
|
| 216 |
-
"- Helps identify specific strengths and weaknesses in each response\n"
|
| 217 |
-
]
|
| 218 |
-
}
|
| 219 |
-
],
|
| 220 |
-
"metadata": {
|
| 221 |
-
"kernelspec": {
|
| 222 |
-
"display_name": ".venv",
|
| 223 |
-
"language": "python",
|
| 224 |
-
"name": "python3"
|
| 225 |
-
},
|
| 226 |
-
"language_info": {
|
| 227 |
-
"codemirror_mode": {
|
| 228 |
-
"name": "ipython",
|
| 229 |
-
"version": 3
|
| 230 |
-
},
|
| 231 |
-
"file_extension": ".py",
|
| 232 |
-
"mimetype": "text/x-python",
|
| 233 |
-
"name": "python",
|
| 234 |
-
"nbconvert_exporter": "python",
|
| 235 |
-
"pygments_lexer": "ipython3",
|
| 236 |
-
"version": "3.12.7"
|
| 237 |
-
}
|
| 238 |
-
},
|
| 239 |
-
"nbformat": 4,
|
| 240 |
-
"nbformat_minor": 2
|
| 241 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_llm_reviewer.ipynb
DELETED
|
@@ -1,627 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"This notebook extends the original by adding a reviewer pattern to evaluate the impact on model performance.\n",
|
| 17 |
-
"\n",
|
| 18 |
-
"In the new workflow, each model's answer is provided to a \"reviewer LLM\" who is prompted to \"Evaluate the response for clarity and strength of argument, and provide constructive suggestions for improving the answer.\" Each model is then given the chance to revise its answer based on the feedback but is also told, \"You are not required to take any of the feedback into account, but you want to win the competition.\"\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"<table>\n",
|
| 21 |
-
" <caption style=\"font-size: 1.2em; margin-bottom: 10px;\"><strong>Results for Representative Run</strong></caption>\n",
|
| 22 |
-
" <thead>\n",
|
| 23 |
-
" <tr>\n",
|
| 24 |
-
" <th>Model</th>\n",
|
| 25 |
-
" <th>Original Rank</th>\n",
|
| 26 |
-
" <th>Exclusive Feedback</th>\n",
|
| 27 |
-
" <th>With Feedback (all models)</th>\n",
|
| 28 |
-
" </tr>\n",
|
| 29 |
-
" </thead>\n",
|
| 30 |
-
" <tbody>\n",
|
| 31 |
-
" <tr>\n",
|
| 32 |
-
" <td>gpt-4o-mini</td>\n",
|
| 33 |
-
" <td>2</td>\n",
|
| 34 |
-
" <td>3</td>\n",
|
| 35 |
-
" <td>4</td>\n",
|
| 36 |
-
" </tr>\n",
|
| 37 |
-
" <tr>\n",
|
| 38 |
-
" <td>claude-3-7-sonnet-latest</td>\n",
|
| 39 |
-
" <td>6</td>\n",
|
| 40 |
-
" <td>1</td>\n",
|
| 41 |
-
" <td>1</td>\n",
|
| 42 |
-
" </tr>\n",
|
| 43 |
-
" <tr>\n",
|
| 44 |
-
" <td>gemini-2.0-flash</td>\n",
|
| 45 |
-
" <td>1</td>\n",
|
| 46 |
-
" <td>1</td>\n",
|
| 47 |
-
" <td>2</td>\n",
|
| 48 |
-
" </tr>\n",
|
| 49 |
-
" <tr>\n",
|
| 50 |
-
" <td>deepseek-chat</td>\n",
|
| 51 |
-
" <td>3</td>\n",
|
| 52 |
-
" <td>2</td>\n",
|
| 53 |
-
" <td>3</td>\n",
|
| 54 |
-
" </tr>\n",
|
| 55 |
-
" <tr>\n",
|
| 56 |
-
" <td>llama-3.3-70b-versatile</td>\n",
|
| 57 |
-
" <td>4</td>\n",
|
| 58 |
-
" <td>3</td>\n",
|
| 59 |
-
" <td>5</td>\n",
|
| 60 |
-
" </tr>\n",
|
| 61 |
-
" <tr>\n",
|
| 62 |
-
" <td>llama3.2</td>\n",
|
| 63 |
-
" <td>5</td>\n",
|
| 64 |
-
" <td>4</td>\n",
|
| 65 |
-
" <td>6</td>\n",
|
| 66 |
-
" </tr>\n",
|
| 67 |
-
" </tbody>\n",
|
| 68 |
-
"</table>\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"The workflow is obviously non-deterministic and the results can vary greatly from run to run, but the introduction of a reviewer appeared to have a generaly positive impact on performance. The table above shows the results for a representative run. It compares each model's rank versus the other models when it exclusively received feedback. The table also shows the ranking when ALL models received feedback. Exclusive use of feedback improved a model's ranking for five out of six models and decreased it for one model.\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"Inspired by some other contributions, this worksheet also makes LLM calls asyncrhonously to reduce wait time."
|
| 73 |
-
]
|
| 74 |
-
},
|
| 75 |
-
{
|
| 76 |
-
"cell_type": "code",
|
| 77 |
-
"execution_count": 23,
|
| 78 |
-
"metadata": {},
|
| 79 |
-
"outputs": [],
|
| 80 |
-
"source": [
|
| 81 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 82 |
-
"#!uv add prettytable\n",
|
| 83 |
-
"\n",
|
| 84 |
-
"import os\n",
|
| 85 |
-
"import asyncio\n",
|
| 86 |
-
"import json\n",
|
| 87 |
-
"from dotenv import load_dotenv\n",
|
| 88 |
-
"from openai import OpenAI, AsyncOpenAI\n",
|
| 89 |
-
"from anthropic import AsyncAnthropic\n",
|
| 90 |
-
"from IPython.display import display\n",
|
| 91 |
-
"from pydantic import BaseModel, Field\n",
|
| 92 |
-
"from string import Template\n",
|
| 93 |
-
"from prettytable import PrettyTable\n",
|
| 94 |
-
"\n",
|
| 95 |
-
"\n"
|
| 96 |
-
]
|
| 97 |
-
},
|
| 98 |
-
{
|
| 99 |
-
"cell_type": "code",
|
| 100 |
-
"execution_count": 24,
|
| 101 |
-
"metadata": {},
|
| 102 |
-
"outputs": [],
|
| 103 |
-
"source": [
|
| 104 |
-
"class LLMResult(BaseModel):\n",
|
| 105 |
-
" model: str\n",
|
| 106 |
-
" answer: str\n",
|
| 107 |
-
" feedback: str | None =Field(\n",
|
| 108 |
-
" default = None, \n",
|
| 109 |
-
" description=\"Mutable field. This will be set by the reviewer.\")\n",
|
| 110 |
-
" revised_answer: str | None =Field(\n",
|
| 111 |
-
" default = None, \n",
|
| 112 |
-
" description=\"Mutable field. This will be set by the answerer after the reviewer has provided feedback.\")\n",
|
| 113 |
-
" original_rank: int | None =Field(\n",
|
| 114 |
-
" default = None, \n",
|
| 115 |
-
" description=\"Mutable field. Rank when no feedback is used by any models.\")\n",
|
| 116 |
-
" exclusive_feedback: str | None =Field(\n",
|
| 117 |
-
" default = None, \n",
|
| 118 |
-
" description=\"Mutable field. Rank when only this model used feedback.\")\n",
|
| 119 |
-
" revised_rank: int | None =Field(\n",
|
| 120 |
-
" default = None, \n",
|
| 121 |
-
" description=\"Mutable field. Rank when all models used feedback.\")\n",
|
| 122 |
-
"\n",
|
| 123 |
-
"results : list[LLMResult] = []\n"
|
| 124 |
-
]
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"cell_type": "code",
|
| 128 |
-
"execution_count": null,
|
| 129 |
-
"metadata": {},
|
| 130 |
-
"outputs": [],
|
| 131 |
-
"source": [
|
| 132 |
-
"# Always remember to do this!\n",
|
| 133 |
-
"load_dotenv(override=True)"
|
| 134 |
-
]
|
| 135 |
-
},
|
| 136 |
-
{
|
| 137 |
-
"cell_type": "code",
|
| 138 |
-
"execution_count": null,
|
| 139 |
-
"metadata": {},
|
| 140 |
-
"outputs": [],
|
| 141 |
-
"source": [
|
| 142 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 143 |
-
"\n",
|
| 144 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 145 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 146 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 147 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 148 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"if openai_api_key:\n",
|
| 151 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 152 |
-
"else:\n",
|
| 153 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 154 |
-
" \n",
|
| 155 |
-
"if anthropic_api_key:\n",
|
| 156 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 157 |
-
"else:\n",
|
| 158 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 159 |
-
"\n",
|
| 160 |
-
"if google_api_key:\n",
|
| 161 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 162 |
-
"else:\n",
|
| 163 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 164 |
-
"\n",
|
| 165 |
-
"if deepseek_api_key:\n",
|
| 166 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 167 |
-
"else:\n",
|
| 168 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"if groq_api_key:\n",
|
| 171 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 172 |
-
"else:\n",
|
| 173 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 174 |
-
]
|
| 175 |
-
},
|
| 176 |
-
{
|
| 177 |
-
"cell_type": "code",
|
| 178 |
-
"execution_count": 27,
|
| 179 |
-
"metadata": {},
|
| 180 |
-
"outputs": [],
|
| 181 |
-
"source": [
|
| 182 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 183 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 184 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 185 |
-
]
|
| 186 |
-
},
|
| 187 |
-
{
|
| 188 |
-
"cell_type": "code",
|
| 189 |
-
"execution_count": null,
|
| 190 |
-
"metadata": {},
|
| 191 |
-
"outputs": [],
|
| 192 |
-
"source": [
|
| 193 |
-
"messages"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"openai = OpenAI()\n",
|
| 203 |
-
"response = openai.chat.completions.create(\n",
|
| 204 |
-
" model=\"gpt-4o-mini\",\n",
|
| 205 |
-
" messages=messages,\n",
|
| 206 |
-
")\n",
|
| 207 |
-
"question = response.choices[0].message.content\n",
|
| 208 |
-
"print(question)\n"
|
| 209 |
-
]
|
| 210 |
-
},
|
| 211 |
-
{
|
| 212 |
-
"cell_type": "code",
|
| 213 |
-
"execution_count": 30,
|
| 214 |
-
"metadata": {},
|
| 215 |
-
"outputs": [],
|
| 216 |
-
"source": [
|
| 217 |
-
"competitors = []\n",
|
| 218 |
-
"answers = []\n",
|
| 219 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 220 |
-
]
|
| 221 |
-
},
|
| 222 |
-
{
|
| 223 |
-
"cell_type": "code",
|
| 224 |
-
"execution_count": 31,
|
| 225 |
-
"metadata": {},
|
| 226 |
-
"outputs": [],
|
| 227 |
-
"source": [
|
| 228 |
-
"# The API we know well\n",
|
| 229 |
-
"\n",
|
| 230 |
-
"async def openai_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 231 |
-
" openai = AsyncOpenAI()\n",
|
| 232 |
-
" response = await openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 233 |
-
" answer = response.choices[0].message.content\n",
|
| 234 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 235 |
-
" return answer\n"
|
| 236 |
-
]
|
| 237 |
-
},
|
| 238 |
-
{
|
| 239 |
-
"cell_type": "code",
|
| 240 |
-
"execution_count": 32,
|
| 241 |
-
"metadata": {},
|
| 242 |
-
"outputs": [],
|
| 243 |
-
"source": [
|
| 244 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 245 |
-
"\n",
|
| 246 |
-
"async def claude_anthropic_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 247 |
-
" claude = AsyncAnthropic()\n",
|
| 248 |
-
" response = await claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 249 |
-
" answer = response.content[0].text\n",
|
| 250 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 251 |
-
" return answer\n"
|
| 252 |
-
]
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"cell_type": "code",
|
| 256 |
-
"execution_count": 33,
|
| 257 |
-
"metadata": {},
|
| 258 |
-
"outputs": [],
|
| 259 |
-
"source": [
|
| 260 |
-
"async def gemini_google_answer(messages: list[dict[str, str]], model_name : str) -> str: \n",
|
| 261 |
-
" gemini = AsyncOpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 262 |
-
" response = await gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 263 |
-
" answer = response.choices[0].message.content.strip()\n",
|
| 264 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 265 |
-
" return answer\n"
|
| 266 |
-
]
|
| 267 |
-
},
|
| 268 |
-
{
|
| 269 |
-
"cell_type": "code",
|
| 270 |
-
"execution_count": 34,
|
| 271 |
-
"metadata": {},
|
| 272 |
-
"outputs": [],
|
| 273 |
-
"source": [
|
| 274 |
-
"async def deepseek_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 275 |
-
" deepseek = AsyncOpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 276 |
-
" response = await deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 277 |
-
" answer = response.choices[0].message.content\n",
|
| 278 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 279 |
-
" return answer\n"
|
| 280 |
-
]
|
| 281 |
-
},
|
| 282 |
-
{
|
| 283 |
-
"cell_type": "code",
|
| 284 |
-
"execution_count": 35,
|
| 285 |
-
"metadata": {},
|
| 286 |
-
"outputs": [],
|
| 287 |
-
"source": [
|
| 288 |
-
"async def groq_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 289 |
-
" groq = AsyncOpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 290 |
-
" response = await groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 291 |
-
" answer = response.choices[0].message.content\n",
|
| 292 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 293 |
-
" return answer\n"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "markdown",
|
| 298 |
-
"metadata": {},
|
| 299 |
-
"source": [
|
| 300 |
-
"## For the next cell, we will use Ollama\n",
|
| 301 |
-
"\n",
|
| 302 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 303 |
-
"and runs models locally using high performance C++ code.\n",
|
| 304 |
-
"\n",
|
| 305 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 306 |
-
"\n",
|
| 307 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 308 |
-
"\n",
|
| 309 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 310 |
-
"\n",
|
| 311 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 312 |
-
"\n",
|
| 313 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 314 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 315 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 316 |
-
]
|
| 317 |
-
},
|
| 318 |
-
{
|
| 319 |
-
"cell_type": "markdown",
|
| 320 |
-
"metadata": {},
|
| 321 |
-
"source": [
|
| 322 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 323 |
-
" <tr>\n",
|
| 324 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 325 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 326 |
-
" </td>\n",
|
| 327 |
-
" <td>\n",
|
| 328 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 329 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 330 |
-
" </span>\n",
|
| 331 |
-
" </td>\n",
|
| 332 |
-
" </tr>\n",
|
| 333 |
-
"</table>"
|
| 334 |
-
]
|
| 335 |
-
},
|
| 336 |
-
{
|
| 337 |
-
"cell_type": "code",
|
| 338 |
-
"execution_count": 36,
|
| 339 |
-
"metadata": {},
|
| 340 |
-
"outputs": [],
|
| 341 |
-
"source": [
|
| 342 |
-
"#!ollama pull llama3.2"
|
| 343 |
-
]
|
| 344 |
-
},
|
| 345 |
-
{
|
| 346 |
-
"cell_type": "code",
|
| 347 |
-
"execution_count": 37,
|
| 348 |
-
"metadata": {},
|
| 349 |
-
"outputs": [],
|
| 350 |
-
"source": [
|
| 351 |
-
"async def ollama_answer(messages: list[dict[str, str]], model_name : str) -> str:\n",
|
| 352 |
-
" ollama = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 353 |
-
" response = await ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 354 |
-
" answer = response.choices[0].message.content\n",
|
| 355 |
-
" print(f\"{model_name} answer: {answer[:50]}...\")\n",
|
| 356 |
-
" return answer\n"
|
| 357 |
-
]
|
| 358 |
-
},
|
| 359 |
-
{
|
| 360 |
-
"cell_type": "code",
|
| 361 |
-
"execution_count": null,
|
| 362 |
-
"metadata": {},
|
| 363 |
-
"outputs": [],
|
| 364 |
-
"source": [
|
| 365 |
-
"answerers = [openai_answer, claude_anthropic_answer, gemini_google_answer, deepseek_answer, groq_answer, ollama_answer]\n",
|
| 366 |
-
"models = [\"gpt-4o-mini\", \"claude-3-7-sonnet-latest\", \"gemini-2.0-flash\", \"deepseek-chat\", \"llama-3.3-70b-versatile\", \"llama3.2\"]\n",
|
| 367 |
-
"\n",
|
| 368 |
-
"tasks = [ answerer(messages, model) for answerer, model in zip(answerers, models)]\n",
|
| 369 |
-
"answers : list[str] = await asyncio.gather(*tasks)\n",
|
| 370 |
-
"results : list[LLMResult] = [LLMResult(model=model, answer=answer) for model, answer in zip(models, answers)]\n"
|
| 371 |
-
]
|
| 372 |
-
},
|
| 373 |
-
{
|
| 374 |
-
"cell_type": "code",
|
| 375 |
-
"execution_count": null,
|
| 376 |
-
"metadata": {},
|
| 377 |
-
"outputs": [],
|
| 378 |
-
"source": [
|
| 379 |
-
"answers "
|
| 380 |
-
]
|
| 381 |
-
},
|
| 382 |
-
{
|
| 383 |
-
"cell_type": "code",
|
| 384 |
-
"execution_count": 40,
|
| 385 |
-
"metadata": {},
|
| 386 |
-
"outputs": [],
|
| 387 |
-
"source": [
|
| 388 |
-
"reviewer = f\"\"\"You are reviewing a submission for a writing competition. The particpant has been given this question to answer:\n",
|
| 389 |
-
"\n",
|
| 390 |
-
"{question}\n",
|
| 391 |
-
"\n",
|
| 392 |
-
"Your job is to evaluate the response for clarity and strength of argument, and provide constructive suggestions for improving the answer.\n",
|
| 393 |
-
"Limit your feedback to 200 words.\n",
|
| 394 |
-
"\n",
|
| 395 |
-
"Here is the particpant's answer:\n",
|
| 396 |
-
"{{answer}}\n",
|
| 397 |
-
"\"\"\"\n",
|
| 398 |
-
"\n",
|
| 399 |
-
"async def review_answer(answer : str) -> str:\n",
|
| 400 |
-
" openai = AsyncOpenAI()\n",
|
| 401 |
-
" reviewer_messages = [{\"role\": \"user\", \"content\": reviewer.format(answer=answer)}]\n",
|
| 402 |
-
" reviewer_response = await openai.chat.completions.create(\n",
|
| 403 |
-
" model=\"gpt-4o-mini\",\n",
|
| 404 |
-
" messages=reviewer_messages,\n",
|
| 405 |
-
" )\n",
|
| 406 |
-
" feedback = reviewer_response.choices[0].message.content\n",
|
| 407 |
-
" print(f\"feedback: {feedback[:50]}...\")\n",
|
| 408 |
-
" return feedback"
|
| 409 |
-
]
|
| 410 |
-
},
|
| 411 |
-
{
|
| 412 |
-
"cell_type": "code",
|
| 413 |
-
"execution_count": null,
|
| 414 |
-
"metadata": {},
|
| 415 |
-
"outputs": [],
|
| 416 |
-
"source": [
|
| 417 |
-
"import asyncio\n",
|
| 418 |
-
"\n",
|
| 419 |
-
"tasks = [review_answer(answer) for answer in answers]\n",
|
| 420 |
-
"feedback = await asyncio.gather(*tasks)\n",
|
| 421 |
-
"\n",
|
| 422 |
-
"for result, feedback in zip(results, feedback):\n",
|
| 423 |
-
" result.feedback = feedback\n"
|
| 424 |
-
]
|
| 425 |
-
},
|
| 426 |
-
{
|
| 427 |
-
"cell_type": "code",
|
| 428 |
-
"execution_count": 42,
|
| 429 |
-
"metadata": {},
|
| 430 |
-
"outputs": [],
|
| 431 |
-
"source": [
|
| 432 |
-
"revision_prompt = f\"\"\"You are revising a submission you wrote for a writing competition based on feedback from a reviewer.\n",
|
| 433 |
-
"\n",
|
| 434 |
-
"You are not required to take any of the feedback into account but you want to win the competition.\n",
|
| 435 |
-
"\n",
|
| 436 |
-
"The question was: \n",
|
| 437 |
-
"{question}\n",
|
| 438 |
-
"\n",
|
| 439 |
-
"The feedback was:\n",
|
| 440 |
-
"{{feedback}}\n",
|
| 441 |
-
"\n",
|
| 442 |
-
"And your original answer was:\n",
|
| 443 |
-
"{{answer}}\n",
|
| 444 |
-
"\n",
|
| 445 |
-
"Please return your revised answer and nothing else.\n",
|
| 446 |
-
"\"\"\"\n"
|
| 447 |
-
]
|
| 448 |
-
},
|
| 449 |
-
{
|
| 450 |
-
"cell_type": "code",
|
| 451 |
-
"execution_count": null,
|
| 452 |
-
"metadata": {},
|
| 453 |
-
"outputs": [],
|
| 454 |
-
"source": [
|
| 455 |
-
"messages = [{\"role\": \"user\", \"content\": revision_prompt.format(answer=answer, feedback=feedback)} for answer, feedback in zip(answers, feedback)]\n",
|
| 456 |
-
"tasks = [ answerer(messages, model) for answerer, model in zip(answerers, models)]\n",
|
| 457 |
-
"revised_answers = await asyncio.gather(*tasks)\n",
|
| 458 |
-
"\n",
|
| 459 |
-
"for revised_answer, result in zip(revised_answers, results):\n",
|
| 460 |
-
" result.revised_answer = revised_answer\n",
|
| 461 |
-
"\n"
|
| 462 |
-
]
|
| 463 |
-
},
|
| 464 |
-
{
|
| 465 |
-
"cell_type": "code",
|
| 466 |
-
"execution_count": 44,
|
| 467 |
-
"metadata": {},
|
| 468 |
-
"outputs": [],
|
| 469 |
-
"source": [
|
| 470 |
-
"# need to use Template because we are making a later substitution for \"together\"\n",
|
| 471 |
-
"judge = Template(f\"\"\"You are judging a competition between {len(results)} competitors.\n",
|
| 472 |
-
"Each model has been given this question:\n",
|
| 473 |
-
"\n",
|
| 474 |
-
"{question}\n",
|
| 475 |
-
"\n",
|
| 476 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 477 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 478 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 479 |
-
"\n",
|
| 480 |
-
"Here are the responses from each competitor:\n",
|
| 481 |
-
"\n",
|
| 482 |
-
"$together\n",
|
| 483 |
-
"\n",
|
| 484 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\")\n",
|
| 485 |
-
"\n",
|
| 486 |
-
"\n"
|
| 487 |
-
]
|
| 488 |
-
},
|
| 489 |
-
{
|
| 490 |
-
"cell_type": "code",
|
| 491 |
-
"execution_count": 45,
|
| 492 |
-
"metadata": {},
|
| 493 |
-
"outputs": [],
|
| 494 |
-
"source": [
|
| 495 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 496 |
-
]
|
| 497 |
-
},
|
| 498 |
-
{
|
| 499 |
-
"cell_type": "code",
|
| 500 |
-
"execution_count": 46,
|
| 501 |
-
"metadata": {},
|
| 502 |
-
"outputs": [],
|
| 503 |
-
"source": [
|
| 504 |
-
"def come_together(results : list[LLMResult], revised_entry : int | None ) -> list[dict[str, str]]:\n",
|
| 505 |
-
" # include revised results for \"revised_entry\" or all entries if revise_entrys is None\n",
|
| 506 |
-
" together = \"\"\n",
|
| 507 |
-
" for index, result in enumerate(results):\n",
|
| 508 |
-
" together += f\"# Response from competitor {index}\\n\\n\"\n",
|
| 509 |
-
" together += result.answer if (index != revised_entry and revised_entry is not None) else result.revised_answer + \"\\n\\n\"\n",
|
| 510 |
-
" return [{\"role\": \"user\", \"content\": judge.substitute(together=together)}]\n",
|
| 511 |
-
"\n",
|
| 512 |
-
"\n",
|
| 513 |
-
"# Judgement time!\n",
|
| 514 |
-
"async def judgement_time(results : list[LLMResult], revised_entry : int ) -> str:\n",
|
| 515 |
-
" judge_messages = come_together(results, revised_entry)\n",
|
| 516 |
-
"\n",
|
| 517 |
-
" openai = AsyncOpenAI()\n",
|
| 518 |
-
" response = await openai.chat.completions.create(\n",
|
| 519 |
-
" model=\"o3-mini\",\n",
|
| 520 |
-
" messages=judge_messages,\n",
|
| 521 |
-
" )\n",
|
| 522 |
-
" results = response.choices[0].message.content\n",
|
| 523 |
-
" results_dict = json.loads(results)\n",
|
| 524 |
-
" results = { int(model) : int(rank) +1 for rank, model in enumerate(results_dict[\"results\"]) }\n",
|
| 525 |
-
" return results\n",
|
| 526 |
-
"\n"
|
| 527 |
-
]
|
| 528 |
-
},
|
| 529 |
-
{
|
| 530 |
-
"cell_type": "code",
|
| 531 |
-
"execution_count": 47,
|
| 532 |
-
"metadata": {},
|
| 533 |
-
"outputs": [],
|
| 534 |
-
"source": [
|
| 535 |
-
"#evaluate the impact of feedback on model performance\n",
|
| 536 |
-
"\n",
|
| 537 |
-
"no_feedback = await judgement_time(results, -1)\n",
|
| 538 |
-
"with_feedback = await judgement_time(results, None)\n",
|
| 539 |
-
"\n",
|
| 540 |
-
"tasks = [ judgement_time(results, i) for i in range(len(results))]\n",
|
| 541 |
-
"model_spefic_feedback = await asyncio.gather(*tasks)\n",
|
| 542 |
-
"\n",
|
| 543 |
-
"for index, result in enumerate(results):\n",
|
| 544 |
-
" result.original_rank = no_feedback[index]\n",
|
| 545 |
-
" result.exclusive_feedback = model_spefic_feedback[index][index]\n",
|
| 546 |
-
" result.revised_rank = with_feedback[index]\n",
|
| 547 |
-
"\n"
|
| 548 |
-
]
|
| 549 |
-
},
|
| 550 |
-
{
|
| 551 |
-
"cell_type": "code",
|
| 552 |
-
"execution_count": null,
|
| 553 |
-
"metadata": {},
|
| 554 |
-
"outputs": [],
|
| 555 |
-
"source": [
|
| 556 |
-
"\n",
|
| 557 |
-
"table = PrettyTable()\n",
|
| 558 |
-
"table.field_names = [\"Model\", \"Original Rank\", \"Exclusive Feedback\", \"With Feedback (all models)\"]\n",
|
| 559 |
-
"\n",
|
| 560 |
-
"for result in results:\n",
|
| 561 |
-
" table.add_row([result.model, result.original_rank, result.exclusive_feedback, result.revised_rank])\n",
|
| 562 |
-
"\n",
|
| 563 |
-
"print(table)\n",
|
| 564 |
-
"\n"
|
| 565 |
-
]
|
| 566 |
-
},
|
| 567 |
-
{
|
| 568 |
-
"cell_type": "markdown",
|
| 569 |
-
"metadata": {},
|
| 570 |
-
"source": [
|
| 571 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 572 |
-
" <tr>\n",
|
| 573 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 574 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 575 |
-
" </td>\n",
|
| 576 |
-
" <td>\n",
|
| 577 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 578 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 579 |
-
" </span>\n",
|
| 580 |
-
" </td>\n",
|
| 581 |
-
" </tr>\n",
|
| 582 |
-
"</table>"
|
| 583 |
-
]
|
| 584 |
-
},
|
| 585 |
-
{
|
| 586 |
-
"cell_type": "markdown",
|
| 587 |
-
"metadata": {},
|
| 588 |
-
"source": [
|
| 589 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 590 |
-
" <tr>\n",
|
| 591 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 592 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 593 |
-
" </td>\n",
|
| 594 |
-
" <td>\n",
|
| 595 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 596 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 597 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 598 |
-
" to business projects where accuracy is critical.\n",
|
| 599 |
-
" </span>\n",
|
| 600 |
-
" </td>\n",
|
| 601 |
-
" </tr>\n",
|
| 602 |
-
"</table>"
|
| 603 |
-
]
|
| 604 |
-
}
|
| 605 |
-
],
|
| 606 |
-
"metadata": {
|
| 607 |
-
"kernelspec": {
|
| 608 |
-
"display_name": ".venv",
|
| 609 |
-
"language": "python",
|
| 610 |
-
"name": "python3"
|
| 611 |
-
},
|
| 612 |
-
"language_info": {
|
| 613 |
-
"codemirror_mode": {
|
| 614 |
-
"name": "ipython",
|
| 615 |
-
"version": 3
|
| 616 |
-
},
|
| 617 |
-
"file_extension": ".py",
|
| 618 |
-
"mimetype": "text/x-python",
|
| 619 |
-
"name": "python",
|
| 620 |
-
"nbconvert_exporter": "python",
|
| 621 |
-
"pygments_lexer": "ipython3",
|
| 622 |
-
"version": "3.12.9"
|
| 623 |
-
}
|
| 624 |
-
},
|
| 625 |
-
"nbformat": 4,
|
| 626 |
-
"nbformat_minor": 2
|
| 627 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_moneek.ipynb
DELETED
|
@@ -1,173 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"This program uses Evaluator Optimizer pattern to enhance generator's response in creating marketing content for smart keyboard."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "code",
|
| 14 |
-
"execution_count": null,
|
| 15 |
-
"metadata": {},
|
| 16 |
-
"outputs": [],
|
| 17 |
-
"source": [
|
| 18 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"import os\n",
|
| 21 |
-
"import json\n",
|
| 22 |
-
"from dotenv import load_dotenv\n",
|
| 23 |
-
"from openai import OpenAI\n",
|
| 24 |
-
"from anthropic import Anthropic\n",
|
| 25 |
-
"from IPython.display import Markdown, display"
|
| 26 |
-
]
|
| 27 |
-
},
|
| 28 |
-
{
|
| 29 |
-
"cell_type": "code",
|
| 30 |
-
"execution_count": null,
|
| 31 |
-
"metadata": {},
|
| 32 |
-
"outputs": [],
|
| 33 |
-
"source": [
|
| 34 |
-
"# Always remember to do this!\n",
|
| 35 |
-
"load_dotenv(override=True)"
|
| 36 |
-
]
|
| 37 |
-
},
|
| 38 |
-
{
|
| 39 |
-
"cell_type": "code",
|
| 40 |
-
"execution_count": null,
|
| 41 |
-
"metadata": {},
|
| 42 |
-
"outputs": [],
|
| 43 |
-
"source": [
|
| 44 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 45 |
-
"\n",
|
| 46 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 47 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 48 |
-
"\n",
|
| 49 |
-
"if openai_api_key:\n",
|
| 50 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 51 |
-
"else:\n",
|
| 52 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 53 |
-
" \n",
|
| 54 |
-
"if anthropic_api_key:\n",
|
| 55 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 56 |
-
"else:\n",
|
| 57 |
-
" print(\"Anthropic API Key not set (and this is optional)\")"
|
| 58 |
-
]
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"cell_type": "code",
|
| 62 |
-
"execution_count": null,
|
| 63 |
-
"metadata": {},
|
| 64 |
-
"outputs": [],
|
| 65 |
-
"source": [
|
| 66 |
-
"request = \"Provide a short marketing content for XYZ keyboard. \"\n",
|
| 67 |
-
"request += \"It should be eagaging and talks about innovative features.\"\n",
|
| 68 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": null,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [],
|
| 76 |
-
"source": [
|
| 77 |
-
"messages"
|
| 78 |
-
]
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"cell_type": "code",
|
| 82 |
-
"execution_count": null,
|
| 83 |
-
"metadata": {},
|
| 84 |
-
"outputs": [],
|
| 85 |
-
"source": [
|
| 86 |
-
"openai = OpenAI()\n",
|
| 87 |
-
"\n",
|
| 88 |
-
"response = openai.chat.completions.create(\n",
|
| 89 |
-
" model=\"gpt-4o-mini\",\n",
|
| 90 |
-
" messages=messages,\n",
|
| 91 |
-
")\n",
|
| 92 |
-
"marketing_statement= response.choices[0].message.content\n",
|
| 93 |
-
"print(marketing_statement)\n",
|
| 94 |
-
"\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": null,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"judge = f\"\"\"### Instruction ###\n",
|
| 104 |
-
"You are an expert tech gadget analyst. Your task is to evaluate a marketing material based on several criteria.\n",
|
| 105 |
-
"Please be brief.\n",
|
| 106 |
-
"\n",
|
| 107 |
-
"### Ad to Evaluate ###\n",
|
| 108 |
-
"{marketing_statement}\n",
|
| 109 |
-
"\n",
|
| 110 |
-
"### Evaluation Criteria ###\n",
|
| 111 |
-
"Evaluate the statement based on how engaging it is.\n",
|
| 112 |
-
"\n",
|
| 113 |
-
"### Expected Output Format ###\n",
|
| 114 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 115 |
-
"{{\"results\": {{\"statement\": \"{marketing_statement}\", \"engagability\": \"Comment on whether the content is engaging\", \"critique\": \"Offer a specific critique and suggest at least one way the recipe could be improved\", \"verdict\": \"This should have a value either 'accepted' or 'rejected' based on whether the statement requires improvement\"}}}}\n",
|
| 116 |
-
"\"\"\"\n",
|
| 117 |
-
"\n",
|
| 118 |
-
"print(judge)\n",
|
| 119 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]\n",
|
| 120 |
-
"\n",
|
| 121 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 122 |
-
"claude = Anthropic()\n",
|
| 123 |
-
"response = claude.messages.create(model=model_name, messages=judge_messages, max_tokens=1000)\n",
|
| 124 |
-
"marketing_statement_feedback = response.content[0].text\n",
|
| 125 |
-
"\n",
|
| 126 |
-
"print(marketing_statement_feedback)\n"
|
| 127 |
-
]
|
| 128 |
-
},
|
| 129 |
-
{
|
| 130 |
-
"cell_type": "code",
|
| 131 |
-
"execution_count": null,
|
| 132 |
-
"metadata": {},
|
| 133 |
-
"outputs": [],
|
| 134 |
-
"source": [
|
| 135 |
-
"results_dict = json.loads(marketing_statement_feedback)\n",
|
| 136 |
-
"feedback = results_dict[\"results\"]\n",
|
| 137 |
-
"print(feedback)\n",
|
| 138 |
-
"print(\"\\n\\n\")\n",
|
| 139 |
-
"display(Markdown(marketing_statement_feedback))\n",
|
| 140 |
-
"\n",
|
| 141 |
-
"print(f\"Marketing statement:\\n{feedback[\"statement\"]}\")\n",
|
| 142 |
-
"for key in feedback:\n",
|
| 143 |
-
" if key == \"verdict\":\n",
|
| 144 |
-
" if feedback[key] == \"accepted\":\n",
|
| 145 |
-
" print(\"Marketing statement was accepted.\")\n",
|
| 146 |
-
" break\n",
|
| 147 |
-
" else:\n",
|
| 148 |
-
" print(\"Marketing statement was rejected and requires revision. Please iterate over to call Generator and Evaluator for improvement\")"
|
| 149 |
-
]
|
| 150 |
-
}
|
| 151 |
-
],
|
| 152 |
-
"metadata": {
|
| 153 |
-
"kernelspec": {
|
| 154 |
-
"display_name": ".venv",
|
| 155 |
-
"language": "python",
|
| 156 |
-
"name": "python3"
|
| 157 |
-
},
|
| 158 |
-
"language_info": {
|
| 159 |
-
"codemirror_mode": {
|
| 160 |
-
"name": "ipython",
|
| 161 |
-
"version": 3
|
| 162 |
-
},
|
| 163 |
-
"file_extension": ".py",
|
| 164 |
-
"mimetype": "text/x-python",
|
| 165 |
-
"name": "python",
|
| 166 |
-
"nbconvert_exporter": "python",
|
| 167 |
-
"pygments_lexer": "ipython3",
|
| 168 |
-
"version": "3.12.11"
|
| 169 |
-
}
|
| 170 |
-
},
|
| 171 |
-
"nbformat": 4,
|
| 172 |
-
"nbformat_minor": 2
|
| 173 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_multi-evaluation-criteria.ipynb
DELETED
|
@@ -1,506 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": null,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"import os\n",
|
| 39 |
-
"import json\n",
|
| 40 |
-
"from dotenv import load_dotenv\n",
|
| 41 |
-
"from openai import OpenAI\n",
|
| 42 |
-
"from anthropic import Anthropic\n",
|
| 43 |
-
"from IPython.display import Markdown, display"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Always remember to do this!\n",
|
| 53 |
-
"load_dotenv(override=True)"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"if openai_api_key:\n",
|
| 71 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 72 |
-
"else:\n",
|
| 73 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 74 |
-
" \n",
|
| 75 |
-
"if anthropic_api_key:\n",
|
| 76 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 77 |
-
"else:\n",
|
| 78 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 79 |
-
"\n",
|
| 80 |
-
"if google_api_key:\n",
|
| 81 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 82 |
-
"else:\n",
|
| 83 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"if deepseek_api_key:\n",
|
| 86 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 87 |
-
"else:\n",
|
| 88 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"if groq_api_key:\n",
|
| 91 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 92 |
-
"else:\n",
|
| 93 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 94 |
-
]
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"cell_type": "code",
|
| 98 |
-
"execution_count": null,
|
| 99 |
-
"metadata": {},
|
| 100 |
-
"outputs": [],
|
| 101 |
-
"source": [
|
| 102 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 103 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 104 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 105 |
-
]
|
| 106 |
-
},
|
| 107 |
-
{
|
| 108 |
-
"cell_type": "code",
|
| 109 |
-
"execution_count": null,
|
| 110 |
-
"metadata": {},
|
| 111 |
-
"outputs": [],
|
| 112 |
-
"source": [
|
| 113 |
-
"messages"
|
| 114 |
-
]
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"cell_type": "code",
|
| 118 |
-
"execution_count": null,
|
| 119 |
-
"metadata": {},
|
| 120 |
-
"outputs": [],
|
| 121 |
-
"source": [
|
| 122 |
-
"openai = OpenAI()\n",
|
| 123 |
-
"response = openai.chat.completions.create(\n",
|
| 124 |
-
" model=\"gpt-4o-mini\",\n",
|
| 125 |
-
" messages=messages,\n",
|
| 126 |
-
")\n",
|
| 127 |
-
"question = response.choices[0].message.content\n",
|
| 128 |
-
"print(question)\n"
|
| 129 |
-
]
|
| 130 |
-
},
|
| 131 |
-
{
|
| 132 |
-
"cell_type": "code",
|
| 133 |
-
"execution_count": null,
|
| 134 |
-
"metadata": {},
|
| 135 |
-
"outputs": [],
|
| 136 |
-
"source": [
|
| 137 |
-
"competitors = []\n",
|
| 138 |
-
"answers = []\n",
|
| 139 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# The API we know well\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 151 |
-
"\n",
|
| 152 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 153 |
-
"answer = response.choices[0].message.content\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"display(Markdown(answer))\n",
|
| 156 |
-
"competitors.append(model_name)\n",
|
| 157 |
-
"answers.append(answer)"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"model_name = \"claude-sonnet-4-latest\"\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"claude = Anthropic()\n",
|
| 171 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 172 |
-
"answer = response.content[0].text\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"display(Markdown(answer))\n",
|
| 175 |
-
"competitors.append(model_name)\n",
|
| 176 |
-
"answers.append(answer)"
|
| 177 |
-
]
|
| 178 |
-
},
|
| 179 |
-
{
|
| 180 |
-
"cell_type": "code",
|
| 181 |
-
"execution_count": null,
|
| 182 |
-
"metadata": {},
|
| 183 |
-
"outputs": [],
|
| 184 |
-
"source": [
|
| 185 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 186 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 189 |
-
"answer = response.choices[0].message.content\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"display(Markdown(answer))\n",
|
| 192 |
-
"competitors.append(model_name)\n",
|
| 193 |
-
"answers.append(answer)"
|
| 194 |
-
]
|
| 195 |
-
},
|
| 196 |
-
{
|
| 197 |
-
"cell_type": "code",
|
| 198 |
-
"execution_count": null,
|
| 199 |
-
"metadata": {},
|
| 200 |
-
"outputs": [],
|
| 201 |
-
"source": [
|
| 202 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 203 |
-
"model_name = \"deepseek-chat\"\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 206 |
-
"answer = response.choices[0].message.content\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"display(Markdown(answer))\n",
|
| 209 |
-
"competitors.append(model_name)\n",
|
| 210 |
-
"answers.append(answer)"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": null,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 220 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 223 |
-
"answer = response.choices[0].message.content\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"display(Markdown(answer))\n",
|
| 226 |
-
"competitors.append(model_name)\n",
|
| 227 |
-
"answers.append(answer)\n"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "markdown",
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"source": [
|
| 234 |
-
"## For the next cell, we will use Ollama\n",
|
| 235 |
-
"\n",
|
| 236 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 237 |
-
"and runs models locally using high performance C++ code.\n",
|
| 238 |
-
"\n",
|
| 239 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 240 |
-
"\n",
|
| 241 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 242 |
-
"\n",
|
| 243 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 246 |
-
"\n",
|
| 247 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 248 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 249 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"cell_type": "markdown",
|
| 254 |
-
"metadata": {},
|
| 255 |
-
"source": [
|
| 256 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 257 |
-
" <tr>\n",
|
| 258 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 259 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 260 |
-
" </td>\n",
|
| 261 |
-
" <td>\n",
|
| 262 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 263 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 264 |
-
" </span>\n",
|
| 265 |
-
" </td>\n",
|
| 266 |
-
" </tr>\n",
|
| 267 |
-
"</table>"
|
| 268 |
-
]
|
| 269 |
-
},
|
| 270 |
-
{
|
| 271 |
-
"cell_type": "code",
|
| 272 |
-
"execution_count": null,
|
| 273 |
-
"metadata": {},
|
| 274 |
-
"outputs": [],
|
| 275 |
-
"source": [
|
| 276 |
-
"!ollama pull llama3.2"
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 286 |
-
"model_name = \"llama3\"\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 289 |
-
"answer = response.choices[0].message.content\n",
|
| 290 |
-
"\n",
|
| 291 |
-
"display(Markdown(answer))\n",
|
| 292 |
-
"competitors.append(model_name)\n",
|
| 293 |
-
"answers.append(answer)"
|
| 294 |
-
]
|
| 295 |
-
},
|
| 296 |
-
{
|
| 297 |
-
"cell_type": "code",
|
| 298 |
-
"execution_count": null,
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"outputs": [],
|
| 301 |
-
"source": [
|
| 302 |
-
"# So where are we?\n",
|
| 303 |
-
"\n",
|
| 304 |
-
"print(competitors)\n",
|
| 305 |
-
"print(answers)\n"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"cell_type": "code",
|
| 310 |
-
"execution_count": null,
|
| 311 |
-
"metadata": {},
|
| 312 |
-
"outputs": [],
|
| 313 |
-
"source": [
|
| 314 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 315 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 316 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 326 |
-
" display(Markdown(f\"# Competitor: {competitor}\\n\\n{answer}\"))"
|
| 327 |
-
]
|
| 328 |
-
},
|
| 329 |
-
{
|
| 330 |
-
"cell_type": "code",
|
| 331 |
-
"execution_count": null,
|
| 332 |
-
"metadata": {},
|
| 333 |
-
"outputs": [],
|
| 334 |
-
"source": [
|
| 335 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 336 |
-
"\n",
|
| 337 |
-
"together = \"\"\n",
|
| 338 |
-
"for index, answer in enumerate(answers):\n",
|
| 339 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 340 |
-
" together += answer + \"\\n\\n\""
|
| 341 |
-
]
|
| 342 |
-
},
|
| 343 |
-
{
|
| 344 |
-
"cell_type": "code",
|
| 345 |
-
"execution_count": null,
|
| 346 |
-
"metadata": {},
|
| 347 |
-
"outputs": [],
|
| 348 |
-
"source": [
|
| 349 |
-
"print(together)"
|
| 350 |
-
]
|
| 351 |
-
},
|
| 352 |
-
{
|
| 353 |
-
"cell_type": "code",
|
| 354 |
-
"execution_count": null,
|
| 355 |
-
"metadata": {},
|
| 356 |
-
"outputs": [],
|
| 357 |
-
"source": [
|
| 358 |
-
"evaluation_criteria = [\"Effectiveness in resolving the conflict\", \"Clarity of argument\", \"Creativity of solution\", \"Strength of argument\", \"conciseness\", \"applicability to a business context\"]\n",
|
| 359 |
-
"\n",
|
| 360 |
-
"judgements = []\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"for evaluation_criterion in evaluation_criteria:\n",
|
| 363 |
-
"\n",
|
| 364 |
-
" judgements.append (f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 365 |
-
" Each model has been given this question:\n",
|
| 366 |
-
"\n",
|
| 367 |
-
" {question}\n",
|
| 368 |
-
"\n",
|
| 369 |
-
" Your job is to evaluate each response for {evaluation_criterion}, and rank them in order of best to worst.\n",
|
| 370 |
-
" Respond with JSON, and only JSON, with the following format:\n",
|
| 371 |
-
" {{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 372 |
-
"\n",
|
| 373 |
-
" Here are the responses from each competitor:\n",
|
| 374 |
-
"\n",
|
| 375 |
-
" {together}\n",
|
| 376 |
-
"\n",
|
| 377 |
-
" Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\")\n"
|
| 378 |
-
]
|
| 379 |
-
},
|
| 380 |
-
{
|
| 381 |
-
"cell_type": "code",
|
| 382 |
-
"execution_count": null,
|
| 383 |
-
"metadata": {},
|
| 384 |
-
"outputs": [],
|
| 385 |
-
"source": [
|
| 386 |
-
"print(judgements[1])\n"
|
| 387 |
-
]
|
| 388 |
-
},
|
| 389 |
-
{
|
| 390 |
-
"cell_type": "code",
|
| 391 |
-
"execution_count": null,
|
| 392 |
-
"metadata": {},
|
| 393 |
-
"outputs": [],
|
| 394 |
-
"source": [
|
| 395 |
-
"\n",
|
| 396 |
-
"judge_messages = []\n",
|
| 397 |
-
"for judgement in judgements:\n",
|
| 398 |
-
" judge_messages.append ([{\"role\": \"user\", \"content\": judgement}])"
|
| 399 |
-
]
|
| 400 |
-
},
|
| 401 |
-
{
|
| 402 |
-
"cell_type": "code",
|
| 403 |
-
"execution_count": null,
|
| 404 |
-
"metadata": {},
|
| 405 |
-
"outputs": [],
|
| 406 |
-
"source": [
|
| 407 |
-
"results = []\n",
|
| 408 |
-
"# Judgement time!\n",
|
| 409 |
-
"for judge_message in judge_messages:\n",
|
| 410 |
-
" openai = OpenAI()\n",
|
| 411 |
-
" response = openai.chat.completions.create(\n",
|
| 412 |
-
" model=\"o3-mini\",\n",
|
| 413 |
-
" messages=judge_message,\n",
|
| 414 |
-
" )\n",
|
| 415 |
-
" results.append (response.choices[0].message.content)\n",
|
| 416 |
-
" print(results[0])\n"
|
| 417 |
-
]
|
| 418 |
-
},
|
| 419 |
-
{
|
| 420 |
-
"cell_type": "code",
|
| 421 |
-
"execution_count": null,
|
| 422 |
-
"metadata": {},
|
| 423 |
-
"outputs": [],
|
| 424 |
-
"source": [
|
| 425 |
-
"for result in results:\n",
|
| 426 |
-
" print(result)"
|
| 427 |
-
]
|
| 428 |
-
},
|
| 429 |
-
{
|
| 430 |
-
"cell_type": "code",
|
| 431 |
-
"execution_count": null,
|
| 432 |
-
"metadata": {},
|
| 433 |
-
"outputs": [],
|
| 434 |
-
"source": [
|
| 435 |
-
"# OK let's turn this into results!\n",
|
| 436 |
-
"\n",
|
| 437 |
-
"for result, evaluation_criterion in zip(results, evaluation_criteria):\n",
|
| 438 |
-
" results_dict = json.loads(result)\n",
|
| 439 |
-
" ranks = results_dict[\"results\"]\n",
|
| 440 |
-
" display(Markdown(f\"### {evaluation_criterion}\"))\n",
|
| 441 |
-
" for index, result in enumerate(ranks):\n",
|
| 442 |
-
" competitor = competitors[int(result)-1] \n",
|
| 443 |
-
" display(Markdown(f\"Rank {index+1}: {competitor}\"))"
|
| 444 |
-
]
|
| 445 |
-
},
|
| 446 |
-
{
|
| 447 |
-
"cell_type": "markdown",
|
| 448 |
-
"metadata": {},
|
| 449 |
-
"source": [
|
| 450 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 451 |
-
" <tr>\n",
|
| 452 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 453 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 454 |
-
" </td>\n",
|
| 455 |
-
" <td>\n",
|
| 456 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 457 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 458 |
-
" </span>\n",
|
| 459 |
-
" </td>\n",
|
| 460 |
-
" </tr>\n",
|
| 461 |
-
"</table>"
|
| 462 |
-
]
|
| 463 |
-
},
|
| 464 |
-
{
|
| 465 |
-
"cell_type": "markdown",
|
| 466 |
-
"metadata": {},
|
| 467 |
-
"source": [
|
| 468 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 469 |
-
" <tr>\n",
|
| 470 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 471 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 472 |
-
" </td>\n",
|
| 473 |
-
" <td>\n",
|
| 474 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 475 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 476 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 477 |
-
" to business projects where accuracy is critical.\n",
|
| 478 |
-
" </span>\n",
|
| 479 |
-
" </td>\n",
|
| 480 |
-
" </tr>\n",
|
| 481 |
-
"</table>"
|
| 482 |
-
]
|
| 483 |
-
}
|
| 484 |
-
],
|
| 485 |
-
"metadata": {
|
| 486 |
-
"kernelspec": {
|
| 487 |
-
"display_name": ".venv",
|
| 488 |
-
"language": "python",
|
| 489 |
-
"name": "python3"
|
| 490 |
-
},
|
| 491 |
-
"language_info": {
|
| 492 |
-
"codemirror_mode": {
|
| 493 |
-
"name": "ipython",
|
| 494 |
-
"version": 3
|
| 495 |
-
},
|
| 496 |
-
"file_extension": ".py",
|
| 497 |
-
"mimetype": "text/x-python",
|
| 498 |
-
"name": "python",
|
| 499 |
-
"nbconvert_exporter": "python",
|
| 500 |
-
"pygments_lexer": "ipython3",
|
| 501 |
-
"version": "3.12.10"
|
| 502 |
-
}
|
| 503 |
-
},
|
| 504 |
-
"nbformat": 4,
|
| 505 |
-
"nbformat_minor": 2
|
| 506 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_perplexity_support.ipynb
DELETED
|
@@ -1,497 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": 1,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 37 |
-
"\n",
|
| 38 |
-
"import os\n",
|
| 39 |
-
"import json\n",
|
| 40 |
-
"from dotenv import load_dotenv\n",
|
| 41 |
-
"from openai import OpenAI\n",
|
| 42 |
-
"from anthropic import Anthropic\n",
|
| 43 |
-
"from IPython.display import Markdown, display"
|
| 44 |
-
]
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"# Always remember to do this!\n",
|
| 53 |
-
"load_dotenv(override=True)"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 65 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 66 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 67 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 68 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 69 |
-
"perplexity_api_key = os.getenv('PERPLEXITY_API_KEY')\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"if openai_api_key:\n",
|
| 72 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 73 |
-
"else:\n",
|
| 74 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 75 |
-
" \n",
|
| 76 |
-
"if anthropic_api_key:\n",
|
| 77 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 78 |
-
"else:\n",
|
| 79 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 80 |
-
"\n",
|
| 81 |
-
"if google_api_key:\n",
|
| 82 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 83 |
-
"else:\n",
|
| 84 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 85 |
-
"\n",
|
| 86 |
-
"if deepseek_api_key:\n",
|
| 87 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 88 |
-
"else:\n",
|
| 89 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 90 |
-
"\n",
|
| 91 |
-
"if groq_api_key:\n",
|
| 92 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 93 |
-
"else:\n",
|
| 94 |
-
" print(\"Groq API Key not set (and this is optional)\")\n",
|
| 95 |
-
"\n",
|
| 96 |
-
"if perplexity_api_key:\n",
|
| 97 |
-
" print(f\"Perplexity API Key exists and begins {perplexity_api_key[:4]}\")\n",
|
| 98 |
-
"else:\n",
|
| 99 |
-
" print(\"Perplexity API Key not set (and this is optional)\")"
|
| 100 |
-
]
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"cell_type": "code",
|
| 104 |
-
"execution_count": 4,
|
| 105 |
-
"metadata": {},
|
| 106 |
-
"outputs": [],
|
| 107 |
-
"source": [
|
| 108 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 109 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 110 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 111 |
-
]
|
| 112 |
-
},
|
| 113 |
-
{
|
| 114 |
-
"cell_type": "code",
|
| 115 |
-
"execution_count": null,
|
| 116 |
-
"metadata": {},
|
| 117 |
-
"outputs": [],
|
| 118 |
-
"source": [
|
| 119 |
-
"messages"
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "code",
|
| 124 |
-
"execution_count": null,
|
| 125 |
-
"metadata": {},
|
| 126 |
-
"outputs": [],
|
| 127 |
-
"source": [
|
| 128 |
-
"openai = OpenAI()\n",
|
| 129 |
-
"response = openai.chat.completions.create(\n",
|
| 130 |
-
" model=\"gpt-4o-mini\",\n",
|
| 131 |
-
" messages=messages,\n",
|
| 132 |
-
")\n",
|
| 133 |
-
"question = response.choices[0].message.content\n",
|
| 134 |
-
"print(question)\n"
|
| 135 |
-
]
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"cell_type": "code",
|
| 139 |
-
"execution_count": null,
|
| 140 |
-
"metadata": {},
|
| 141 |
-
"outputs": [],
|
| 142 |
-
"source": [
|
| 143 |
-
"competitors = []\n",
|
| 144 |
-
"answers = []\n",
|
| 145 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 146 |
-
]
|
| 147 |
-
},
|
| 148 |
-
{
|
| 149 |
-
"cell_type": "code",
|
| 150 |
-
"execution_count": null,
|
| 151 |
-
"metadata": {},
|
| 152 |
-
"outputs": [],
|
| 153 |
-
"source": [
|
| 154 |
-
"# The API we know well\n",
|
| 155 |
-
"\n",
|
| 156 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 157 |
-
"\n",
|
| 158 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 159 |
-
"answer = response.choices[0].message.content\n",
|
| 160 |
-
"\n",
|
| 161 |
-
"display(Markdown(answer))\n",
|
| 162 |
-
"competitors.append(model_name)\n",
|
| 163 |
-
"answers.append(answer)"
|
| 164 |
-
]
|
| 165 |
-
},
|
| 166 |
-
{
|
| 167 |
-
"cell_type": "code",
|
| 168 |
-
"execution_count": null,
|
| 169 |
-
"metadata": {},
|
| 170 |
-
"outputs": [],
|
| 171 |
-
"source": [
|
| 172 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 175 |
-
"\n",
|
| 176 |
-
"claude = Anthropic()\n",
|
| 177 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 178 |
-
"answer = response.content[0].text\n",
|
| 179 |
-
"\n",
|
| 180 |
-
"display(Markdown(answer))\n",
|
| 181 |
-
"competitors.append(model_name)\n",
|
| 182 |
-
"answers.append(answer)"
|
| 183 |
-
]
|
| 184 |
-
},
|
| 185 |
-
{
|
| 186 |
-
"cell_type": "code",
|
| 187 |
-
"execution_count": null,
|
| 188 |
-
"metadata": {},
|
| 189 |
-
"outputs": [],
|
| 190 |
-
"source": [
|
| 191 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 192 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 193 |
-
"\n",
|
| 194 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 195 |
-
"answer = response.choices[0].message.content\n",
|
| 196 |
-
"\n",
|
| 197 |
-
"display(Markdown(answer))\n",
|
| 198 |
-
"competitors.append(model_name)\n",
|
| 199 |
-
"answers.append(answer)"
|
| 200 |
-
]
|
| 201 |
-
},
|
| 202 |
-
{
|
| 203 |
-
"cell_type": "code",
|
| 204 |
-
"execution_count": null,
|
| 205 |
-
"metadata": {},
|
| 206 |
-
"outputs": [],
|
| 207 |
-
"source": [
|
| 208 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 209 |
-
"model_name = \"deepseek-chat\"\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 212 |
-
"answer = response.choices[0].message.content\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"display(Markdown(answer))\n",
|
| 215 |
-
"competitors.append(model_name)\n",
|
| 216 |
-
"answers.append(answer)"
|
| 217 |
-
]
|
| 218 |
-
},
|
| 219 |
-
{
|
| 220 |
-
"cell_type": "code",
|
| 221 |
-
"execution_count": null,
|
| 222 |
-
"metadata": {},
|
| 223 |
-
"outputs": [],
|
| 224 |
-
"source": [
|
| 225 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 226 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 227 |
-
"\n",
|
| 228 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 229 |
-
"answer = response.choices[0].message.content\n",
|
| 230 |
-
"\n",
|
| 231 |
-
"display(Markdown(answer))\n",
|
| 232 |
-
"competitors.append(model_name)\n",
|
| 233 |
-
"answers.append(answer)\n"
|
| 234 |
-
]
|
| 235 |
-
},
|
| 236 |
-
{
|
| 237 |
-
"cell_type": "code",
|
| 238 |
-
"execution_count": null,
|
| 239 |
-
"metadata": {},
|
| 240 |
-
"outputs": [],
|
| 241 |
-
"source": [
|
| 242 |
-
"perplexity = OpenAI(api_key=perplexity_api_key, base_url=\"https://api.perplexity.ai\")\n",
|
| 243 |
-
"model_name = \"sonar\"\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"response = perplexity.chat.completions.create(model=model_name, messages=messages)\n",
|
| 246 |
-
"answer = response.choices[0].message.content\n",
|
| 247 |
-
"\n",
|
| 248 |
-
"display(Markdown(answer))\n",
|
| 249 |
-
"competitors.append(model_name)\n",
|
| 250 |
-
"answers.append(answer)"
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "markdown",
|
| 255 |
-
"metadata": {},
|
| 256 |
-
"source": [
|
| 257 |
-
"## For the next cell, we will use Ollama\n",
|
| 258 |
-
"\n",
|
| 259 |
-
"Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
|
| 260 |
-
"and runs models locally using high performance C++ code.\n",
|
| 261 |
-
"\n",
|
| 262 |
-
"If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
|
| 263 |
-
"\n",
|
| 264 |
-
"After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
|
| 265 |
-
"\n",
|
| 266 |
-
"You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
|
| 267 |
-
"\n",
|
| 268 |
-
"Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"`ollama pull <model_name>` downloads a model locally \n",
|
| 271 |
-
"`ollama ls` lists all the models you've downloaded \n",
|
| 272 |
-
"`ollama rm <model_name>` deletes the specified model from your downloads"
|
| 273 |
-
]
|
| 274 |
-
},
|
| 275 |
-
{
|
| 276 |
-
"cell_type": "markdown",
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"source": [
|
| 279 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 280 |
-
" <tr>\n",
|
| 281 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 282 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 283 |
-
" </td>\n",
|
| 284 |
-
" <td>\n",
|
| 285 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 286 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 287 |
-
" </span>\n",
|
| 288 |
-
" </td>\n",
|
| 289 |
-
" </tr>\n",
|
| 290 |
-
"</table>"
|
| 291 |
-
]
|
| 292 |
-
},
|
| 293 |
-
{
|
| 294 |
-
"cell_type": "code",
|
| 295 |
-
"execution_count": null,
|
| 296 |
-
"metadata": {},
|
| 297 |
-
"outputs": [],
|
| 298 |
-
"source": [
|
| 299 |
-
"!ollama pull llama3.2"
|
| 300 |
-
]
|
| 301 |
-
},
|
| 302 |
-
{
|
| 303 |
-
"cell_type": "code",
|
| 304 |
-
"execution_count": null,
|
| 305 |
-
"metadata": {},
|
| 306 |
-
"outputs": [],
|
| 307 |
-
"source": [
|
| 308 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 309 |
-
"model_name = \"llama3.2\"\n",
|
| 310 |
-
"\n",
|
| 311 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 312 |
-
"answer = response.choices[0].message.content\n",
|
| 313 |
-
"\n",
|
| 314 |
-
"display(Markdown(answer))\n",
|
| 315 |
-
"competitors.append(model_name)\n",
|
| 316 |
-
"answers.append(answer)"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# So where are we?\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"print(competitors)\n",
|
| 328 |
-
"print(answers)\n"
|
| 329 |
-
]
|
| 330 |
-
},
|
| 331 |
-
{
|
| 332 |
-
"cell_type": "code",
|
| 333 |
-
"execution_count": null,
|
| 334 |
-
"metadata": {},
|
| 335 |
-
"outputs": [],
|
| 336 |
-
"source": [
|
| 337 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 338 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 339 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 340 |
-
]
|
| 341 |
-
},
|
| 342 |
-
{
|
| 343 |
-
"cell_type": "code",
|
| 344 |
-
"execution_count": 20,
|
| 345 |
-
"metadata": {},
|
| 346 |
-
"outputs": [],
|
| 347 |
-
"source": [
|
| 348 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 349 |
-
"\n",
|
| 350 |
-
"together = \"\"\n",
|
| 351 |
-
"for index, answer in enumerate(answers):\n",
|
| 352 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 353 |
-
" together += answer + \"\\n\\n\""
|
| 354 |
-
]
|
| 355 |
-
},
|
| 356 |
-
{
|
| 357 |
-
"cell_type": "code",
|
| 358 |
-
"execution_count": null,
|
| 359 |
-
"metadata": {},
|
| 360 |
-
"outputs": [],
|
| 361 |
-
"source": [
|
| 362 |
-
"print(together)"
|
| 363 |
-
]
|
| 364 |
-
},
|
| 365 |
-
{
|
| 366 |
-
"cell_type": "code",
|
| 367 |
-
"execution_count": 22,
|
| 368 |
-
"metadata": {},
|
| 369 |
-
"outputs": [],
|
| 370 |
-
"source": [
|
| 371 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 372 |
-
"Each model has been given this question:\n",
|
| 373 |
-
"\n",
|
| 374 |
-
"{question}\n",
|
| 375 |
-
"\n",
|
| 376 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 377 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 378 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 379 |
-
"\n",
|
| 380 |
-
"Here are the responses from each competitor:\n",
|
| 381 |
-
"\n",
|
| 382 |
-
"{together}\n",
|
| 383 |
-
"\n",
|
| 384 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 385 |
-
]
|
| 386 |
-
},
|
| 387 |
-
{
|
| 388 |
-
"cell_type": "code",
|
| 389 |
-
"execution_count": null,
|
| 390 |
-
"metadata": {},
|
| 391 |
-
"outputs": [],
|
| 392 |
-
"source": [
|
| 393 |
-
"print(judge)"
|
| 394 |
-
]
|
| 395 |
-
},
|
| 396 |
-
{
|
| 397 |
-
"cell_type": "code",
|
| 398 |
-
"execution_count": 29,
|
| 399 |
-
"metadata": {},
|
| 400 |
-
"outputs": [],
|
| 401 |
-
"source": [
|
| 402 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 403 |
-
]
|
| 404 |
-
},
|
| 405 |
-
{
|
| 406 |
-
"cell_type": "code",
|
| 407 |
-
"execution_count": null,
|
| 408 |
-
"metadata": {},
|
| 409 |
-
"outputs": [],
|
| 410 |
-
"source": [
|
| 411 |
-
"# Judgement time!\n",
|
| 412 |
-
"\n",
|
| 413 |
-
"openai = OpenAI()\n",
|
| 414 |
-
"response = openai.chat.completions.create(\n",
|
| 415 |
-
" model=\"o3-mini\",\n",
|
| 416 |
-
" messages=judge_messages,\n",
|
| 417 |
-
")\n",
|
| 418 |
-
"results = response.choices[0].message.content\n",
|
| 419 |
-
"print(results)\n"
|
| 420 |
-
]
|
| 421 |
-
},
|
| 422 |
-
{
|
| 423 |
-
"cell_type": "code",
|
| 424 |
-
"execution_count": null,
|
| 425 |
-
"metadata": {},
|
| 426 |
-
"outputs": [],
|
| 427 |
-
"source": [
|
| 428 |
-
"# OK let's turn this into results!\n",
|
| 429 |
-
"\n",
|
| 430 |
-
"results_dict = json.loads(results)\n",
|
| 431 |
-
"ranks = results_dict[\"results\"]\n",
|
| 432 |
-
"for index, result in enumerate(ranks):\n",
|
| 433 |
-
" competitor = competitors[int(result)-1]\n",
|
| 434 |
-
" print(f\"Rank {index+1}: {competitor}\")"
|
| 435 |
-
]
|
| 436 |
-
},
|
| 437 |
-
{
|
| 438 |
-
"cell_type": "markdown",
|
| 439 |
-
"metadata": {},
|
| 440 |
-
"source": [
|
| 441 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 442 |
-
" <tr>\n",
|
| 443 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 444 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 445 |
-
" </td>\n",
|
| 446 |
-
" <td>\n",
|
| 447 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 448 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 449 |
-
" </span>\n",
|
| 450 |
-
" </td>\n",
|
| 451 |
-
" </tr>\n",
|
| 452 |
-
"</table>"
|
| 453 |
-
]
|
| 454 |
-
},
|
| 455 |
-
{
|
| 456 |
-
"cell_type": "markdown",
|
| 457 |
-
"metadata": {},
|
| 458 |
-
"source": [
|
| 459 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 460 |
-
" <tr>\n",
|
| 461 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 462 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 463 |
-
" </td>\n",
|
| 464 |
-
" <td>\n",
|
| 465 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 466 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 467 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 468 |
-
" to business projects where accuracy is critical.\n",
|
| 469 |
-
" </span>\n",
|
| 470 |
-
" </td>\n",
|
| 471 |
-
" </tr>\n",
|
| 472 |
-
"</table>"
|
| 473 |
-
]
|
| 474 |
-
}
|
| 475 |
-
],
|
| 476 |
-
"metadata": {
|
| 477 |
-
"kernelspec": {
|
| 478 |
-
"display_name": ".venv",
|
| 479 |
-
"language": "python",
|
| 480 |
-
"name": "python3"
|
| 481 |
-
},
|
| 482 |
-
"language_info": {
|
| 483 |
-
"codemirror_mode": {
|
| 484 |
-
"name": "ipython",
|
| 485 |
-
"version": 3
|
| 486 |
-
},
|
| 487 |
-
"file_extension": ".py",
|
| 488 |
-
"mimetype": "text/x-python",
|
| 489 |
-
"name": "python",
|
| 490 |
-
"nbconvert_exporter": "python",
|
| 491 |
-
"pygments_lexer": "ipython3",
|
| 492 |
-
"version": "3.12.3"
|
| 493 |
-
}
|
| 494 |
-
},
|
| 495 |
-
"nbformat": 4,
|
| 496 |
-
"nbformat_minor": 2
|
| 497 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_reflection_pattern.ipynb
DELETED
|
@@ -1,311 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Week 1, Day 3\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"Today we will work with lots of models! This is a way to get comfortable with APIs."
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 17 |
-
" <tr>\n",
|
| 18 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 19 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 20 |
-
" </td>\n",
|
| 21 |
-
" <td>\n",
|
| 22 |
-
" <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
|
| 23 |
-
" <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
|
| 24 |
-
" </span>\n",
|
| 25 |
-
" </td>\n",
|
| 26 |
-
" </tr>\n",
|
| 27 |
-
"</table>"
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "markdown",
|
| 32 |
-
"metadata": {},
|
| 33 |
-
"source": [
|
| 34 |
-
"This version adds Reflection pattern where we ask each model to critique and improve its own answer."
|
| 35 |
-
]
|
| 36 |
-
},
|
| 37 |
-
{
|
| 38 |
-
"cell_type": "code",
|
| 39 |
-
"execution_count": 9,
|
| 40 |
-
"metadata": {},
|
| 41 |
-
"outputs": [],
|
| 42 |
-
"source": [
|
| 43 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 44 |
-
"\n",
|
| 45 |
-
"import os\n",
|
| 46 |
-
"import json\n",
|
| 47 |
-
"from dotenv import load_dotenv\n",
|
| 48 |
-
"from openai import OpenAI\n",
|
| 49 |
-
"from anthropic import Anthropic\n",
|
| 50 |
-
"from IPython.display import Markdown, display"
|
| 51 |
-
]
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"cell_type": "markdown",
|
| 55 |
-
"metadata": {},
|
| 56 |
-
"source": []
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"cell_type": "code",
|
| 60 |
-
"execution_count": 12,
|
| 61 |
-
"metadata": {},
|
| 62 |
-
"outputs": [],
|
| 63 |
-
"source": [
|
| 64 |
-
"request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
|
| 65 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 66 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 67 |
-
]
|
| 68 |
-
},
|
| 69 |
-
{
|
| 70 |
-
"cell_type": "code",
|
| 71 |
-
"execution_count": null,
|
| 72 |
-
"metadata": {},
|
| 73 |
-
"outputs": [],
|
| 74 |
-
"source": [
|
| 75 |
-
"messages"
|
| 76 |
-
]
|
| 77 |
-
},
|
| 78 |
-
{
|
| 79 |
-
"cell_type": "code",
|
| 80 |
-
"execution_count": 14,
|
| 81 |
-
"metadata": {},
|
| 82 |
-
"outputs": [],
|
| 83 |
-
"source": [
|
| 84 |
-
"competitors = []\n",
|
| 85 |
-
"answers = []\n",
|
| 86 |
-
"messages = [{\"role\": \"user\", \"content\": question}]"
|
| 87 |
-
]
|
| 88 |
-
},
|
| 89 |
-
{
|
| 90 |
-
"cell_type": "code",
|
| 91 |
-
"execution_count": null,
|
| 92 |
-
"metadata": {},
|
| 93 |
-
"outputs": [],
|
| 94 |
-
"source": [
|
| 95 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 96 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 97 |
-
"\n",
|
| 98 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 99 |
-
"answer = response.choices[0].message.content\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"display(Markdown(answer))\n",
|
| 102 |
-
"competitors.append(model_name)\n",
|
| 103 |
-
"answers.append(answer)"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": null,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 113 |
-
"model_name = \"deepseek-chat\"\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 116 |
-
"answer = response.choices[0].message.content\n",
|
| 117 |
-
"\n",
|
| 118 |
-
"display(Markdown(answer))\n",
|
| 119 |
-
"competitors.append(model_name)\n",
|
| 120 |
-
"answers.append(answer)"
|
| 121 |
-
]
|
| 122 |
-
},
|
| 123 |
-
{
|
| 124 |
-
"cell_type": "code",
|
| 125 |
-
"execution_count": null,
|
| 126 |
-
"metadata": {},
|
| 127 |
-
"outputs": [],
|
| 128 |
-
"source": [
|
| 129 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 130 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 131 |
-
"\n",
|
| 132 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 133 |
-
"answer = response.choices[0].message.content\n",
|
| 134 |
-
"\n",
|
| 135 |
-
"display(Markdown(answer))\n",
|
| 136 |
-
"competitors.append(model_name)\n",
|
| 137 |
-
"answers.append(answer)\n"
|
| 138 |
-
]
|
| 139 |
-
},
|
| 140 |
-
{
|
| 141 |
-
"cell_type": "markdown",
|
| 142 |
-
"metadata": {},
|
| 143 |
-
"source": [
|
| 144 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 145 |
-
" <tr>\n",
|
| 146 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 147 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 148 |
-
" </td>\n",
|
| 149 |
-
" <td>\n",
|
| 150 |
-
" <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
|
| 151 |
-
" <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
|
| 152 |
-
" </span>\n",
|
| 153 |
-
" </td>\n",
|
| 154 |
-
" </tr>\n",
|
| 155 |
-
"</table>"
|
| 156 |
-
]
|
| 157 |
-
},
|
| 158 |
-
{
|
| 159 |
-
"cell_type": "code",
|
| 160 |
-
"execution_count": null,
|
| 161 |
-
"metadata": {},
|
| 162 |
-
"outputs": [],
|
| 163 |
-
"source": [
|
| 164 |
-
"!ollama pull llama3.2"
|
| 165 |
-
]
|
| 166 |
-
},
|
| 167 |
-
{
|
| 168 |
-
"cell_type": "code",
|
| 169 |
-
"execution_count": 33,
|
| 170 |
-
"metadata": {},
|
| 171 |
-
"outputs": [],
|
| 172 |
-
"source": [
|
| 173 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 174 |
-
"\n",
|
| 175 |
-
"together = \"\"\n",
|
| 176 |
-
"for index, answer in enumerate(answers):\n",
|
| 177 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 178 |
-
" together += answer + \"\\n\\n\""
|
| 179 |
-
]
|
| 180 |
-
},
|
| 181 |
-
{
|
| 182 |
-
"cell_type": "code",
|
| 183 |
-
"execution_count": 36,
|
| 184 |
-
"metadata": {},
|
| 185 |
-
"outputs": [],
|
| 186 |
-
"source": [
|
| 187 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 188 |
-
"Each model has been given this question:\n",
|
| 189 |
-
"\n",
|
| 190 |
-
"{question}\n",
|
| 191 |
-
"\n",
|
| 192 |
-
"Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
|
| 193 |
-
"Respond with JSON, and only JSON, with the following format:\n",
|
| 194 |
-
"{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
|
| 195 |
-
"\n",
|
| 196 |
-
"Here are the responses from each competitor:\n",
|
| 197 |
-
"\n",
|
| 198 |
-
"{together}\n",
|
| 199 |
-
"\n",
|
| 200 |
-
"Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
|
| 201 |
-
]
|
| 202 |
-
},
|
| 203 |
-
{
|
| 204 |
-
"cell_type": "code",
|
| 205 |
-
"execution_count": 38,
|
| 206 |
-
"metadata": {},
|
| 207 |
-
"outputs": [],
|
| 208 |
-
"source": [
|
| 209 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 210 |
-
]
|
| 211 |
-
},
|
| 212 |
-
{
|
| 213 |
-
"cell_type": "markdown",
|
| 214 |
-
"metadata": {},
|
| 215 |
-
"source": [
|
| 216 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 217 |
-
" <tr>\n",
|
| 218 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 219 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 220 |
-
" </td>\n",
|
| 221 |
-
" <td>\n",
|
| 222 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 223 |
-
" <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
|
| 224 |
-
" </span>\n",
|
| 225 |
-
" </td>\n",
|
| 226 |
-
" </tr>\n",
|
| 227 |
-
"</table>"
|
| 228 |
-
]
|
| 229 |
-
},
|
| 230 |
-
{
|
| 231 |
-
"cell_type": "markdown",
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"source": [
|
| 234 |
-
"1. Ensemble (Model Competition) Pattern\n",
|
| 235 |
-
"Description: The same prompt/question is sent to multiple different LLMs (OpenAI, Anthropic, Ollama, etc.).\n",
|
| 236 |
-
"Purpose: To compare the quality, style, and content of responses from different models.\n",
|
| 237 |
-
"Where in notebook:\n",
|
| 238 |
-
"The code sends the same question to several models and collects their answers in the competitors and answers lists.\n",
|
| 239 |
-
"\n",
|
| 240 |
-
"2. Judging/Evaluator Pattern\n",
|
| 241 |
-
"Description: After collecting responses from all models, another LLM is used as a “judge” to evaluate and rank the responses.\n",
|
| 242 |
-
"Purpose: To automate the assessment of which model gave the best answer, based on clarity and strength of argument.\n",
|
| 243 |
-
"Where in notebook:\n",
|
| 244 |
-
"The judge prompt is constructed, and an LLM is asked to rank the responses in JSON format.\n",
|
| 245 |
-
"\n",
|
| 246 |
-
"3. Self-Improvement/Meta-Reasoning Pattern\n",
|
| 247 |
-
"Description: The system not only generates answers but also reflects on and evaluates its own outputs (or those of its peers).\n",
|
| 248 |
-
"Purpose: To iteratively improve or select the best output, often used in advanced agentic systems.\n",
|
| 249 |
-
"Where in notebook:\n",
|
| 250 |
-
"The “judge” LLM is an example of meta-reasoning, as it reasons about the quality of other LLMs’ outputs.\n",
|
| 251 |
-
"\n",
|
| 252 |
-
"4. Chain-of-Thought/Decomposition Pattern (to a lesser extent)\n",
|
| 253 |
-
"Description: Breaking down a complex task into subtasks (e.g., generate question → get answers → evaluate answers).\n",
|
| 254 |
-
"Purpose: To improve reliability and interpretability by structuring the workflow.\n",
|
| 255 |
-
"Where in notebook:\n",
|
| 256 |
-
"The workflow is decomposed into:\n",
|
| 257 |
-
"Generating a challenging question\n",
|
| 258 |
-
"Getting answers from multiple models\n",
|
| 259 |
-
"Judging the answers\n",
|
| 260 |
-
"\n",
|
| 261 |
-
"In short:\n",
|
| 262 |
-
"This notebook uses the Ensemble/Competition, Judging/Evaluator, and Meta-Reasoning agentic patterns, and also demonstrates a simple form of Decomposition by structuring the workflow into clear stages.\n",
|
| 263 |
-
"If you want to add more agentic patterns, you could try things like:\n",
|
| 264 |
-
"Reflexion (let models critique and revise their own answers)\n",
|
| 265 |
-
"Tool Use (let models call external tools or APIs)\n",
|
| 266 |
-
"Planning (let a model plan the steps before answering)"
|
| 267 |
-
]
|
| 268 |
-
},
|
| 269 |
-
{
|
| 270 |
-
"cell_type": "markdown",
|
| 271 |
-
"metadata": {},
|
| 272 |
-
"source": [
|
| 273 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 274 |
-
" <tr>\n",
|
| 275 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 276 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 277 |
-
" </td>\n",
|
| 278 |
-
" <td>\n",
|
| 279 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 280 |
-
" <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
|
| 281 |
-
" are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
|
| 282 |
-
" to business projects where accuracy is critical.\n",
|
| 283 |
-
" </span>\n",
|
| 284 |
-
" </td>\n",
|
| 285 |
-
" </tr>\n",
|
| 286 |
-
"</table>"
|
| 287 |
-
]
|
| 288 |
-
}
|
| 289 |
-
],
|
| 290 |
-
"metadata": {
|
| 291 |
-
"kernelspec": {
|
| 292 |
-
"display_name": ".venv",
|
| 293 |
-
"language": "python",
|
| 294 |
-
"name": "python3"
|
| 295 |
-
},
|
| 296 |
-
"language_info": {
|
| 297 |
-
"codemirror_mode": {
|
| 298 |
-
"name": "ipython",
|
| 299 |
-
"version": 3
|
| 300 |
-
},
|
| 301 |
-
"file_extension": ".py",
|
| 302 |
-
"mimetype": "text/x-python",
|
| 303 |
-
"name": "python",
|
| 304 |
-
"nbconvert_exporter": "python",
|
| 305 |
-
"pygments_lexer": "ipython3",
|
| 306 |
-
"version": "3.12.8"
|
| 307 |
-
}
|
| 308 |
-
},
|
| 309 |
-
"nbformat": 4,
|
| 310 |
-
"nbformat_minor": 2
|
| 311 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_reflection_pattern2.ipynb
DELETED
|
@@ -1,999 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Welcome to the Second Lab - Exercise: Advanced Agentic Design Patterns\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"This notebook extends the previous lab by adding the **Reflection Pattern** to improve response quality.\n",
|
| 10 |
-
"\n",
|
| 11 |
-
"### Patterns used in the original lab:\n",
|
| 12 |
-
"1. **Multi-Model Comparison Pattern** - Comparing multiple models\n",
|
| 13 |
-
"2. **Judge/Evaluator Pattern** - Evaluation by a judge model\n",
|
| 14 |
-
"\n",
|
| 15 |
-
"### New pattern added:\n",
|
| 16 |
-
"3. **Reflection Pattern** - Self-improvement of responses"
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "markdown",
|
| 21 |
-
"metadata": {},
|
| 22 |
-
"source": [
|
| 23 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 24 |
-
" <tr>\n",
|
| 25 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 26 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 27 |
-
" </td>\n",
|
| 28 |
-
" <td>\n",
|
| 29 |
-
" <h2 style=\"color:#ff7800;\">New Pattern: Reflection</h2>\n",
|
| 30 |
-
" <span style=\"color:#ff7800;\">The Reflection Pattern allows a model to critique and improve its own response. This is particularly useful for complex tasks requiring nuance and precision.</span>\n",
|
| 31 |
-
" </td>\n",
|
| 32 |
-
" </tr>\n",
|
| 33 |
-
"</table>"
|
| 34 |
-
]
|
| 35 |
-
},
|
| 36 |
-
{
|
| 37 |
-
"cell_type": "code",
|
| 38 |
-
"execution_count": 1,
|
| 39 |
-
"metadata": {},
|
| 40 |
-
"outputs": [
|
| 41 |
-
{
|
| 42 |
-
"data": {
|
| 43 |
-
"text/plain": [
|
| 44 |
-
"True"
|
| 45 |
-
]
|
| 46 |
-
},
|
| 47 |
-
"execution_count": 1,
|
| 48 |
-
"metadata": {},
|
| 49 |
-
"output_type": "execute_result"
|
| 50 |
-
}
|
| 51 |
-
],
|
| 52 |
-
"source": [
|
| 53 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 54 |
-
"\n",
|
| 55 |
-
"import os\n",
|
| 56 |
-
"import json\n",
|
| 57 |
-
"from dotenv import load_dotenv\n",
|
| 58 |
-
"from openai import OpenAI\n",
|
| 59 |
-
"from anthropic import Anthropic\n",
|
| 60 |
-
"from IPython.display import Markdown, display\n",
|
| 61 |
-
"\n",
|
| 62 |
-
"# Always remember to do this!\n",
|
| 63 |
-
"load_dotenv(override=True)"
|
| 64 |
-
]
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"cell_type": "code",
|
| 68 |
-
"execution_count": 2,
|
| 69 |
-
"metadata": {},
|
| 70 |
-
"outputs": [
|
| 71 |
-
{
|
| 72 |
-
"name": "stdout",
|
| 73 |
-
"output_type": "stream",
|
| 74 |
-
"text": [
|
| 75 |
-
"OpenAI API Key exists and begins sk-1kYcH\n",
|
| 76 |
-
"Anthropic API Key exists and begins sk-ant-\n",
|
| 77 |
-
"Google API Key not set (and this is optional)\n",
|
| 78 |
-
"DeepSeek API Key not set (and this is optional)\n",
|
| 79 |
-
"Groq API Key not set (and this is optional)\n"
|
| 80 |
-
]
|
| 81 |
-
}
|
| 82 |
-
],
|
| 83 |
-
"source": [
|
| 84 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 85 |
-
"\n",
|
| 86 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 87 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 88 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 89 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 90 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 91 |
-
"\n",
|
| 92 |
-
"if openai_api_key:\n",
|
| 93 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 94 |
-
"else:\n",
|
| 95 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 96 |
-
" \n",
|
| 97 |
-
"if anthropic_api_key:\n",
|
| 98 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 99 |
-
"else:\n",
|
| 100 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 101 |
-
"\n",
|
| 102 |
-
"if google_api_key:\n",
|
| 103 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 104 |
-
"else:\n",
|
| 105 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 106 |
-
"\n",
|
| 107 |
-
"if deepseek_api_key:\n",
|
| 108 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 109 |
-
"else:\n",
|
| 110 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"if groq_api_key:\n",
|
| 113 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 114 |
-
"else:\n",
|
| 115 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 116 |
-
]
|
| 117 |
-
},
|
| 118 |
-
{
|
| 119 |
-
"cell_type": "markdown",
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"source": [
|
| 122 |
-
"## Step 1: Generate Initial Question (Multi-Model Pattern)"
|
| 123 |
-
]
|
| 124 |
-
},
|
| 125 |
-
{
|
| 126 |
-
"cell_type": "code",
|
| 127 |
-
"execution_count": 3,
|
| 128 |
-
"metadata": {},
|
| 129 |
-
"outputs": [
|
| 130 |
-
{
|
| 131 |
-
"name": "stdout",
|
| 132 |
-
"output_type": "stream",
|
| 133 |
-
"text": [
|
| 134 |
-
"Generated Question:\n",
|
| 135 |
-
"A wealthy philanthropist has developed a new drug that can cure a rare but fatal disease affecting a small population. However, the drug is expensive to produce and the philanthropist only has enough resources to manufacture a limited supply. At the same time, a competing pharmaceutical company has discovered the cure but plans to charge exorbitant prices, making it inaccessible for most patients. \n",
|
| 136 |
-
"\n",
|
| 137 |
-
"The philanthropist learns that if they invest their resources into manufacturing the drug, it can be distributed at a lower cost but only to a select few who are already on a waiting list, prioritizing those who are most likely to recover. Alternatively, the philanthropist could sell the formula to the competing company for a substantial profit, ensuring that a broader population can access the cure, albeit at high prices that many cannot afford.\n",
|
| 138 |
-
"\n",
|
| 139 |
-
"The dilemma: Should the philanthropist prioritize the immediate health of a few individuals by providing the cure at a lower cost, or should they consider the greater good by allowing the competitive company to distribute the cure to a wider audience at a higher price?\n"
|
| 140 |
-
]
|
| 141 |
-
}
|
| 142 |
-
],
|
| 143 |
-
"source": [
|
| 144 |
-
"# Generate a challenging question for the models to answer\n",
|
| 145 |
-
"\n",
|
| 146 |
-
"request = \"Please come up with a challenging ethical dilemma that requires careful moral reasoning and consideration of multiple perspectives. \"\n",
|
| 147 |
-
"request += \"The dilemma should involve conflicting values and have no clear-cut answer. Answer only with the dilemma, no explanation.\"\n",
|
| 148 |
-
"\n",
|
| 149 |
-
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 150 |
-
"\n",
|
| 151 |
-
"openai = OpenAI()\n",
|
| 152 |
-
"response = openai.chat.completions.create(\n",
|
| 153 |
-
" model=\"gpt-4o-mini\",\n",
|
| 154 |
-
" messages=messages,\n",
|
| 155 |
-
")\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"question = response.choices[0].message.content\n",
|
| 158 |
-
"print(\"Generated Question:\")\n",
|
| 159 |
-
"print(question)"
|
| 160 |
-
]
|
| 161 |
-
},
|
| 162 |
-
{
|
| 163 |
-
"cell_type": "markdown",
|
| 164 |
-
"metadata": {},
|
| 165 |
-
"source": [
|
| 166 |
-
"## Step 2: Get Initial Responses from Multiple Models"
|
| 167 |
-
]
|
| 168 |
-
},
|
| 169 |
-
{
|
| 170 |
-
"cell_type": "code",
|
| 171 |
-
"execution_count": 4,
|
| 172 |
-
"metadata": {},
|
| 173 |
-
"outputs": [],
|
| 174 |
-
"source": [
|
| 175 |
-
"def get_initial_response(client, model_name, question, is_anthropic=False):\n",
|
| 176 |
-
" \"\"\"Get initial response from a model\"\"\"\n",
|
| 177 |
-
" messages = [{\"role\": \"user\", \"content\": question}]\n",
|
| 178 |
-
" \n",
|
| 179 |
-
" if is_anthropic:\n",
|
| 180 |
-
" response = client.messages.create(\n",
|
| 181 |
-
" model=model_name, \n",
|
| 182 |
-
" messages=messages, \n",
|
| 183 |
-
" max_tokens=1000\n",
|
| 184 |
-
" )\n",
|
| 185 |
-
" return response.content[0].text\n",
|
| 186 |
-
" else:\n",
|
| 187 |
-
" response = client.chat.completions.create(\n",
|
| 188 |
-
" model=model_name, \n",
|
| 189 |
-
" messages=messages\n",
|
| 190 |
-
" )\n",
|
| 191 |
-
" return response.choices[0].message.content"
|
| 192 |
-
]
|
| 193 |
-
},
|
| 194 |
-
{
|
| 195 |
-
"cell_type": "code",
|
| 196 |
-
"execution_count": 5,
|
| 197 |
-
"metadata": {},
|
| 198 |
-
"outputs": [],
|
| 199 |
-
"source": [
|
| 200 |
-
"# Configure clients\n",
|
| 201 |
-
"openai_client = OpenAI()\n",
|
| 202 |
-
"claude_client = Anthropic() if anthropic_api_key else None\n",
|
| 203 |
-
"gemini_client = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\") if google_api_key else None\n",
|
| 204 |
-
"deepseek_client = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\") if deepseek_api_key else None\n",
|
| 205 |
-
"groq_client = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\") if groq_api_key else None"
|
| 206 |
-
]
|
| 207 |
-
},
|
| 208 |
-
{
|
| 209 |
-
"cell_type": "code",
|
| 210 |
-
"execution_count": 6,
|
| 211 |
-
"metadata": {},
|
| 212 |
-
"outputs": [
|
| 213 |
-
{
|
| 214 |
-
"name": "stdout",
|
| 215 |
-
"output_type": "stream",
|
| 216 |
-
"text": [
|
| 217 |
-
"\n",
|
| 218 |
-
"=== INITIAL RESPONSES ===\n",
|
| 219 |
-
"\n",
|
| 220 |
-
"**gpt-4o-mini:**\n"
|
| 221 |
-
]
|
| 222 |
-
},
|
| 223 |
-
{
|
| 224 |
-
"data": {
|
| 225 |
-
"text/markdown": [
|
| 226 |
-
"This ethical dilemma presents a challenging decision for the philanthropist, who must weigh the immediate health needs of a few individuals against the broader societal implications of drug distribution and access.\n",
|
| 227 |
-
"\n",
|
| 228 |
-
"### Option 1: Prioritizing Immediate Health\n",
|
| 229 |
-
"\n",
|
| 230 |
-
"If the philanthropist chooses to manufacture the drug and distribute it at a lower cost to those on the waiting list, they are directly addressing the pressing health needs of a select few individuals who are already vulnerable. This action prioritizes compassion and the moral obligation to help those who are suffering. By ensuring that the drug is available to those with the highest likelihood of recovery, the philanthropist demonstrates an ethical commitment to saving lives and reducing suffering in the short term.\n",
|
| 231 |
-
"\n",
|
| 232 |
-
"However, this approach has limitations. By distributing the drug to only a small number of patients, the philanthropist may overlook other individuals who could benefit from the cure. Additionally, this solution does not address the systemic issue of access to healthcare and affordable medications for the larger population suffering from the disease.\n",
|
| 233 |
-
"\n",
|
| 234 |
-
"### Option 2: Considering the Greater Good\n",
|
| 235 |
-
"\n",
|
| 236 |
-
"On the other hand, selling the formula to the competing pharmaceutical company for a substantial profit could lead to a wider distribution of the drug, although at a higher price point that may make it inaccessible to many patients. In this scenario, the philanthropist uses their financial gain to potentially invest in other healthcare initiatives or research, thus contributing to the long-term improvement of medical care or addressing related health issues.\n",
|
| 237 |
-
"\n",
|
| 238 |
-
"This choice raises ethical concerns regarding the prioritization of profit over compassion and the risk that many individuals will remain unable to afford the life-saving treatment. It also creates a tension between the ideals of philanthropy and the realities of the pharmaceutical industry, which often operates on profit motives rather than altruistic goals.\n",
|
| 239 |
-
"\n",
|
| 240 |
-
"### Balancing the Two Options\n",
|
| 241 |
-
"\n",
|
| 242 |
-
"A possible compromise could be for the philanthropist to negotiate a deal with the pharmaceutical company that ensures a tiered pricing structure, where those who can afford the drug pay more while discounts or alternative funding are provided for low-income patients. This could help bridge the gap between immediate health needs and wider access.\n",
|
| 243 |
-
"\n",
|
| 244 |
-
"Ultimately, the decision comes down to the philanthropist's values and vision for their impact on public health. Do they prioritize saving a few lives in the short term or seek a more sustainable, albeit imperfect, solution that aims at broader access over a longer timeframe? The complexity of the dilemma emphasizes the need for thoughtful deliberation on how best to serve both individual health needs and the greater public good."
|
| 245 |
-
],
|
| 246 |
-
"text/plain": [
|
| 247 |
-
"<IPython.core.display.Markdown object>"
|
| 248 |
-
]
|
| 249 |
-
},
|
| 250 |
-
"metadata": {},
|
| 251 |
-
"output_type": "display_data"
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"name": "stdout",
|
| 255 |
-
"output_type": "stream",
|
| 256 |
-
"text": [
|
| 257 |
-
"\n",
|
| 258 |
-
"==================================================\n",
|
| 259 |
-
"\n",
|
| 260 |
-
"**claude-3-7-sonnet-latest:**\n"
|
| 261 |
-
]
|
| 262 |
-
},
|
| 263 |
-
{
|
| 264 |
-
"data": {
|
| 265 |
-
"text/markdown": [
|
| 266 |
-
"# The Philanthropist's Dilemma\n",
|
| 267 |
-
"\n",
|
| 268 |
-
"This is a complex ethical dilemma that involves several important considerations:\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"## Key Ethical Tensions\n",
|
| 271 |
-
"\n",
|
| 272 |
-
"- **Limited access at affordable prices** vs. **wider access at unaffordable prices**\n",
|
| 273 |
-
"- **Immediate relief for a few** vs. **potential long-term access for many**\n",
|
| 274 |
-
"- **Direct control over distribution** vs. **surrendering control to profit-motivated actors**\n",
|
| 275 |
-
"\n",
|
| 276 |
-
"## Considerations for Manufacturing the Drug Directly\n",
|
| 277 |
-
"\n",
|
| 278 |
-
"**Benefits:**\n",
|
| 279 |
-
"- Ensures the most vulnerable patients receive treatment based on medical need rather than ability to pay\n",
|
| 280 |
-
"- Maintains the philanthropist's ethical vision and control over distribution\n",
|
| 281 |
-
"- Sets a precedent for compassionate drug pricing\n",
|
| 282 |
-
"\n",
|
| 283 |
-
"**Drawbacks:**\n",
|
| 284 |
-
"- Limited overall reach due to resource constraints\n",
|
| 285 |
-
"- Potentially slower scaling of production\n",
|
| 286 |
-
"- Many patients may receive no treatment at all\n",
|
| 287 |
-
"\n",
|
| 288 |
-
"## Considerations for Selling to the Pharmaceutical Company\n",
|
| 289 |
-
"\n",
|
| 290 |
-
"**Benefits:**\n",
|
| 291 |
-
"- Potentially greater production capacity and distribution reach\n",
|
| 292 |
-
"- The philanthropist could use profits to subsidize costs for those who cannot afford it\n",
|
| 293 |
-
"- Might accelerate further research and development\n",
|
| 294 |
-
"\n",
|
| 295 |
-
"**Drawbacks:**\n",
|
| 296 |
-
"- Many patients would be excluded based on financial means\n",
|
| 297 |
-
"- Surrenders control over an essential medicine to profit-motivated decision-making\n",
|
| 298 |
-
"- Could establish a problematic precedent for pricing life-saving medications\n",
|
| 299 |
-
"\n",
|
| 300 |
-
"This dilemma reflects broader tensions in healthcare ethics between utilitarian approaches (helping the most people) and justice-based approaches (ensuring fair access based on need rather than wealth).\n",
|
| 301 |
-
"\n",
|
| 302 |
-
"There might be creative third options worth exploring, such as licensing agreements with price caps, creating a non-profit manufacturing entity, or partnering with governments to ensure broader affordable access."
|
| 303 |
-
],
|
| 304 |
-
"text/plain": [
|
| 305 |
-
"<IPython.core.display.Markdown object>"
|
| 306 |
-
]
|
| 307 |
-
},
|
| 308 |
-
"metadata": {},
|
| 309 |
-
"output_type": "display_data"
|
| 310 |
-
},
|
| 311 |
-
{
|
| 312 |
-
"name": "stdout",
|
| 313 |
-
"output_type": "stream",
|
| 314 |
-
"text": [
|
| 315 |
-
"\n",
|
| 316 |
-
"==================================================\n",
|
| 317 |
-
"\n"
|
| 318 |
-
]
|
| 319 |
-
}
|
| 320 |
-
],
|
| 321 |
-
"source": [
|
| 322 |
-
"# Collect initial responses\n",
|
| 323 |
-
"initial_responses = {}\n",
|
| 324 |
-
"competitors = []\n",
|
| 325 |
-
"\n",
|
| 326 |
-
"models = [\n",
|
| 327 |
-
" (\"gpt-4o-mini\", openai_client, False),\n",
|
| 328 |
-
" (\"claude-3-7-sonnet-latest\", claude_client, True),\n",
|
| 329 |
-
" (\"gemini-2.0-flash\", gemini_client, False),\n",
|
| 330 |
-
" (\"deepseek-chat\", deepseek_client, False),\n",
|
| 331 |
-
" (\"llama-3.3-70b-versatile\", groq_client, False),\n",
|
| 332 |
-
"]\n",
|
| 333 |
-
"\n",
|
| 334 |
-
"print(\"\\n=== INITIAL RESPONSES ===\\n\")\n",
|
| 335 |
-
"\n",
|
| 336 |
-
"for model_name, client, is_anthropic in models:\n",
|
| 337 |
-
" if client:\n",
|
| 338 |
-
" try:\n",
|
| 339 |
-
" response = get_initial_response(client, model_name, question, is_anthropic)\n",
|
| 340 |
-
" initial_responses[model_name] = response\n",
|
| 341 |
-
" competitors.append(model_name)\n",
|
| 342 |
-
" \n",
|
| 343 |
-
" print(f\"**{model_name}:**\")\n",
|
| 344 |
-
" display(Markdown(response))\n",
|
| 345 |
-
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n",
|
| 346 |
-
" except Exception as e:\n",
|
| 347 |
-
" print(f\"Error with {model_name}: {e}\")"
|
| 348 |
-
]
|
| 349 |
-
},
|
| 350 |
-
{
|
| 351 |
-
"cell_type": "markdown",
|
| 352 |
-
"metadata": {},
|
| 353 |
-
"source": [
|
| 354 |
-
"## Step 3: NEW PATTERN - Reflection Pattern"
|
| 355 |
-
]
|
| 356 |
-
},
|
| 357 |
-
{
|
| 358 |
-
"cell_type": "code",
|
| 359 |
-
"execution_count": 7,
|
| 360 |
-
"metadata": {},
|
| 361 |
-
"outputs": [],
|
| 362 |
-
"source": [
|
| 363 |
-
"def apply_reflection_pattern(client, model_name, original_question, initial_response, is_anthropic=False):\n",
|
| 364 |
-
" \"\"\"Apply the Reflection Pattern to improve a response\"\"\"\n",
|
| 365 |
-
" \n",
|
| 366 |
-
" reflection_prompt = f\"\"\"\n",
|
| 367 |
-
"You previously received this question:\n",
|
| 368 |
-
"{original_question}\n",
|
| 369 |
-
"\n",
|
| 370 |
-
"Here was your initial response:\n",
|
| 371 |
-
"{initial_response}\n",
|
| 372 |
-
"\n",
|
| 373 |
-
"Now, as a critical expert, analyze your own response:\n",
|
| 374 |
-
"1. What are the strengths of this response?\n",
|
| 375 |
-
"2. What important perspectives are missing?\n",
|
| 376 |
-
"3. Are there any biases or blind spots in the analysis?\n",
|
| 377 |
-
"4. How could you improve this response?\n",
|
| 378 |
-
"\n",
|
| 379 |
-
"After this self-critique, provide an IMPROVED response that takes into account your observations.\n",
|
| 380 |
-
"\n",
|
| 381 |
-
"Response format:\n",
|
| 382 |
-
"## Self-Critique\n",
|
| 383 |
-
"[Your critical analysis of the initial response]\n",
|
| 384 |
-
"\n",
|
| 385 |
-
"## Improved Response\n",
|
| 386 |
-
"[Your revised and improved response]\n",
|
| 387 |
-
"\"\"\"\n",
|
| 388 |
-
" \n",
|
| 389 |
-
" messages = [{\"role\": \"user\", \"content\": reflection_prompt}]\n",
|
| 390 |
-
" \n",
|
| 391 |
-
" if is_anthropic:\n",
|
| 392 |
-
" response = client.messages.create(\n",
|
| 393 |
-
" model=model_name, \n",
|
| 394 |
-
" messages=messages, \n",
|
| 395 |
-
" max_tokens=1500\n",
|
| 396 |
-
" )\n",
|
| 397 |
-
" return response.content[0].text\n",
|
| 398 |
-
" else:\n",
|
| 399 |
-
" response = client.chat.completions.create(\n",
|
| 400 |
-
" model=model_name, \n",
|
| 401 |
-
" messages=messages\n",
|
| 402 |
-
" )\n",
|
| 403 |
-
" return response.choices[0].message.content"
|
| 404 |
-
]
|
| 405 |
-
},
|
| 406 |
-
{
|
| 407 |
-
"cell_type": "code",
|
| 408 |
-
"execution_count": 8,
|
| 409 |
-
"metadata": {},
|
| 410 |
-
"outputs": [
|
| 411 |
-
{
|
| 412 |
-
"name": "stdout",
|
| 413 |
-
"output_type": "stream",
|
| 414 |
-
"text": [
|
| 415 |
-
"\n",
|
| 416 |
-
"=== RESPONSES AFTER REFLECTION ===\n",
|
| 417 |
-
"\n",
|
| 418 |
-
"**gpt-4o-mini - After Reflection:**\n"
|
| 419 |
-
]
|
| 420 |
-
},
|
| 421 |
-
{
|
| 422 |
-
"data": {
|
| 423 |
-
"text/markdown": [
|
| 424 |
-
"## Self-Critique\n",
|
| 425 |
-
"1. **Strengths of this Response:**\n",
|
| 426 |
-
" - The response thoroughly outlines both options available to the philanthropist, providing a balanced view of the ethical implications of each choice.\n",
|
| 427 |
-
" - It acknowledges the immediate health needs of affected individuals as well as the broader societal implications of drug distribution.\n",
|
| 428 |
-
" - It introduces a potential compromise solution, which adds depth to the analysis and suggests a more nuanced approach to the dilemma.\n",
|
| 429 |
-
"\n",
|
| 430 |
-
"2. **Important Perspectives Missing:**\n",
|
| 431 |
-
" - The response does not adequately consider the potential operational and logistical challenges in manufacturing and distributing the drug at a lower cost, including regulatory hurdles and the scalability of production.\n",
|
| 432 |
-
" - There is limited discussion on the emotional impact of the decision on the patients and their families, which could influence the philanthropist's considerations.\n",
|
| 433 |
-
" - The perspective of other stakeholders, such as healthcare providers and ethicists, is not introduced.\n",
|
| 434 |
-
"\n",
|
| 435 |
-
"3. **Biases or Blind Spots in the Analysis:**\n",
|
| 436 |
-
" - The response may lean towards prioritizing compassion over economic pragmatism, possibly downplaying the complexities involved in pharmaceutical economics and the realities that arise from selling to a corporation with profit motives.\n",
|
| 437 |
-
" - It assumes a binary choice rather than considering other stakeholder impacts and longer-term systemic solutions.\n",
|
| 438 |
-
"\n",
|
| 439 |
-
"4. **How to Improve This Response:**\n",
|
| 440 |
-
" - Include more contextual factors that might affect the decision, such as regulatory considerations, patient demographics, and healthcare infrastructure.\n",
|
| 441 |
-
" - Expand on the emotional and psychological aspects of the decision-making process for both the philanthropist and the patients involved.\n",
|
| 442 |
-
" - Address the potential for future societal implications if the competing company monopolizes the market after acquiring the formula.\n",
|
| 443 |
-
"\n",
|
| 444 |
-
"## Improved Response\n",
|
| 445 |
-
"This ethical dilemma presents the philanthropist with a complex decision regarding how best to utilize limited resources to maximize the benefit for individuals suffering from a rare but fatal disease. The two primary options – providing a low-cost supply to a select few or selling the formula for broader but costly distribution – both highlight significant ethical considerations.\n",
|
| 446 |
-
"\n",
|
| 447 |
-
"### Option 1: Prioritizing Immediate Health\n",
|
| 448 |
-
"By choosing to manufacture the drug at a lower cost for those on the waiting list, the philanthropist opts to directly address the urgent health needs of vulnerable individuals. This approach reflects a moral obligation to alleviate suffering and save lives in the short term. Prioritizing individuals with the highest likelihood of recovery can lead to tangible, immediate outcomes for those patients and their families.\n",
|
| 449 |
-
"\n",
|
| 450 |
-
"However, there are operational challenges associated with this choice. Limited production capabilities may mean that only a fraction of those in need can actually receive the drug, leaving many others without hope. Additionally, this decision doesn't resolve the systemic issues within healthcare, such as overall treatment accessibility and drug pricing, which may persist if not tackled holistically.\n",
|
| 451 |
-
"\n",
|
| 452 |
-
"### Option 2: Considering the Greater Good\n",
|
| 453 |
-
"Alternatively, selling the formula to the competing pharmaceutical company could result in wider distribution of the drug and potentially more patients benefiting from the cure, albeit at higher prices. This choice could finance further philanthropic efforts or investments in healthcare that might ultimately lead to broader long-term improvements in public health.\n",
|
| 454 |
-
"\n",
|
| 455 |
-
"However, ethical concerns arise when considering the high pricing of the cure. The decision may disproportionately disadvantage lower-income patients, perpetuating healthcare inequities. Furthermore, there is the risk that this choice could enable the pharmaceutical company to monopolize treatment options, further exploitation in the industry.\n",
|
| 456 |
-
"\n",
|
| 457 |
-
"### A Balanced Approach\n",
|
| 458 |
-
"To navigate this complex dilemma more thoughtfully, the philanthropist could explore a compromise by negotiating with the pharmaceutical company to establish a tiered pricing structure. This could create a system where the drug is offered at a reduced price for low-income patients, while ensuring sustainability for the company through higher prices for those who can afford them. Additionally, the philanthropist might advocate for a commitment from the company to invest in generics or alternative distribution methods to enhance accessibility.\n",
|
| 459 |
-
"\n",
|
| 460 |
-
"### Conclusion\n",
|
| 461 |
-
"The choice ultimately hinges on the philanthropist's values and vision for their impact on public health. This decision requires careful consideration of immediate health benefits, long-term accessibility, and the emotional ramifications for affected individuals. By weighing the implications of each option and considering collaborative solutions, the philanthropist can work towards an outcome that promotes both individual care and broader societal well-being."
|
| 462 |
-
],
|
| 463 |
-
"text/plain": [
|
| 464 |
-
"<IPython.core.display.Markdown object>"
|
| 465 |
-
]
|
| 466 |
-
},
|
| 467 |
-
"metadata": {},
|
| 468 |
-
"output_type": "display_data"
|
| 469 |
-
},
|
| 470 |
-
{
|
| 471 |
-
"name": "stdout",
|
| 472 |
-
"output_type": "stream",
|
| 473 |
-
"text": [
|
| 474 |
-
"\n",
|
| 475 |
-
"==================================================\n",
|
| 476 |
-
"\n",
|
| 477 |
-
"**claude-3-7-sonnet-latest - After Reflection:**\n"
|
| 478 |
-
]
|
| 479 |
-
},
|
| 480 |
-
{
|
| 481 |
-
"data": {
|
| 482 |
-
"text/markdown": [
|
| 483 |
-
"## Self-Critique\n",
|
| 484 |
-
"\n",
|
| 485 |
-
"### Strengths of the initial response:\n",
|
| 486 |
-
"- Well-structured analysis that clearly outlines the ethical tensions\n",
|
| 487 |
-
"- Presents balanced considerations for both options\n",
|
| 488 |
-
"- Mentions potential third options beyond the binary choice\n",
|
| 489 |
-
"- Identifies the broader ethical frameworks at play (utilitarian vs. justice-based approaches)\n",
|
| 490 |
-
"\n",
|
| 491 |
-
"### Missing perspectives:\n",
|
| 492 |
-
"1. **Stakeholder analysis**: The response lacks a thorough examination of all affected parties (patients, healthcare systems, future patients, etc.)\n",
|
| 493 |
-
"2. **Timeline considerations**: No discussion of short-term vs. long-term consequences beyond immediate access\n",
|
| 494 |
-
"3. **Public health impact**: Limited analysis of how each option affects overall public health outcomes\n",
|
| 495 |
-
"4. **Precedent-setting effects**: Inadequate exploration of how this decision might influence future pharmaceutical development and pricing\n",
|
| 496 |
-
"5. **Regulatory context**: No mention of potential government intervention, price controls, or other regulatory factors\n",
|
| 497 |
-
"6. **Global justice perspective**: No consideration of how this decision affects different regions/countries\n",
|
| 498 |
-
"\n",
|
| 499 |
-
"### Biases and blind spots:\n",
|
| 500 |
-
"1. **False dichotomy**: Despite mentioning \"third options,\" the analysis primarily treats this as a binary choice\n",
|
| 501 |
-
"2. **Western/developed-world bias**: Assumes a market-based healthcare system without considering different global contexts\n",
|
| 502 |
-
"3. **Individual-focused ethics**: Overemphasizes individual choice rather than institutional or systemic responsibilities\n",
|
| 503 |
-
"4. **Overly abstract**: The analysis lacks concrete examples or case studies that might inform the decision\n",
|
| 504 |
-
"5. **Neglect of power dynamics**: Doesn't address the power imbalance between corporations, individuals, and patients\n",
|
| 505 |
-
"\n",
|
| 506 |
-
"### Improvement opportunities:\n",
|
| 507 |
-
"1. Provide a more nuanced spectrum of options beyond the binary choice\n",
|
| 508 |
-
"2. Include more stakeholder perspectives, particularly patient voices\n",
|
| 509 |
-
"3. Consider real-world case studies of similar pharmaceutical dilemmas\n",
|
| 510 |
-
"4. Address systemic issues in drug development and pharmaceutical pricing\n",
|
| 511 |
-
"5. Explore collaborative approaches that leverage multiple institutions\n",
|
| 512 |
-
"6. Discuss intellectual property rights and their ethical implications\n",
|
| 513 |
-
"\n",
|
| 514 |
-
"## Improved Response\n",
|
| 515 |
-
"\n",
|
| 516 |
-
"# The Philanthropist's Dilemma: A Multidimensional Ethical Analysis\n",
|
| 517 |
-
"\n",
|
| 518 |
-
"This scenario presents not simply a binary choice but a complex ethical landscape involving multiple stakeholders, systemic factors, and competing values.\n",
|
| 519 |
-
"\n",
|
| 520 |
-
"## Stakeholder Analysis\n",
|
| 521 |
-
"\n",
|
| 522 |
-
"**Patients and families:**\n",
|
| 523 |
-
"- Those currently suffering need immediate access regardless of mechanism\n",
|
| 524 |
-
"- Future patients have interests in sustainable development of treatments\n",
|
| 525 |
-
"- Economic diversity among patients means affordability affects different groups unequally\n",
|
| 526 |
-
"\n",
|
| 527 |
-
"**Healthcare systems:**\n",
|
| 528 |
-
"- Must allocate limited resources across competing priorities\n",
|
| 529 |
-
"- High-priced drugs can strain budgets and force difficult coverage decisions\n",
|
| 530 |
-
"- Precedents set now affect future negotiations with pharmaceutical companies\n",
|
| 531 |
-
"\n",
|
| 532 |
-
"**Research community:**\n",
|
| 533 |
-
"- Incentives for developing treatments for rare diseases are influenced by such cases\n",
|
| 534 |
-
"- How intellectual property is handled affects future research priorities\n",
|
| 535 |
-
"\n",
|
| 536 |
-
"## Ethical Frameworks Worth Considering\n",
|
| 537 |
-
"\n",
|
| 538 |
-
"1. **Distributive justice**: Who should receive limited resources? What constitutes fair allocation?\n",
|
| 539 |
-
"2. **Rights-based approach**: Do patients have a right to life-saving medication regardless of cost?\n",
|
| 540 |
-
"3. **Consequentialist assessment**: Which option produces the best outcomes for the most people over time?\n",
|
| 541 |
-
"4. **Virtue ethics**: What would a virtuous philanthropist do in this situation?\n",
|
| 542 |
-
"5. **Global justice**: How does this decision affect healthcare equity across different regions?\n",
|
| 543 |
-
"\n",
|
| 544 |
-
"## Spectrum of Options\n",
|
| 545 |
-
"\n",
|
| 546 |
-
"Rather than two mutually exclusive choices, consider a spectrum of possibilities:\n",
|
| 547 |
-
"\n",
|
| 548 |
-
"1. **Direct manufacturing with tiered pricing**: Manufacture independently but implement income-based pricing to maximize access while maintaining sustainability\n",
|
| 549 |
-
"\n",
|
| 550 |
-
"2. **Conditional licensing**: License the formula with contractual price controls, distribution requirements, and accessibility guarantees\n",
|
| 551 |
-
"\n",
|
| 552 |
-
"3. **Public-private partnership**: Collaborate with governments, NGOs, and selected pharmaceutical partners to ensure broad, affordable access\n",
|
| 553 |
-
"\n",
|
| 554 |
-
"4. **Open-source approach**: Release the formula publicly with certain patent protections waived, while establishing a foundation to support manufacturing\n",
|
| 555 |
-
"\n",
|
| 556 |
-
"5. **Hybrid distribution model**: Manufacture for highest-need populations while licensing to reach others, using licensing revenues to subsidize direct manufacturing\n",
|
| 557 |
-
"\n",
|
| 558 |
-
"## Case Study Context\n",
|
| 559 |
-
"\n",
|
| 560 |
-
"Similar dilemmas have occurred with treatments for HIV/AIDS, hepatitis C, and rare genetic disorders. The outcomes suggest:\n",
|
| 561 |
-
"\n",
|
| 562 |
-
"- Maintaining some control over intellectual property while ensuring broad access often yields better public health outcomes than either extreme option\n",
|
| 563 |
-
"- Patient advocacy can significantly influence corporate behavior and pricing\n",
|
| 564 |
-
"- International differences in pricing and patent enforcement create complex dynamics\n",
|
| 565 |
-
"- Government intervention through negotiation, compulsory licensing, or regulation often becomes necessary\n",
|
| 566 |
-
"\n",
|
| 567 |
-
"## Systems-Level Considerations\n",
|
| 568 |
-
"\n",
|
| 569 |
-
"This dilemma exists within broader systemic issues:\n",
|
| 570 |
-
"\n",
|
| 571 |
-
"- The current pharmaceutical development model creates inherent tensions between innovation, access, and affordability\n",
|
| 572 |
-
"- Rare disease treatments highlight market failures in drug development\n",
|
| 573 |
-
"- Healthcare financing systems vary globally, affecting how we should evaluate \"accessibility\"\n",
|
| 574 |
-
"- Intellectual property regimes may require reform to better balance innovation incentives with public health needs\n",
|
| 575 |
-
"\n",
|
| 576 |
-
"## Recommended Approach\n",
|
| 577 |
-
"\n",
|
| 578 |
-
"The philanthropist should pursue a hybrid strategy that:\n",
|
| 579 |
-
"\n",
|
| 580 |
-
"1. Maintains sufficient control to ensure the most vulnerable patients receive treatment regardless of ability to pay\n",
|
| 581 |
-
"\n",
|
| 582 |
-
"2. Leverages partnerships with multiple entities (pharmaceutical companies, governments, NGOs) to maximize production scale and geographic reach\n",
|
| 583 |
-
"\n",
|
| 584 |
-
"3. Implements contractual safeguards on pricing, with particular attention to low and middle-income regions\n",
|
| 585 |
-
"\n",
|
| 586 |
-
"4. Establishes a patient assistance foundation using a portion of any licensing revenues\n",
|
| 587 |
-
"\n",
|
| 588 |
-
"5. Advocates for systemic reforms that would prevent such dilemmas in the future\n",
|
| 589 |
-
"\n",
|
| 590 |
-
"This approach recognizes that the philanthropist's responsibility extends beyond the immediate distribution decision to include consideration of precedent-setting effects, stakeholder equity, and systemic change—balancing immediate needs with long-term public health impact."
|
| 591 |
-
],
|
| 592 |
-
"text/plain": [
|
| 593 |
-
"<IPython.core.display.Markdown object>"
|
| 594 |
-
]
|
| 595 |
-
},
|
| 596 |
-
"metadata": {},
|
| 597 |
-
"output_type": "display_data"
|
| 598 |
-
},
|
| 599 |
-
{
|
| 600 |
-
"name": "stdout",
|
| 601 |
-
"output_type": "stream",
|
| 602 |
-
"text": [
|
| 603 |
-
"\n",
|
| 604 |
-
"==================================================\n",
|
| 605 |
-
"\n"
|
| 606 |
-
]
|
| 607 |
-
}
|
| 608 |
-
],
|
| 609 |
-
"source": [
|
| 610 |
-
"# Apply Reflection Pattern\n",
|
| 611 |
-
"reflected_responses = {}\n",
|
| 612 |
-
"\n",
|
| 613 |
-
"print(\"\\n=== RESPONSES AFTER REFLECTION ===\\n\")\n",
|
| 614 |
-
"\n",
|
| 615 |
-
"for model_name, client, is_anthropic in models:\n",
|
| 616 |
-
" if client and model_name in initial_responses:\n",
|
| 617 |
-
" try:\n",
|
| 618 |
-
" reflected = apply_reflection_pattern(\n",
|
| 619 |
-
" client, model_name, question, \n",
|
| 620 |
-
" initial_responses[model_name], is_anthropic\n",
|
| 621 |
-
" )\n",
|
| 622 |
-
" reflected_responses[model_name] = reflected\n",
|
| 623 |
-
" \n",
|
| 624 |
-
" print(f\"**{model_name} - After Reflection:**\")\n",
|
| 625 |
-
" display(Markdown(reflected))\n",
|
| 626 |
-
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n",
|
| 627 |
-
" except Exception as e:\n",
|
| 628 |
-
" print(f\"Error with reflection for {model_name}: {e}\")"
|
| 629 |
-
]
|
| 630 |
-
},
|
| 631 |
-
{
|
| 632 |
-
"cell_type": "markdown",
|
| 633 |
-
"metadata": {},
|
| 634 |
-
"source": [
|
| 635 |
-
"## Step 4: Comparative Evaluation (Extended Judge Pattern)"
|
| 636 |
-
]
|
| 637 |
-
},
|
| 638 |
-
{
|
| 639 |
-
"cell_type": "code",
|
| 640 |
-
"execution_count": 9,
|
| 641 |
-
"metadata": {},
|
| 642 |
-
"outputs": [],
|
| 643 |
-
"source": [
|
| 644 |
-
"def create_comparative_evaluation(question, initial_responses, reflected_responses):\n",
|
| 645 |
-
" \"\"\"Create a comparative evaluation of responses before/after reflection\"\"\"\n",
|
| 646 |
-
" \n",
|
| 647 |
-
" evaluation_prompt = f\"\"\"\n",
|
| 648 |
-
"You are evaluating the effectiveness of the \"Reflection Pattern\" on the following question:\n",
|
| 649 |
-
"{question}\n",
|
| 650 |
-
"\n",
|
| 651 |
-
"For each model, you have:\n",
|
| 652 |
-
"1. An initial response\n",
|
| 653 |
-
"2. A response after self-reflection\n",
|
| 654 |
-
"\n",
|
| 655 |
-
"Analyze and compare:\n",
|
| 656 |
-
"- Depth of analysis\n",
|
| 657 |
-
"- Consideration of multiple perspectives\n",
|
| 658 |
-
"- Nuance and sophistication of reasoning\n",
|
| 659 |
-
"- Improvement brought by reflection\n",
|
| 660 |
-
"\n",
|
| 661 |
-
"MODELS TO EVALUATE:\n",
|
| 662 |
-
"\"\"\"\n",
|
| 663 |
-
" \n",
|
| 664 |
-
" for model_name in initial_responses:\n",
|
| 665 |
-
" if model_name in reflected_responses:\n",
|
| 666 |
-
" evaluation_prompt += f\"\"\"\n",
|
| 667 |
-
"## {model_name}\n",
|
| 668 |
-
"\n",
|
| 669 |
-
"### Initial response:\n",
|
| 670 |
-
"{initial_responses[model_name][:500]}...\n",
|
| 671 |
-
"\n",
|
| 672 |
-
"### Response after reflection:\n",
|
| 673 |
-
"{reflected_responses[model_name][:800]}...\n",
|
| 674 |
-
"\n",
|
| 675 |
-
"\"\"\"\n",
|
| 676 |
-
" \n",
|
| 677 |
-
" evaluation_prompt += \"\"\"\n",
|
| 678 |
-
"Respond with structured JSON:\n",
|
| 679 |
-
"{\n",
|
| 680 |
-
" \"general_analysis\": \"Your analysis of the Reflection Pattern's effectiveness\",\n",
|
| 681 |
-
" \"initial_ranking\": [\"best initially ranked model\", \"second\", \"third\"],\n",
|
| 682 |
-
" \"post_reflection_ranking\": [\"best ranked model after reflection\", \"second\", \"third\"],\n",
|
| 683 |
-
" \"most_improved\": \"Which model improved the most\",\n",
|
| 684 |
-
" \"insights\": \"Insights about the usefulness of the Reflection Pattern\"\n",
|
| 685 |
-
"}\n",
|
| 686 |
-
"\"\"\"\n",
|
| 687 |
-
" \n",
|
| 688 |
-
" return evaluation_prompt"
|
| 689 |
-
]
|
| 690 |
-
},
|
| 691 |
-
{
|
| 692 |
-
"cell_type": "code",
|
| 693 |
-
"execution_count": 10,
|
| 694 |
-
"metadata": {},
|
| 695 |
-
"outputs": [
|
| 696 |
-
{
|
| 697 |
-
"name": "stdout",
|
| 698 |
-
"output_type": "stream",
|
| 699 |
-
"text": [
|
| 700 |
-
"\n",
|
| 701 |
-
"=== FINAL EVALUATION ===\n",
|
| 702 |
-
"\n",
|
| 703 |
-
"```json\n",
|
| 704 |
-
"{\n",
|
| 705 |
-
" \"general_analysis\": \"The Reflection Pattern effectively enhanced the depth of analysis and consideration of multiple perspectives in both models. However, the results differ in terms of sophistication and detail. The GPT-4 model provided initial observations that were relatively shallow but improved by incorporating logistical challenges and suggesting compromises during reflection. In contrast, Claude-3's initial response was more structured and sophisticated, covering a broader range of ethical frameworks, but still showed room for improvement regarding stakeholder analysis and long-term impacts.\",\n",
|
| 706 |
-
" \"initial_ranking\": [\"claude-3-7-sonnet-latest\", \"gpt-4o-mini\"],\n",
|
| 707 |
-
" \"post_reflection_ranking\": [\"claude-3-7-sonnet-latest\", \"gpt-4o-mini\"],\n",
|
| 708 |
-
" \"most_improved\": \"gpt-4o-mini\",\n",
|
| 709 |
-
" \"insights\": \"The Reflection Pattern revealed significant gaps in both models' initial analyses, encouraging deeper engagement with ethical implications and stakeholder considerations. It highlighted the importance of reflecting on logistical realities and the real-world impacts of decisions, marking it as a worthwhile practice for ethical dilemmas.\"\n",
|
| 710 |
-
"}\n",
|
| 711 |
-
"```\n",
|
| 712 |
-
"Could not parse JSON, raw output shown above\n"
|
| 713 |
-
]
|
| 714 |
-
}
|
| 715 |
-
],
|
| 716 |
-
"source": [
|
| 717 |
-
"# Final evaluation\n",
|
| 718 |
-
"if initial_responses and reflected_responses:\n",
|
| 719 |
-
" evaluation_prompt = create_comparative_evaluation(question, initial_responses, reflected_responses)\n",
|
| 720 |
-
" \n",
|
| 721 |
-
" judge_messages = [{\"role\": \"user\", \"content\": evaluation_prompt}]\n",
|
| 722 |
-
" \n",
|
| 723 |
-
" try:\n",
|
| 724 |
-
" judge_response = openai_client.chat.completions.create(\n",
|
| 725 |
-
" model=\"gpt-4o-mini\",\n",
|
| 726 |
-
" messages=judge_messages,\n",
|
| 727 |
-
" )\n",
|
| 728 |
-
" \n",
|
| 729 |
-
" evaluation_result = judge_response.choices[0].message.content\n",
|
| 730 |
-
" print(\"\\n=== FINAL EVALUATION ===\\n\")\n",
|
| 731 |
-
" print(evaluation_result)\n",
|
| 732 |
-
" \n",
|
| 733 |
-
" # Try to parse JSON for structured display\n",
|
| 734 |
-
" try:\n",
|
| 735 |
-
" eval_json = json.loads(evaluation_result)\n",
|
| 736 |
-
" print(\"\\n=== STRUCTURED RESULTS ===\\n\")\n",
|
| 737 |
-
" for key, value in eval_json.items():\n",
|
| 738 |
-
" print(f\"{key.replace('_', ' ').title()}: {value}\")\n",
|
| 739 |
-
" except:\n",
|
| 740 |
-
" print(\"Could not parse JSON, raw output shown above\")\n",
|
| 741 |
-
" \n",
|
| 742 |
-
" except Exception as e:\n",
|
| 743 |
-
" print(f\"Error during final evaluation: {e}\")"
|
| 744 |
-
]
|
| 745 |
-
},
|
| 746 |
-
{
|
| 747 |
-
"cell_type": "markdown",
|
| 748 |
-
"metadata": {},
|
| 749 |
-
"source": [
|
| 750 |
-
"## Simple Before/After Comparison"
|
| 751 |
-
]
|
| 752 |
-
},
|
| 753 |
-
{
|
| 754 |
-
"cell_type": "code",
|
| 755 |
-
"execution_count": 11,
|
| 756 |
-
"metadata": {},
|
| 757 |
-
"outputs": [
|
| 758 |
-
{
|
| 759 |
-
"name": "stdout",
|
| 760 |
-
"output_type": "stream",
|
| 761 |
-
"text": [
|
| 762 |
-
"\n",
|
| 763 |
-
"=== BEFORE vs AFTER COMPARISON ===\n",
|
| 764 |
-
"\n",
|
| 765 |
-
"\n",
|
| 766 |
-
"==================== GPT-4O-MINI ====================\n",
|
| 767 |
-
"\n",
|
| 768 |
-
"BEFORE REFLECTION:\n",
|
| 769 |
-
"--------------------------------------------------\n",
|
| 770 |
-
"This ethical dilemma presents a challenging decision for the philanthropist, who must weigh the immediate health needs of a few individuals against the broader societal implications of drug distribution and access.\n",
|
| 771 |
-
"\n",
|
| 772 |
-
"### Option 1: Prioritizing Immediate Health\n",
|
| 773 |
-
"\n",
|
| 774 |
-
"If the philanthropist chooses to manufa...\n",
|
| 775 |
-
"\n",
|
| 776 |
-
"AFTER REFLECTION:\n",
|
| 777 |
-
"--------------------------------------------------\n",
|
| 778 |
-
"This ethical dilemma presents the philanthropist with a complex decision regarding how best to utilize limited resources to maximize the benefit for individuals suffering from a rare but fatal disease. The two primary options – providing a low-cost supply to a select few or selling the formula for broader but costly distribution – both highlight significant ethical considerations.\n",
|
| 779 |
-
"\n",
|
| 780 |
-
"### Option 1: P...\n",
|
| 781 |
-
"\n",
|
| 782 |
-
"======================================================================\n",
|
| 783 |
-
"\n",
|
| 784 |
-
"\n",
|
| 785 |
-
"==================== CLAUDE-3-7-SONNET-LATEST ====================\n",
|
| 786 |
-
"\n",
|
| 787 |
-
"BEFORE REFLECTION:\n",
|
| 788 |
-
"--------------------------------------------------\n",
|
| 789 |
-
"# The Philanthropist's Dilemma\n",
|
| 790 |
-
"\n",
|
| 791 |
-
"This is a complex ethical dilemma that involves several important considerations:\n",
|
| 792 |
-
"\n",
|
| 793 |
-
"## Key Ethical Tensions\n",
|
| 794 |
-
"\n",
|
| 795 |
-
"- **Limited access at affordable prices** vs. **wider access at unaffordable prices**\n",
|
| 796 |
-
"- **Immediate relief for a few** vs. **potential long-term access for many...\n",
|
| 797 |
-
"\n",
|
| 798 |
-
"AFTER REFLECTION:\n",
|
| 799 |
-
"--------------------------------------------------\n",
|
| 800 |
-
"# The Philanthropist's Dilemma: A Multidimensional Ethical Analysis\n",
|
| 801 |
-
"\n",
|
| 802 |
-
"This scenario presents not simply a binary choice but a complex ethical landscape involving multiple stakeholders, systemic factors, and competing values.\n",
|
| 803 |
-
"\n",
|
| 804 |
-
"## Stakeholder Analysis\n",
|
| 805 |
-
"\n",
|
| 806 |
-
"**Patients and families:**\n",
|
| 807 |
-
"- Those currently suffering need immediate access regardless of mechanism\n",
|
| 808 |
-
"- Future patients have interests in sustainable d...\n",
|
| 809 |
-
"\n",
|
| 810 |
-
"======================================================================\n",
|
| 811 |
-
"\n"
|
| 812 |
-
]
|
| 813 |
-
}
|
| 814 |
-
],
|
| 815 |
-
"source": [
|
| 816 |
-
"# Display side-by-side comparison for each model\n",
|
| 817 |
-
"print(\"\\n=== BEFORE vs AFTER COMPARISON ===\\n\")\n",
|
| 818 |
-
"\n",
|
| 819 |
-
"for model_name in initial_responses:\n",
|
| 820 |
-
" if model_name in reflected_responses:\n",
|
| 821 |
-
" print(f\"\\n{'='*20} {model_name.upper()} {'='*20}\\n\")\n",
|
| 822 |
-
" \n",
|
| 823 |
-
" print(\"BEFORE REFLECTION:\")\n",
|
| 824 |
-
" print(\"-\" * 50)\n",
|
| 825 |
-
" print(initial_responses[model_name][:300] + \"...\")\n",
|
| 826 |
-
" \n",
|
| 827 |
-
" print(\"\\nAFTER REFLECTION:\")\n",
|
| 828 |
-
" print(\"-\" * 50)\n",
|
| 829 |
-
" # Extract just the \"Improved Response\" section if it exists\n",
|
| 830 |
-
" reflected = reflected_responses[model_name]\n",
|
| 831 |
-
" if \"## Improved Response\" in reflected:\n",
|
| 832 |
-
" improved_section = reflected.split(\"## Improved Response\")[1].strip()\n",
|
| 833 |
-
" print(improved_section[:400] + \"...\")\n",
|
| 834 |
-
" else:\n",
|
| 835 |
-
" print(reflected[:400] + \"...\")\n",
|
| 836 |
-
" \n",
|
| 837 |
-
" print(\"\\n\" + \"=\"*70 + \"\\n\")"
|
| 838 |
-
]
|
| 839 |
-
},
|
| 840 |
-
{
|
| 841 |
-
"cell_type": "markdown",
|
| 842 |
-
"metadata": {},
|
| 843 |
-
"source": [
|
| 844 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 845 |
-
" <tr>\n",
|
| 846 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 847 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 848 |
-
" </td>\n",
|
| 849 |
-
" <td>\n",
|
| 850 |
-
" <h2 style=\"color:#ff7800;\">Pattern Analysis</h2>\n",
|
| 851 |
-
" <span style=\"color:#ff7800;\">\n",
|
| 852 |
-
" <b>Patterns used:</b><br/>\n",
|
| 853 |
-
" 1. <b>Multi-Model Comparison:</b> Comparing multiple models on the same task<br/>\n",
|
| 854 |
-
" 2. <b>Judge/Evaluator:</b> Using a model to evaluate performances<br/>\n",
|
| 855 |
-
" 3. <b>Reflection (NEW):</b> Self-critique and improvement of responses<br/><br/>\n",
|
| 856 |
-
" <b>Possible experiments:</b><br/>\n",
|
| 857 |
-
" - Iterate the Reflection Pattern multiple times<br/>\n",
|
| 858 |
-
" - Add a \"Debate Pattern\" between models<br/>\n",
|
| 859 |
-
" - Implement a \"Consensus Pattern\"\n",
|
| 860 |
-
" </span>\n",
|
| 861 |
-
" </td>\n",
|
| 862 |
-
" </tr>\n",
|
| 863 |
-
"</table>"
|
| 864 |
-
]
|
| 865 |
-
},
|
| 866 |
-
{
|
| 867 |
-
"cell_type": "markdown",
|
| 868 |
-
"metadata": {},
|
| 869 |
-
"source": [
|
| 870 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 871 |
-
" <tr>\n",
|
| 872 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 873 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 874 |
-
" </td>\n",
|
| 875 |
-
" <td>\n",
|
| 876 |
-
" <h2 style=\"color:#00bfff;\">Commercial Applications</h2>\n",
|
| 877 |
-
" <span style=\"color:#00bfff;\">\n",
|
| 878 |
-
" The <b>Reflection Pattern</b> is particularly valuable for:<br/>\n",
|
| 879 |
-
" • Improving quality of complex analyses<br/>\n",
|
| 880 |
-
" • Reducing bias in AI recommendations<br/>\n",
|
| 881 |
-
" • Creating self-improving systems<br/>\n",
|
| 882 |
-
" • Developing more robust AI for critical decisions<br/><br/>\n",
|
| 883 |
-
" Use cases: Strategic consulting, risk analysis, ethical evaluation, medical diagnosis\n",
|
| 884 |
-
" </span>\n",
|
| 885 |
-
" </td>\n",
|
| 886 |
-
" </tr>\n",
|
| 887 |
-
"</table>"
|
| 888 |
-
]
|
| 889 |
-
},
|
| 890 |
-
{
|
| 891 |
-
"cell_type": "markdown",
|
| 892 |
-
"metadata": {},
|
| 893 |
-
"source": [
|
| 894 |
-
"## Additional Pattern Ideas for Future Implementation"
|
| 895 |
-
]
|
| 896 |
-
},
|
| 897 |
-
{
|
| 898 |
-
"cell_type": "code",
|
| 899 |
-
"execution_count": 12,
|
| 900 |
-
"metadata": {},
|
| 901 |
-
"outputs": [
|
| 902 |
-
{
|
| 903 |
-
"name": "stdout",
|
| 904 |
-
"output_type": "stream",
|
| 905 |
-
"text": [
|
| 906 |
-
"Exercise completed! Analyze the results to see the impact of the Reflection Pattern.\n"
|
| 907 |
-
]
|
| 908 |
-
}
|
| 909 |
-
],
|
| 910 |
-
"source": [
|
| 911 |
-
"# 1. Chain of Thought Pattern\n",
|
| 912 |
-
"\"\"\"\n",
|
| 913 |
-
"Add a pattern that asks models to show their reasoning step by step:\n",
|
| 914 |
-
"\n",
|
| 915 |
-
"def apply_chain_of_thought_pattern(client, question):\n",
|
| 916 |
-
" prompt = f\\\"\n",
|
| 917 |
-
" Question: {question}\n",
|
| 918 |
-
" \n",
|
| 919 |
-
" Please think through this step by step:\n",
|
| 920 |
-
" Step 1: [Identify the key issues]\n",
|
| 921 |
-
" Step 2: [Consider different perspectives]\n",
|
| 922 |
-
" Step 3: [Evaluate potential consequences]\n",
|
| 923 |
-
" Step 4: [Provide reasoned conclusion]\n",
|
| 924 |
-
" \\\"\n",
|
| 925 |
-
" return get_response(client, prompt)\n",
|
| 926 |
-
"\"\"\"\n",
|
| 927 |
-
"\n",
|
| 928 |
-
"# 2. Iterative Refinement Pattern\n",
|
| 929 |
-
"\"\"\"\n",
|
| 930 |
-
"Create a loop that progressively improves the response over multiple iterations:\n",
|
| 931 |
-
"\n",
|
| 932 |
-
"def iterative_refinement(client, question, iterations=3):\n",
|
| 933 |
-
" response = get_initial_response(client, question)\n",
|
| 934 |
-
" for i in range(iterations):\n",
|
| 935 |
-
" critique_prompt = f\\\"Improve this response: {response}\\\"\n",
|
| 936 |
-
" response = get_response(client, critique_prompt)\n",
|
| 937 |
-
" return response\n",
|
| 938 |
-
"\"\"\"\n",
|
| 939 |
-
"\n",
|
| 940 |
-
"# 3. Debate Pattern\n",
|
| 941 |
-
"\"\"\"\n",
|
| 942 |
-
"Make two models debate their respective responses:\n",
|
| 943 |
-
"\n",
|
| 944 |
-
"def create_debate(client1, client2, question):\n",
|
| 945 |
-
" response1 = get_response(client1, question)\n",
|
| 946 |
-
" response2 = get_response(client2, question)\n",
|
| 947 |
-
" \n",
|
| 948 |
-
" debate_prompt1 = f\\\"Argue against this position: {response2}\\\"\n",
|
| 949 |
-
" debate_prompt2 = f\\\"Argue against this position: {response1}\\\"\n",
|
| 950 |
-
" \n",
|
| 951 |
-
" counter1 = get_response(client1, debate_prompt1)\n",
|
| 952 |
-
" counter2 = get_response(client2, debate_prompt2)\n",
|
| 953 |
-
" \n",
|
| 954 |
-
" return counter1, counter2\n",
|
| 955 |
-
"\"\"\"\n",
|
| 956 |
-
"\n",
|
| 957 |
-
"# 4. Consensus Building Pattern\n",
|
| 958 |
-
"\"\"\"\n",
|
| 959 |
-
"Attempt to create a consensus response based on all individual responses:\n",
|
| 960 |
-
"\n",
|
| 961 |
-
"def build_consensus(all_responses, question):\n",
|
| 962 |
-
" consensus_prompt = f\\\"\n",
|
| 963 |
-
" Original question: {question}\n",
|
| 964 |
-
" \n",
|
| 965 |
-
" Here are multiple expert responses:\n",
|
| 966 |
-
" {all_responses}\n",
|
| 967 |
-
" \n",
|
| 968 |
-
" Create a consensus response that incorporates the best insights from all responses\n",
|
| 969 |
-
" while resolving contradictions.\n",
|
| 970 |
-
" \\\"\n",
|
| 971 |
-
" return get_response(openai_client, consensus_prompt)\n",
|
| 972 |
-
"\"\"\"\n",
|
| 973 |
-
"\n",
|
| 974 |
-
"print(\"Exercise completed! Analyze the results to see the impact of the Reflection Pattern.\")"
|
| 975 |
-
]
|
| 976 |
-
}
|
| 977 |
-
],
|
| 978 |
-
"metadata": {
|
| 979 |
-
"kernelspec": {
|
| 980 |
-
"display_name": ".venv",
|
| 981 |
-
"language": "python",
|
| 982 |
-
"name": "python3"
|
| 983 |
-
},
|
| 984 |
-
"language_info": {
|
| 985 |
-
"codemirror_mode": {
|
| 986 |
-
"name": "ipython",
|
| 987 |
-
"version": 3
|
| 988 |
-
},
|
| 989 |
-
"file_extension": ".py",
|
| 990 |
-
"mimetype": "text/x-python",
|
| 991 |
-
"name": "python",
|
| 992 |
-
"nbconvert_exporter": "python",
|
| 993 |
-
"pygments_lexer": "ipython3",
|
| 994 |
-
"version": "3.12.11"
|
| 995 |
-
}
|
| 996 |
-
},
|
| 997 |
-
"nbformat": 4,
|
| 998 |
-
"nbformat_minor": 4
|
| 999 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/2_lab2_six-thinking-hats-simulator.ipynb
DELETED
|
@@ -1,457 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Six Thinking Hats Simulator\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"## Objective\n",
|
| 10 |
-
"This notebook implements a simulator of the Six Thinking Hats technique to evaluate and improve technological solutions. The simulator will:\n",
|
| 11 |
-
"\n",
|
| 12 |
-
"1. Use an LLM to generate an initial technological solution idea for a specific daily task in a company.\n",
|
| 13 |
-
"2. Apply the Six Thinking Hats methodology to analyze and improve the proposed solution.\n",
|
| 14 |
-
"3. Provide a comprehensive evaluation from different perspectives.\n",
|
| 15 |
-
"\n",
|
| 16 |
-
"## About the Six Thinking Hats Technique\n",
|
| 17 |
-
"\n",
|
| 18 |
-
"The Six Thinking Hats is a powerful technique developed by Edward de Bono that helps people look at problems and decisions from different perspectives. Each \"hat\" represents a different thinking approach:\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"- **White Hat (Facts):** Focuses on available information, facts, and data.\n",
|
| 21 |
-
"- **Red Hat (Feelings):** Represents emotions, intuition, and gut feelings.\n",
|
| 22 |
-
"- **Black Hat (Critical):** Identifies potential problems, risks, and negative aspects.\n",
|
| 23 |
-
"- **Yellow Hat (Positive):** Looks for benefits, opportunities, and positive aspects.\n",
|
| 24 |
-
"- **Green Hat (Creative):** Encourages new ideas, alternatives, and possibilities.\n",
|
| 25 |
-
"- **Blue Hat (Process):** Manages the thinking process and ensures all perspectives are considered.\n",
|
| 26 |
-
"\n",
|
| 27 |
-
"In this simulator, we'll use these different perspectives to thoroughly evaluate and improve technological solutions proposed by an LLM."
|
| 28 |
-
]
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"cell_type": "code",
|
| 32 |
-
"execution_count": 1,
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"outputs": [],
|
| 35 |
-
"source": [
|
| 36 |
-
"import os\n",
|
| 37 |
-
"import json\n",
|
| 38 |
-
"from dotenv import load_dotenv\n",
|
| 39 |
-
"from openai import OpenAI\n",
|
| 40 |
-
"from anthropic import Anthropic\n",
|
| 41 |
-
"from IPython.display import Markdown, display"
|
| 42 |
-
]
|
| 43 |
-
},
|
| 44 |
-
{
|
| 45 |
-
"cell_type": "code",
|
| 46 |
-
"execution_count": null,
|
| 47 |
-
"metadata": {},
|
| 48 |
-
"outputs": [],
|
| 49 |
-
"source": [
|
| 50 |
-
"load_dotenv(override=True)"
|
| 51 |
-
]
|
| 52 |
-
},
|
| 53 |
-
{
|
| 54 |
-
"cell_type": "code",
|
| 55 |
-
"execution_count": null,
|
| 56 |
-
"metadata": {},
|
| 57 |
-
"outputs": [],
|
| 58 |
-
"source": [
|
| 59 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 62 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 63 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 64 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 65 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"if openai_api_key:\n",
|
| 68 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 69 |
-
"else:\n",
|
| 70 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 71 |
-
" \n",
|
| 72 |
-
"if anthropic_api_key:\n",
|
| 73 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 74 |
-
"else:\n",
|
| 75 |
-
" print(\"Anthropic API Key not set\")\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"if google_api_key:\n",
|
| 78 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 79 |
-
"else:\n",
|
| 80 |
-
" print(\"Google API Key not set\")\n",
|
| 81 |
-
"\n",
|
| 82 |
-
"if deepseek_api_key:\n",
|
| 83 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 84 |
-
"else:\n",
|
| 85 |
-
" print(\"DeepSeek API Key not set\")\n",
|
| 86 |
-
"\n",
|
| 87 |
-
"if groq_api_key:\n",
|
| 88 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 89 |
-
"else:\n",
|
| 90 |
-
" print(\"Groq API Key not set\")"
|
| 91 |
-
]
|
| 92 |
-
},
|
| 93 |
-
{
|
| 94 |
-
"cell_type": "code",
|
| 95 |
-
"execution_count": null,
|
| 96 |
-
"metadata": {},
|
| 97 |
-
"outputs": [],
|
| 98 |
-
"source": [
|
| 99 |
-
"request = \"Generate a technological solution to solve a specific workplace challenge. Choose an employee role, in a specific industry, and identify a time-consuming or error-prone daily task they face. Then, create an innovative yet practical technological solution that addresses this challenge. Include what technologies it uses (AI, automation, etc.), how it integrates with existing systems, its key benefits, and basic implementation requirements. Keep your solution realistic with current technology. \"\n",
|
| 100 |
-
"request += \"Answer only with the question, no explanation.\"\n",
|
| 101 |
-
"messages = [{\"role\": \"user\", \"content\": request}]\n",
|
| 102 |
-
"\n",
|
| 103 |
-
"openai = OpenAI()\n",
|
| 104 |
-
"response = openai.chat.completions.create(\n",
|
| 105 |
-
" model=\"gpt-4o-mini\",\n",
|
| 106 |
-
" messages=messages,\n",
|
| 107 |
-
")\n",
|
| 108 |
-
"question = response.choices[0].message.content\n",
|
| 109 |
-
"print(question)"
|
| 110 |
-
]
|
| 111 |
-
},
|
| 112 |
-
{
|
| 113 |
-
"cell_type": "code",
|
| 114 |
-
"execution_count": null,
|
| 115 |
-
"metadata": {},
|
| 116 |
-
"outputs": [],
|
| 117 |
-
"source": [
|
| 118 |
-
"validation_prompt = f\"\"\"Validate and improve the following technological solution. For each iteration, check if the solution meets these criteria:\n",
|
| 119 |
-
"\n",
|
| 120 |
-
"1. Clarity:\n",
|
| 121 |
-
" - Is the problem clearly defined?\n",
|
| 122 |
-
" - Is the solution clearly explained?\n",
|
| 123 |
-
" - Are the technical components well-described?\n",
|
| 124 |
-
"\n",
|
| 125 |
-
"2. Specificity:\n",
|
| 126 |
-
" - Are there specific examples or use cases?\n",
|
| 127 |
-
" - Are the technologies and tools specifically named?\n",
|
| 128 |
-
" - Are the implementation steps detailed?\n",
|
| 129 |
-
"\n",
|
| 130 |
-
"3. Context:\n",
|
| 131 |
-
" - Is the industry/company context clear?\n",
|
| 132 |
-
" - Are the user roles and needs well-defined?\n",
|
| 133 |
-
" - Is the current workflow/problem well-described?\n",
|
| 134 |
-
"\n",
|
| 135 |
-
"4. Constraints:\n",
|
| 136 |
-
" - Are there clear technical limitations?\n",
|
| 137 |
-
" - Are there budget/time constraints mentioned?\n",
|
| 138 |
-
" - Are there integration requirements specified?\n",
|
| 139 |
-
"\n",
|
| 140 |
-
"If any of these criteria are not met, improve the solution by:\n",
|
| 141 |
-
"1. Adding missing details\n",
|
| 142 |
-
"2. Clarifying ambiguous points\n",
|
| 143 |
-
"3. Providing more specific examples\n",
|
| 144 |
-
"4. Including relevant constraints\n",
|
| 145 |
-
"\n",
|
| 146 |
-
"Here is the technological solution to validate and improve:\n",
|
| 147 |
-
"{question} \n",
|
| 148 |
-
"Provide an improved version that addresses any missing or unclear aspects. If this is the 5th iteration, return the final improved version without further changes.\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"Response only with the Improved Solution:\n",
|
| 151 |
-
"[Your improved solution here]\"\"\"\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"messages = [{\"role\": \"user\", \"content\": validation_prompt}]\n",
|
| 154 |
-
"\n",
|
| 155 |
-
"response = openai.chat.completions.create(model=\"gpt-4o\", messages=messages)\n",
|
| 156 |
-
"question = response.choices[0].message.content\n",
|
| 157 |
-
"\n",
|
| 158 |
-
"display(Markdown(question))"
|
| 159 |
-
]
|
| 160 |
-
},
|
| 161 |
-
{
|
| 162 |
-
"cell_type": "markdown",
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"source": [
|
| 165 |
-
"\n",
|
| 166 |
-
"In this section, we will ask each AI model to analyze a technological solution using the Six Thinking Hats methodology. Each model will:\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"1. First generate a technological solution for a workplace challenge\n",
|
| 169 |
-
"2. Then analyze that solution using each of the Six Thinking Hats\n",
|
| 170 |
-
"\n",
|
| 171 |
-
"Each model will provide:\n",
|
| 172 |
-
"1. An initial technological solution\n",
|
| 173 |
-
"2. A structured analysis using all six thinking hats\n",
|
| 174 |
-
"3. A final recommendation based on the comprehensive analysis\n",
|
| 175 |
-
"\n",
|
| 176 |
-
"This approach will allow us to:\n",
|
| 177 |
-
"- Compare how different models apply the Six Thinking Hats methodology\n",
|
| 178 |
-
"- Identify patterns and differences in their analytical approaches\n",
|
| 179 |
-
"- Gather diverse perspectives on the same solution\n",
|
| 180 |
-
"- Create a rich, multi-faceted evaluation of each proposed technological solution\n",
|
| 181 |
-
"\n",
|
| 182 |
-
"The responses will be collected and displayed below, showing how each model applies the Six Thinking Hats methodology to evaluate and improve the proposed solutions."
|
| 183 |
-
]
|
| 184 |
-
},
|
| 185 |
-
{
|
| 186 |
-
"cell_type": "code",
|
| 187 |
-
"execution_count": 6,
|
| 188 |
-
"metadata": {},
|
| 189 |
-
"outputs": [],
|
| 190 |
-
"source": [
|
| 191 |
-
"models = []\n",
|
| 192 |
-
"answers = []\n",
|
| 193 |
-
"combined_question = f\" Analyze the technological solution prposed in {question} using the Six Thinking Hats methodology. For each hat, provide a detailed analysis. Finally, provide a comprehensive recommendation based on all the above analyses.\"\n",
|
| 194 |
-
"messages = [{\"role\": \"user\", \"content\": combined_question}]"
|
| 195 |
-
]
|
| 196 |
-
},
|
| 197 |
-
{
|
| 198 |
-
"cell_type": "code",
|
| 199 |
-
"execution_count": null,
|
| 200 |
-
"metadata": {},
|
| 201 |
-
"outputs": [],
|
| 202 |
-
"source": [
|
| 203 |
-
"# GPT thinking process\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"model_name = \"gpt-4o\"\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 209 |
-
"answer = response.choices[0].message.content\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"display(Markdown(answer))\n",
|
| 212 |
-
"models.append(model_name)\n",
|
| 213 |
-
"answers.append(answer)"
|
| 214 |
-
]
|
| 215 |
-
},
|
| 216 |
-
{
|
| 217 |
-
"cell_type": "code",
|
| 218 |
-
"execution_count": null,
|
| 219 |
-
"metadata": {},
|
| 220 |
-
"outputs": [],
|
| 221 |
-
"source": [
|
| 222 |
-
"# Claude thinking process\n",
|
| 223 |
-
"\n",
|
| 224 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 225 |
-
"\n",
|
| 226 |
-
"claude = Anthropic()\n",
|
| 227 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 228 |
-
"answer = response.content[0].text\n",
|
| 229 |
-
"\n",
|
| 230 |
-
"display(Markdown(answer))\n",
|
| 231 |
-
"models.append(model_name)\n",
|
| 232 |
-
"answers.append(answer)"
|
| 233 |
-
]
|
| 234 |
-
},
|
| 235 |
-
{
|
| 236 |
-
"cell_type": "code",
|
| 237 |
-
"execution_count": null,
|
| 238 |
-
"metadata": {},
|
| 239 |
-
"outputs": [],
|
| 240 |
-
"source": [
|
| 241 |
-
"# Gemini thinking process\n",
|
| 242 |
-
"\n",
|
| 243 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 244 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 245 |
-
"\n",
|
| 246 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 247 |
-
"answer = response.choices[0].message.content\n",
|
| 248 |
-
"\n",
|
| 249 |
-
"display(Markdown(answer))\n",
|
| 250 |
-
"models.append(model_name)\n",
|
| 251 |
-
"answers.append(answer)"
|
| 252 |
-
]
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"cell_type": "code",
|
| 256 |
-
"execution_count": null,
|
| 257 |
-
"metadata": {},
|
| 258 |
-
"outputs": [],
|
| 259 |
-
"source": [
|
| 260 |
-
"# Deepseek thinking process\n",
|
| 261 |
-
"\n",
|
| 262 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 263 |
-
"model_name = \"deepseek-chat\"\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 266 |
-
"answer = response.choices[0].message.content\n",
|
| 267 |
-
"\n",
|
| 268 |
-
"display(Markdown(answer))\n",
|
| 269 |
-
"models.append(model_name)\n",
|
| 270 |
-
"answers.append(answer)"
|
| 271 |
-
]
|
| 272 |
-
},
|
| 273 |
-
{
|
| 274 |
-
"cell_type": "code",
|
| 275 |
-
"execution_count": null,
|
| 276 |
-
"metadata": {},
|
| 277 |
-
"outputs": [],
|
| 278 |
-
"source": [
|
| 279 |
-
"# Groq thinking process\n",
|
| 280 |
-
"\n",
|
| 281 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 282 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 283 |
-
"\n",
|
| 284 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 285 |
-
"answer = response.choices[0].message.content\n",
|
| 286 |
-
"\n",
|
| 287 |
-
"display(Markdown(answer))\n",
|
| 288 |
-
"models.append(model_name)\n",
|
| 289 |
-
"answers.append(answer)"
|
| 290 |
-
]
|
| 291 |
-
},
|
| 292 |
-
{
|
| 293 |
-
"cell_type": "code",
|
| 294 |
-
"execution_count": null,
|
| 295 |
-
"metadata": {},
|
| 296 |
-
"outputs": [],
|
| 297 |
-
"source": [
|
| 298 |
-
"!ollama pull llama3.2"
|
| 299 |
-
]
|
| 300 |
-
},
|
| 301 |
-
{
|
| 302 |
-
"cell_type": "code",
|
| 303 |
-
"execution_count": null,
|
| 304 |
-
"metadata": {},
|
| 305 |
-
"outputs": [],
|
| 306 |
-
"source": [
|
| 307 |
-
"# Ollama thinking process\n",
|
| 308 |
-
"\n",
|
| 309 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 310 |
-
"model_name = \"llama3.2\"\n",
|
| 311 |
-
"\n",
|
| 312 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 313 |
-
"answer = response.choices[0].message.content\n",
|
| 314 |
-
"\n",
|
| 315 |
-
"display(Markdown(answer))\n",
|
| 316 |
-
"models.append(model_name)\n",
|
| 317 |
-
"answers.append(answer)"
|
| 318 |
-
]
|
| 319 |
-
},
|
| 320 |
-
{
|
| 321 |
-
"cell_type": "code",
|
| 322 |
-
"execution_count": null,
|
| 323 |
-
"metadata": {},
|
| 324 |
-
"outputs": [],
|
| 325 |
-
"source": [
|
| 326 |
-
"for model, answer in zip(models, answers):\n",
|
| 327 |
-
" print(f\"Model: {model}\\n\\n{answer}\")"
|
| 328 |
-
]
|
| 329 |
-
},
|
| 330 |
-
{
|
| 331 |
-
"cell_type": "markdown",
|
| 332 |
-
"metadata": {},
|
| 333 |
-
"source": [
|
| 334 |
-
"## Next Step: Solution Synthesis and Enhancement\n",
|
| 335 |
-
"\n",
|
| 336 |
-
"**Best Recommendation Selection and Extended Solution Development**\n",
|
| 337 |
-
"\n",
|
| 338 |
-
"After applying the Six Thinking Hats analysis to evaluate the initial technological solution from multiple perspectives, the simulator will:\n",
|
| 339 |
-
"\n",
|
| 340 |
-
"1. **Synthesize Analysis Results**: Compile insights from all six thinking perspectives (White, Red, Black, Yellow, Green, and Blue hats) to identify the most compelling recommendations and improvements.\n",
|
| 341 |
-
"\n",
|
| 342 |
-
"2. **Select Optimal Recommendation**: Using a weighted evaluation system that considers feasibility, impact, and alignment with organizational goals, the simulator will identify and present the single best recommendation that emerged from the Six Thinking Hats analysis.\n",
|
| 343 |
-
"\n",
|
| 344 |
-
"3. **Generate Extended Solution**: Building upon the selected best recommendation, the simulator will create a comprehensive, enhanced version of the original technological solution that incorporates:\n",
|
| 345 |
-
" - Key insights from the critical analysis (Black Hat)\n",
|
| 346 |
-
" - Positive opportunities identified (Yellow Hat)\n",
|
| 347 |
-
" - Creative alternatives and innovations (Green Hat)\n",
|
| 348 |
-
" - Factual considerations and data requirements (White Hat)\n",
|
| 349 |
-
" - User experience and emotional factors (Red Hat)\n",
|
| 350 |
-
"\n",
|
| 351 |
-
"4. **Multi-Model Enhancement**: To further strengthen the solution, the simulator will leverage additional AI models or perspectives to provide supplementary recommendations that complement the Six Thinking Hats analysis, offering a more robust and well-rounded final technological solution.\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"This step transforms the analytical insights into actionable improvements, delivering a refined solution that has been thoroughly evaluated and enhanced through structured critical thinking."
|
| 354 |
-
]
|
| 355 |
-
},
|
| 356 |
-
{
|
| 357 |
-
"cell_type": "code",
|
| 358 |
-
"execution_count": 14,
|
| 359 |
-
"metadata": {},
|
| 360 |
-
"outputs": [],
|
| 361 |
-
"source": [
|
| 362 |
-
"together = \"\"\n",
|
| 363 |
-
"for index, answer in enumerate(answers):\n",
|
| 364 |
-
" together += f\"# Response from model {index+1}\\n\\n\"\n",
|
| 365 |
-
" together += answer + \"\\n\\n\""
|
| 366 |
-
]
|
| 367 |
-
},
|
| 368 |
-
{
|
| 369 |
-
"cell_type": "code",
|
| 370 |
-
"execution_count": null,
|
| 371 |
-
"metadata": {},
|
| 372 |
-
"outputs": [],
|
| 373 |
-
"source": [
|
| 374 |
-
"from IPython.display import Markdown, display\n",
|
| 375 |
-
"import re\n",
|
| 376 |
-
"\n",
|
| 377 |
-
"print(f\"Each model has been given this technological solution to analyze: {question}\")\n",
|
| 378 |
-
"\n",
|
| 379 |
-
"# First, get the best individual response\n",
|
| 380 |
-
"judge_prompt = f\"\"\"\n",
|
| 381 |
-
" You are judging the quality of {len(models)} responses.\n",
|
| 382 |
-
" Evaluate each response based on:\n",
|
| 383 |
-
" 1. Clarity and coherence\n",
|
| 384 |
-
" 2. Depth of analysis\n",
|
| 385 |
-
" 3. Practicality of recommendations\n",
|
| 386 |
-
" 4. Originality of insights\n",
|
| 387 |
-
" \n",
|
| 388 |
-
" Rank the responses from best to worst.\n",
|
| 389 |
-
" Respond with the model index of the best response, nothing else.\n",
|
| 390 |
-
" \n",
|
| 391 |
-
" Here are the responses:\n",
|
| 392 |
-
" {answers}\n",
|
| 393 |
-
" \"\"\"\n",
|
| 394 |
-
" \n",
|
| 395 |
-
"# Get the best response\n",
|
| 396 |
-
"judge_response = openai.chat.completions.create(\n",
|
| 397 |
-
" model=\"o3-mini\",\n",
|
| 398 |
-
" messages=[{\"role\": \"user\", \"content\": judge_prompt}]\n",
|
| 399 |
-
")\n",
|
| 400 |
-
"best_response = judge_response.choices[0].message.content\n",
|
| 401 |
-
"\n",
|
| 402 |
-
"print(f\"Best Response's Model: {models[int(best_response)]}\")\n",
|
| 403 |
-
"\n",
|
| 404 |
-
"synthesis_prompt = f\"\"\"\n",
|
| 405 |
-
" Here is the best response's model index from the judge:\n",
|
| 406 |
-
"\n",
|
| 407 |
-
" {best_response}\n",
|
| 408 |
-
"\n",
|
| 409 |
-
" And here are the responses from all the models:\n",
|
| 410 |
-
"\n",
|
| 411 |
-
" {together}\n",
|
| 412 |
-
"\n",
|
| 413 |
-
" Synthesize the responses from the non-best models into one comprehensive answer that:\n",
|
| 414 |
-
" 1. Captures the best insights from each response that could add value to the best response from the judge\n",
|
| 415 |
-
" 2. Resolves any contradictions between responses before extending the best response\n",
|
| 416 |
-
" 3. Presents a clear and coherent final answer that is a comprehensive extension of the best response from the judge\n",
|
| 417 |
-
" 4. Maintains the same format as the original best response from the judge\n",
|
| 418 |
-
" 5. Compiles all additional recommendations mentioned by all models\n",
|
| 419 |
-
"\n",
|
| 420 |
-
" Show the best response {answers[int(best_response)]} and then your synthesized response specifying which are additional recommendations to the best response:\n",
|
| 421 |
-
" \"\"\"\n",
|
| 422 |
-
"\n",
|
| 423 |
-
"# Get the synthesized response\n",
|
| 424 |
-
"synthesis_response = claude.messages.create(\n",
|
| 425 |
-
" model=\"claude-3-7-sonnet-latest\",\n",
|
| 426 |
-
" messages=[{\"role\": \"user\", \"content\": synthesis_prompt}],\n",
|
| 427 |
-
" max_tokens=10000\n",
|
| 428 |
-
")\n",
|
| 429 |
-
"synthesized_answer = synthesis_response.content[0].text\n",
|
| 430 |
-
"\n",
|
| 431 |
-
"converted_answer = re.sub(r'\\\\[\\[\\]]', '$$', synthesized_answer)\n",
|
| 432 |
-
"display(Markdown(converted_answer))"
|
| 433 |
-
]
|
| 434 |
-
}
|
| 435 |
-
],
|
| 436 |
-
"metadata": {
|
| 437 |
-
"kernelspec": {
|
| 438 |
-
"display_name": ".venv",
|
| 439 |
-
"language": "python",
|
| 440 |
-
"name": "python3"
|
| 441 |
-
},
|
| 442 |
-
"language_info": {
|
| 443 |
-
"codemirror_mode": {
|
| 444 |
-
"name": "ipython",
|
| 445 |
-
"version": 3
|
| 446 |
-
},
|
| 447 |
-
"file_extension": ".py",
|
| 448 |
-
"mimetype": "text/x-python",
|
| 449 |
-
"name": "python",
|
| 450 |
-
"nbconvert_exporter": "python",
|
| 451 |
-
"pygments_lexer": "ipython3",
|
| 452 |
-
"version": "3.12.10"
|
| 453 |
-
}
|
| 454 |
-
},
|
| 455 |
-
"nbformat": 4,
|
| 456 |
-
"nbformat_minor": 2
|
| 457 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/3_lab3_groq_llama_generator_gemini_evaluator.ipynb
DELETED
|
@@ -1,286 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Chat app with LinkedIn Profile Information - Groq LLama as Generator and Gemini as evaluator\n"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "code",
|
| 12 |
-
"execution_count": 58,
|
| 13 |
-
"metadata": {},
|
| 14 |
-
"outputs": [],
|
| 15 |
-
"source": [
|
| 16 |
-
"# If you don't know what any of these packages do - you can always ask ChatGPT for a guide!\n",
|
| 17 |
-
"\n",
|
| 18 |
-
"from dotenv import load_dotenv\n",
|
| 19 |
-
"from openai import OpenAI\n",
|
| 20 |
-
"from pypdf import PdfReader\n",
|
| 21 |
-
"from groq import Groq\n",
|
| 22 |
-
"import gradio as gr"
|
| 23 |
-
]
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"cell_type": "code",
|
| 27 |
-
"execution_count": 59,
|
| 28 |
-
"metadata": {},
|
| 29 |
-
"outputs": [],
|
| 30 |
-
"source": [
|
| 31 |
-
"load_dotenv(override=True)\n",
|
| 32 |
-
"groq = Groq()"
|
| 33 |
-
]
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"cell_type": "code",
|
| 37 |
-
"execution_count": 60,
|
| 38 |
-
"metadata": {},
|
| 39 |
-
"outputs": [],
|
| 40 |
-
"source": [
|
| 41 |
-
"reader = PdfReader(\"me/My_LinkedIn.pdf\")\n",
|
| 42 |
-
"linkedin = \"\"\n",
|
| 43 |
-
"for page in reader.pages:\n",
|
| 44 |
-
" text = page.extract_text()\n",
|
| 45 |
-
" if text:\n",
|
| 46 |
-
" linkedin += text"
|
| 47 |
-
]
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
"cell_type": "code",
|
| 51 |
-
"execution_count": null,
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"outputs": [],
|
| 54 |
-
"source": [
|
| 55 |
-
"print(linkedin)"
|
| 56 |
-
]
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"cell_type": "code",
|
| 60 |
-
"execution_count": 61,
|
| 61 |
-
"metadata": {},
|
| 62 |
-
"outputs": [],
|
| 63 |
-
"source": [
|
| 64 |
-
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 65 |
-
" summary = f.read()"
|
| 66 |
-
]
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"cell_type": "code",
|
| 70 |
-
"execution_count": 62,
|
| 71 |
-
"metadata": {},
|
| 72 |
-
"outputs": [],
|
| 73 |
-
"source": [
|
| 74 |
-
"name = \"Maalaiappan Subramanian\""
|
| 75 |
-
]
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"cell_type": "code",
|
| 79 |
-
"execution_count": 63,
|
| 80 |
-
"metadata": {},
|
| 81 |
-
"outputs": [],
|
| 82 |
-
"source": [
|
| 83 |
-
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 84 |
-
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 85 |
-
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 86 |
-
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 87 |
-
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 88 |
-
"If you don't know the answer, say so.\"\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 91 |
-
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 92 |
-
]
|
| 93 |
-
},
|
| 94 |
-
{
|
| 95 |
-
"cell_type": "code",
|
| 96 |
-
"execution_count": null,
|
| 97 |
-
"metadata": {},
|
| 98 |
-
"outputs": [],
|
| 99 |
-
"source": [
|
| 100 |
-
"system_prompt"
|
| 101 |
-
]
|
| 102 |
-
},
|
| 103 |
-
{
|
| 104 |
-
"cell_type": "code",
|
| 105 |
-
"execution_count": 65,
|
| 106 |
-
"metadata": {},
|
| 107 |
-
"outputs": [],
|
| 108 |
-
"source": [
|
| 109 |
-
"def chat(message, history):\n",
|
| 110 |
-
" # Below line is to remove the metadata and options from the history\n",
|
| 111 |
-
" history = [{k: v for k, v in item.items() if k not in ('metadata', 'options')} for item in history]\n",
|
| 112 |
-
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 113 |
-
" response = groq.chat.completions.create(model=\"llama-3.3-70b-versatile\", messages=messages)\n",
|
| 114 |
-
" return response.choices[0].message.content"
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"cell_type": "code",
|
| 119 |
-
"execution_count": null,
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [],
|
| 122 |
-
"source": [
|
| 123 |
-
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 124 |
-
]
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"cell_type": "code",
|
| 128 |
-
"execution_count": 67,
|
| 129 |
-
"metadata": {},
|
| 130 |
-
"outputs": [],
|
| 131 |
-
"source": [
|
| 132 |
-
"# Create a Pydantic model for the Evaluation\n",
|
| 133 |
-
"\n",
|
| 134 |
-
"from pydantic import BaseModel\n",
|
| 135 |
-
"\n",
|
| 136 |
-
"class Evaluation(BaseModel):\n",
|
| 137 |
-
" is_acceptable: bool\n",
|
| 138 |
-
" feedback: str\n"
|
| 139 |
-
]
|
| 140 |
-
},
|
| 141 |
-
{
|
| 142 |
-
"cell_type": "code",
|
| 143 |
-
"execution_count": 69,
|
| 144 |
-
"metadata": {},
|
| 145 |
-
"outputs": [],
|
| 146 |
-
"source": [
|
| 147 |
-
"evaluator_system_prompt = f\"You are an evaluator that decides whether a response to a question is acceptable. \\\n",
|
| 148 |
-
"You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \\\n",
|
| 149 |
-
"The Agent is playing the role of {name} and is representing {name} on their website. \\\n",
|
| 150 |
-
"The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 151 |
-
"The Agent has been provided with context on {name} in the form of their summary and LinkedIn details. Here's the information:\"\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"evaluator_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 154 |
-
"evaluator_system_prompt += f\"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\""
|
| 155 |
-
]
|
| 156 |
-
},
|
| 157 |
-
{
|
| 158 |
-
"cell_type": "code",
|
| 159 |
-
"execution_count": 70,
|
| 160 |
-
"metadata": {},
|
| 161 |
-
"outputs": [],
|
| 162 |
-
"source": [
|
| 163 |
-
"def evaluator_user_prompt(reply, message, history):\n",
|
| 164 |
-
" user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n",
|
| 165 |
-
" user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n",
|
| 166 |
-
" user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n",
|
| 167 |
-
" user_prompt += f\"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n",
|
| 168 |
-
" return user_prompt"
|
| 169 |
-
]
|
| 170 |
-
},
|
| 171 |
-
{
|
| 172 |
-
"cell_type": "code",
|
| 173 |
-
"execution_count": 71,
|
| 174 |
-
"metadata": {},
|
| 175 |
-
"outputs": [],
|
| 176 |
-
"source": [
|
| 177 |
-
"import os\n",
|
| 178 |
-
"gemini = OpenAI(\n",
|
| 179 |
-
" api_key=os.getenv(\"GOOGLE_API_KEY\"), \n",
|
| 180 |
-
" base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
| 181 |
-
")"
|
| 182 |
-
]
|
| 183 |
-
},
|
| 184 |
-
{
|
| 185 |
-
"cell_type": "code",
|
| 186 |
-
"execution_count": 72,
|
| 187 |
-
"metadata": {},
|
| 188 |
-
"outputs": [],
|
| 189 |
-
"source": [
|
| 190 |
-
"def evaluate(reply, message, history) -> Evaluation:\n",
|
| 191 |
-
"\n",
|
| 192 |
-
" messages = [{\"role\": \"system\", \"content\": evaluator_system_prompt}] + [{\"role\": \"user\", \"content\": evaluator_user_prompt(reply, message, history)}]\n",
|
| 193 |
-
" response = gemini.beta.chat.completions.parse(model=\"gemini-2.0-flash\", messages=messages, response_format=Evaluation)\n",
|
| 194 |
-
" return response.choices[0].message.parsed"
|
| 195 |
-
]
|
| 196 |
-
},
|
| 197 |
-
{
|
| 198 |
-
"cell_type": "code",
|
| 199 |
-
"execution_count": 73,
|
| 200 |
-
"metadata": {},
|
| 201 |
-
"outputs": [],
|
| 202 |
-
"source": [
|
| 203 |
-
"def rerun(reply, message, history, feedback):\n",
|
| 204 |
-
" # Below line is to remove the metadata and options from the history\n",
|
| 205 |
-
" history = [{k: v for k, v in item.items() if k not in ('metadata', 'options')} for item in history]\n",
|
| 206 |
-
" updated_system_prompt = system_prompt + f\"\\n\\n## Previous answer rejected\\nYou just tried to reply, but the quality control rejected your reply\\n\"\n",
|
| 207 |
-
" updated_system_prompt += f\"## Your attempted answer:\\n{reply}\\n\\n\"\n",
|
| 208 |
-
" updated_system_prompt += f\"## Reason for rejection:\\n{feedback}\\n\\n\"\n",
|
| 209 |
-
" messages = [{\"role\": \"system\", \"content\": updated_system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 210 |
-
" response = groq.chat.completions.create(model=\"llama-3.3-70b-versatile\", messages=messages)\n",
|
| 211 |
-
" return response.choices[0].message.content"
|
| 212 |
-
]
|
| 213 |
-
},
|
| 214 |
-
{
|
| 215 |
-
"cell_type": "code",
|
| 216 |
-
"execution_count": 74,
|
| 217 |
-
"metadata": {},
|
| 218 |
-
"outputs": [],
|
| 219 |
-
"source": [
|
| 220 |
-
"def chat(message, history):\n",
|
| 221 |
-
" if \"personal\" in message:\n",
|
| 222 |
-
" system = system_prompt + \"\\n\\nEverything in your reply needs to be in Gen Z language - \\\n",
|
| 223 |
-
" it is mandatory that you respond only and entirely in Gen Z language\"\n",
|
| 224 |
-
" else:\n",
|
| 225 |
-
" system = system_prompt\n",
|
| 226 |
-
" # Below line is to remove the metadata and options from the history\n",
|
| 227 |
-
" history = [{k: v for k, v in item.items() if k not in ('metadata', 'options')} for item in history]\n",
|
| 228 |
-
" messages = [{\"role\": \"system\", \"content\": system}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 229 |
-
" response = groq.chat.completions.create(model=\"llama-3.3-70b-versatile\", messages=messages)\n",
|
| 230 |
-
" reply =response.choices[0].message.content\n",
|
| 231 |
-
"\n",
|
| 232 |
-
" evaluation = evaluate(reply, message, history)\n",
|
| 233 |
-
" \n",
|
| 234 |
-
" if evaluation.is_acceptable:\n",
|
| 235 |
-
" print(\"Passed evaluation - returning reply\")\n",
|
| 236 |
-
" else:\n",
|
| 237 |
-
" print(\"Failed evaluation - retrying\")\n",
|
| 238 |
-
" print(evaluation.feedback)\n",
|
| 239 |
-
" reply = rerun(reply, message, history, evaluation.feedback) \n",
|
| 240 |
-
" return reply"
|
| 241 |
-
]
|
| 242 |
-
},
|
| 243 |
-
{
|
| 244 |
-
"cell_type": "code",
|
| 245 |
-
"execution_count": null,
|
| 246 |
-
"metadata": {},
|
| 247 |
-
"outputs": [],
|
| 248 |
-
"source": [
|
| 249 |
-
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 250 |
-
]
|
| 251 |
-
},
|
| 252 |
-
{
|
| 253 |
-
"cell_type": "markdown",
|
| 254 |
-
"metadata": {},
|
| 255 |
-
"source": []
|
| 256 |
-
},
|
| 257 |
-
{
|
| 258 |
-
"cell_type": "code",
|
| 259 |
-
"execution_count": null,
|
| 260 |
-
"metadata": {},
|
| 261 |
-
"outputs": [],
|
| 262 |
-
"source": []
|
| 263 |
-
}
|
| 264 |
-
],
|
| 265 |
-
"metadata": {
|
| 266 |
-
"kernelspec": {
|
| 267 |
-
"display_name": ".venv",
|
| 268 |
-
"language": "python",
|
| 269 |
-
"name": "python3"
|
| 270 |
-
},
|
| 271 |
-
"language_info": {
|
| 272 |
-
"codemirror_mode": {
|
| 273 |
-
"name": "ipython",
|
| 274 |
-
"version": 3
|
| 275 |
-
},
|
| 276 |
-
"file_extension": ".py",
|
| 277 |
-
"mimetype": "text/x-python",
|
| 278 |
-
"name": "python",
|
| 279 |
-
"nbconvert_exporter": "python",
|
| 280 |
-
"pygments_lexer": "ipython3",
|
| 281 |
-
"version": "3.12.10"
|
| 282 |
-
}
|
| 283 |
-
},
|
| 284 |
-
"nbformat": 4,
|
| 285 |
-
"nbformat_minor": 2
|
| 286 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/4_lab4_slack.ipynb
DELETED
|
@@ -1,469 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## The first big project - Professionally You!\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"### And, Tool use.\n",
|
| 10 |
-
"\n",
|
| 11 |
-
"### But first: introducing Slack\n",
|
| 12 |
-
"\n",
|
| 13 |
-
"Slack is a nifty tool for sending Push Notifications to your phone.\n",
|
| 14 |
-
"\n",
|
| 15 |
-
"It's super easy to set up and install!\n",
|
| 16 |
-
"\n",
|
| 17 |
-
"Simply visit https://api.slack.com and sign up for a free account, and create your new workspace and app.\n",
|
| 18 |
-
"\n",
|
| 19 |
-
"1. Create a Slack App:\n",
|
| 20 |
-
"- Go to the [Slack API portal](https://api.slack.com/apps) and click Create New App.\n",
|
| 21 |
-
"- Choose From scratch, provide an App Name (e.g., \"CustomerNotifier\"), and select the Slack workspace where you want to - install the app.\n",
|
| 22 |
-
"- Click Create App.\n",
|
| 23 |
-
"\n",
|
| 24 |
-
"2. Add Required Permissions (Scopes):\n",
|
| 25 |
-
"- Navigate to OAuth & Permissions in the left sidebar of your app’s management page.\n",
|
| 26 |
-
"- Under Bot Token Scopes, add the chat:write scope to allow your app to post messages. If you need to send direct messages (DMs) to users, also add im:write and users:read to fetch user IDs.\n",
|
| 27 |
-
"- If you plan to post to specific channels, ensure the app has permissions like channels:write or groups:write for public or private channels, respectively.\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"3. Install the App to Your Workspace:\n",
|
| 30 |
-
"- In the OAuth & Permissions section, click Install to Workspace.\n",
|
| 31 |
-
"- Authorize the app, selecting the channel where it will post messages (if using incoming webhooks) or granting the necessary permissions.\n",
|
| 32 |
-
"- After installation, you’ll receive a Bot User OAuth Token (starts with xoxb-). Copy this token, as it will be used for - API authentication. Keep it secure and avoid hardcoding it in your source code.\n",
|
| 33 |
-
"\n",
|
| 34 |
-
"(This is so you could choose to organize your push notifications into different apps in the future.)\n",
|
| 35 |
-
"\n",
|
| 36 |
-
"4. Create a new private channel in slack App\n",
|
| 37 |
-
"- Opt to use Private Access\n",
|
| 38 |
-
"- After creating the private channel, type \"@<your bot name in step 1>\" to allow slack default bot to invite the bot into your chat\n",
|
| 39 |
-
"- Go to \"About\" of your private chat. Copy the channel Id at the bottom\n",
|
| 40 |
-
"\n",
|
| 41 |
-
"5. Install slack_sdk==3.35.0 into your env\n",
|
| 42 |
-
"```\n",
|
| 43 |
-
"uv pip install slack_sdk==3.35.0\n",
|
| 44 |
-
"```\n",
|
| 45 |
-
"\n",
|
| 46 |
-
"Add to your `.env` file:\n",
|
| 47 |
-
"```\n",
|
| 48 |
-
"SLACK_AGENT_CHANNEL_ID=put_your_user_token_here\n",
|
| 49 |
-
"SLACK_BOT_AGENT_OAUTH_TOKEN=put_the_oidc_token_here\n",
|
| 50 |
-
"```\n",
|
| 51 |
-
"\n",
|
| 52 |
-
"And install the Slack app on your phone."
|
| 53 |
-
]
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"cell_type": "code",
|
| 57 |
-
"execution_count": 2,
|
| 58 |
-
"metadata": {},
|
| 59 |
-
"outputs": [],
|
| 60 |
-
"source": [
|
| 61 |
-
"# imports\n",
|
| 62 |
-
"\n",
|
| 63 |
-
"from dotenv import load_dotenv\n",
|
| 64 |
-
"from openai import OpenAI\n",
|
| 65 |
-
"import json\n",
|
| 66 |
-
"import os\n",
|
| 67 |
-
"import requests\n",
|
| 68 |
-
"from pypdf import PdfReader\n",
|
| 69 |
-
"import gradio as gr\n",
|
| 70 |
-
"from slack_sdk import WebClient\n",
|
| 71 |
-
"from slack_sdk.errors import SlackApiError"
|
| 72 |
-
]
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"cell_type": "code",
|
| 76 |
-
"execution_count": 3,
|
| 77 |
-
"metadata": {},
|
| 78 |
-
"outputs": [],
|
| 79 |
-
"source": [
|
| 80 |
-
"# The usual start\n",
|
| 81 |
-
"\n",
|
| 82 |
-
"load_dotenv(override=True)\n",
|
| 83 |
-
"openai = OpenAI()"
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": 11,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"# For slack\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"slack_channel_id:str = str(os.getenv(\"SLACK_AGENT_CHANNEL_ID\"))\n",
|
| 95 |
-
"slack_oauth_token = os.getenv(\"SLACK_BOT_AGENT_OAUTH_TOKEN\")\n",
|
| 96 |
-
"slack_client = WebClient(token=slack_oauth_token)\n"
|
| 97 |
-
]
|
| 98 |
-
},
|
| 99 |
-
{
|
| 100 |
-
"cell_type": "code",
|
| 101 |
-
"execution_count": 12,
|
| 102 |
-
"metadata": {},
|
| 103 |
-
"outputs": [],
|
| 104 |
-
"source": [
|
| 105 |
-
"def push(message):\n",
|
| 106 |
-
" print(f\"Push: {message}\")\n",
|
| 107 |
-
" response = slack_client.chat_postMessage(\n",
|
| 108 |
-
" channel=slack_channel_id,\n",
|
| 109 |
-
" text=message\n",
|
| 110 |
-
" )"
|
| 111 |
-
]
|
| 112 |
-
},
|
| 113 |
-
{
|
| 114 |
-
"cell_type": "code",
|
| 115 |
-
"execution_count": null,
|
| 116 |
-
"metadata": {},
|
| 117 |
-
"outputs": [],
|
| 118 |
-
"source": [
|
| 119 |
-
"push(\"HEY!!\")"
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "code",
|
| 124 |
-
"execution_count": 14,
|
| 125 |
-
"metadata": {},
|
| 126 |
-
"outputs": [],
|
| 127 |
-
"source": [
|
| 128 |
-
"def record_user_details(email, name=\"Name not provided\", notes=\"not provided\"):\n",
|
| 129 |
-
" push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
|
| 130 |
-
" return {\"recorded\": \"ok\"}"
|
| 131 |
-
]
|
| 132 |
-
},
|
| 133 |
-
{
|
| 134 |
-
"cell_type": "code",
|
| 135 |
-
"execution_count": 15,
|
| 136 |
-
"metadata": {},
|
| 137 |
-
"outputs": [],
|
| 138 |
-
"source": [
|
| 139 |
-
"def record_unknown_question(question):\n",
|
| 140 |
-
" push(f\"Recording {question} asked that I couldn't answer\")\n",
|
| 141 |
-
" return {\"recorded\": \"ok\"}"
|
| 142 |
-
]
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"cell_type": "code",
|
| 146 |
-
"execution_count": 16,
|
| 147 |
-
"metadata": {},
|
| 148 |
-
"outputs": [],
|
| 149 |
-
"source": [
|
| 150 |
-
"record_user_details_json = {\n",
|
| 151 |
-
" \"name\": \"record_user_details\",\n",
|
| 152 |
-
" \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
|
| 153 |
-
" \"parameters\": {\n",
|
| 154 |
-
" \"type\": \"object\",\n",
|
| 155 |
-
" \"properties\": {\n",
|
| 156 |
-
" \"email\": {\n",
|
| 157 |
-
" \"type\": \"string\",\n",
|
| 158 |
-
" \"description\": \"The email address of this user\"\n",
|
| 159 |
-
" },\n",
|
| 160 |
-
" \"name\": {\n",
|
| 161 |
-
" \"type\": \"string\",\n",
|
| 162 |
-
" \"description\": \"The user's name, if they provided it\"\n",
|
| 163 |
-
" }\n",
|
| 164 |
-
" ,\n",
|
| 165 |
-
" \"notes\": {\n",
|
| 166 |
-
" \"type\": \"string\",\n",
|
| 167 |
-
" \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
|
| 168 |
-
" }\n",
|
| 169 |
-
" },\n",
|
| 170 |
-
" \"required\": [\"email\"],\n",
|
| 171 |
-
" \"additionalProperties\": False\n",
|
| 172 |
-
" }\n",
|
| 173 |
-
"}"
|
| 174 |
-
]
|
| 175 |
-
},
|
| 176 |
-
{
|
| 177 |
-
"cell_type": "code",
|
| 178 |
-
"execution_count": 17,
|
| 179 |
-
"metadata": {},
|
| 180 |
-
"outputs": [],
|
| 181 |
-
"source": [
|
| 182 |
-
"record_unknown_question_json = {\n",
|
| 183 |
-
" \"name\": \"record_unknown_question\",\n",
|
| 184 |
-
" \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 185 |
-
" \"parameters\": {\n",
|
| 186 |
-
" \"type\": \"object\",\n",
|
| 187 |
-
" \"properties\": {\n",
|
| 188 |
-
" \"question\": {\n",
|
| 189 |
-
" \"type\": \"string\",\n",
|
| 190 |
-
" \"description\": \"The question that couldn't be answered\"\n",
|
| 191 |
-
" },\n",
|
| 192 |
-
" },\n",
|
| 193 |
-
" \"required\": [\"question\"],\n",
|
| 194 |
-
" \"additionalProperties\": False\n",
|
| 195 |
-
" }\n",
|
| 196 |
-
"}"
|
| 197 |
-
]
|
| 198 |
-
},
|
| 199 |
-
{
|
| 200 |
-
"cell_type": "code",
|
| 201 |
-
"execution_count": 18,
|
| 202 |
-
"metadata": {},
|
| 203 |
-
"outputs": [],
|
| 204 |
-
"source": [
|
| 205 |
-
"tools = [{\"type\": \"function\", \"function\": record_user_details_json},\n",
|
| 206 |
-
" {\"type\": \"function\", \"function\": record_unknown_question_json}]"
|
| 207 |
-
]
|
| 208 |
-
},
|
| 209 |
-
{
|
| 210 |
-
"cell_type": "code",
|
| 211 |
-
"execution_count": null,
|
| 212 |
-
"metadata": {},
|
| 213 |
-
"outputs": [],
|
| 214 |
-
"source": [
|
| 215 |
-
"tools"
|
| 216 |
-
]
|
| 217 |
-
},
|
| 218 |
-
{
|
| 219 |
-
"cell_type": "code",
|
| 220 |
-
"execution_count": 20,
|
| 221 |
-
"metadata": {},
|
| 222 |
-
"outputs": [],
|
| 223 |
-
"source": [
|
| 224 |
-
"# This function can take a list of tool calls, and run them. This is the IF statement!!\n",
|
| 225 |
-
"\n",
|
| 226 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 227 |
-
" results = []\n",
|
| 228 |
-
" for tool_call in tool_calls:\n",
|
| 229 |
-
" tool_name = tool_call.function.name\n",
|
| 230 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 231 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 232 |
-
"\n",
|
| 233 |
-
" # THE BIG IF STATEMENT!!!\n",
|
| 234 |
-
"\n",
|
| 235 |
-
" if tool_name == \"record_user_details\":\n",
|
| 236 |
-
" result = record_user_details(**arguments)\n",
|
| 237 |
-
" elif tool_name == \"record_unknown_question\":\n",
|
| 238 |
-
" result = record_unknown_question(**arguments)\n",
|
| 239 |
-
"\n",
|
| 240 |
-
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 241 |
-
" return results"
|
| 242 |
-
]
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"cell_type": "code",
|
| 246 |
-
"execution_count": null,
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"globals()[\"record_unknown_question\"](\"this is a really hard question\")"
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "code",
|
| 255 |
-
"execution_count": 22,
|
| 256 |
-
"metadata": {},
|
| 257 |
-
"outputs": [],
|
| 258 |
-
"source": [
|
| 259 |
-
"# This is a more elegant way that avoids the IF statement.\n",
|
| 260 |
-
"\n",
|
| 261 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 262 |
-
" results = []\n",
|
| 263 |
-
" for tool_call in tool_calls:\n",
|
| 264 |
-
" tool_name = tool_call.function.name\n",
|
| 265 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 266 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 267 |
-
" tool = globals().get(tool_name)\n",
|
| 268 |
-
" result = tool(**arguments) if tool else {}\n",
|
| 269 |
-
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 270 |
-
" return results"
|
| 271 |
-
]
|
| 272 |
-
},
|
| 273 |
-
{
|
| 274 |
-
"cell_type": "code",
|
| 275 |
-
"execution_count": 23,
|
| 276 |
-
"metadata": {},
|
| 277 |
-
"outputs": [],
|
| 278 |
-
"source": [
|
| 279 |
-
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 280 |
-
"linkedin = \"\"\n",
|
| 281 |
-
"for page in reader.pages:\n",
|
| 282 |
-
" text = page.extract_text()\n",
|
| 283 |
-
" if text:\n",
|
| 284 |
-
" linkedin += text\n",
|
| 285 |
-
"\n",
|
| 286 |
-
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 287 |
-
" summary = f.read()\n",
|
| 288 |
-
"\n",
|
| 289 |
-
"name = \"Ed Donner\""
|
| 290 |
-
]
|
| 291 |
-
},
|
| 292 |
-
{
|
| 293 |
-
"cell_type": "code",
|
| 294 |
-
"execution_count": 24,
|
| 295 |
-
"metadata": {},
|
| 296 |
-
"outputs": [],
|
| 297 |
-
"source": [
|
| 298 |
-
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 299 |
-
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 300 |
-
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 301 |
-
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 302 |
-
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 303 |
-
"If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
|
| 304 |
-
"If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
|
| 305 |
-
"\n",
|
| 306 |
-
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 307 |
-
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 308 |
-
]
|
| 309 |
-
},
|
| 310 |
-
{
|
| 311 |
-
"cell_type": "code",
|
| 312 |
-
"execution_count": 25,
|
| 313 |
-
"metadata": {},
|
| 314 |
-
"outputs": [],
|
| 315 |
-
"source": [
|
| 316 |
-
"def chat(message, history):\n",
|
| 317 |
-
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 318 |
-
" done = False\n",
|
| 319 |
-
" while not done:\n",
|
| 320 |
-
"\n",
|
| 321 |
-
" # This is the call to the LLM - see that we pass in the tools json\n",
|
| 322 |
-
"\n",
|
| 323 |
-
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages, tools=tools)\n",
|
| 324 |
-
"\n",
|
| 325 |
-
" finish_reason = response.choices[0].finish_reason\n",
|
| 326 |
-
" \n",
|
| 327 |
-
" # If the LLM wants to call a tool, we do that!\n",
|
| 328 |
-
" \n",
|
| 329 |
-
" if finish_reason==\"tool_calls\":\n",
|
| 330 |
-
" message = response.choices[0].message\n",
|
| 331 |
-
" tool_calls = message.tool_calls\n",
|
| 332 |
-
" results = handle_tool_calls(tool_calls)\n",
|
| 333 |
-
" messages.append(message)\n",
|
| 334 |
-
" messages.extend(results)\n",
|
| 335 |
-
" else:\n",
|
| 336 |
-
" done = True\n",
|
| 337 |
-
" return response.choices[0].message.content"
|
| 338 |
-
]
|
| 339 |
-
},
|
| 340 |
-
{
|
| 341 |
-
"cell_type": "code",
|
| 342 |
-
"execution_count": null,
|
| 343 |
-
"metadata": {},
|
| 344 |
-
"outputs": [],
|
| 345 |
-
"source": [
|
| 346 |
-
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 347 |
-
]
|
| 348 |
-
},
|
| 349 |
-
{
|
| 350 |
-
"cell_type": "markdown",
|
| 351 |
-
"metadata": {},
|
| 352 |
-
"source": [
|
| 353 |
-
"## And now for deployment\n",
|
| 354 |
-
"\n",
|
| 355 |
-
"This code is in `app.py`\n",
|
| 356 |
-
"\n",
|
| 357 |
-
"We will deploy to HuggingFace Spaces. Thank you student Robert M for improving these instructions.\n",
|
| 358 |
-
"\n",
|
| 359 |
-
"Before you start: remember to update the files in the \"me\" directory - your LinkedIn profile and summary.txt - so that it talks about you! \n",
|
| 360 |
-
"Also check that there's no README file within the 1_foundations directory. If there is one, please delete it. The deploy process creates a new README file in this directory for you.\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"1. Visit https://huggingface.co and set up an account \n",
|
| 363 |
-
"2. From the Avatar menu on the top right, choose Access Tokens. Choose \"Create New Token\". Give it WRITE permissions.\n",
|
| 364 |
-
"3. Take this token and add it to your .env file: `HF_TOKEN=hf_xxx` and see note below if this token doesn't seem to get picked up during deployment \n",
|
| 365 |
-
"4. From the 1_foundations folder, enter: `uv run gradio deploy` and if for some reason this still wants you to enter your HF token, then interrupt it with ctrl+c and run this instead: `uv run dotenv -f ../.env run -- uv run gradio deploy` which forces your keys to all be set as environment variables \n",
|
| 366 |
-
"5. Follow its instructions: name it \"career_conversation\", specify app.py, choose cpu-basic as the hardware, say Yes to needing to supply secrets, provide your openai api key, your pushover user and token, and say \"no\" to github actions. \n",
|
| 367 |
-
"\n",
|
| 368 |
-
"#### Extra note about the HuggingFace token\n",
|
| 369 |
-
"\n",
|
| 370 |
-
"A couple of students have mentioned the HuggingFace doesn't detect their token, even though it's in the .env file. Here are things to try: \n",
|
| 371 |
-
"1. Restart Cursor \n",
|
| 372 |
-
"2. Rerun load_dotenv(override=True) and use a new terminal (the + button on the top right of the Terminal) \n",
|
| 373 |
-
"3. In the Terminal, run this before the gradio deploy: `$env:HF_TOKEN = \"hf_XXXX\"` \n",
|
| 374 |
-
"Thank you James and Martins for these tips. \n",
|
| 375 |
-
"\n",
|
| 376 |
-
"#### More about these secrets:\n",
|
| 377 |
-
"\n",
|
| 378 |
-
"If you're confused by what's going on with these secrets: it just wants you to enter the key name and value for each of your secrets -- so you would enter: \n",
|
| 379 |
-
"`OPENAI_API_KEY` \n",
|
| 380 |
-
"Followed by: \n",
|
| 381 |
-
"`sk-proj-...` \n",
|
| 382 |
-
"\n",
|
| 383 |
-
"And if you don't want to set secrets this way, or something goes wrong with it, it's no problem - you can change your secrets later: \n",
|
| 384 |
-
"1. Log in to HuggingFace website \n",
|
| 385 |
-
"2. Go to your profile screen via the Avatar menu on the top right \n",
|
| 386 |
-
"3. Select the Space you deployed \n",
|
| 387 |
-
"4. Click on the Settings wheel on the top right \n",
|
| 388 |
-
"5. You can scroll down to change your secrets, delete the space, etc.\n",
|
| 389 |
-
"\n",
|
| 390 |
-
"#### And now you should be deployed!\n",
|
| 391 |
-
"\n",
|
| 392 |
-
"Here is mine: https://huggingface.co/spaces/ed-donner/Career_Conversation\n",
|
| 393 |
-
"\n",
|
| 394 |
-
"I just got a push notification that a student asked me how they can become President of their country 😂😂\n",
|
| 395 |
-
"\n",
|
| 396 |
-
"For more information on deployment:\n",
|
| 397 |
-
"\n",
|
| 398 |
-
"https://www.gradio.app/guides/sharing-your-app#hosting-on-hf-spaces\n",
|
| 399 |
-
"\n",
|
| 400 |
-
"To delete your Space in the future: \n",
|
| 401 |
-
"1. Log in to HuggingFace\n",
|
| 402 |
-
"2. From the Avatar menu, select your profile\n",
|
| 403 |
-
"3. Click on the Space itself and select the settings wheel on the top right\n",
|
| 404 |
-
"4. Scroll to the Delete section at the bottom\n",
|
| 405 |
-
"5. ALSO: delete the README file that Gradio may have created inside this 1_foundations folder (otherwise it won't ask you the questions the next time you do a gradio deploy)\n"
|
| 406 |
-
]
|
| 407 |
-
},
|
| 408 |
-
{
|
| 409 |
-
"cell_type": "markdown",
|
| 410 |
-
"metadata": {},
|
| 411 |
-
"source": [
|
| 412 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 413 |
-
" <tr>\n",
|
| 414 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 415 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 416 |
-
" </td>\n",
|
| 417 |
-
" <td>\n",
|
| 418 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 419 |
-
" <span style=\"color:#ff7800;\">• First and foremost, deploy this for yourself! It's a real, valuable tool - the future resume..<br/>\n",
|
| 420 |
-
" • Next, improve the resources - add better context about yourself. If you know RAG, then add a knowledge base about you.<br/>\n",
|
| 421 |
-
" • Add in more tools! You could have a SQL database with common Q&A that the LLM could read and write from?<br/>\n",
|
| 422 |
-
" • Bring in the Evaluator from the last lab, and add other Agentic patterns.\n",
|
| 423 |
-
" </span>\n",
|
| 424 |
-
" </td>\n",
|
| 425 |
-
" </tr>\n",
|
| 426 |
-
"</table>"
|
| 427 |
-
]
|
| 428 |
-
},
|
| 429 |
-
{
|
| 430 |
-
"cell_type": "markdown",
|
| 431 |
-
"metadata": {},
|
| 432 |
-
"source": [
|
| 433 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 434 |
-
" <tr>\n",
|
| 435 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 436 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 437 |
-
" </td>\n",
|
| 438 |
-
" <td>\n",
|
| 439 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 440 |
-
" <span style=\"color:#00bfff;\">Aside from the obvious (your career alter-ego) this has business applications in any situation where you need an AI assistant with domain expertise and an ability to interact with the real world.\n",
|
| 441 |
-
" </span>\n",
|
| 442 |
-
" </td>\n",
|
| 443 |
-
" </tr>\n",
|
| 444 |
-
"</table>"
|
| 445 |
-
]
|
| 446 |
-
}
|
| 447 |
-
],
|
| 448 |
-
"metadata": {
|
| 449 |
-
"kernelspec": {
|
| 450 |
-
"display_name": ".venv",
|
| 451 |
-
"language": "python",
|
| 452 |
-
"name": "python3"
|
| 453 |
-
},
|
| 454 |
-
"language_info": {
|
| 455 |
-
"codemirror_mode": {
|
| 456 |
-
"name": "ipython",
|
| 457 |
-
"version": 3
|
| 458 |
-
},
|
| 459 |
-
"file_extension": ".py",
|
| 460 |
-
"mimetype": "text/x-python",
|
| 461 |
-
"name": "python",
|
| 462 |
-
"nbconvert_exporter": "python",
|
| 463 |
-
"pygments_lexer": "ipython3",
|
| 464 |
-
"version": "3.12.11"
|
| 465 |
-
}
|
| 466 |
-
},
|
| 467 |
-
"nbformat": 4,
|
| 468 |
-
"nbformat_minor": 2
|
| 469 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/4_lab4_spotify.ipynb
DELETED
|
@@ -1,829 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Adding a Spotify Tool - Musically You!\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"This version of the notebook introduces a Spotify tool that can query your listening history from Spotify to extend the domain of questions the chatbot can answer to include your musical tastes.\n",
|
| 10 |
-
"\n",
|
| 11 |
-
"Unfortunately, it's a bit of PITA to get acess and refresh tokens for Spotify. The process requires connecting to an authentication end point while logged in to Spotify and then processing a callback. To make this easier, instructions along with a small app that can be deployed to HuggingFace Spaces using Gradio are included at the end of this notebook. "
|
| 12 |
-
]
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"cell_type": "markdown",
|
| 16 |
-
"metadata": {},
|
| 17 |
-
"source": [
|
| 18 |
-
"## The first big project - Professionally You!\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"### And, Tool use.\n",
|
| 21 |
-
"\n",
|
| 22 |
-
"### But first: introducing Pushover\n",
|
| 23 |
-
"\n",
|
| 24 |
-
"Pushover is a nifty tool for sending Push Notifications to your phone.\n",
|
| 25 |
-
"\n",
|
| 26 |
-
"It's super easy to set up and install!\n",
|
| 27 |
-
"\n",
|
| 28 |
-
"Simply visit https://pushover.net/ and click 'Login or Signup' on the top right to sign up for a free account, and create your API keys.\n",
|
| 29 |
-
"\n",
|
| 30 |
-
"Once you've signed up, on the home screen, click \"Create an Application/API Token\", and give it any name (like Agents) and click Create Application.\n",
|
| 31 |
-
"\n",
|
| 32 |
-
"Then add 2 lines to your `.env` file:\n",
|
| 33 |
-
"\n",
|
| 34 |
-
"PUSHOVER_USER=_put the key that's on the top right of your Pushover home screen and probably starts with a u_ \n",
|
| 35 |
-
"PUSHOVER_TOKEN=_put the key when you click into your new application called Agents (or whatever) and probably starts with an a_\n",
|
| 36 |
-
"\n",
|
| 37 |
-
"Remember to save your `.env` file, and run `load_dotenv(override=True)` after saving, to set your environment variables.\n",
|
| 38 |
-
"\n",
|
| 39 |
-
"Finally, click \"Add Phone, Tablet or Desktop\" to install on your phone."
|
| 40 |
-
]
|
| 41 |
-
},
|
| 42 |
-
{
|
| 43 |
-
"cell_type": "code",
|
| 44 |
-
"execution_count": 23,
|
| 45 |
-
"metadata": {},
|
| 46 |
-
"outputs": [],
|
| 47 |
-
"source": [
|
| 48 |
-
"# imports\n",
|
| 49 |
-
"\n",
|
| 50 |
-
"from dotenv import load_dotenv\n",
|
| 51 |
-
"from openai import OpenAI\n",
|
| 52 |
-
"import json\n",
|
| 53 |
-
"import os\n",
|
| 54 |
-
"import requests\n",
|
| 55 |
-
"from pypdf import PdfReader\n",
|
| 56 |
-
"import gradio as gr"
|
| 57 |
-
]
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"cell_type": "code",
|
| 61 |
-
"execution_count": 24,
|
| 62 |
-
"metadata": {},
|
| 63 |
-
"outputs": [],
|
| 64 |
-
"source": [
|
| 65 |
-
"# The usual start\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"load_dotenv(override=True)\n",
|
| 68 |
-
"openai = OpenAI()"
|
| 69 |
-
]
|
| 70 |
-
},
|
| 71 |
-
{
|
| 72 |
-
"cell_type": "code",
|
| 73 |
-
"execution_count": null,
|
| 74 |
-
"metadata": {},
|
| 75 |
-
"outputs": [],
|
| 76 |
-
"source": [
|
| 77 |
-
"# For pushover\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"pushover_user = os.getenv(\"PUSHOVER_USER\")\n",
|
| 80 |
-
"pushover_token = os.getenv(\"PUSHOVER_TOKEN\")\n",
|
| 81 |
-
"pushover_url = \"https://api.pushover.net/1/messages.json\"\n",
|
| 82 |
-
"\n",
|
| 83 |
-
"if pushover_user:\n",
|
| 84 |
-
" print(f\"Pushover user found and starts with {pushover_user[0]}\")\n",
|
| 85 |
-
"else:\n",
|
| 86 |
-
" print(\"Pushover user not found\")\n",
|
| 87 |
-
"\n",
|
| 88 |
-
"if pushover_token:\n",
|
| 89 |
-
" print(f\"Pushover token found and starts with {pushover_token[0]}\")\n",
|
| 90 |
-
"else:\n",
|
| 91 |
-
" print(\"Pushover token not found\")"
|
| 92 |
-
]
|
| 93 |
-
},
|
| 94 |
-
{
|
| 95 |
-
"cell_type": "code",
|
| 96 |
-
"execution_count": 26,
|
| 97 |
-
"metadata": {},
|
| 98 |
-
"outputs": [],
|
| 99 |
-
"source": [
|
| 100 |
-
"def push(message):\n",
|
| 101 |
-
" print(f\"Push: {message}\")\n",
|
| 102 |
-
" payload = {\"user\": pushover_user, \"token\": pushover_token, \"message\": message}\n",
|
| 103 |
-
" requests.post(pushover_url, data=payload)"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": 27,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"def record_user_details(email, name=\"Name not provided\", notes=\"not provided\"):\n",
|
| 113 |
-
" push(f\"Recording interest from {name} with email {email} and notes {notes}\")\n",
|
| 114 |
-
" return {\"recorded\": \"ok\"}"
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"cell_type": "code",
|
| 119 |
-
"execution_count": 28,
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [],
|
| 122 |
-
"source": [
|
| 123 |
-
"def record_unknown_question(question):\n",
|
| 124 |
-
" push(f\"Recording {question} asked that I couldn't answer\")\n",
|
| 125 |
-
" return {\"recorded\": \"ok\"}"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"cell_type": "code",
|
| 130 |
-
"execution_count": null,
|
| 131 |
-
"metadata": {},
|
| 132 |
-
"outputs": [],
|
| 133 |
-
"source": [
|
| 134 |
-
"# For Spotify access token and refresh token\n",
|
| 135 |
-
"import base64\n",
|
| 136 |
-
"import time\n",
|
| 137 |
-
"import hashlib\n",
|
| 138 |
-
"import secrets\n",
|
| 139 |
-
"import urllib.parse\n",
|
| 140 |
-
"\n",
|
| 141 |
-
"spotify_client_id = os.getenv(\"SPOTIFY_CLIENT_ID\")\n",
|
| 142 |
-
"spotify_client_secret = os.getenv(\"SPOTIFY_CLIENT_SECRET\")\n",
|
| 143 |
-
"\n",
|
| 144 |
-
"if spotify_client_id:\n",
|
| 145 |
-
" print(f\"Spotify client ID found and starts with {spotify_client_id[:4]}\")\n",
|
| 146 |
-
"else:\n",
|
| 147 |
-
" print(\"Spotify client ID not found\")\n",
|
| 148 |
-
"\n",
|
| 149 |
-
"if spotify_client_secret:\n",
|
| 150 |
-
" print(f\"Spotify client secret found and starts with {spotify_client_secret[:4]}\")\n",
|
| 151 |
-
"else:\n",
|
| 152 |
-
" print(\"Spotify client secret not found\")\n",
|
| 153 |
-
"\n",
|
| 154 |
-
"spotify_access_token = os.getenv(\"SPOTIFY_ACCESS_TOKEN\")\n",
|
| 155 |
-
"spotify_refresh_token = os.getenv(\"SPOTIFY_REFRESH_TOKEN\")\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"if spotify_access_token and spotify_refresh_token:\n",
|
| 158 |
-
" # Set expiry to past to force refresh on first use\n",
|
| 159 |
-
" spotify_token_expiry = time.time() - 60\n",
|
| 160 |
-
" print(\"Spotify tokens loaded from environment!\")\n",
|
| 161 |
-
" print(f\"Access token preview: {spotify_access_token[:20]}...\")\n",
|
| 162 |
-
" print(f\"Refresh token preview: {spotify_refresh_token[:20]}...\")\n",
|
| 163 |
-
"else:\n",
|
| 164 |
-
" print(\"No Spotify tokens found in environment. Run spotify_flask_auth.py to get them.\")"
|
| 165 |
-
]
|
| 166 |
-
},
|
| 167 |
-
{
|
| 168 |
-
"cell_type": "code",
|
| 169 |
-
"execution_count": 30,
|
| 170 |
-
"metadata": {},
|
| 171 |
-
"outputs": [],
|
| 172 |
-
"source": [
|
| 173 |
-
"def get_spotify_access_token():\n",
|
| 174 |
-
" global spotify_access_token, spotify_refresh_token, spotify_token_expiry\n",
|
| 175 |
-
" \n",
|
| 176 |
-
" # Check if we have a valid cached token\n",
|
| 177 |
-
" if spotify_access_token and time.time() < spotify_token_expiry:\n",
|
| 178 |
-
" return spotify_access_token\n",
|
| 179 |
-
" \n",
|
| 180 |
-
"\n",
|
| 181 |
-
" auth_url = \"https://accounts.spotify.com/api/token\"\n",
|
| 182 |
-
" \n",
|
| 183 |
-
" credentials = f\"{spotify_client_id}:{spotify_client_secret}\"\n",
|
| 184 |
-
" encoded_credentials = base64.b64encode(credentials.encode()).decode()\n",
|
| 185 |
-
" \n",
|
| 186 |
-
" headers = {\n",
|
| 187 |
-
" \"Authorization\": f\"Basic {encoded_credentials}\",\n",
|
| 188 |
-
" \"Content-Type\": \"application/x-www-form-urlencoded\"\n",
|
| 189 |
-
" }\n",
|
| 190 |
-
" \n",
|
| 191 |
-
" data = {\n",
|
| 192 |
-
" \"grant_type\": \"refresh_token\",\n",
|
| 193 |
-
" \"refresh_token\": spotify_refresh_token\n",
|
| 194 |
-
" }\n",
|
| 195 |
-
" \n",
|
| 196 |
-
" response = requests.post(auth_url, headers=headers, data=data)\n",
|
| 197 |
-
" \n",
|
| 198 |
-
" if response.status_code == 200:\n",
|
| 199 |
-
" token_data = response.json()\n",
|
| 200 |
-
" spotify_access_token = token_data[\"access_token\"]\n",
|
| 201 |
-
" # Update refresh token if a new one is provided\n",
|
| 202 |
-
" if \"refresh_token\" in token_data:\n",
|
| 203 |
-
" spotify_refresh_token = token_data[\"refresh_token\"]\n",
|
| 204 |
-
" # Set expiry time with a buffer\n",
|
| 205 |
-
" spotify_token_expiry = time.time() + token_data[\"expires_in\"] - 300\n",
|
| 206 |
-
" return spotify_access_token\n",
|
| 207 |
-
" else:\n",
|
| 208 |
-
" print(f\"Failed to refresh Spotify access token: {response.status_code}\")\n",
|
| 209 |
-
" print(f\"Response: {response.text}\")\n",
|
| 210 |
-
" return None\n"
|
| 211 |
-
]
|
| 212 |
-
},
|
| 213 |
-
{
|
| 214 |
-
"cell_type": "code",
|
| 215 |
-
"execution_count": 31,
|
| 216 |
-
"metadata": {},
|
| 217 |
-
"outputs": [],
|
| 218 |
-
"source": [
|
| 219 |
-
"def get_user_top_items(item_type=\"artists\", time_range=\"medium_term\", limit=10):\n",
|
| 220 |
-
" \"\"\"\n",
|
| 221 |
-
" Get the user's top artists or tracks from Spotify.\n",
|
| 222 |
-
" \n",
|
| 223 |
-
" Args:\n",
|
| 224 |
-
" item_type: 'artists' or 'tracks'\n",
|
| 225 |
-
" time_range: 'short_term' (4 weeks), 'medium_term' (6 months), 'long_term' (several years)\n",
|
| 226 |
-
" limit: Number of items to return (1-50)\n",
|
| 227 |
-
" \n",
|
| 228 |
-
" Returns:\n",
|
| 229 |
-
" Dictionary with top items data\n",
|
| 230 |
-
" \"\"\"\n",
|
| 231 |
-
" token = get_spotify_access_token()\n",
|
| 232 |
-
" if not token:\n",
|
| 233 |
-
" return {\"error\": \"Failed to get Spotify access token\"}\n",
|
| 234 |
-
" \n",
|
| 235 |
-
" # Make API request\n",
|
| 236 |
-
" url = f\"https://api.spotify.com/v1/me/top/{item_type}\"\n",
|
| 237 |
-
" headers = {\n",
|
| 238 |
-
" \"Authorization\": f\"Bearer {token}\"\n",
|
| 239 |
-
" }\n",
|
| 240 |
-
" params = {\n",
|
| 241 |
-
" \"time_range\": time_range,\n",
|
| 242 |
-
" \"limit\": limit\n",
|
| 243 |
-
" }\n",
|
| 244 |
-
" \n",
|
| 245 |
-
" response = requests.get(url, headers=headers, params=params)\n",
|
| 246 |
-
" \n",
|
| 247 |
-
" if response.status_code == 200:\n",
|
| 248 |
-
" data = response.json()\n",
|
| 249 |
-
" \n",
|
| 250 |
-
" formatted_items = []\n",
|
| 251 |
-
" for idx, item in enumerate(data.get(\"items\", []), 1):\n",
|
| 252 |
-
" if item_type == \"artists\":\n",
|
| 253 |
-
" formatted_items.append({\n",
|
| 254 |
-
" \"rank\": idx,\n",
|
| 255 |
-
" \"name\": item[\"name\"],\n",
|
| 256 |
-
" \"genres\": item.get(\"genres\", []),\n",
|
| 257 |
-
" \"popularity\": item.get(\"popularity\", 0),\n",
|
| 258 |
-
" \"spotify_url\": item[\"external_urls\"][\"spotify\"]\n",
|
| 259 |
-
" })\n",
|
| 260 |
-
" else: # tracks\n",
|
| 261 |
-
" formatted_items.append({\n",
|
| 262 |
-
" \"rank\": idx,\n",
|
| 263 |
-
" \"name\": item[\"name\"],\n",
|
| 264 |
-
" \"artist\": item[\"artists\"][0][\"name\"] if item.get(\"artists\") else \"Unknown\",\n",
|
| 265 |
-
" \"album\": item[\"album\"][\"name\"] if item.get(\"album\") else \"Unknown\",\n",
|
| 266 |
-
" \"popularity\": item.get(\"popularity\", 0),\n",
|
| 267 |
-
" \"spotify_url\": item[\"external_urls\"][\"spotify\"]\n",
|
| 268 |
-
" })\n",
|
| 269 |
-
" \n",
|
| 270 |
-
" return {\n",
|
| 271 |
-
" \"item_type\": item_type,\n",
|
| 272 |
-
" \"time_range\": time_range,\n",
|
| 273 |
-
" \"count\": len(formatted_items),\n",
|
| 274 |
-
" \"items\": formatted_items\n",
|
| 275 |
-
" }\n",
|
| 276 |
-
" else:\n",
|
| 277 |
-
" return {\"error\": f\"Failed to get top items: {response.status_code} - {response.text}\"}"
|
| 278 |
-
]
|
| 279 |
-
},
|
| 280 |
-
{
|
| 281 |
-
"cell_type": "code",
|
| 282 |
-
"execution_count": null,
|
| 283 |
-
"metadata": {},
|
| 284 |
-
"outputs": [],
|
| 285 |
-
"source": [
|
| 286 |
-
"# lets test the tool\n",
|
| 287 |
-
"get_user_top_items(item_type=\"artists\", time_range=\"medium_term\", limit=3)"
|
| 288 |
-
]
|
| 289 |
-
},
|
| 290 |
-
{
|
| 291 |
-
"cell_type": "code",
|
| 292 |
-
"execution_count": 33,
|
| 293 |
-
"metadata": {},
|
| 294 |
-
"outputs": [],
|
| 295 |
-
"source": [
|
| 296 |
-
"record_user_details_json = {\n",
|
| 297 |
-
" \"name\": \"record_user_details\",\n",
|
| 298 |
-
" \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
|
| 299 |
-
" \"parameters\": {\n",
|
| 300 |
-
" \"type\": \"object\",\n",
|
| 301 |
-
" \"properties\": {\n",
|
| 302 |
-
" \"email\": {\n",
|
| 303 |
-
" \"type\": \"string\",\n",
|
| 304 |
-
" \"description\": \"The email address of this user\"\n",
|
| 305 |
-
" },\n",
|
| 306 |
-
" \"name\": {\n",
|
| 307 |
-
" \"type\": \"string\",\n",
|
| 308 |
-
" \"description\": \"The user's name, if they provided it\"\n",
|
| 309 |
-
" }\n",
|
| 310 |
-
" ,\n",
|
| 311 |
-
" \"notes\": {\n",
|
| 312 |
-
" \"type\": \"string\",\n",
|
| 313 |
-
" \"description\": \"Any additional information about the conversation that's worth recording to give context\"\n",
|
| 314 |
-
" }\n",
|
| 315 |
-
" },\n",
|
| 316 |
-
" \"required\": [\"email\"],\n",
|
| 317 |
-
" \"additionalProperties\": False\n",
|
| 318 |
-
" }\n",
|
| 319 |
-
"}"
|
| 320 |
-
]
|
| 321 |
-
},
|
| 322 |
-
{
|
| 323 |
-
"cell_type": "code",
|
| 324 |
-
"execution_count": 34,
|
| 325 |
-
"metadata": {},
|
| 326 |
-
"outputs": [],
|
| 327 |
-
"source": [
|
| 328 |
-
"get_user_top_items_json = {\n",
|
| 329 |
-
" \"name\": \"get_user_top_items\",\n",
|
| 330 |
-
" \"description\": \"Get the user's top artists or tracks from Spotify based on their listening history\",\n",
|
| 331 |
-
" \"parameters\": {\n",
|
| 332 |
-
" \"type\": \"object\",\n",
|
| 333 |
-
" \"properties\": {\n",
|
| 334 |
-
" \"item_type\": {\n",
|
| 335 |
-
" \"type\": \"string\",\n",
|
| 336 |
-
" \"description\": \"Type of items to retrieve: 'artists' or 'tracks'\",\n",
|
| 337 |
-
" \"enum\": [\"artists\", \"tracks\"]\n",
|
| 338 |
-
" },\n",
|
| 339 |
-
" \"time_range\": {\n",
|
| 340 |
-
" \"type\": \"string\", \n",
|
| 341 |
-
" \"description\": \"Time range for the data: 'short_term' (4 weeks), 'medium_term' (6 months), or 'long_term' (several years)\",\n",
|
| 342 |
-
" \"enum\": [\"short_term\", \"medium_term\", \"long_term\"]\n",
|
| 343 |
-
" },\n",
|
| 344 |
-
" \"limit\": {\n",
|
| 345 |
-
" \"type\": \"integer\",\n",
|
| 346 |
-
" \"description\": \"Number of items to return (1-50)\",\n",
|
| 347 |
-
" \"minimum\": 1,\n",
|
| 348 |
-
" \"maximum\": 50\n",
|
| 349 |
-
" }\n",
|
| 350 |
-
" },\n",
|
| 351 |
-
" \"required\": [\"item_type\", \"time_range\", \"limit\"],\n",
|
| 352 |
-
" \"additionalProperties\": False\n",
|
| 353 |
-
" }\n",
|
| 354 |
-
"}"
|
| 355 |
-
]
|
| 356 |
-
},
|
| 357 |
-
{
|
| 358 |
-
"cell_type": "code",
|
| 359 |
-
"execution_count": 35,
|
| 360 |
-
"metadata": {},
|
| 361 |
-
"outputs": [],
|
| 362 |
-
"source": [
|
| 363 |
-
"record_unknown_question_json = {\n",
|
| 364 |
-
" \"name\": \"record_unknown_question\",\n",
|
| 365 |
-
" \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 366 |
-
" \"parameters\": {\n",
|
| 367 |
-
" \"type\": \"object\",\n",
|
| 368 |
-
" \"properties\": {\n",
|
| 369 |
-
" \"question\": {\n",
|
| 370 |
-
" \"type\": \"string\",\n",
|
| 371 |
-
" \"description\": \"The question that couldn't be answered\"\n",
|
| 372 |
-
" },\n",
|
| 373 |
-
" },\n",
|
| 374 |
-
" \"required\": [\"question\"],\n",
|
| 375 |
-
" \"additionalProperties\": False\n",
|
| 376 |
-
" }\n",
|
| 377 |
-
"}"
|
| 378 |
-
]
|
| 379 |
-
},
|
| 380 |
-
{
|
| 381 |
-
"cell_type": "code",
|
| 382 |
-
"execution_count": 36,
|
| 383 |
-
"metadata": {},
|
| 384 |
-
"outputs": [],
|
| 385 |
-
"source": [
|
| 386 |
-
"tools = [{\"type\": \"function\", \"function\": record_user_details_json},\n",
|
| 387 |
-
" {\"type\": \"function\", \"function\": record_unknown_question_json},\n",
|
| 388 |
-
" {\"type\": \"function\", \"function\": get_user_top_items_json}]"
|
| 389 |
-
]
|
| 390 |
-
},
|
| 391 |
-
{
|
| 392 |
-
"cell_type": "code",
|
| 393 |
-
"execution_count": null,
|
| 394 |
-
"metadata": {},
|
| 395 |
-
"outputs": [],
|
| 396 |
-
"source": [
|
| 397 |
-
"tools"
|
| 398 |
-
]
|
| 399 |
-
},
|
| 400 |
-
{
|
| 401 |
-
"cell_type": "code",
|
| 402 |
-
"execution_count": 38,
|
| 403 |
-
"metadata": {},
|
| 404 |
-
"outputs": [],
|
| 405 |
-
"source": [
|
| 406 |
-
"# This function can take a list of tool calls, and run them. This is the IF statement!!\n",
|
| 407 |
-
"\n",
|
| 408 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 409 |
-
" results = []\n",
|
| 410 |
-
" for tool_call in tool_calls:\n",
|
| 411 |
-
" tool_name = tool_call.function.name\n",
|
| 412 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 413 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 414 |
-
"\n",
|
| 415 |
-
" # THE BIG IF STATEMENT!!!\n",
|
| 416 |
-
"\n",
|
| 417 |
-
" if tool_name == \"record_user_details\":\n",
|
| 418 |
-
" result = record_user_details(**arguments)\n",
|
| 419 |
-
" elif tool_name == \"record_unknown_question\":\n",
|
| 420 |
-
" result = record_unknown_question(**arguments)\n",
|
| 421 |
-
" elif tool_name == \"get_user_top_items\":\n",
|
| 422 |
-
" result = get_user_top_items(**arguments)\n",
|
| 423 |
-
"\n",
|
| 424 |
-
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 425 |
-
" return results"
|
| 426 |
-
]
|
| 427 |
-
},
|
| 428 |
-
{
|
| 429 |
-
"cell_type": "code",
|
| 430 |
-
"execution_count": null,
|
| 431 |
-
"metadata": {},
|
| 432 |
-
"outputs": [],
|
| 433 |
-
"source": [
|
| 434 |
-
"globals()[\"record_unknown_question\"](\"this is a really hard question\")"
|
| 435 |
-
]
|
| 436 |
-
},
|
| 437 |
-
{
|
| 438 |
-
"cell_type": "code",
|
| 439 |
-
"execution_count": 40,
|
| 440 |
-
"metadata": {},
|
| 441 |
-
"outputs": [],
|
| 442 |
-
"source": [
|
| 443 |
-
"# This is a more elegant way that avoids the IF statement.\n",
|
| 444 |
-
"\n",
|
| 445 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 446 |
-
" results = []\n",
|
| 447 |
-
" for tool_call in tool_calls:\n",
|
| 448 |
-
" tool_name = tool_call.function.name\n",
|
| 449 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 450 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 451 |
-
" tool = globals().get(tool_name)\n",
|
| 452 |
-
" result = tool(**arguments) if tool else {}\n",
|
| 453 |
-
" results.append({\"role\": \"tool\",\"content\": json.dumps(result),\"tool_call_id\": tool_call.id})\n",
|
| 454 |
-
" return results"
|
| 455 |
-
]
|
| 456 |
-
},
|
| 457 |
-
{
|
| 458 |
-
"cell_type": "code",
|
| 459 |
-
"execution_count": 41,
|
| 460 |
-
"metadata": {},
|
| 461 |
-
"outputs": [],
|
| 462 |
-
"source": [
|
| 463 |
-
"reader = PdfReader(\"me/linkedin.pdf\")\n",
|
| 464 |
-
"linkedin = \"\"\n",
|
| 465 |
-
"for page in reader.pages:\n",
|
| 466 |
-
" text = page.extract_text()\n",
|
| 467 |
-
" if text:\n",
|
| 468 |
-
" linkedin += text\n",
|
| 469 |
-
"\n",
|
| 470 |
-
"with open(\"me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 471 |
-
" summary = f.read()\n",
|
| 472 |
-
"\n",
|
| 473 |
-
"name = \"Ed Donner\""
|
| 474 |
-
]
|
| 475 |
-
},
|
| 476 |
-
{
|
| 477 |
-
"cell_type": "code",
|
| 478 |
-
"execution_count": 42,
|
| 479 |
-
"metadata": {},
|
| 480 |
-
"outputs": [],
|
| 481 |
-
"source": [
|
| 482 |
-
"# We've added a \"If they ask you about your tastes in music you can use your get_user_top_items tool...\" \n",
|
| 483 |
-
"\n",
|
| 484 |
-
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 485 |
-
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 486 |
-
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 487 |
-
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 488 |
-
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 489 |
-
"If they ask you about your tastes in music you can use your get_user_top_items tool to get information about your top artists and tracks. \\\n",
|
| 490 |
-
"If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
|
| 491 |
-
"If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
|
| 492 |
-
"\n",
|
| 493 |
-
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 494 |
-
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 495 |
-
]
|
| 496 |
-
},
|
| 497 |
-
{
|
| 498 |
-
"cell_type": "code",
|
| 499 |
-
"execution_count": 43,
|
| 500 |
-
"metadata": {},
|
| 501 |
-
"outputs": [],
|
| 502 |
-
"source": [
|
| 503 |
-
"def chat(message, history):\n",
|
| 504 |
-
" messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
|
| 505 |
-
" done = False\n",
|
| 506 |
-
" while not done:\n",
|
| 507 |
-
"\n",
|
| 508 |
-
" # This is the call to the LLM - see that we pass in the tools json\n",
|
| 509 |
-
"\n",
|
| 510 |
-
" response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages, tools=tools)\n",
|
| 511 |
-
"\n",
|
| 512 |
-
" finish_reason = response.choices[0].finish_reason\n",
|
| 513 |
-
" \n",
|
| 514 |
-
" # If the LLM wants to call a tool, we do that!\n",
|
| 515 |
-
" \n",
|
| 516 |
-
" if finish_reason==\"tool_calls\":\n",
|
| 517 |
-
" message = response.choices[0].message\n",
|
| 518 |
-
" tool_calls = message.tool_calls\n",
|
| 519 |
-
" results = handle_tool_calls(tool_calls)\n",
|
| 520 |
-
" messages.append(message)\n",
|
| 521 |
-
" messages.extend(results)\n",
|
| 522 |
-
" else:\n",
|
| 523 |
-
" done = True\n",
|
| 524 |
-
" return response.choices[0].message.content"
|
| 525 |
-
]
|
| 526 |
-
},
|
| 527 |
-
{
|
| 528 |
-
"cell_type": "code",
|
| 529 |
-
"execution_count": null,
|
| 530 |
-
"metadata": {},
|
| 531 |
-
"outputs": [],
|
| 532 |
-
"source": [
|
| 533 |
-
"gr.ChatInterface(chat, type=\"messages\").launch()"
|
| 534 |
-
]
|
| 535 |
-
},
|
| 536 |
-
{
|
| 537 |
-
"cell_type": "markdown",
|
| 538 |
-
"metadata": {},
|
| 539 |
-
"source": [
|
| 540 |
-
"## And now for deployment\n",
|
| 541 |
-
"\n",
|
| 542 |
-
"This code is in `app.py`\n",
|
| 543 |
-
"\n",
|
| 544 |
-
"We will deploy to HuggingFace Spaces.\n",
|
| 545 |
-
"\n",
|
| 546 |
-
"Before you start: remember to update the files in the \"me\" directory - your LinkedIn profile and summary.txt - so that it talks about you! Also change `self.name = \"Ed Donner\"` in `app.py`.. \n",
|
| 547 |
-
"\n",
|
| 548 |
-
"Also check that there's no README file within the 1_foundations directory. If there is one, please delete it. The deploy process creates a new README file in this directory for you.\n",
|
| 549 |
-
"\n",
|
| 550 |
-
"1. Visit https://huggingface.co and set up an account \n",
|
| 551 |
-
"2. From the Avatar menu on the top right, choose Access Tokens. Choose \"Create New Token\". Give it WRITE permissions - it needs to have WRITE permissions! Keep a record of your new key. \n",
|
| 552 |
-
"3. In the Terminal, run: `uv tool install 'huggingface_hub[cli]'` to install the HuggingFace tool, then `hf auth login` to login at the command line with your key. Afterwards, run `hf auth whoami` to check you're logged in \n",
|
| 553 |
-
"4. Take your new token and add it to your .env file: `HF_TOKEN=hf_xxx` for the future\n",
|
| 554 |
-
"5. From the 1_foundations folder, enter: `uv run gradio deploy` \n",
|
| 555 |
-
"6. Follow its instructions: name it \"career_conversation\", specify app.py, choose cpu-basic as the hardware, say Yes to needing to supply secrets, provide your openai api key, your pushover user and token, and say \"no\" to github actions. \n",
|
| 556 |
-
"\n",
|
| 557 |
-
"Thank you Robert, James, Martins, Andras and Priya for these tips. \n",
|
| 558 |
-
"Please read the next 2 sections - how to change your Secrets, and how to redeploy your Space (you may need to delete the README.md that gets created in this 1_foundations directory).\n",
|
| 559 |
-
"\n",
|
| 560 |
-
"#### More about these secrets:\n",
|
| 561 |
-
"\n",
|
| 562 |
-
"If you're confused by what's going on with these secrets: it just wants you to enter the key name and value for each of your secrets -- so you would enter: \n",
|
| 563 |
-
"`OPENAI_API_KEY` \n",
|
| 564 |
-
"Followed by: \n",
|
| 565 |
-
"`sk-proj-...` \n",
|
| 566 |
-
"\n",
|
| 567 |
-
"And if you don't want to set secrets this way, or something goes wrong with it, it's no problem - you can change your secrets later: \n",
|
| 568 |
-
"1. Log in to HuggingFace website \n",
|
| 569 |
-
"2. Go to your profile screen via the Avatar menu on the top right \n",
|
| 570 |
-
"3. Select the Space you deployed \n",
|
| 571 |
-
"4. Click on the Settings wheel on the top right \n",
|
| 572 |
-
"5. You can scroll down to change your secrets (Variables and Secrets section), delete the space, etc.\n",
|
| 573 |
-
"\n",
|
| 574 |
-
"#### And now you should be deployed!\n",
|
| 575 |
-
"\n",
|
| 576 |
-
"If you want to completely replace everything and start again with your keys, you may need to delete the README.md that got created in this 1_foundations folder.\n",
|
| 577 |
-
"\n",
|
| 578 |
-
"Here is mine: https://huggingface.co/spaces/ed-donner/Career_Conversation\n",
|
| 579 |
-
"\n",
|
| 580 |
-
"I just got a push notification that a student asked me how they can become President of their country 😂😂\n",
|
| 581 |
-
"\n",
|
| 582 |
-
"For more information on deployment:\n",
|
| 583 |
-
"\n",
|
| 584 |
-
"https://www.gradio.app/guides/sharing-your-app#hosting-on-hf-spaces\n",
|
| 585 |
-
"\n",
|
| 586 |
-
"To delete your Space in the future: \n",
|
| 587 |
-
"1. Log in to HuggingFace\n",
|
| 588 |
-
"2. From the Avatar menu, select your profile\n",
|
| 589 |
-
"3. Click on the Space itself and select the settings wheel on the top right\n",
|
| 590 |
-
"4. Scroll to the Delete section at the bottom\n",
|
| 591 |
-
"5. ALSO: delete the README file that Gradio may have created inside this 1_foundations folder (otherwise it won't ask you the questions the next time you do a gradio deploy)\n"
|
| 592 |
-
]
|
| 593 |
-
},
|
| 594 |
-
{
|
| 595 |
-
"cell_type": "markdown",
|
| 596 |
-
"metadata": {},
|
| 597 |
-
"source": [
|
| 598 |
-
"## Spotify API Setup Instructions\n",
|
| 599 |
-
"\n",
|
| 600 |
-
"To use the Spotify tool in this notebook, you need will need to undergo a one-time setup process to obtain access and refresh tokens from Spotify. This involve the following steps:\n",
|
| 601 |
-
"\n",
|
| 602 |
-
"1. **Create a Spotify App**:\n",
|
| 603 |
-
" - Go to https://developer.spotify.com/dashboard\n",
|
| 604 |
-
" - Click \"Create app\"\n",
|
| 605 |
-
" - Fill in the app details\n",
|
| 606 |
-
" - Set Redirect URI to: `https://your-username-your-space-name.hf.space/callback` (replace with your actual HuggingFace Space URL)\n",
|
| 607 |
-
" - Save your Client ID and Client Secret\n",
|
| 608 |
-
"\n",
|
| 609 |
-
"2. **Add to your `.env` file**:\n",
|
| 610 |
-
" ```\n",
|
| 611 |
-
" SPOTIFY_CLIENT_ID=your_client_id_here\n",
|
| 612 |
-
" SPOTIFY_CLIENT_SECRET=your_client_secret_here\n",
|
| 613 |
-
" ```\n",
|
| 614 |
-
"\n",
|
| 615 |
-
"3. **Deploy and authenticate**:\n",
|
| 616 |
-
" - Deploy the authentication app from the **Flask Authentication App for Spotify** cell below to HuggingFace Spaces\n",
|
| 617 |
-
" - Visit your deployed app and click \"Authorize with Spotify\"\n",
|
| 618 |
-
" - After authorizing, copy the tokens displayed\n",
|
| 619 |
-
" - ONCE YOU HAVE HAVE OBTAINED YOUR ACCESS AND REFRESH TOKESNS YOU CAN DELETE THIS DEPLOYMENT\n",
|
| 620 |
-
"\n",
|
| 621 |
-
"4. **Add tokens to .env and reload**:\n",
|
| 622 |
-
" ```\n",
|
| 623 |
-
" SPOTIFY_ACCESS_TOKEN=your_access_token\n",
|
| 624 |
-
" SPOTIFY_REFRESH_TOKEN=your_refresh_token\n",
|
| 625 |
-
" ```\n",
|
| 626 |
-
" Then run `load_dotenv(override=True)`\n",
|
| 627 |
-
"\n"
|
| 628 |
-
]
|
| 629 |
-
},
|
| 630 |
-
{
|
| 631 |
-
"cell_type": "code",
|
| 632 |
-
"execution_count": null,
|
| 633 |
-
"metadata": {},
|
| 634 |
-
"outputs": [],
|
| 635 |
-
"source": []
|
| 636 |
-
},
|
| 637 |
-
{
|
| 638 |
-
"cell_type": "markdown",
|
| 639 |
-
"metadata": {},
|
| 640 |
-
"source": [
|
| 641 |
-
"## Flask Authentication App for Spotify\n",
|
| 642 |
-
"\n",
|
| 643 |
-
"Deploy this code as `spotify_flask_auth.py` to HuggingFace Spaces using Gradio following the steps as above fro\n",
|
| 644 |
-
"app.py. You need the following:\n",
|
| 645 |
-
"1. SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET defined in your environment\n",
|
| 646 |
-
"2. Need a requirements.txt with Flask listed as a dependency (no other dependencies are needed)\n",
|
| 647 |
-
"\n",
|
| 648 |
-
"```python\n",
|
| 649 |
-
"from flask import Flask, request, redirect, render_template_string\n",
|
| 650 |
-
"import requests\n",
|
| 651 |
-
"import base64\n",
|
| 652 |
-
"import os\n",
|
| 653 |
-
"from dotenv import load_dotenv\n",
|
| 654 |
-
"import urllib.parse\n",
|
| 655 |
-
"import secrets\n",
|
| 656 |
-
"import string\n",
|
| 657 |
-
"\n",
|
| 658 |
-
"load_dotenv(override=True)\n",
|
| 659 |
-
"\n",
|
| 660 |
-
"app = Flask(__name__)\n",
|
| 661 |
-
"app.secret_key = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(32))\n",
|
| 662 |
-
"\n",
|
| 663 |
-
"CLIENT_ID = os.getenv(\"SPOTIFY_CLIENT_ID\")\n",
|
| 664 |
-
"CLIENT_SECRET = os.getenv(\"SPOTIFY_CLIENT_SECRET\")\n",
|
| 665 |
-
"\n",
|
| 666 |
-
"REDIRECT_URI = f\"https://{os.getenv('SPACE_HOST')}/callback\"\n",
|
| 667 |
-
"SCOPE = \"user-top-read\"\n",
|
| 668 |
-
"tokens = {}\n",
|
| 669 |
-
"\n",
|
| 670 |
-
"# HTML template for the home page\n",
|
| 671 |
-
"HOME_TEMPLATE = \"\"\"\n",
|
| 672 |
-
"<!DOCTYPE html>\n",
|
| 673 |
-
"<html>\n",
|
| 674 |
-
"<head>\n",
|
| 675 |
-
" <title>Spotify OAuth Helper</title>\n",
|
| 676 |
-
"</head>\n",
|
| 677 |
-
"<body>\n",
|
| 678 |
-
" {% if has_credentials %}\n",
|
| 679 |
-
" <div style=\"margin-bottom: 20px;\">\n",
|
| 680 |
-
" <p>Make sure to add this redirect URI to your Spotify app settings:</p>\n",
|
| 681 |
-
" <code>{{ redirect_uri }}</code>\n",
|
| 682 |
-
" </div>\n",
|
| 683 |
-
" <button onclick=\"window.open('https://accounts.spotify.com/authorize?client_id={{ client_id }}&response_type=code&redirect_uri={{ redirect_uri | urlencode }}&scope={{ scope | urlencode }}&show_dialog=true', '_blank')\">Authorize with Spotify</button>\n",
|
| 684 |
-
" {% else %}\n",
|
| 685 |
-
" <div>Missing Spotify credentials in .env file</div>\n",
|
| 686 |
-
" {% endif %}\n",
|
| 687 |
-
"</body>\n",
|
| 688 |
-
"</html>\n",
|
| 689 |
-
"\"\"\"\n",
|
| 690 |
-
"\n",
|
| 691 |
-
"SUCCESS_TEMPLATE = \"\"\"\n",
|
| 692 |
-
"<!DOCTYPE html>\n",
|
| 693 |
-
"<html>\n",
|
| 694 |
-
"<head>\n",
|
| 695 |
-
" <title>Spotify OAuth - Success!</title>\n",
|
| 696 |
-
"</head>\n",
|
| 697 |
-
"<body>\n",
|
| 698 |
-
" <p>Authorization Complete</p>\n",
|
| 699 |
-
" <h3>Add to your .env file:</h3>\n",
|
| 700 |
-
" <pre>SPOTIFY_ACCESS_TOKEN={{ access_token }}\n",
|
| 701 |
-
"SPOTIFY_REFRESH_TOKEN={{ refresh_token }}</pre>\n",
|
| 702 |
-
"</body>\n",
|
| 703 |
-
"</html>\n",
|
| 704 |
-
"\"\"\"\n",
|
| 705 |
-
"\n",
|
| 706 |
-
"@app.route('/')\n",
|
| 707 |
-
"def home():\n",
|
| 708 |
-
" error = request.args.get('error')\n",
|
| 709 |
-
" has_credentials = CLIENT_ID and CLIENT_SECRET\n",
|
| 710 |
-
" return render_template_string(HOME_TEMPLATE, error=error, has_credentials=has_credentials, redirect_uri=REDIRECT_URI, client_id=CLIENT_ID, scope=SCOPE)\n",
|
| 711 |
-
"\n",
|
| 712 |
-
"@app.route('/authorize')\n",
|
| 713 |
-
"def authorize():\n",
|
| 714 |
-
" \"\"\"Redirect to Spotify authorization\"\"\"\n",
|
| 715 |
-
" if not CLIENT_ID:\n",
|
| 716 |
-
" return redirect('/?error=Missing SPOTIFY_CLIENT_ID')\n",
|
| 717 |
-
" \n",
|
| 718 |
-
" auth_url = \"https://accounts.spotify.com/authorize\"\n",
|
| 719 |
-
" params = {\n",
|
| 720 |
-
" \"client_id\": CLIENT_ID,\n",
|
| 721 |
-
" \"response_type\": \"code\",\n",
|
| 722 |
-
" \"redirect_uri\": REDIRECT_URI,\n",
|
| 723 |
-
" \"scope\": SCOPE,\n",
|
| 724 |
-
" \"show_dialog\": \"true\"\n",
|
| 725 |
-
" }\n",
|
| 726 |
-
" \n",
|
| 727 |
-
" url = f\"{auth_url}?{urllib.parse.urlencode(params)}\"\n",
|
| 728 |
-
" return redirect(url)\n",
|
| 729 |
-
"\n",
|
| 730 |
-
"@app.route('/callback')\n",
|
| 731 |
-
"def callback():\n",
|
| 732 |
-
" \"\"\"Handle the OAuth callback\"\"\"\n",
|
| 733 |
-
" error = request.args.get('error')\n",
|
| 734 |
-
" if error:\n",
|
| 735 |
-
" return redirect(f'/?error=Authorization failed: {error}')\n",
|
| 736 |
-
" \n",
|
| 737 |
-
" code = request.args.get('code')\n",
|
| 738 |
-
" if not code:\n",
|
| 739 |
-
" return redirect('/?error=No authorization code received')\n",
|
| 740 |
-
" \n",
|
| 741 |
-
" # Exchange code for tokens\n",
|
| 742 |
-
" token_url = \"https://accounts.spotify.com/api/token\"\n",
|
| 743 |
-
" \n",
|
| 744 |
-
" credentials = f\"{CLIENT_ID}:{CLIENT_SECRET}\"\n",
|
| 745 |
-
" encoded_credentials = base64.b64encode(credentials.encode()).decode()\n",
|
| 746 |
-
" \n",
|
| 747 |
-
" headers = {\n",
|
| 748 |
-
" \"Authorization\": f\"Basic {encoded_credentials}\",\n",
|
| 749 |
-
" \"Content-Type\": \"application/x-www-form-urlencoded\"\n",
|
| 750 |
-
" }\n",
|
| 751 |
-
" \n",
|
| 752 |
-
" data = {\n",
|
| 753 |
-
" \"grant_type\": \"authorization_code\",\n",
|
| 754 |
-
" \"code\": code,\n",
|
| 755 |
-
" \"redirect_uri\": REDIRECT_URI\n",
|
| 756 |
-
" }\n",
|
| 757 |
-
" \n",
|
| 758 |
-
" response = requests.post(token_url, headers=headers, data=data)\n",
|
| 759 |
-
" \n",
|
| 760 |
-
" if response.status_code == 200:\n",
|
| 761 |
-
" token_data = response.json()\n",
|
| 762 |
-
" tokens['access_token'] = token_data['access_token']\n",
|
| 763 |
-
" tokens['refresh_token'] = token_data['refresh_token']\n",
|
| 764 |
-
" \n",
|
| 765 |
-
" return render_template_string(\n",
|
| 766 |
-
" SUCCESS_TEMPLATE,\n",
|
| 767 |
-
" access_token=token_data['access_token'],\n",
|
| 768 |
-
" refresh_token=token_data['refresh_token']\n",
|
| 769 |
-
" )\n",
|
| 770 |
-
" else:\n",
|
| 771 |
-
" error_msg = response.json().get('error_description', 'Unknown error')\n",
|
| 772 |
-
" return redirect(f'/?error=Token exchange failed: {error_msg}')\n",
|
| 773 |
-
"\n",
|
| 774 |
-
"if __name__ == '__main__':\n",
|
| 775 |
-
" app.run(host='0.0.0.0', port=7860)\n",
|
| 776 |
-
"```\n",
|
| 777 |
-
"\n",
|
| 778 |
-
"**Deployment Instructions:**\n",
|
| 779 |
-
"1. You will need to provide `SPOTIFY_CLIENT_ID` and `SPOTIFY_CLIENT_SECRET` as secrets in HuggingFace Spaces\n",
|
| 780 |
-
"2. Create a `requirements.txt` file with a single entry: `Flask`\n",
|
| 781 |
-
"3. Deploy to HuggingFace Spaces using the instructions in the deployment section below"
|
| 782 |
-
]
|
| 783 |
-
},
|
| 784 |
-
{
|
| 785 |
-
"cell_type": "markdown",
|
| 786 |
-
"metadata": {},
|
| 787 |
-
"source": []
|
| 788 |
-
},
|
| 789 |
-
{
|
| 790 |
-
"cell_type": "markdown",
|
| 791 |
-
"metadata": {},
|
| 792 |
-
"source": [
|
| 793 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 794 |
-
" <tr>\n",
|
| 795 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 796 |
-
" <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 797 |
-
" </td>\n",
|
| 798 |
-
" <td>\n",
|
| 799 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 800 |
-
" <span style=\"color:#00bfff;\">Aside from the obvious (your career alter-ego) this has business applications in any situation where you need an AI assistant with domain expertise and an ability to interact with the real world.\n",
|
| 801 |
-
" </span>\n",
|
| 802 |
-
" </td>\n",
|
| 803 |
-
" </tr>\n",
|
| 804 |
-
"</table>"
|
| 805 |
-
]
|
| 806 |
-
}
|
| 807 |
-
],
|
| 808 |
-
"metadata": {
|
| 809 |
-
"kernelspec": {
|
| 810 |
-
"display_name": ".venv",
|
| 811 |
-
"language": "python",
|
| 812 |
-
"name": "python3"
|
| 813 |
-
},
|
| 814 |
-
"language_info": {
|
| 815 |
-
"codemirror_mode": {
|
| 816 |
-
"name": "ipython",
|
| 817 |
-
"version": 3
|
| 818 |
-
},
|
| 819 |
-
"file_extension": ".py",
|
| 820 |
-
"mimetype": "text/x-python",
|
| 821 |
-
"name": "python",
|
| 822 |
-
"nbconvert_exporter": "python",
|
| 823 |
-
"pygments_lexer": "ipython3",
|
| 824 |
-
"version": "3.12.11"
|
| 825 |
-
}
|
| 826 |
-
},
|
| 827 |
-
"nbformat": 4,
|
| 828 |
-
"nbformat_minor": 2
|
| 829 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/4_lab4_with_telegram.ipynb
DELETED
|
@@ -1,422 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"### Contributed by Faisal Alkheraiji\n",
|
| 8 |
-
"\n",
|
| 9 |
-
"LinkedIn: https://www.linkedin.com/in/faisalalkheraiji/\n"
|
| 10 |
-
]
|
| 11 |
-
},
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "markdown",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"source": [
|
| 16 |
-
"## The first big project - Professionally You!\n",
|
| 17 |
-
"\n",
|
| 18 |
-
"### And, Tool use.\n",
|
| 19 |
-
"\n",
|
| 20 |
-
"### But first: introducing Telegram\n",
|
| 21 |
-
"\n",
|
| 22 |
-
"We need to do the following to get out Telegram chatbot working:\n",
|
| 23 |
-
"\n",
|
| 24 |
-
"1. Create new telegram bot using @BotFather.\n",
|
| 25 |
-
"2. Get our bot token.\n",
|
| 26 |
-
"3. Get your chat ID.\n",
|
| 27 |
-
"\n",
|
| 28 |
-
"For easy and quick tutorial, follow this great tutorial from our friend:\n",
|
| 29 |
-
"\n",
|
| 30 |
-
"https://chatgpt.com/share/686eccf4-34b0-8000-8f34-a3d9269e0578\n",
|
| 31 |
-
"\n",
|
| 32 |
-
"Then add 2 lines to your `.env` file:\n",
|
| 33 |
-
"\n",
|
| 34 |
-
"TELEGRAM*BOT_TOKEN=\\_your bot token*\n",
|
| 35 |
-
"\n",
|
| 36 |
-
"TELEGRAM*CHAT_ID=\\_your chat ID*\n"
|
| 37 |
-
]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"cell_type": "code",
|
| 41 |
-
"execution_count": null,
|
| 42 |
-
"metadata": {},
|
| 43 |
-
"outputs": [],
|
| 44 |
-
"source": [
|
| 45 |
-
"# imports\n",
|
| 46 |
-
"\n",
|
| 47 |
-
"from dotenv import load_dotenv\n",
|
| 48 |
-
"from openai import OpenAI\n",
|
| 49 |
-
"import json\n",
|
| 50 |
-
"import os\n",
|
| 51 |
-
"import requests\n",
|
| 52 |
-
"from pypdf import PdfReader\n",
|
| 53 |
-
"import gradio as gr"
|
| 54 |
-
]
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"cell_type": "code",
|
| 58 |
-
"execution_count": null,
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# The usual start\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"load_dotenv(override=True)\n",
|
| 65 |
-
"openai = OpenAI()"
|
| 66 |
-
]
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"cell_type": "code",
|
| 70 |
-
"execution_count": null,
|
| 71 |
-
"metadata": {},
|
| 72 |
-
"outputs": [],
|
| 73 |
-
"source": [
|
| 74 |
-
"# Getting the Telegram bot token and chat ID from environment variables\n",
|
| 75 |
-
"# You can also replace these with your actual values directly\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"TELEGRAM_BOT_TOKEN = os.getenv(\"TELEGRAM_BOT_TOKEN\", \"your_bot_token_here\")\n",
|
| 78 |
-
"TELEGRAM_CHAT_ID = os.getenv(\"TELEGRAM_CHAT_ID\", \"your_chat_id_here\")"
|
| 79 |
-
]
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"cell_type": "code",
|
| 83 |
-
"execution_count": null,
|
| 84 |
-
"metadata": {},
|
| 85 |
-
"outputs": [],
|
| 86 |
-
"source": [
|
| 87 |
-
"def send_telegram_message(text):\n",
|
| 88 |
-
" url = f\"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage\"\n",
|
| 89 |
-
" payload = {\"chat_id\": TELEGRAM_CHAT_ID, \"text\": text}\n",
|
| 90 |
-
"\n",
|
| 91 |
-
" response = requests.post(url, data=payload)\n",
|
| 92 |
-
"\n",
|
| 93 |
-
" if response.status_code == 200:\n",
|
| 94 |
-
" # print(\"Message sent successfully!\")\n",
|
| 95 |
-
" return {\"status\": \"success\", \"message\": text}\n",
|
| 96 |
-
" else:\n",
|
| 97 |
-
" # print(f\"Failed to send message. Status code: {response.status_code}\")\n",
|
| 98 |
-
" # print(response.text)\n",
|
| 99 |
-
" return {\"status\": \"error\", \"message\": response.text}"
|
| 100 |
-
]
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"cell_type": "code",
|
| 104 |
-
"execution_count": null,
|
| 105 |
-
"metadata": {},
|
| 106 |
-
"outputs": [],
|
| 107 |
-
"source": [
|
| 108 |
-
"# Example usage\n",
|
| 109 |
-
"send_telegram_message(\"Hello from python notebook !!\")"
|
| 110 |
-
]
|
| 111 |
-
},
|
| 112 |
-
{
|
| 113 |
-
"cell_type": "code",
|
| 114 |
-
"execution_count": null,
|
| 115 |
-
"metadata": {},
|
| 116 |
-
"outputs": [],
|
| 117 |
-
"source": [
|
| 118 |
-
"def record_user_details(email, name=\"Name not provided\", notes=\"not provided\"):\n",
|
| 119 |
-
" send_telegram_message(\n",
|
| 120 |
-
" f\"Recording interest from {name} with email {email} and notes {notes}\"\n",
|
| 121 |
-
" )\n",
|
| 122 |
-
" return {\"recorded\": \"ok\"}"
|
| 123 |
-
]
|
| 124 |
-
},
|
| 125 |
-
{
|
| 126 |
-
"cell_type": "code",
|
| 127 |
-
"execution_count": null,
|
| 128 |
-
"metadata": {},
|
| 129 |
-
"outputs": [],
|
| 130 |
-
"source": [
|
| 131 |
-
"def record_unknown_question(question):\n",
|
| 132 |
-
" send_telegram_message(f\"Recording {question} asked that I couldn't answer\")\n",
|
| 133 |
-
" return {\"recorded\": \"ok\"}"
|
| 134 |
-
]
|
| 135 |
-
},
|
| 136 |
-
{
|
| 137 |
-
"cell_type": "code",
|
| 138 |
-
"execution_count": null,
|
| 139 |
-
"metadata": {},
|
| 140 |
-
"outputs": [],
|
| 141 |
-
"source": [
|
| 142 |
-
"record_user_details_json = {\n",
|
| 143 |
-
" \"name\": \"record_user_details\",\n",
|
| 144 |
-
" \"description\": \"Use this tool to record that a user is interested in being in touch and provided an email address\",\n",
|
| 145 |
-
" \"parameters\": {\n",
|
| 146 |
-
" \"type\": \"object\",\n",
|
| 147 |
-
" \"properties\": {\n",
|
| 148 |
-
" \"email\": {\n",
|
| 149 |
-
" \"type\": \"string\",\n",
|
| 150 |
-
" \"description\": \"The email address of this user\",\n",
|
| 151 |
-
" },\n",
|
| 152 |
-
" \"name\": {\n",
|
| 153 |
-
" \"type\": \"string\",\n",
|
| 154 |
-
" \"description\": \"The user's name, if they provided it\",\n",
|
| 155 |
-
" },\n",
|
| 156 |
-
" \"notes\": {\n",
|
| 157 |
-
" \"type\": \"string\",\n",
|
| 158 |
-
" \"description\": \"Any additional information about the conversation that's worth recording to give context\",\n",
|
| 159 |
-
" },\n",
|
| 160 |
-
" },\n",
|
| 161 |
-
" \"required\": [\"email\"],\n",
|
| 162 |
-
" \"additionalProperties\": False,\n",
|
| 163 |
-
" },\n",
|
| 164 |
-
"}"
|
| 165 |
-
]
|
| 166 |
-
},
|
| 167 |
-
{
|
| 168 |
-
"cell_type": "code",
|
| 169 |
-
"execution_count": null,
|
| 170 |
-
"metadata": {},
|
| 171 |
-
"outputs": [],
|
| 172 |
-
"source": [
|
| 173 |
-
"record_unknown_question_json = {\n",
|
| 174 |
-
" \"name\": \"record_unknown_question\",\n",
|
| 175 |
-
" \"description\": \"Always use this tool to record any question that couldn't be answered as you didn't know the answer\",\n",
|
| 176 |
-
" \"parameters\": {\n",
|
| 177 |
-
" \"type\": \"object\",\n",
|
| 178 |
-
" \"properties\": {\n",
|
| 179 |
-
" \"question\": {\n",
|
| 180 |
-
" \"type\": \"string\",\n",
|
| 181 |
-
" \"description\": \"The question that couldn't be answered\",\n",
|
| 182 |
-
" },\n",
|
| 183 |
-
" },\n",
|
| 184 |
-
" \"required\": [\"question\"],\n",
|
| 185 |
-
" \"additionalProperties\": False,\n",
|
| 186 |
-
" },\n",
|
| 187 |
-
"}"
|
| 188 |
-
]
|
| 189 |
-
},
|
| 190 |
-
{
|
| 191 |
-
"cell_type": "code",
|
| 192 |
-
"execution_count": null,
|
| 193 |
-
"metadata": {},
|
| 194 |
-
"outputs": [],
|
| 195 |
-
"source": [
|
| 196 |
-
"tools = [\n",
|
| 197 |
-
" {\"type\": \"function\", \"function\": record_user_details_json},\n",
|
| 198 |
-
" {\"type\": \"function\", \"function\": record_unknown_question_json},\n",
|
| 199 |
-
"]"
|
| 200 |
-
]
|
| 201 |
-
},
|
| 202 |
-
{
|
| 203 |
-
"cell_type": "code",
|
| 204 |
-
"execution_count": null,
|
| 205 |
-
"metadata": {},
|
| 206 |
-
"outputs": [],
|
| 207 |
-
"source": [
|
| 208 |
-
"tools"
|
| 209 |
-
]
|
| 210 |
-
},
|
| 211 |
-
{
|
| 212 |
-
"cell_type": "code",
|
| 213 |
-
"execution_count": null,
|
| 214 |
-
"metadata": {},
|
| 215 |
-
"outputs": [],
|
| 216 |
-
"source": [
|
| 217 |
-
"# This function can take a list of tool calls, and run them. This is the IF statement!!\n",
|
| 218 |
-
"\n",
|
| 219 |
-
"\n",
|
| 220 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 221 |
-
" results = []\n",
|
| 222 |
-
" for tool_call in tool_calls:\n",
|
| 223 |
-
" tool_name = tool_call.function.name\n",
|
| 224 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 225 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 226 |
-
"\n",
|
| 227 |
-
" # THE BIG IF STATEMENT!!!\n",
|
| 228 |
-
"\n",
|
| 229 |
-
" if tool_name == \"record_user_details\":\n",
|
| 230 |
-
" result = record_user_details(**arguments)\n",
|
| 231 |
-
" elif tool_name == \"record_unknown_question\":\n",
|
| 232 |
-
" result = record_unknown_question(**arguments)\n",
|
| 233 |
-
"\n",
|
| 234 |
-
" results.append(\n",
|
| 235 |
-
" {\n",
|
| 236 |
-
" \"role\": \"tool\",\n",
|
| 237 |
-
" \"content\": json.dumps(result),\n",
|
| 238 |
-
" \"tool_call_id\": tool_call.id,\n",
|
| 239 |
-
" }\n",
|
| 240 |
-
" )\n",
|
| 241 |
-
" return results"
|
| 242 |
-
]
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"cell_type": "code",
|
| 246 |
-
"execution_count": null,
|
| 247 |
-
"metadata": {},
|
| 248 |
-
"outputs": [],
|
| 249 |
-
"source": [
|
| 250 |
-
"globals()[\"record_unknown_question\"](\"this is a really hard question\")"
|
| 251 |
-
]
|
| 252 |
-
},
|
| 253 |
-
{
|
| 254 |
-
"cell_type": "code",
|
| 255 |
-
"execution_count": null,
|
| 256 |
-
"metadata": {},
|
| 257 |
-
"outputs": [],
|
| 258 |
-
"source": [
|
| 259 |
-
"# This is a more elegant way that avoids the IF statement.\n",
|
| 260 |
-
"\n",
|
| 261 |
-
"\n",
|
| 262 |
-
"def handle_tool_calls(tool_calls):\n",
|
| 263 |
-
" results = []\n",
|
| 264 |
-
" for tool_call in tool_calls:\n",
|
| 265 |
-
" tool_name = tool_call.function.name\n",
|
| 266 |
-
" arguments = json.loads(tool_call.function.arguments)\n",
|
| 267 |
-
" print(f\"Tool called: {tool_name}\", flush=True)\n",
|
| 268 |
-
" tool = globals().get(tool_name)\n",
|
| 269 |
-
" result = tool(**arguments) if tool else {}\n",
|
| 270 |
-
" results.append(\n",
|
| 271 |
-
" {\n",
|
| 272 |
-
" \"role\": \"tool\",\n",
|
| 273 |
-
" \"content\": json.dumps(result),\n",
|
| 274 |
-
" \"tool_call_id\": tool_call.id,\n",
|
| 275 |
-
" }\n",
|
| 276 |
-
" )\n",
|
| 277 |
-
" return results"
|
| 278 |
-
]
|
| 279 |
-
},
|
| 280 |
-
{
|
| 281 |
-
"cell_type": "code",
|
| 282 |
-
"execution_count": null,
|
| 283 |
-
"metadata": {},
|
| 284 |
-
"outputs": [],
|
| 285 |
-
"source": [
|
| 286 |
-
"reader = PdfReader(\"../me/linkedin.pdf\")\n",
|
| 287 |
-
"linkedin = \"\"\n",
|
| 288 |
-
"for page in reader.pages:\n",
|
| 289 |
-
" text = page.extract_text()\n",
|
| 290 |
-
" if text:\n",
|
| 291 |
-
" linkedin += text\n",
|
| 292 |
-
"\n",
|
| 293 |
-
"with open(\"../me/summary.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
| 294 |
-
" summary = f.read()\n",
|
| 295 |
-
"\n",
|
| 296 |
-
"name = \"Ed Donner\""
|
| 297 |
-
]
|
| 298 |
-
},
|
| 299 |
-
{
|
| 300 |
-
"cell_type": "code",
|
| 301 |
-
"execution_count": null,
|
| 302 |
-
"metadata": {},
|
| 303 |
-
"outputs": [],
|
| 304 |
-
"source": [
|
| 305 |
-
"system_prompt = f\"You are acting as {name}. You are answering questions on {name}'s website, \\\n",
|
| 306 |
-
"particularly questions related to {name}'s career, background, skills and experience. \\\n",
|
| 307 |
-
"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \\\n",
|
| 308 |
-
"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \\\n",
|
| 309 |
-
"Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n",
|
| 310 |
-
"If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \\\n",
|
| 311 |
-
"If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \"\n",
|
| 312 |
-
"\n",
|
| 313 |
-
"system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n",
|
| 314 |
-
"system_prompt += f\"With this context, please chat with the user, always staying in character as {name}.\"\n"
|
| 315 |
-
]
|
| 316 |
-
},
|
| 317 |
-
{
|
| 318 |
-
"cell_type": "code",
|
| 319 |
-
"execution_count": null,
|
| 320 |
-
"metadata": {},
|
| 321 |
-
"outputs": [],
|
| 322 |
-
"source": [
|
| 323 |
-
"def chat(message, history):\n",
|
| 324 |
-
" messages = (\n",
|
| 325 |
-
" [{\"role\": \"system\", \"content\": system_prompt}]\n",
|
| 326 |
-
" + history\n",
|
| 327 |
-
" + [{\"role\": \"user\", \"content\": message}]\n",
|
| 328 |
-
" )\n",
|
| 329 |
-
" done = False\n",
|
| 330 |
-
" while not done:\n",
|
| 331 |
-
" # This is the call to the LLM - see that we pass in the tools json\n",
|
| 332 |
-
"\n",
|
| 333 |
-
" response = openai.chat.completions.create(\n",
|
| 334 |
-
" model=\"gpt-4o-mini\", messages=messages, tools=tools\n",
|
| 335 |
-
" )\n",
|
| 336 |
-
"\n",
|
| 337 |
-
" finish_reason = response.choices[0].finish_reason\n",
|
| 338 |
-
"\n",
|
| 339 |
-
" # If the LLM wants to call a tool, we do that!\n",
|
| 340 |
-
"\n",
|
| 341 |
-
" if finish_reason == \"tool_calls\":\n",
|
| 342 |
-
" message = response.choices[0].message\n",
|
| 343 |
-
" tool_calls = message.tool_calls\n",
|
| 344 |
-
" results = handle_tool_calls(tool_calls)\n",
|
| 345 |
-
" messages.append(message)\n",
|
| 346 |
-
" messages.extend(results)\n",
|
| 347 |
-
" else:\n",
|
| 348 |
-
" done = True\n",
|
| 349 |
-
" return response.choices[0].message.content"
|
| 350 |
-
]
|
| 351 |
-
},
|
| 352 |
-
{
|
| 353 |
-
"cell_type": "code",
|
| 354 |
-
"execution_count": null,
|
| 355 |
-
"metadata": {},
|
| 356 |
-
"outputs": [],
|
| 357 |
-
"source": [
|
| 358 |
-
"gr.ChatInterface(chat, type=\"messages\").launch(inbrowser=True)"
|
| 359 |
-
]
|
| 360 |
-
},
|
| 361 |
-
{
|
| 362 |
-
"cell_type": "markdown",
|
| 363 |
-
"metadata": {},
|
| 364 |
-
"source": [
|
| 365 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 366 |
-
" <tr>\n",
|
| 367 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 368 |
-
" <img src=\"../../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 369 |
-
" </td>\n",
|
| 370 |
-
" <td>\n",
|
| 371 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 372 |
-
" <span style=\"color:#ff7800;\">• First and foremost, deploy this for yourself! It's a real, valuable tool - the future resume..<br/>\n",
|
| 373 |
-
" • Next, improve the resources - add better context about yourself. If you know RAG, then add a knowledge base about you.<br/>\n",
|
| 374 |
-
" • Add in more tools! You could have a SQL database with common Q&A that the LLM could read and write from?<br/>\n",
|
| 375 |
-
" • Bring in the Evaluator from the last lab, and add other Agentic patterns.\n",
|
| 376 |
-
" </span>\n",
|
| 377 |
-
" </td>\n",
|
| 378 |
-
" </tr>\n",
|
| 379 |
-
"</table>\n"
|
| 380 |
-
]
|
| 381 |
-
},
|
| 382 |
-
{
|
| 383 |
-
"cell_type": "markdown",
|
| 384 |
-
"metadata": {},
|
| 385 |
-
"source": [
|
| 386 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 387 |
-
" <tr>\n",
|
| 388 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 389 |
-
" <img src=\"../../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 390 |
-
" </td>\n",
|
| 391 |
-
" <td>\n",
|
| 392 |
-
" <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
|
| 393 |
-
" <span style=\"color:#00bfff;\">Aside from the obvious (your career alter-ego) this has business applications in any situation where you need an AI assistant with domain expertise and an ability to interact with the real world.\n",
|
| 394 |
-
" </span>\n",
|
| 395 |
-
" </td>\n",
|
| 396 |
-
" </tr>\n",
|
| 397 |
-
"</table>\n"
|
| 398 |
-
]
|
| 399 |
-
}
|
| 400 |
-
],
|
| 401 |
-
"metadata": {
|
| 402 |
-
"kernelspec": {
|
| 403 |
-
"display_name": ".venv",
|
| 404 |
-
"language": "python",
|
| 405 |
-
"name": "python3"
|
| 406 |
-
},
|
| 407 |
-
"language_info": {
|
| 408 |
-
"codemirror_mode": {
|
| 409 |
-
"name": "ipython",
|
| 410 |
-
"version": 3
|
| 411 |
-
},
|
| 412 |
-
"file_extension": ".py",
|
| 413 |
-
"mimetype": "text/x-python",
|
| 414 |
-
"name": "python",
|
| 415 |
-
"nbconvert_exporter": "python",
|
| 416 |
-
"pygments_lexer": "ipython3",
|
| 417 |
-
"version": "3.12.11"
|
| 418 |
-
}
|
| 419 |
-
},
|
| 420 |
-
"nbformat": 4,
|
| 421 |
-
"nbformat_minor": 2
|
| 422 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/Business_Idea.ipynb
DELETED
|
@@ -1,388 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"## Business idea generator and evaluator \n",
|
| 8 |
-
"\n"
|
| 9 |
-
]
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"cell_type": "code",
|
| 13 |
-
"execution_count": 1,
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"outputs": [],
|
| 16 |
-
"source": [
|
| 17 |
-
"# Start with imports - ask ChatGPT to explain any package that you don't know\n",
|
| 18 |
-
"\n",
|
| 19 |
-
"import os\n",
|
| 20 |
-
"import json\n",
|
| 21 |
-
"from dotenv import load_dotenv\n",
|
| 22 |
-
"from openai import OpenAI\n",
|
| 23 |
-
"from anthropic import Anthropic\n",
|
| 24 |
-
"from IPython.display import Markdown, display"
|
| 25 |
-
]
|
| 26 |
-
},
|
| 27 |
-
{
|
| 28 |
-
"cell_type": "code",
|
| 29 |
-
"execution_count": null,
|
| 30 |
-
"metadata": {},
|
| 31 |
-
"outputs": [],
|
| 32 |
-
"source": [
|
| 33 |
-
"# Always remember to do this!\n",
|
| 34 |
-
"load_dotenv(override=True)"
|
| 35 |
-
]
|
| 36 |
-
},
|
| 37 |
-
{
|
| 38 |
-
"cell_type": "code",
|
| 39 |
-
"execution_count": null,
|
| 40 |
-
"metadata": {},
|
| 41 |
-
"outputs": [],
|
| 42 |
-
"source": [
|
| 43 |
-
"# Print the key prefixes to help with any debugging\n",
|
| 44 |
-
"\n",
|
| 45 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 46 |
-
"anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
|
| 47 |
-
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
| 48 |
-
"deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
|
| 49 |
-
"groq_api_key = os.getenv('GROQ_API_KEY')\n",
|
| 50 |
-
"\n",
|
| 51 |
-
"if openai_api_key:\n",
|
| 52 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 53 |
-
"else:\n",
|
| 54 |
-
" print(\"OpenAI API Key not set\")\n",
|
| 55 |
-
" \n",
|
| 56 |
-
"if anthropic_api_key:\n",
|
| 57 |
-
" print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
|
| 58 |
-
"else:\n",
|
| 59 |
-
" print(\"Anthropic API Key not set (and this is optional)\")\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"if google_api_key:\n",
|
| 62 |
-
" print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
|
| 63 |
-
"else:\n",
|
| 64 |
-
" print(\"Google API Key not set (and this is optional)\")\n",
|
| 65 |
-
"\n",
|
| 66 |
-
"if deepseek_api_key:\n",
|
| 67 |
-
" print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
|
| 68 |
-
"else:\n",
|
| 69 |
-
" print(\"DeepSeek API Key not set (and this is optional)\")\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"if groq_api_key:\n",
|
| 72 |
-
" print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
|
| 73 |
-
"else:\n",
|
| 74 |
-
" print(\"Groq API Key not set (and this is optional)\")"
|
| 75 |
-
]
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"cell_type": "code",
|
| 79 |
-
"execution_count": 4,
|
| 80 |
-
"metadata": {},
|
| 81 |
-
"outputs": [],
|
| 82 |
-
"source": [
|
| 83 |
-
"request = (\n",
|
| 84 |
-
" \"Please generate three innovative business ideas aligned with the latest global trends. \"\n",
|
| 85 |
-
" \"For each idea, include a brief description (2–3 sentences).\"\n",
|
| 86 |
-
")\n",
|
| 87 |
-
"messages = [{\"role\": \"user\", \"content\": request}]"
|
| 88 |
-
]
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"cell_type": "code",
|
| 92 |
-
"execution_count": null,
|
| 93 |
-
"metadata": {},
|
| 94 |
-
"outputs": [],
|
| 95 |
-
"source": [
|
| 96 |
-
"messages"
|
| 97 |
-
]
|
| 98 |
-
},
|
| 99 |
-
{
|
| 100 |
-
"cell_type": "code",
|
| 101 |
-
"execution_count": null,
|
| 102 |
-
"metadata": {},
|
| 103 |
-
"outputs": [],
|
| 104 |
-
"source": [
|
| 105 |
-
"\n",
|
| 106 |
-
"openai = OpenAI()\n",
|
| 107 |
-
"'''\n",
|
| 108 |
-
"response = openai.chat.completions.create(\n",
|
| 109 |
-
" model=\"gpt-4o-mini\",\n",
|
| 110 |
-
" messages=messages,\n",
|
| 111 |
-
")\n",
|
| 112 |
-
"question = response.choices[0].message.content\n",
|
| 113 |
-
"print(question)\n",
|
| 114 |
-
"'''"
|
| 115 |
-
]
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"cell_type": "code",
|
| 119 |
-
"execution_count": 9,
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [],
|
| 122 |
-
"source": [
|
| 123 |
-
"competitors = []\n",
|
| 124 |
-
"answers = []\n",
|
| 125 |
-
"#messages = [{\"role\": \"user\", \"content\": question}]"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
{
|
| 129 |
-
"cell_type": "code",
|
| 130 |
-
"execution_count": null,
|
| 131 |
-
"metadata": {},
|
| 132 |
-
"outputs": [],
|
| 133 |
-
"source": [
|
| 134 |
-
"# The API we know well\n",
|
| 135 |
-
"\n",
|
| 136 |
-
"model_name = \"gpt-4o-mini\"\n",
|
| 137 |
-
"\n",
|
| 138 |
-
"response = openai.chat.completions.create(model=model_name, messages=messages)\n",
|
| 139 |
-
"answer = response.choices[0].message.content\n",
|
| 140 |
-
"\n",
|
| 141 |
-
"display(Markdown(answer))\n",
|
| 142 |
-
"competitors.append(model_name)\n",
|
| 143 |
-
"answers.append(answer)"
|
| 144 |
-
]
|
| 145 |
-
},
|
| 146 |
-
{
|
| 147 |
-
"cell_type": "code",
|
| 148 |
-
"execution_count": null,
|
| 149 |
-
"metadata": {},
|
| 150 |
-
"outputs": [],
|
| 151 |
-
"source": [
|
| 152 |
-
"# Anthropic has a slightly different API, and Max Tokens is required\n",
|
| 153 |
-
"\n",
|
| 154 |
-
"model_name = \"claude-3-7-sonnet-latest\"\n",
|
| 155 |
-
"\n",
|
| 156 |
-
"claude = Anthropic()\n",
|
| 157 |
-
"response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
|
| 158 |
-
"answer = response.content[0].text\n",
|
| 159 |
-
"\n",
|
| 160 |
-
"display(Markdown(answer))\n",
|
| 161 |
-
"competitors.append(model_name)\n",
|
| 162 |
-
"answers.append(answer)"
|
| 163 |
-
]
|
| 164 |
-
},
|
| 165 |
-
{
|
| 166 |
-
"cell_type": "code",
|
| 167 |
-
"execution_count": null,
|
| 168 |
-
"metadata": {},
|
| 169 |
-
"outputs": [],
|
| 170 |
-
"source": [
|
| 171 |
-
"gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
|
| 172 |
-
"model_name = \"gemini-2.0-flash\"\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
|
| 175 |
-
"answer = response.choices[0].message.content\n",
|
| 176 |
-
"\n",
|
| 177 |
-
"display(Markdown(answer))\n",
|
| 178 |
-
"competitors.append(model_name)\n",
|
| 179 |
-
"answers.append(answer)"
|
| 180 |
-
]
|
| 181 |
-
},
|
| 182 |
-
{
|
| 183 |
-
"cell_type": "code",
|
| 184 |
-
"execution_count": null,
|
| 185 |
-
"metadata": {},
|
| 186 |
-
"outputs": [],
|
| 187 |
-
"source": [
|
| 188 |
-
"deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
|
| 189 |
-
"model_name = \"deepseek-chat\"\n",
|
| 190 |
-
"\n",
|
| 191 |
-
"response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
|
| 192 |
-
"answer = response.choices[0].message.content\n",
|
| 193 |
-
"\n",
|
| 194 |
-
"display(Markdown(answer))\n",
|
| 195 |
-
"competitors.append(model_name)\n",
|
| 196 |
-
"answers.append(answer)"
|
| 197 |
-
]
|
| 198 |
-
},
|
| 199 |
-
{
|
| 200 |
-
"cell_type": "code",
|
| 201 |
-
"execution_count": null,
|
| 202 |
-
"metadata": {},
|
| 203 |
-
"outputs": [],
|
| 204 |
-
"source": [
|
| 205 |
-
"groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
|
| 206 |
-
"model_name = \"llama-3.3-70b-versatile\"\n",
|
| 207 |
-
"\n",
|
| 208 |
-
"response = groq.chat.completions.create(model=model_name, messages=messages)\n",
|
| 209 |
-
"answer = response.choices[0].message.content\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"display(Markdown(answer))\n",
|
| 212 |
-
"competitors.append(model_name)\n",
|
| 213 |
-
"answers.append(answer)\n"
|
| 214 |
-
]
|
| 215 |
-
},
|
| 216 |
-
{
|
| 217 |
-
"cell_type": "code",
|
| 218 |
-
"execution_count": null,
|
| 219 |
-
"metadata": {},
|
| 220 |
-
"outputs": [],
|
| 221 |
-
"source": [
|
| 222 |
-
"!ollama pull llama3.2"
|
| 223 |
-
]
|
| 224 |
-
},
|
| 225 |
-
{
|
| 226 |
-
"cell_type": "code",
|
| 227 |
-
"execution_count": null,
|
| 228 |
-
"metadata": {},
|
| 229 |
-
"outputs": [],
|
| 230 |
-
"source": [
|
| 231 |
-
"ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
|
| 232 |
-
"model_name = \"llama3.2\"\n",
|
| 233 |
-
"\n",
|
| 234 |
-
"response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
|
| 235 |
-
"answer = response.choices[0].message.content\n",
|
| 236 |
-
"\n",
|
| 237 |
-
"display(Markdown(answer))\n",
|
| 238 |
-
"competitors.append(model_name)\n",
|
| 239 |
-
"answers.append(answer)"
|
| 240 |
-
]
|
| 241 |
-
},
|
| 242 |
-
{
|
| 243 |
-
"cell_type": "code",
|
| 244 |
-
"execution_count": null,
|
| 245 |
-
"metadata": {},
|
| 246 |
-
"outputs": [],
|
| 247 |
-
"source": [
|
| 248 |
-
"# So where are we?\n",
|
| 249 |
-
"\n",
|
| 250 |
-
"print(competitors)\n",
|
| 251 |
-
"print(answers)\n"
|
| 252 |
-
]
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"cell_type": "code",
|
| 256 |
-
"execution_count": null,
|
| 257 |
-
"metadata": {},
|
| 258 |
-
"outputs": [],
|
| 259 |
-
"source": [
|
| 260 |
-
"# It's nice to know how to use \"zip\"\n",
|
| 261 |
-
"for competitor, answer in zip(competitors, answers):\n",
|
| 262 |
-
" print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
|
| 263 |
-
]
|
| 264 |
-
},
|
| 265 |
-
{
|
| 266 |
-
"cell_type": "code",
|
| 267 |
-
"execution_count": 14,
|
| 268 |
-
"metadata": {},
|
| 269 |
-
"outputs": [],
|
| 270 |
-
"source": [
|
| 271 |
-
"# Let's bring this together - note the use of \"enumerate\"\n",
|
| 272 |
-
"\n",
|
| 273 |
-
"together = \"\"\n",
|
| 274 |
-
"for index, answer in enumerate(answers):\n",
|
| 275 |
-
" together += f\"# Response from competitor {index+1}\\n\\n\"\n",
|
| 276 |
-
" together += answer + \"\\n\\n\""
|
| 277 |
-
]
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"cell_type": "code",
|
| 281 |
-
"execution_count": null,
|
| 282 |
-
"metadata": {},
|
| 283 |
-
"outputs": [],
|
| 284 |
-
"source": [
|
| 285 |
-
"print(together)"
|
| 286 |
-
]
|
| 287 |
-
},
|
| 288 |
-
{
|
| 289 |
-
"cell_type": "code",
|
| 290 |
-
"execution_count": null,
|
| 291 |
-
"metadata": {},
|
| 292 |
-
"outputs": [],
|
| 293 |
-
"source": [
|
| 294 |
-
"judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
|
| 295 |
-
"Each model was asked to generate three innovative business ideas aligned with the latest global trends.\n",
|
| 296 |
-
"\n",
|
| 297 |
-
"Your job is to evaluate the likelihood of success for each idea on a scale from 0 to 100 percent. For each competitor, list the three percentages in the same order as their ideas.\n",
|
| 298 |
-
"\n",
|
| 299 |
-
"Respond only with JSON in this format:\n",
|
| 300 |
-
"{{\"results\": [\n",
|
| 301 |
-
" {{\"competitor\": 1, \"success_chances\": [perc1, perc2, perc3]}},\n",
|
| 302 |
-
" {{\"competitor\": 2, \"success_chances\": [perc1, perc2, perc3]}},\n",
|
| 303 |
-
" ...\n",
|
| 304 |
-
"]}}\n",
|
| 305 |
-
"\n",
|
| 306 |
-
"Here are the ideas from each competitor:\n",
|
| 307 |
-
"\n",
|
| 308 |
-
"{together}\n",
|
| 309 |
-
"\n",
|
| 310 |
-
"Now respond with only the JSON, nothing else.\"\"\"\n"
|
| 311 |
-
]
|
| 312 |
-
},
|
| 313 |
-
{
|
| 314 |
-
"cell_type": "code",
|
| 315 |
-
"execution_count": null,
|
| 316 |
-
"metadata": {},
|
| 317 |
-
"outputs": [],
|
| 318 |
-
"source": [
|
| 319 |
-
"print(judge)"
|
| 320 |
-
]
|
| 321 |
-
},
|
| 322 |
-
{
|
| 323 |
-
"cell_type": "code",
|
| 324 |
-
"execution_count": 18,
|
| 325 |
-
"metadata": {},
|
| 326 |
-
"outputs": [],
|
| 327 |
-
"source": [
|
| 328 |
-
"judge_messages = [{\"role\": \"user\", \"content\": judge}]"
|
| 329 |
-
]
|
| 330 |
-
},
|
| 331 |
-
{
|
| 332 |
-
"cell_type": "code",
|
| 333 |
-
"execution_count": null,
|
| 334 |
-
"metadata": {},
|
| 335 |
-
"outputs": [],
|
| 336 |
-
"source": [
|
| 337 |
-
"# Judgement time!\n",
|
| 338 |
-
"\n",
|
| 339 |
-
"openai = OpenAI()\n",
|
| 340 |
-
"response = openai.chat.completions.create(\n",
|
| 341 |
-
" model=\"o3-mini\",\n",
|
| 342 |
-
" messages=judge_messages,\n",
|
| 343 |
-
")\n",
|
| 344 |
-
"results = response.choices[0].message.content\n",
|
| 345 |
-
"print(results)\n"
|
| 346 |
-
]
|
| 347 |
-
},
|
| 348 |
-
{
|
| 349 |
-
"cell_type": "code",
|
| 350 |
-
"execution_count": null,
|
| 351 |
-
"metadata": {},
|
| 352 |
-
"outputs": [],
|
| 353 |
-
"source": [
|
| 354 |
-
"# Parse judge results JSON and display success probabilities\n",
|
| 355 |
-
"results_dict = json.loads(results)\n",
|
| 356 |
-
"for entry in results_dict[\"results\"]:\n",
|
| 357 |
-
" comp_num = entry[\"competitor\"]\n",
|
| 358 |
-
" comp_name = competitors[comp_num - 1]\n",
|
| 359 |
-
" chances = entry[\"success_chances\"]\n",
|
| 360 |
-
" print(f\"{comp_name}:\")\n",
|
| 361 |
-
" for idx, perc in enumerate(chances, start=1):\n",
|
| 362 |
-
" print(f\" Idea {idx}: {perc}% chance of success\")\n",
|
| 363 |
-
" print()\n"
|
| 364 |
-
]
|
| 365 |
-
}
|
| 366 |
-
],
|
| 367 |
-
"metadata": {
|
| 368 |
-
"kernelspec": {
|
| 369 |
-
"display_name": ".venv",
|
| 370 |
-
"language": "python",
|
| 371 |
-
"name": "python3"
|
| 372 |
-
},
|
| 373 |
-
"language_info": {
|
| 374 |
-
"codemirror_mode": {
|
| 375 |
-
"name": "ipython",
|
| 376 |
-
"version": 3
|
| 377 |
-
},
|
| 378 |
-
"file_extension": ".py",
|
| 379 |
-
"mimetype": "text/x-python",
|
| 380 |
-
"name": "python",
|
| 381 |
-
"nbconvert_exporter": "python",
|
| 382 |
-
"pygments_lexer": "ipython3",
|
| 383 |
-
"version": "3.12.7"
|
| 384 |
-
}
|
| 385 |
-
},
|
| 386 |
-
"nbformat": 4,
|
| 387 |
-
"nbformat_minor": 2
|
| 388 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/README.md
DELETED
|
@@ -1,97 +0,0 @@
|
|
| 1 |
-
# Smart RAG Chatbot
|
| 2 |
-
|
| 3 |
-
A conversational AI that answers questions from your documents first, then falls back to general knowledge when needed. Plus, it keeps you in the loop with smart notifications.
|
| 4 |
-
|
| 5 |
-
## What it does
|
| 6 |
-
|
| 7 |
-
Think of it as your personal AI assistant that:
|
| 8 |
-
- **Knows your stuff** - Searches your documents first to answer questions
|
| 9 |
-
- **Stays helpful** - Uses general AI knowledge when your docs don't have the answer
|
| 10 |
-
- **Keeps you informed** - Sends notifications when it goes beyond your knowledge base
|
| 11 |
-
- **Remembers conversations** - Maintains chat history and user details
|
| 12 |
-
|
| 13 |
-
## How it works
|
| 14 |
-
|
| 15 |
-
1. User asks a question
|
| 16 |
-
2. System searches your documents in `knowledge_base/`
|
| 17 |
-
3. **Found answer?** → Uses your docs and responds
|
| 18 |
-
4. **No answer?** → Uses general AI knowledge + sends you a notification
|
| 19 |
-
5. **Small talk?** → Quick friendly response
|
| 20 |
-
|
| 21 |
-
## Architecture
|
| 22 |
-
|
| 23 |
-
```
|
| 24 |
-
User Question → Search Your Docs → ChatGPT Response → Gemini Quality Check
|
| 25 |
-
↓ ↓
|
| 26 |
-
If no relevant docs If using general knowledge
|
| 27 |
-
↓ ↓
|
| 28 |
-
General AI Knowledge ← ← ← ← ← ← ← ← Pushover Notification
|
| 29 |
-
```
|
| 30 |
-
|
| 31 |
-
**Components:**
|
| 32 |
-
- **ChromaDB + LangChain**: Stores and searches your documents
|
| 33 |
-
- **ChatGPT**: Generates responses
|
| 34 |
-
- **Gemini**: Checks response quality
|
| 35 |
-
- **Pushover**: Sends notifications
|
| 36 |
-
- **Gradio**: Simple web interface
|
| 37 |
-
|
| 38 |
-
## Quick Setup
|
| 39 |
-
|
| 40 |
-
1. **Install dependencies:**
|
| 41 |
-
```bash
|
| 42 |
-
pip install -r requirements.txt
|
| 43 |
-
```
|
| 44 |
-
|
| 45 |
-
2. **Create `.env` file with your API keys:**
|
| 46 |
-
```bash
|
| 47 |
-
OPENAI_API_KEY=your_openai_key
|
| 48 |
-
GOOGLE_API_KEY=your_gemini_key
|
| 49 |
-
PUSHOVER_USER=your_pushover_user # optional
|
| 50 |
-
PUSHOVER_TOKEN=your_pushover_token # optional
|
| 51 |
-
```
|
| 52 |
-
|
| 53 |
-
3. **Add your documents:**
|
| 54 |
-
Drop your `.txt`, `.md`, or `.markdown` files into the `knowledge_base/` folder
|
| 55 |
-
|
| 56 |
-
4. **Launch:**
|
| 57 |
-
```bash
|
| 58 |
-
python app.py
|
| 59 |
-
```
|
| 60 |
-
|
| 61 |
-
That's it! The web interface opens automatically.
|
| 62 |
-
|
| 63 |
-
## Key Features
|
| 64 |
-
|
| 65 |
-
- **Smart fallback**: Uses your docs first, general knowledge second
|
| 66 |
-
- **Quality control**: Built-in evaluator ensures good responses
|
| 67 |
-
- **Conversation memory**: Remembers chat history and user details
|
| 68 |
-
- **Smart notifications**: Only alerts when using general knowledge
|
| 69 |
-
- **Simple setup**: Just API keys and documents
|
| 70 |
-
|
| 71 |
-
## File Structure
|
| 72 |
-
|
| 73 |
-
```
|
| 74 |
-
├── app.py # Web interface
|
| 75 |
-
├── controller.py # Main logic
|
| 76 |
-
├── rag.py # Document search
|
| 77 |
-
├── evaluator.py # Quality checking
|
| 78 |
-
├── tools.py # Notifications
|
| 79 |
-
├── knowledge_base/ # Your documents
|
| 80 |
-
└── .env # API keys
|
| 81 |
-
```
|
| 82 |
-
|
| 83 |
-
## Example Usage
|
| 84 |
-
|
| 85 |
-
**Question about your docs:**
|
| 86 |
-
```
|
| 87 |
-
User: "What's our return policy?"
|
| 88 |
-
Bot: [Searches your docs] → [Finds policy] → [Answers from your content]
|
| 89 |
-
```
|
| 90 |
-
|
| 91 |
-
**General question:**
|
| 92 |
-
```
|
| 93 |
-
User: "What is machine learning?"
|
| 94 |
-
Bot: [No docs found] → [Uses AI knowledge] → [Sends notification] → [Helpful explanation]
|
| 95 |
-
```
|
| 96 |
-
|
| 97 |
-
Built with ChromaDB, LangChain, OpenAI, Gemini, and Gradio.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/app.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
from controller import ChatbotController
|
| 3 |
-
|
| 4 |
-
controller = ChatbotController()
|
| 5 |
-
|
| 6 |
-
def respond(user_msg, history, recorded_emails_state):
|
| 7 |
-
history.append({"role": "user", "content": user_msg})
|
| 8 |
-
reply, emails = controller.get_response(
|
| 9 |
-
message=user_msg,
|
| 10 |
-
history=history,
|
| 11 |
-
name=None,
|
| 12 |
-
email=None,
|
| 13 |
-
recorded_emails=set(recorded_emails_state or []),
|
| 14 |
-
)
|
| 15 |
-
history.append({"role": "assistant", "content": reply})
|
| 16 |
-
return history, history, list(emails)
|
| 17 |
-
|
| 18 |
-
with gr.Blocks(title="RAG Chat") as demo:
|
| 19 |
-
chat = gr.Chatbot(type="messages", min_height=600, label="Assistant")
|
| 20 |
-
msg = gr.Textbox(label="Your message", placeholder="Type here…")
|
| 21 |
-
history_state = gr.State([])
|
| 22 |
-
processed_emails_state = gr.State([])
|
| 23 |
-
msg.submit(
|
| 24 |
-
respond,
|
| 25 |
-
inputs=[msg, history_state, processed_emails_state],
|
| 26 |
-
outputs=[chat, history_state, processed_emails_state],
|
| 27 |
-
)
|
| 28 |
-
msg.submit(lambda: "", None, msg)
|
| 29 |
-
|
| 30 |
-
demo.launch(inbrowser=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/data_level0.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ea9ac6972cb4666769a17755f17c5727f676f11a742e9553bf3a21119ab54394
|
| 3 |
-
size 167600
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/header.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a0e81c3b22454233bc12d0762f06dcca48261a75231cf87c79b75e69a6c00150
|
| 3 |
-
size 100
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/length.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
|
| 3 |
-
size 400
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/6aee01b9-a616-491d-a196-c857cd862793/link_lists.bin
DELETED
|
File without changes
|
community_contributions/ChatBot_with_evaluator_and_notifier/career_db/chroma.sqlite3
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:460534870e395f01413300a55f0ee89062d0ffa8102796eb6365b15c8c2b0063
|
| 3 |
-
size 1597440
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/chat.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import json
|
| 3 |
-
from typing import List, Dict, Any
|
| 4 |
-
from openai import OpenAI
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
load_dotenv(override=True)
|
| 9 |
-
|
| 10 |
-
MODEL = "gpt-4o-mini"
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class ChatGPTLLM:
|
| 14 |
-
def __init__(self, model: str = MODEL):
|
| 15 |
-
load_dotenv(override=True)
|
| 16 |
-
self.model = model
|
| 17 |
-
self.client = OpenAI()
|
| 18 |
-
|
| 19 |
-
def format_context_prompt(self, query: str, context: List[str]) -> str:
|
| 20 |
-
joined = "\n\n".join(context) if context else "(no context)"
|
| 21 |
-
return (
|
| 22 |
-
"You are a careful, concise assistant. Use ONLY the Context below.\n"
|
| 23 |
-
"If the answer is not present in the Context, reply exactly: I don't know.\n"
|
| 24 |
-
"Do not add external knowledge or speculate.\n\n"
|
| 25 |
-
f"Context:\n{joined}\n\n"
|
| 26 |
-
f"Question: {query}\n"
|
| 27 |
-
"Answer succinctly in 1-3 sentences."
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
def generate_response(self, query: str, context: List[str]) -> Dict[str, Any]:
|
| 31 |
-
try:
|
| 32 |
-
prompt = self.format_context_prompt(query, context)
|
| 33 |
-
messages = [
|
| 34 |
-
{"role": "system", "content": "Answer only from provided Context."},
|
| 35 |
-
{"role": "user", "content": prompt},
|
| 36 |
-
]
|
| 37 |
-
resp = self.client.chat.completions.create(
|
| 38 |
-
model=self.model,
|
| 39 |
-
messages=messages
|
| 40 |
-
)
|
| 41 |
-
content = resp.choices[0].message.content
|
| 42 |
-
return {"text": content or "", "raw": json.loads(resp.model_dump_json())}
|
| 43 |
-
except Exception as e:
|
| 44 |
-
print(f"[llm1] error: {e}")
|
| 45 |
-
return {"error": str(e), "text": ""}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/controller.py
DELETED
|
@@ -1,304 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
from typing import List, Tuple, Set
|
| 4 |
-
from dotenv import load_dotenv
|
| 5 |
-
from langchain_openai import ChatOpenAI
|
| 6 |
-
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
| 7 |
-
from rag import get_retriever, ingest as ingest_docs
|
| 8 |
-
from evaluator import GeminiEvaluator
|
| 9 |
-
from tools import notify
|
| 10 |
-
|
| 11 |
-
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
|
| 12 |
-
DISCLAIMER = "This info does not exist in our DB, but according to your input this is your output: "
|
| 13 |
-
|
| 14 |
-
# --- Cursor Implementation Prompt: Minimal LLM and Evaluator functions ---
|
| 15 |
-
|
| 16 |
-
def LLM(user_input, db_retrieved, history):
|
| 17 |
-
"""
|
| 18 |
-
Builds a comprehensive prompt using user input, retrieved context, and chat history,
|
| 19 |
-
then calls the OpenAI chat model (via LangChain ChatOpenAI) to generate a response.
|
| 20 |
-
"""
|
| 21 |
-
load_dotenv(override=True)
|
| 22 |
-
model = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
|
| 23 |
-
llm = ChatOpenAI(model=model, temperature=0.2)
|
| 24 |
-
|
| 25 |
-
context_text = "\n\n".join(db_retrieved if isinstance(db_retrieved, list) else [str(db_retrieved)])
|
| 26 |
-
history_text = str(history or [])
|
| 27 |
-
system = (
|
| 28 |
-
"Answer using ONLY the provided DB retrieval and keep consistency with the chat history. "
|
| 29 |
-
"If the retrieval does not contain the answer, reply: I am unsure."
|
| 30 |
-
)
|
| 31 |
-
user = (
|
| 32 |
-
f"This is the user input: {user_input}\n\n"
|
| 33 |
-
f"This is the db_retrieval: {context_text}\n\n"
|
| 34 |
-
f"This is the history of chat: {history_text}\n\n"
|
| 35 |
-
"Based on these, generate a comprehensive response that answers the user's question using the retrieved context and maintaining consistency with chat history."
|
| 36 |
-
)
|
| 37 |
-
reply = llm.invoke([SystemMessage(content=system), HumanMessage(content=user)]).content
|
| 38 |
-
return reply.strip() if reply else ""
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
def _token_set(text: str) -> set:
|
| 42 |
-
t = (text or "").lower()
|
| 43 |
-
t = re.sub(r"[^a-z0-9\s]", " ", t)
|
| 44 |
-
return {w for w in t.split() if w}
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
def Evaluator(user_input, db_retrieved, llm_response, history):
|
| 48 |
-
"""
|
| 49 |
-
Simple, deterministic evaluator returning metric scores and a pass/fail decision.
|
| 50 |
-
Uses lexical overlap heuristics; values are in [0,1].
|
| 51 |
-
"""
|
| 52 |
-
db_text = "\n\n".join(db_retrieved if isinstance(db_retrieved, list) else [str(db_retrieved)])
|
| 53 |
-
q_set = _token_set(user_input)
|
| 54 |
-
db_set = _token_set(db_text)
|
| 55 |
-
r_set = _token_set(llm_response)
|
| 56 |
-
h_text = str(history or [])
|
| 57 |
-
h_set = _token_set(h_text)
|
| 58 |
-
|
| 59 |
-
def jaccard(a: set, b: set) -> float:
|
| 60 |
-
if not a or not b:
|
| 61 |
-
return 0.0
|
| 62 |
-
inter = len(a & b)
|
| 63 |
-
union = len(a | b)
|
| 64 |
-
return inter / union if union else 0.0
|
| 65 |
-
|
| 66 |
-
relevance = jaccard(q_set, r_set)
|
| 67 |
-
accuracy = jaccard(db_set, r_set)
|
| 68 |
-
consistency = 1.0 if jaccard(h_set, r_set) >= 0.1 or not h_set else jaccard(h_set, r_set)
|
| 69 |
-
completeness = min(1.0, (len(llm_response) / 300.0)) if accuracy >= 0.2 else 0.3
|
| 70 |
-
faithfulness = accuracy
|
| 71 |
-
|
| 72 |
-
overall = max(0.0, min(1.0, 0.3 * relevance + 0.3 * accuracy + 0.15 * completeness + 0.15 * consistency + 0.1 * faithfulness))
|
| 73 |
-
passed = overall >= 0.7
|
| 74 |
-
|
| 75 |
-
feedback_parts = []
|
| 76 |
-
if relevance < 0.5:
|
| 77 |
-
feedback_parts.append("Improve focus on the user's question.")
|
| 78 |
-
if accuracy < 0.5:
|
| 79 |
-
feedback_parts.append("Cite or use details from the retrieved context more precisely.")
|
| 80 |
-
if completeness < 0.7:
|
| 81 |
-
feedback_parts.append("Add missing details supported by context.")
|
| 82 |
-
if consistency < 0.6:
|
| 83 |
-
feedback_parts.append("Ensure alignment with prior conversation.")
|
| 84 |
-
if faithfulness < 0.7:
|
| 85 |
-
feedback_parts.append("Avoid claims not supported by retrieved context.")
|
| 86 |
-
if not feedback_parts:
|
| 87 |
-
feedback_parts.append("Good response: relevant, accurate, and grounded.")
|
| 88 |
-
|
| 89 |
-
return {
|
| 90 |
-
"relevance": round(relevance, 3),
|
| 91 |
-
"accuracy": round(accuracy, 3),
|
| 92 |
-
"completeness": round(completeness, 3),
|
| 93 |
-
"consistency": round(consistency, 3),
|
| 94 |
-
"faithfulness": round(faithfulness, 3),
|
| 95 |
-
"overall": round(overall, 3),
|
| 96 |
-
"passed": passed,
|
| 97 |
-
"feedback": " ".join(feedback_parts),
|
| 98 |
-
}
|
| 99 |
-
|
| 100 |
-
class ChatbotController:
|
| 101 |
-
def __init__(self):
|
| 102 |
-
load_dotenv(override=True)
|
| 103 |
-
self.llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
|
| 104 |
-
self.evaluator = GeminiEvaluator()
|
| 105 |
-
self._smalltalk_patterns = [
|
| 106 |
-
(re.compile(r"^(hi|hello|hey|yo)\b", re.I), "Hello! How can I help today?"),
|
| 107 |
-
(re.compile(r"how\s+are\s+you\b", re.I), "I'm doing well, thanks for asking. How can I help?"),
|
| 108 |
-
(re.compile(r"(good\s+(morning|afternoon|evening))\b", re.I), "Hello! How can I help?"),
|
| 109 |
-
(re.compile(r"\b(thank(s)?|thanks a lot|ty)\b", re.I), "You're welcome!"),
|
| 110 |
-
(re.compile(r"\b(bye|goodbye|see\s+you)\b", re.I), "Goodbye!"),
|
| 111 |
-
(re.compile(r"tell\s+me\s+a\s+joke", re.I), "Why did the developer go broke? Because they used up all their cache."),
|
| 112 |
-
(re.compile(r"\b(help|what\s+can\s+you\s+do)\b", re.I), "I can answer questions based on our knowledge base or just chat!"),
|
| 113 |
-
]
|
| 114 |
-
|
| 115 |
-
def ingest(self, data_dir: str = None) -> str:
|
| 116 |
-
return ingest_docs(data_dir) if data_dir else ingest_docs()
|
| 117 |
-
|
| 118 |
-
def _extract_emails(self, text: str) -> Set[str]:
|
| 119 |
-
return set(re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text or ""))
|
| 120 |
-
|
| 121 |
-
def _extract_name(self, text: str) -> str | None:
|
| 122 |
-
t = (text or "").strip()
|
| 123 |
-
m = re.search(r"\bmy name is\s+([A-Z][a-zA-Z'.-]{1,40}(\s+[A-Z][a-zA-Z'.-]{1,40}){0,2})\b", t, re.I)
|
| 124 |
-
if m:
|
| 125 |
-
return m.group(1).strip()
|
| 126 |
-
m = re.search(r"\bi am\s+([A-Z][a-zA-Z'.-]{1,40}(\s+[A-Z][a-zA-Z'.-]{1,40}){0,2})\b", t, re.I)
|
| 127 |
-
if m:
|
| 128 |
-
return m.group(1).strip()
|
| 129 |
-
m = re.search(r"\bthis is\s+([A-Z][a-zA-Z'.-]{1,40}(\s+[A-Z][a-zA-Z'.-]{1,40}){0,2})\b", t, re.I)
|
| 130 |
-
if m:
|
| 131 |
-
return m.group(1).strip()
|
| 132 |
-
return None
|
| 133 |
-
|
| 134 |
-
def _extract_emails_from_conversation(self, current_message: str, history: List[dict]) -> Set[str]:
|
| 135 |
-
all_emails = set()
|
| 136 |
-
|
| 137 |
-
# Extract from current message
|
| 138 |
-
all_emails.update(self._extract_emails(current_message))
|
| 139 |
-
|
| 140 |
-
# Extract from chat history (user messages only)
|
| 141 |
-
for msg in (history or []):
|
| 142 |
-
if msg.get("role") == "user":
|
| 143 |
-
content = msg.get("content", "")
|
| 144 |
-
all_emails.update(self._extract_emails(content))
|
| 145 |
-
|
| 146 |
-
return all_emails
|
| 147 |
-
|
| 148 |
-
def _extract_name_from_conversation(self, current_message: str, history: List[dict]) -> str | None:
|
| 149 |
-
# First try current message
|
| 150 |
-
name = self._extract_name(current_message)
|
| 151 |
-
if name:
|
| 152 |
-
return name
|
| 153 |
-
|
| 154 |
-
# Then search through chat history (user messages only, most recent first)
|
| 155 |
-
for msg in reversed(history or []):
|
| 156 |
-
if msg.get("role") == "user":
|
| 157 |
-
content = msg.get("content", "")
|
| 158 |
-
name = self._extract_name(content)
|
| 159 |
-
if name:
|
| 160 |
-
return name
|
| 161 |
-
|
| 162 |
-
return None
|
| 163 |
-
|
| 164 |
-
def _build_prompt(self, q, hits) -> Tuple[str, str]:
|
| 165 |
-
ctx = "\n\n".join([f"[Doc {i+1}]\n{d.page_content}" for i, d in enumerate(hits)])
|
| 166 |
-
sys = (
|
| 167 |
-
"You are a concise assistant. Answer ONLY using the provided Context. "
|
| 168 |
-
"If the Context does not contain the answer, reply exactly: 'I am unsure'. "
|
| 169 |
-
"Do not invent facts or pull from outside knowledge."
|
| 170 |
-
)
|
| 171 |
-
prompt = (
|
| 172 |
-
f"User Question:\n{q}\n\n"
|
| 173 |
-
f"Context (Top {len(hits)}):\n{ctx}\n\n"
|
| 174 |
-
"Provide a short, direct answer grounded in the Context."
|
| 175 |
-
)
|
| 176 |
-
return sys, prompt
|
| 177 |
-
|
| 178 |
-
def _build_conversation_with_history(self, current_message: str, history: List[dict], include_context: bool = False, context_chunks: List[str] = None):
|
| 179 |
-
messages = []
|
| 180 |
-
|
| 181 |
-
if include_context and context_chunks:
|
| 182 |
-
# RAG mode with context
|
| 183 |
-
ctx = "\n\n".join([f"[Doc {i+1}]\n{chunk}" for i, chunk in enumerate(context_chunks)])
|
| 184 |
-
system_msg = (
|
| 185 |
-
"You are a helpful assistant. Use the provided Context to answer questions accurately. "
|
| 186 |
-
"If the Context doesn't contain the answer, say 'I am unsure'. "
|
| 187 |
-
"Maintain conversation continuity and refer to previous messages when relevant.\n\n"
|
| 188 |
-
f"Context:\n{ctx}"
|
| 189 |
-
)
|
| 190 |
-
else:
|
| 191 |
-
# General mode without context
|
| 192 |
-
system_msg = (
|
| 193 |
-
"You are a helpful, practical, and concise assistant. "
|
| 194 |
-
"Maintain conversation continuity and refer to previous messages when relevant."
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
-
messages.append(SystemMessage(content=system_msg))
|
| 198 |
-
|
| 199 |
-
# Add recent history (last 10 messages to avoid token limits)
|
| 200 |
-
recent_history = (history or [])[-10:] if history else []
|
| 201 |
-
for msg in recent_history:
|
| 202 |
-
role = msg.get("role", "")
|
| 203 |
-
content = msg.get("content", "")
|
| 204 |
-
if role == "user":
|
| 205 |
-
messages.append(HumanMessage(content=content))
|
| 206 |
-
elif role == "assistant":
|
| 207 |
-
messages.append(AIMessage(content=content))
|
| 208 |
-
|
| 209 |
-
# Add current message
|
| 210 |
-
messages.append(HumanMessage(content=current_message))
|
| 211 |
-
|
| 212 |
-
return messages
|
| 213 |
-
|
| 214 |
-
def _smalltalk_reply(self, text: str):
|
| 215 |
-
s = (text or "").strip()
|
| 216 |
-
if not s:
|
| 217 |
-
return None
|
| 218 |
-
for pattern, reply in self._smalltalk_patterns:
|
| 219 |
-
if pattern.search(s):
|
| 220 |
-
return reply
|
| 221 |
-
return None
|
| 222 |
-
|
| 223 |
-
def _is_conversational(self, text: str) -> bool:
|
| 224 |
-
t = (text or "").strip().lower()
|
| 225 |
-
conversational_phrases = [
|
| 226 |
-
"how are you",
|
| 227 |
-
"what's up",
|
| 228 |
-
"whats up",
|
| 229 |
-
"tell me a joke",
|
| 230 |
-
"what do you think",
|
| 231 |
-
"your opinion",
|
| 232 |
-
"talk to me",
|
| 233 |
-
"let's chat",
|
| 234 |
-
"lets chat",
|
| 235 |
-
"who are you",
|
| 236 |
-
"help",
|
| 237 |
-
"thank you",
|
| 238 |
-
"thanks",
|
| 239 |
-
"good morning",
|
| 240 |
-
"good evening",
|
| 241 |
-
]
|
| 242 |
-
return any(p in t for p in conversational_phrases)
|
| 243 |
-
|
| 244 |
-
def get_response(self, message: str, history: List[dict], name: str = None, email: str = None, recorded_emails: Set[str] = None):
|
| 245 |
-
quick = self._smalltalk_reply(message)
|
| 246 |
-
if quick is not None:
|
| 247 |
-
ans = quick
|
| 248 |
-
found_emails = self._extract_emails_from_conversation(message, history)
|
| 249 |
-
if email:
|
| 250 |
-
found_emails.add(email)
|
| 251 |
-
seen = recorded_emails or set()
|
| 252 |
-
new_seen = seen | found_emails
|
| 253 |
-
return ans or "Hello!", new_seen
|
| 254 |
-
retriever = get_retriever()
|
| 255 |
-
hits = retriever.get_relevant_documents(message)
|
| 256 |
-
context_chunks = [d.page_content for d in hits]
|
| 257 |
-
|
| 258 |
-
# Check if context is actually relevant using a quick relevance test
|
| 259 |
-
if context_chunks:
|
| 260 |
-
context_text = " ".join(context_chunks)
|
| 261 |
-
relevance_prompt = f"Does this context contain information relevant to answering: '{message}'?\nContext: {context_text[:500]}...\nAnswer only YES or NO."
|
| 262 |
-
relevance_check = self.llm.invoke([HumanMessage(content=relevance_prompt)]).content.strip().upper()
|
| 263 |
-
context_is_relevant = "YES" in relevance_check
|
| 264 |
-
else:
|
| 265 |
-
context_is_relevant = False
|
| 266 |
-
|
| 267 |
-
if not context_chunks or not context_is_relevant:
|
| 268 |
-
# No RAG support or irrelevant context → allow general LLM answer with history
|
| 269 |
-
messages = self._build_conversation_with_history(message, history, include_context=False)
|
| 270 |
-
ans = self.llm.invoke(messages).content.strip()
|
| 271 |
-
decision = self.evaluator.evaluate_no_context(message, ans)
|
| 272 |
-
# Mark this as needing notification since we used general LLM knowledge
|
| 273 |
-
decision["used_general_knowledge"] = True
|
| 274 |
-
else:
|
| 275 |
-
# RAG response with history
|
| 276 |
-
messages = self._build_conversation_with_history(message, history, include_context=True, context_chunks=context_chunks)
|
| 277 |
-
ans = self.llm.invoke(messages).content.strip()
|
| 278 |
-
decision = self.evaluator.evaluate_response(message, context_chunks, ans)
|
| 279 |
-
decision["used_general_knowledge"] = False
|
| 280 |
-
found_emails = self._extract_emails_from_conversation(message, history)
|
| 281 |
-
if email:
|
| 282 |
-
found_emails.add(email)
|
| 283 |
-
found_name = name or self._extract_name_from_conversation(message, history)
|
| 284 |
-
seen = recorded_emails or set()
|
| 285 |
-
new_seen = seen | found_emails
|
| 286 |
-
# Check if we used general knowledge and should send notification
|
| 287 |
-
if decision.get("used_general_knowledge") and ans and ans.lower() != "i am unsure":
|
| 288 |
-
if self._is_conversational(message):
|
| 289 |
-
return ans, new_seen
|
| 290 |
-
fields = []
|
| 291 |
-
if found_name:
|
| 292 |
-
fields.append(f"name={found_name}")
|
| 293 |
-
if found_emails:
|
| 294 |
-
fields.append(f"emails={','.join(sorted(found_emails))}")
|
| 295 |
-
meta = (" | ".join(fields) + " | ") if fields else ""
|
| 296 |
-
title = "RAG missing knowledge"
|
| 297 |
-
message_payload = f"{meta}question={message}"
|
| 298 |
-
notify(title, message_payload)
|
| 299 |
-
return ans, new_seen
|
| 300 |
-
|
| 301 |
-
if decision.get("decision") == "APPROVED":
|
| 302 |
-
return ans or "i am unsure", new_seen
|
| 303 |
-
|
| 304 |
-
return "Insufficient support in our DB.", new_seen
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/evaluator.py
DELETED
|
@@ -1,108 +0,0 @@
|
|
| 1 |
-
from pydantic import BaseModel, Field, ValidationError
|
| 2 |
-
from openai import OpenAI
|
| 3 |
-
from dotenv import load_dotenv
|
| 4 |
-
import os
|
| 5 |
-
import json
|
| 6 |
-
from typing import List, Dict, Any
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
GEMINI_MODEL = "gemini-2.0-flash"
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class GeminiEvaluator:
|
| 13 |
-
def __init__(self, model: str = GEMINI_MODEL):
|
| 14 |
-
load_dotenv(override=True)
|
| 15 |
-
google_api_key = os.getenv('GOOGLE_API_KEY')
|
| 16 |
-
self.client = OpenAI(api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 17 |
-
self.model = model
|
| 18 |
-
|
| 19 |
-
def create_evaluation_prompt(self, query: str, context: List[str], response: str) -> str:
|
| 20 |
-
ctx = "\n- " + "\n- ".join(context) if context else "(no context retrieved)"
|
| 21 |
-
return (
|
| 22 |
-
"You are a strict evaluator. Decide if the Response is supported by the Context.\n"
|
| 23 |
-
"Rules:\n"
|
| 24 |
-
"- If the key facts of the Response are not present in the Context and Context is empty, mark REJECTED and set has_external_info=True.\n"
|
| 25 |
-
"- If partially supported but missing crucial facts, mark REJECTED.\n"
|
| 26 |
-
"- Otherwise APPROVED.\n"
|
| 27 |
-
"Return ONLY a compact JSON with keys: decision, confidence, reason, has_external_info.\n"
|
| 28 |
-
"Respond with JSON only, no prose.\n\n"
|
| 29 |
-
f"Context:\n{ctx}\n\n"
|
| 30 |
-
f"Query: {query}\n"
|
| 31 |
-
f"Response: {response}\n"
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
def evaluate_response(self, query: str, context: List[str], llm_response: str) -> Dict[str, Any]:
|
| 35 |
-
try:
|
| 36 |
-
prompt = self.create_evaluation_prompt(query, context, llm_response)
|
| 37 |
-
messages = [
|
| 38 |
-
{"role": "system", "content": "Return strict JSON only."},
|
| 39 |
-
{"role": "user", "content": prompt},
|
| 40 |
-
]
|
| 41 |
-
resp = self.client.chat.completions.create(model=self.model, messages=messages, temperature=0)
|
| 42 |
-
text = resp.choices[0].message.content or "{}"
|
| 43 |
-
start = text.find("{")
|
| 44 |
-
end = text.rfind("}")
|
| 45 |
-
blob = text[start:end+1] if start != -1 and end != -1 else "{}"
|
| 46 |
-
data = json.loads(blob)
|
| 47 |
-
|
| 48 |
-
class EvaluationResult(BaseModel):
|
| 49 |
-
decision: str = Field(pattern=r"^(APPROVED|REJECTED)$")
|
| 50 |
-
confidence: int = Field(ge=0, le=100)
|
| 51 |
-
reason: str
|
| 52 |
-
has_external_info: bool = False
|
| 53 |
-
|
| 54 |
-
try:
|
| 55 |
-
# normalize decision before validation
|
| 56 |
-
if "decision" in data:
|
| 57 |
-
data["decision"] = str(data["decision"]).upper()
|
| 58 |
-
validated = EvaluationResult(**{
|
| 59 |
-
"decision": data.get("decision", "REJECTED"),
|
| 60 |
-
"confidence": int(data.get("confidence", 50)),
|
| 61 |
-
"reason": data.get("reason", ""),
|
| 62 |
-
"has_external_info": bool(data.get("has_external_info", False)),
|
| 63 |
-
})
|
| 64 |
-
return validated.model_dump()
|
| 65 |
-
except ValidationError as ve:
|
| 66 |
-
print(f"[evaluator] validation error: {ve}")
|
| 67 |
-
return {"decision": "REJECTED", "confidence": 0, "reason": "validation_error", "has_external_info": False}
|
| 68 |
-
except Exception as e:
|
| 69 |
-
print(f"[evaluator] error: {e}")
|
| 70 |
-
return {"decision": "REJECTED", "confidence": 0, "reason": str(e), "has_external_info": False}
|
| 71 |
-
|
| 72 |
-
def evaluate_no_context(self, query: str, llm_response: str) -> Dict[str, Any]:
|
| 73 |
-
try:
|
| 74 |
-
prompt = (
|
| 75 |
-
"You are a strict evaluator. There is NO database context for this query.\n"
|
| 76 |
-
"Evaluate how well the Response addresses the Query in terms of relevance, helpfulness and clarity.\n"
|
| 77 |
-
"Return ONLY JSON with keys: decision (APPROVED|REJECTED), confidence (0-100), reason, has_external_info (true).\n\n"
|
| 78 |
-
f"Query: {query}\n"
|
| 79 |
-
f"Response: {llm_response}\n"
|
| 80 |
-
)
|
| 81 |
-
messages = [
|
| 82 |
-
{"role": "system", "content": "Return strict JSON only."},
|
| 83 |
-
{"role": "user", "content": prompt},
|
| 84 |
-
]
|
| 85 |
-
resp = self.client.chat.completions.create(model=self.model, messages=messages, temperature=0)
|
| 86 |
-
text = resp.choices[0].message.content or "{}"
|
| 87 |
-
start = text.find("{")
|
| 88 |
-
end = text.rfind("}")
|
| 89 |
-
blob = text[start:end+1] if start != -1 and end != -1 else "{}"
|
| 90 |
-
data = json.loads(blob)
|
| 91 |
-
|
| 92 |
-
def _coerce_conf(v):
|
| 93 |
-
try:
|
| 94 |
-
return max(0, min(100, int(float(v))))
|
| 95 |
-
except Exception:
|
| 96 |
-
return 50
|
| 97 |
-
|
| 98 |
-
decision = str(data.get("decision", "APPROVED")).upper()
|
| 99 |
-
return {
|
| 100 |
-
"decision": "APPROVED" if decision == "APPROVED" else "REJECTED",
|
| 101 |
-
"confidence": _coerce_conf(data.get("confidence", 75)),
|
| 102 |
-
"reason": data.get("reason", "no_context"),
|
| 103 |
-
"has_external_info": True,
|
| 104 |
-
}
|
| 105 |
-
except Exception as e:
|
| 106 |
-
# Heuristic fallback when model errors
|
| 107 |
-
rel = 1.0 if (query and llm_response and query.split()[0].lower() in (llm_response or "").lower()) else 0.5
|
| 108 |
-
return {"decision": "APPROVED" if rel >= 0.5 else "REJECTED", "confidence": int(rel * 100), "reason": "fallback_no_context", "has_external_info": True}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/knowledge_base/summary.txt
DELETED
|
@@ -1,499 +0,0 @@
|
|
| 1 |
-
San José State University
|
| 2 |
-
|
| 3 |
-
Former name Minns' Evening Normal School (1857–1862)
|
| 4 |
-
California State Normal School (1862–1921)
|
| 5 |
-
San Jose State Teachers College (1921–1935)
|
| 6 |
-
San Jose State College (1935–1972)
|
| 7 |
-
California State University, San Jose (1972–1974)
|
| 8 |
-
Motto "Powering Silicon Valley"
|
| 9 |
-
Type Public research university
|
| 10 |
-
Established 1857; 168 years ago
|
| 11 |
-
Founder George W. Minns
|
| 12 |
-
Parent institution California State University
|
| 13 |
-
Accreditation WSCUC
|
| 14 |
-
Academic affiliations
|
| 15 |
-
USUSpace-grant
|
| 16 |
-
Endowment $242 million (2024–25)[1]
|
| 17 |
-
Budget $481.8 million (2023–24)[2]
|
| 18 |
-
President Cynthia Teniente-Matson
|
| 19 |
-
Provost Vincent Del Casino[3]
|
| 20 |
-
Academic staff 2,177 (Fall 2024)[4]
|
| 21 |
-
Administrative staff 1,424 (Fall 2022)[4]
|
| 22 |
-
Students 37,661 (Fall 2024)[5]
|
| 23 |
-
Undergraduates 27,354 (Fall 2024)[5]
|
| 24 |
-
Postgraduates 5,468 (Fall 2024)[5]
|
| 25 |
-
Location San Jose, California, U.S.
|
| 26 |
-
37°20′07″N 121°52′53″W
|
| 27 |
-
Campus Large city[6], 154 acres (62 ha) on main campus and 62 acres (25 ha) on south campus
|
| 28 |
-
Newspaper The Spartan Daily
|
| 29 |
-
Colors Blue and gold[7]
|
| 30 |
-
|
| 31 |
-
Nickname Spartans
|
| 32 |
-
Sporting affiliations
|
| 33 |
-
NCAA Division I FBS - Mountain WestWACMPSFWCC
|
| 34 |
-
Mascot Sammy Spartan
|
| 35 |
-
Website www.sjsu.edu Edit this at Wikidata
|
| 36 |
-
|
| 37 |
-
Map
|
| 38 |
-
Wikimedia | © OpenStreetMap
|
| 39 |
-
Show zoomed in
|
| 40 |
-
Show zoomed midway
|
| 41 |
-
Show zoomed out
|
| 42 |
-
Show all
|
| 43 |
-
California Historical Landmark
|
| 44 |
-
Official name First Normal School in California (San Jose State College)
|
| 45 |
-
Designated 1/6/1949
|
| 46 |
-
Reference no. 417[8]
|
| 47 |
-
San José State University (San Jose State or SJSU) is a public research university in San Jose, California. Established in 1857 as the state's first normal school, it is the oldest public university in the western United States[9] and is the founding campus of the California State University system.[10][11]
|
| 48 |
-
|
| 49 |
-
Located in downtown San Jose, San Jose State's main campus spans 154 acres (62 ha), or roughly 19 square blocks. It is accredited by the WASC Senior College and University Commission[12] and is classified among "R2: High Research Spending and Doctorate Production".[13] It is a federally-designated Hispanic-Serving Institution as well as an Asian American and Native American Pacific Islander-Serving Institution.[14]
|
| 50 |
-
|
| 51 |
-
SJSU comprises nine academic colleges, who offer over 250 undergraduate and graduate degree programs.[15] Its enrollment is about 37,000 students annually, including around 28,000 undergraduate and 9,000 graduate and professional students.[5] As of fall 2022, graduate student enrollment, Asian, and international student enrollments at SJSU were the highest of any campus in the CSU system.[5]
|
| 52 |
-
|
| 53 |
-
San Jose State's sports teams compete as the Spartans in the NCAA Division I Mountain West Conference and have won 10 team national championships and 50 individual national championships. SJSU athletes have competed in every Olympics since 1948 and have amassed 21 medals.[16]
|
| 54 |
-
|
| 55 |
-
History
|
| 56 |
-
Main article: History of San Jose State University
|
| 57 |
-
|
| 58 |
-
Dashaway Hall, one of six sites in San Francisco that housed the State Normal School before a permanent location was chosen in San Jose.[17]
|
| 59 |
-
Establishment
|
| 60 |
-
After a private normal school closed in San Francisco after only one year, politicians John Swett and Henry B. Janes sought to establish a normal school for San Francisco's public school system, and approached George W. Minns to be the principal for the nascent institution[18][19] The normal school began operations in 1857 and became known as the Minns Evening Normal School. Classes were only held once a week, and only graduated 54 female students across its existence, however the program proved to be enough of a success for increased funding to be approved.[10]
|
| 61 |
-
|
| 62 |
-
In 1861, after the continued success of the Evening School, a committee was formed to create a report on the merits of fully funding a state normal school and presented its report to the California State Legislator in January 1862. On May 2, 1862, the California State Senate elected to fund a state normal school and to appoint a board of trustees.[20] The California State Normal School was then opened on July 21, 1862.[21]
|
| 63 |
-
|
| 64 |
-
Despite continued success, with increasing enrollment and funding, the California State Normal School quickly began to hold contention with the San Francisco Board of Education, which poached students and withheld sufficient school facilities.[22] Because of these issues, the Normal School moved sites six times while in San Francisco, citing noise complaints, sanitary concerns, and lack of access to proper facilities and materials.[23]
|
| 65 |
-
|
| 66 |
-
In 1868, more serious talks of finding a permanent location for the Normal School began, with a general consensus that the school needed to cut ties with the San Francisco Board of Education and move out of San Francisco. After it became public that the Normal School was looking to move for a permanent location, several cities put in bids to home the school, however after the San Jose Railroad Company paid to have the entire student and faculty body tour the city and potential locations for the school, San Jose became the preferred site.[24] The school moved to San Jose in 1871 and was given Washington Square Park at S. 4th and San Carlos Streets, where the campus remains to this day.[25]
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
The first building on Washington Square, which was destroyed in a fire in 1880.
|
| 70 |
-
The first building on Washington Square was opened in 1871 and fully completed in 1876, however in 1880 the building was destroyed in a fire. After its destruction, Principal Charles H. Allen journeyed to Sacramento to request the California State Legislator for emergency funds for a new building. This caused significant debate in the senate about the effectiveness of the school and if it would be better served elsewhere. The California State Senate voted to move the school to Los Angeles, but was ultimately kept in San Jose after objections by the California State Assembly.[26] The legislature ultimately settled to give partial emergency funds to the school for the construction of a new building, which finished construction in 1881.[citation needed]
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
Initially built to replace the building that was destroyed in 1880, the second State Normal School Building was destroyed in the 1906 San Francisco Earthquake[27]
|
| 74 |
-
|
| 75 |
-
The California State Normal School Bell, forged in 1881, still graces the San Jose campus.
|
| 76 |
-
As a part of the construction of the new building, a large bell was forged to commemorate the school. The bell cost $1,200 ($39,099 in 2024),[28] and was inscribed with the words "California State Normal School, A.D. 1881," and would sound on special occasions until 1946 when the college obtained new chimes.[29] The original bell appears on the SJSU campus to this day and is still associated with various student traditions and rituals.[citation needed]
|
| 77 |
-
|
| 78 |
-
Immediately after the failed attempt to move State Normal School to Los Angeles, California State Senator J.P. West sponsored a bill to create a "Branch State Normal School" in Los Angeles. The bill was passed by both houses, and opened in August 1882. The southern branch campus remained under administrative control of the San Jose campus until 1887.[30] In 1919, the school became the southern branch of the University of California, and later became the University of California, Los Angeles.[31][32]
|
| 79 |
-
|
| 80 |
-
20th Century
|
| 81 |
-
In 1921, the California State Normal School changed its name to the State Teachers College at San Jose.[citation needed]
|
| 82 |
-
|
| 83 |
-
In 1922, the State Teachers College at San Jose adopted the Spartans as the school's official mascot and nickname. Mascots and nicknames prior to 1922 included the Daniels, the Teachers, the Pedagogues, the Normals and the Normalites.[citation needed]
|
| 84 |
-
|
| 85 |
-
In 1930, the Justice Studies Department was founded as a two-year police science degree program. It holds the distinction of offering the first policing degree in the United States. A stone monument and plaque are displayed close to the site of the original police school near Tower Hall.[33]
|
| 86 |
-
|
| 87 |
-
In 1935, the State Teachers Colleges became the California State Colleges, and the school's name was changed again, this time to San Jose State College.[citation needed]
|
| 88 |
-
|
| 89 |
-
In 1942, the old gym (now named Yoshihiro Uchida Hall, after SJSU judo coach Yosh Uchida) was used to register and collect Japanese Americans before sending them to internment camps. Uchida's own family members were interred at some of these camps.[34]
|
| 90 |
-
|
| 91 |
-
In 1963, in an effort to save Tower Hall from demolition, SJSU students and alumni organized testimonials before the State College Board of Trustees, sent telegrams and provided signed petitions. As a result of those efforts, the tower, a principal campus landmark and SJSU icon, was refurbished and reopened in 1966. The tower was again renovated and restored in 2007. Tower Hall is registered with the California Office of Historic Preservation.[35][36]
|
| 92 |
-
|
| 93 |
-
During the 1960s and early 1970s, San Jose State College witnessed a rise in political activism and civic awareness among its student body, including major student protests against the Vietnam War. One of the largest campus protests took place in 1967 when Dow Chemical Company — a major manufacturer of napalm used in the war — came to campus to conduct job recruiting. An estimated 3,000 students and bystanders surrounded the 7th Street administration building, and more than 200 students and teachers lay down on the ground in front of the recruiters.[37]
|
| 94 |
-
|
| 95 |
-
In 1972, upon meeting criteria established by the board of trustees and the Coordinating Council for Higher Education, SJSC was granted university status, and the name was changed to California State University, San Jose.[38] However, in 1974, the California legislature voted to change the school's name to San José State University.[38]
|
| 96 |
-
|
| 97 |
-
In 1982, the English department began sponsoring the annual Bulwer-Lytton Fiction Contest.[39]
|
| 98 |
-
|
| 99 |
-
In 1985, the CADRE Laboratory for New Media was established. It is believed to be the second oldest media lab of its kind in the United States.[40]
|
| 100 |
-
|
| 101 |
-
In 1999, San Jose State and the City of San Jose agreed to combine their main libraries to form a joint city-university library located on campus, the first known collaboration of this type in the United States. The combined library faced opposition, with critics stating the two libraries have very different objectives and that the project would be too expensive. Despite opposition, the $177 million project proceeded, and the Dr. Martin Luther King Jr. Library opened on time and on budget in 2003. The library has won several national awards since its initial opening.[41]
|
| 102 |
-
|
| 103 |
-
21st Century
|
| 104 |
-
During its 2006–07 fiscal year, SJSU received a record $50+ million in private gifts and $84 million in capital campaign contributions.[42]
|
| 105 |
-
|
| 106 |
-
In 2008, SJSU received a CASE WealthEngine Award in recognition of raising over $100 million. SJSU was one of approximately 50 institutions nationwide honored by CASE in 2008 for overall performance in educational fundraising.[43]
|
| 107 |
-
|
| 108 |
-
In October 2010, SJSU President Don Kassing publicly launched SJSU's first-ever comprehensive capital fundraising campaign dubbed "Acceleration: the Campaign for San Jose State University."[44] The original goal of the multi-year campaign was to raise $150 million but was later increased to $200 million because of the rapid success of the campaign. The campaign would eventually exceed its goal one year earlier than anticipated, raising more than $208 million by 2013.[45]
|
| 109 |
-
|
| 110 |
-
In 2012, the NASA Ames Research Center in Mountain View, California, awarded SJSU $73.3 million to participate in the development of systems for improving the safety and efficiency of air and space travel. NASA scientists, SJSU faculty and graduate students worked collaboratively on this effort. The grant was the largest federal award in SJSU history.[46]
|
| 111 |
-
|
| 112 |
-
University principals and presidents
|
| 113 |
-
Main article: List of presidents of San Jose State University
|
| 114 |
-
Thirty-two people have led San Jose State since its founding including 8 principals, 15 presidents, 5 acting presidents, and 4 interim presidents.[47]
|
| 115 |
-
|
| 116 |
-
Principals Presidents
|
| 117 |
-
# Name Years served # Name Years served # Name Years served # Name Years served
|
| 118 |
-
1 George W. Minns 1857–1862 9 James McNaughton 1899–1900 19 Hobert W. BurnsA 1969–1970 26 Don W. KassingT 2010–2011
|
| 119 |
-
2 Ahira Holmes 1862–1865 10 Morris Elmer Dailey 1900–1918 20 John H. Bunzel 1970–1978 28 Mohammad Qayoumi 2011–2015
|
| 120 |
-
1 George W. Minns 1865–1866 11 Lewis Ben WilsonA 1919–1920 21 Gail Fullerton 1978–1991 29 Susan W. MartinT 2015–2016
|
| 121 |
-
3 Henry P. Carlton 1866–1867 12 William Webb Kemp 1920–1923 22 J. Handel EvansA 1991–1994 30 Mary A. Papazian 2016–2021
|
| 122 |
-
4 George E. Tait 1867–1868 13 Alexander Richard HeronA 1923–1923 23 Robert L. Caret 1995–2003 31 Stephen PerezT 2022–2023
|
| 123 |
-
3 Henry P. Carlton 1868–1868 14 Edwin Reagan Snyder 1923–1925 24 Joseph N. CrowleyT 2003–2004 32 Cynthia Teniente-Matson 2023–Present
|
| 124 |
-
5 William T. Lucky 1868–1873 15 Herman F. MinssenA 1923–1927 25 Paul Yu 2004–2004
|
| 125 |
-
6 Charles H. Allen 1873–1889 16 Thomas William MacQuarrie 1927–1952 26 Don W. KassingT 2004–2005
|
| 126 |
-
7 Charles W. Childs 1889–1896 17 John T. Wahlquist 1952–1964 26 Don W. Kassing 2005–2008
|
| 127 |
-
8 Ambrose Randall 1896–1899 18 Robert D. Clark 1964–1969 27 Jon Whitmore 2008–2010
|
| 128 |
-
A = Acting President T = Interim President
|
| 129 |
-
Campus
|
| 130 |
-
|
| 131 |
-
Built in 1910, Tower Hall is the oldest structure on the SJSU campus.
|
| 132 |
-
|
| 133 |
-
The Central Classroom Building is the third oldest structure on campus.
|
| 134 |
-
See also: Tower Hall
|
| 135 |
-
The SJSU main campus comprises approximately 55 buildings situated on a rectangular, 154-acre (62.3 ha) area in downtown San Jose. The campus is bordered by San Fernando Street to the north, San Salvador Street to the south, South 4th Street to the west, and South 10th Street to the east. The south campus, which is home to many of the school's athletics facilities, is located approximately 1.5 miles (2.4 kilometres) south of the main campus on South 7th Street.[citation needed]
|
| 136 |
-
|
| 137 |
-
California State Normal School did not receive a permanent home until it moved from San Francisco to San Jose in 1871. The original California State Normal School campus in San Jose consisted of several rectangular, wooden buildings with a central grass quadrangle. The wooden buildings were destroyed by fire in 1880 and were replaced by interconnected stone and masonry structures of roughly the same configuration in 1881.[citation needed]
|
| 138 |
-
|
| 139 |
-
These buildings were declared unsafe following the 1906 San Francisco earthquake and were being torn down when an aftershock of the magnitude that was predicted to destroy the buildings occurred and no damage was observed. Accordingly, demolition was stopped, and the portions of the buildings still standing were subsequently transformed into four halls: Tower Hall, Morris Dailey Auditorium, Washington Square Hall and Dwight Bentel Hall. These four structures remain standing to this day and are the oldest buildings on campus.[citation needed]
|
| 140 |
-
|
| 141 |
-
Beginning in the fall of 1994, the on-campus segments of San Carlos Street, 7th Street and 9th Street were closed to automobile traffic and converted to pedestrian walkways and green belts within the campus. San Carlos Street was renamed Paseo de San Carlos, 7th Street became Paseo de César Chávez, and 9th Street is now called the Ninth Street Plaza. The project was completed in 1996.[citation needed]
|
| 142 |
-
|
| 143 |
-
Completed in 1999, the Business Classroom Project was a $16 million renovation of the James F. Boccardo Business Education Center. The $1.5 million Heritage Gateway project was completed in the same year. The privately funded project featured construction of eight oversized gateways around the main campus perimeter.[citation needed]
|
| 144 |
-
|
| 145 |
-
In the fall of 2000, the SJSU Police Department, which is part of the larger California State University Police Department, opened a new on-campus, multi-level facility on 7th Street.[citation needed]
|
| 146 |
-
|
| 147 |
-
The $177 million Dr. Martin Luther King Jr. Library, which opened its doors on August 1, 2003, won the Library Journal's 2004 Library of the Year award, the publication's highest honor.[48] The King Library represents the first collaboration of its kind between a university and a major U.S. city. The library is eight stories high, has 475,000 square feet (44,100 m2) of floor space, and houses approximately 1.3 million volumes.[49] San Jose's first public library occupied the same site from 1901 to 1936, and SJSU's Wahlquist Library occupied the site from 1961 to 2000.[citation needed]
|
| 148 |
-
|
| 149 |
-
In 2007, a $2 million renovation of Tower Hall was completed. Tower Hall is among the oldest and most recognizable buildings on campus. It was registered as an official California Historical Landmark in 1949.[50] The building was rededicated in 1910 after numerous campus structures were either destroyed or heavily damaged in the 1906 earthquake. Tower Hall, Morris Dailey Auditorium, Washington Square Hall and Dwight Bentel Hall are the four oldest buildings on campus.[51]
|
| 150 |
-
|
| 151 |
-
The Diaz Compean Student Union is a four-story, stand-alone facility that features a food court, the Spartan Bookstore, a multi-level study area, ballrooms, a bowling alley, music room and large game room. In September 2010, a $90 million expansion and renovation of the student union commenced. The project added approximately 100,000 square feet (9,300 m2) including construction of new ballrooms, food court, theater, meeting rooms and student program spaces. The expansion phase of the project was completed in June 2014. The renovation phase of the project was completed in August 2015.[52]
|
| 152 |
-
|
| 153 |
-
Construction of a new, three-story, 52,000-square-foot (4,800 m2) on-campus health center at 7th Street and Paseo de San Carlos was completed in March 2015. The building houses the Student Health Center, Student Affairs office, Counseling Services and Wellness Center. The project was completed at a cost of over $36 million.[52][53][54]
|
| 154 |
-
|
| 155 |
-
In August 2015, a $55 million renovation of the Spartan Complex was completed.[52] The Spartan Complex houses open recreation spaces, gymnasiums, an indoor aquatics center, the kinesiology department, weight rooms, locker rooms, dance and judo studios, and other classroom space. The primary project objectives were to expand existing structures, upgrade the structures to make them compliant with current building codes, correct ADA deficiencies, remove hazardous materials and correct fire safety deficiencies.[citation needed]
|
| 156 |
-
|
| 157 |
-
Residence halls
|
| 158 |
-
|
| 159 |
-
One of three Campus Village student residence buildings towers over the southeast corner of the SJSU main campus. A total of seven residences halls provide on-campus housing for 4,458 students.
|
| 160 |
-
The SJSU on-campus housing community comprises seven residence halls, which can accommodate a combined total of 4,458 students. When the third phase of the Campus Village is completed, SJSU's total on-campus student housing capacity should increase from 4,458 to 4,928. The projected total cost for this project is approximately $334 million.[55]
|
| 161 |
-
|
| 162 |
-
In January 2023, the California State University Board of Trustees approved a public-private partnership between SJSU and local investors that will allow the former Alfred E. Alquist state office building site to be transformed into new housing for SJSU faculty, staff, and graduate students.[56] Located one block west of the SJSU main campus, the 1.6-acre (0.65 ha) parcel will be the site of approximately 1,000 new housing rental units. Up to half of those units will be reserved for graduate students.[57] The new housing development will comprise one or more high-rise structures up to 300 feet (91.4 m) tall. The estimated total cost of the project is $750 million.[58] The project's design phase is projected to be completed by early 2024. Construction is projected to begin in late 2024 and be completed in 2027.[59]
|
| 163 |
-
|
| 164 |
-
Additional on-campus facilities
|
| 165 |
-
|
| 166 |
-
The Arch of Dignity, Equality and Justice, 2008 by Judy Baca, on the Paseo de César Chávez.
|
| 167 |
-
SJSU is home to the 10,000-square-foot (930 m2), three-story Nuclear Science Facility. It is the only nuclear science facility of its kind in the California State University system.[60]
|
| 168 |
-
|
| 169 |
-
Located on the main campus, the Provident Credit Union Event Center seats approximately 5,000 people for athletic events and over 6,500 for concerts.[citation needed]
|
| 170 |
-
|
| 171 |
-
A new student recreation and aquatic center opened in April 2019. At a cost of $132 million, the new facility houses multiple gymnasiums, basketball courts, multiple weight and fitness centers, exercise rooms, rock climbing wall, indoor track, indoor soccer fields, and competition and recreation pools with support spaces. The new facility is located on the main campus at the corner of 7th Street and San Carlos on the site of the old aquatic center, which was demolished in 2017.[52]
|
| 172 |
-
|
| 173 |
-
Construction of a new interdisciplinary science building broke ground in April 2019. At a projected cost of $181 million, the new facility will house teaching labs, research labs, faculty offices, a dean's suite and interdisciplinary spaces totaling 164,000 square feet (15,200 m2). The project site is located on the southwest quadrant of campus just north of Duncan Hall. The new building was completed in 2023.[61]
|
| 174 |
-
|
| 175 |
-
South Campus
|
| 176 |
-
|
| 177 |
-
A 2017 view of South Campus, stretching from the parking lot west of CEFCU Stadium to the golf course.
|
| 178 |
-
SJSU's South Campus is located in the Spartan Keyes neighborhood, just south of Downtown San Jose. Many of SJSU's athletics facilities, including CEFCU Stadium (formerly known as Spartan Stadium) and the Spartan Golf Complex, along with the athletics department administrative offices and multiple training, practice and competition facilities, are located on the 62-acre (25.1 ha) south campus approximately 1.5 miles (2.4 kilometres) south of the main campus near 7th Street. The south campus also is home to student overflow parking. Shuttle buses run between the main campus and south campus every 10 to 15 minutes Monday through Thursday.[citation needed]
|
| 179 |
-
|
| 180 |
-
In April 2014, a new $76 million master plan to renovate the entire South Campus was unveiled. The estimated cost was later increased to $150 million. The plan called for construction of a golf training facility, new baseball and softball stadiums, new outdoor recreation and intramural facility, new soccer and tennis facilities, three beach volleyball courts, a new multilevel parking garage, a new track and field facility, and a football stadium addition and renovation. The new golf, soccer and tennis facilities opened in 2017. The new softball facility opened in 2018, and the beach volleyball courts were completed in 2019. The intramural facility and parking garage were completed in 2021 along with the first phase of a new baseball facility.[citation needed]
|
| 181 |
-
|
| 182 |
-
In August 2023, the first phase of the football stadium project was completed at an approximate cost of $70 million.[62] Known as the Spartan Athletics Center, the 55,000 square-foot, multi-story facility houses a new football operations center, locker rooms, offices, meeting and training rooms and a sports medicine center.[63] The facility also includes soccer team offices and locker rooms, as well as dining and hospitality facilities, event spaces and premium viewing areas.[64] Phase II, which is tentatively slated to include installation of premium spectator seating on the stadium's east side, remains in the planning stages as of 2023.[citation needed]
|
| 183 |
-
|
| 184 |
-
Remaining South Campus projects are either under construction or still in the planning stages, as of 2023.[65]
|
| 185 |
-
|
| 186 |
-
Off-campus facilities
|
| 187 |
-
SJSU Simpkins International House (360 S. 11th Street, San Jose) provides housing for domestic as well as international students of the university. International House (also known as I-House) is a co-ed residence facility for 70 U.S. and international students attending San José State University. The building has served as a residence hall since 1980, and offers cultural exchanges for U.S. students as well as residents from abroad.[citation needed]
|
| 188 |
-
|
| 189 |
-
The SJSU Department of Aviation and Technology maintains a 6,000-square-foot (560 m2) academic facility at the Reid-Hillview Airport.[citation needed]
|
| 190 |
-
|
| 191 |
-
SJSU manages the Moss Landing Marine Laboratories (MLML) in Moss Landing, California, at Monterey Bay. MLML is a cooperative research facility of seven CSU campuses. Construction of an aquaculture laboratory at the MLML site was completed in August 2014. The building project included construction of a 1,400-square-foot (130 m2) aquaculture lab building and installation of a 1,584-square-foot (147.2 m2) tank slab area. The project was made possible by grants from the Packard Foundation.[52][66]
|
| 192 |
-
|
| 193 |
-
SJSU International and Extended Studies facility (384 S. 2nd Street, San Jose). This off-campus classroom building houses SJSU's International Gateway Programs, a collection of classes geared toward introducing international students to the English language and American culture.[67]
|
| 194 |
-
|
| 195 |
-
University Club (408 S. 8th Street, San Jose), is a 16-room, multi-level dining, special events, and bed-and-breakfast style residence facility for faculty, staff, visiting scholars and graduate students of the university. This building is currently occupied by Alpha Omicron Pi sorority in agreement with the university.[citation needed]
|
| 196 |
-
|
| 197 |
-
Known simply as North Fourth Street (210 N. 4th Street, San Jose), this four-story facility houses the Global Studies Institute, Governmental and External Affairs, International and Extended Studies, the Mineta Transportation Institute, the Processed Foods Institute, and the SJSU Research Foundation.[citation needed]
|
| 198 |
-
|
| 199 |
-
Organization
|
| 200 |
-
|
| 201 |
-
The Boccardo Business Complex at the Lucas College and Graduate School of Business.
|
| 202 |
-
As a member institution of the California State University System, San Jose State falls under the jurisdiction of the California State University Board of Trustees and the chancellor of the California State University.[citation needed]
|
| 203 |
-
|
| 204 |
-
The chief executive of San José State University is the university president. On November 2022, the California State University Board of Trustees named Cynthia Teniente-Matson as the new SJSU president. Teniente-Matson previously served as the president of Texas A&M University–San Antonio and began her tenure at San Jose State on January 16th, 2023.[68]
|
| 205 |
-
|
| 206 |
-
The university is organized into nine colleges:
|
| 207 |
-
|
| 208 |
-
Lucas College and Graduate School of Business[69]
|
| 209 |
-
Connie L. Lurie College of Education[70]
|
| 210 |
-
Charles W. Davidson College of Engineering[71]
|
| 211 |
-
College of Graduate Studies[72]
|
| 212 |
-
College of Health and Human Sciences (formerly the College of Applied Sciences and Arts)[73]
|
| 213 |
-
College of Humanities and the Arts[74]
|
| 214 |
-
College of Information, Data & Society (formerly the College of Professional and Global Education)[75]
|
| 215 |
-
College of Science[76]
|
| 216 |
-
College of Social Sciences[77]
|
| 217 |
-
Additionally, SJSU has seven focused schools:
|
| 218 |
-
|
| 219 |
-
School of Art and Design[78]
|
| 220 |
-
Lucas College and Graduate School of Business[79]
|
| 221 |
-
School of Information[80]
|
| 222 |
-
School of Journalism and Mass Communications[81]
|
| 223 |
-
School of Music and Dance[82]
|
| 224 |
-
The Valley Foundation School of Nursing[83]
|
| 225 |
-
School of Social Work[84]
|
| 226 |
-
Academics
|
| 227 |
-
|
| 228 |
-
The Dr. Martin Luther King Jr. Library houses over 1.6 million volumes.
|
| 229 |
-
As of spring 2023, San José State University offered 150 bachelor's degree programs, 95 master's degrees, 5 doctoral degrees, 11 different credential programs, and 42 certificates.[15] SJSU is accredited by the Western Association of Schools and Colleges (WASC).[12]
|
| 230 |
-
|
| 231 |
-
SJSU's doctoral degree offerings include a Ph.D. program in library and information science offered jointly through Manchester Metropolitan University in Manchester, England,[85] a doctor of audiology (Au.D.), an Ed.D. program in educational leadership, a doctor of nursing practice (DNP), and an occupational therapy doctorate (OTD).[15]
|
| 232 |
-
|
| 233 |
-
As of fall 2024, the university's Charles W. Davidson College of Engineering, with 7,133 undergraduate and graduate students, was the largest college on campus.[86] SJSU's Lucas College and Graduate School of Business was the second largest college on campus with a total enrollment of 6,745 undergraduate and graduate students.[86] The university's College of Social Sciences, with 5,442 undergraduate and graduate students, was the third-largest college at SJSU.[86] Enrollment wise, the Lucas College of Business is among the largest business schools in the country.[87] It is accredited by the Association to Advance Collegiate Schools of Business (AACSB) at both the graduate and undergraduate levels.[88]
|
| 234 |
-
|
| 235 |
-
Rankings
|
| 236 |
-
Academic rankings
|
| 237 |
-
Master's
|
| 238 |
-
Washington Monthly[89] 30
|
| 239 |
-
Regional
|
| 240 |
-
U.S. News & World Report[90] 4
|
| 241 |
-
National
|
| 242 |
-
Forbes[91] 87
|
| 243 |
-
WSJ/College Pulse[92] 16
|
| 244 |
-
Global
|
| 245 |
-
THE[93] 1001–1200
|
| 246 |
-
U.S. News & World Report[94] 1624
|
| 247 |
-
2024–2025 USNWR Rankings Regional Universities in the West[95]
|
| 248 |
-
Most Innovative Schools 3
|
| 249 |
-
Top Public Schools 3
|
| 250 |
-
Best Colleges for Veterans 3
|
| 251 |
-
Top Performers on Social Mobility 12
|
| 252 |
-
Best Value Schools 11
|
| 253 |
-
Best Undergraduate Engineering Programs 15 (At schools where doctorate not offered)
|
| 254 |
-
Computer Engineering 9
|
| 255 |
-
Electrical Engineering / Electronic / Communications 10
|
| 256 |
-
Mechanical Engineering 12
|
| 257 |
-
Nursing 218
|
| 258 |
-
Economics 214
|
| 259 |
-
2024–2025 USNWR Best Graduate School Rankings[96]
|
| 260 |
-
Library and Information Studies 19
|
| 261 |
-
Occupational Therapy 30
|
| 262 |
-
Social Work 77
|
| 263 |
-
Speech-Language Pathology 74
|
| 264 |
-
Fine Arts 110
|
| 265 |
-
Public Affairs 121
|
| 266 |
-
Public Health 116
|
| 267 |
-
Nursing Master's 135
|
| 268 |
-
Part-time MBA 143
|
| 269 |
-
Education 120
|
| 270 |
-
According to the 2024 U.S. News & World Report college rankings, San Jose State was ranked No. 3 in the western United States. SJSU was ranked No. 16 among all 120 "regional universities" in the western U.S.[97]
|
| 271 |
-
|
| 272 |
-
SJSU's undergraduate engineering program was ranked tied for No. 12 nationally among 230 public and private colleges that do not offer doctoral degrees in engineering, according to the 2022-2023 U.S. News & World Report college rankings.[98]
|
| 273 |
-
|
| 274 |
-
SJSU was ranked No. 107 out of approximately 500 institutions nationwide on the 2022 Forbes America's Top Colleges list. SJSU was ranked No. 43 nationally on the Forbes list of top public universities and colleges. Forbes also ranked SJSU No. 40 nationally out of approximately 300 colleges and universities on the most recent Forbes list of America's Best Value Colleges (2019).[99]
|
| 275 |
-
|
| 276 |
-
Money magazine ranked San Jose State No. 31 nationally out of approximately 625 schools it evaluated for its 2022 "Best Colleges in America" ranking.[100] Money also ranked SJSU No. 27 nationally on its 2022 list of Best Public Colleges,[101] No. 39 on its list of Best Colleges for Engineering Majors,[102] and No. 19 on Money's list of Best Colleges in the West.[103] Finally, Money magazine ranked San Jose State No. 1 nationally on its 2020 list of "Most Transformative Colleges."[104]
|
| 277 |
-
|
| 278 |
-
SJSU was ranked No. 16 out of more than 800 U.S. colleges and universities in the Wall Street Journal/Times Higher Education College Rankings 2025. The ranking was based on 15 individual performance indicators and responses from more than 170,000 current college students.[105]
|
| 279 |
-
|
| 280 |
-
Washington Monthly ranked SJSU No. 53 nationally out of 603 master's universities (2022). Washington Monthly ranks colleges based on their "contribution to the public good in three broad categories: social mobility, research, and promoting public service."[106]
|
| 281 |
-
|
| 282 |
-
The Webometrics Ranking of World Universities, which provides an assessment of the scholarly contents, visibility and impact of universities on the web, ranked SJSU No. 701 out of approximately 12,000 universities worldwide, and No. 200 out of approximately 3,200 U.S. colleges and universities (2022).[107][108]
|
| 283 |
-
|
| 284 |
-
Undergraduate admissions
|
| 285 |
-
Admission to SJSU is based on a combination of the applicant's high school cumulative grade point average (GPA) and standardized test scores. These factors are used to determine the applicant's California State University (CSU) eligibility index. More specifically, the eligibility index is a weighted combination of the applicant's high school grade point average during the final three years of high school and either the SAT or ACT score.
|
| 286 |
-
|
| 287 |
-
The CSU eligibility index is calculated by using either the SAT or ACT as follows: (Sum of SAT scores in mathematics and critical reading) + (800 x high school GPA) or (10 x ACT composite score without the writing score) + (200 x high school GPA).
|
| 288 |
-
|
| 289 |
-
In fall 2022, a total of 34,783 first-time, first-year (freshmen) applications were submitted, with 26,083 applicants accepted (75.0%) and 4,036 enrolling (15.5% of those accepted).[109]
|
| 290 |
-
|
| 291 |
-
Freshman Admission Statistics[110][110][111][112][113]
|
| 292 |
-
2024 2023 2022 2021 2020
|
| 293 |
-
Applicants 37,132 35,780 34,783 30,441 32,375
|
| 294 |
-
Admits 31,419 28,708 26,083 25,682 21,810
|
| 295 |
-
% Admitted 84.6 80.2 75.0 84.4 67.4
|
| 296 |
-
Enrolled 4,604 4,519 4,036 4,220 3,325
|
| 297 |
-
SAT composite (middle 50% range) 1090–1330 1070–1320 1070–1370 1030–1310 1030–1240
|
| 298 |
-
ACT composite (middle 50% range) 24–30 22–28 21–29 20–31 19–26
|
| 299 |
-
Average High School GPA 3.53 3.50 3.60 3.54 3.55
|
| 300 |
-
Transfer Admission Statistics[114][111][112][113]
|
| 301 |
-
2024 2023 2022 2021 2020
|
| 302 |
-
Applicants 11,504 10,880 12,458 14,337 14,929
|
| 303 |
-
Admits 8.036 7,806 8,720 10,120 10,329
|
| 304 |
-
% Admitted 69.8 71.7 70.0 70.6 69.2
|
| 305 |
-
Enrolled 3,144 2,939 3,220 3,739 4,328
|
| 306 |
-
Among first-time, first-year (freshmen) students who enrolled in fall 2021, SAT scores for the middle 50.0% ranged from 1030–1310.[112] ACT composite scores for the middle 50.0% ranged from 20–31.[111] The average high school GPA for incoming freshmen was 3.54. Approximately 39.0% of all incoming freshmen had a high school GPA between 3.75 and 4.0. and 18% had an incoming average high school GPA of 4.0[111]
|
| 307 |
-
|
| 308 |
-
In recent years, enrollment at SJSU has become impacted in all undergraduate majors, which means the university no longer has the enrollment capacity to accept all CSU-eligible applicants, including some from local high schools and community colleges. Although an applicant may meet the minimum CSU admission requirements, CSU-eligible applicants are no longer guaranteed admission.[115][116]
|
| 309 |
-
|
| 310 |
-
Undergraduate graduation and retention
|
| 311 |
-
Among all first-time freshmen students who enrolled at SJSU in fall 2017, 30% graduated within four years; 68% who enrolled in fall 2015 graduated within six years.[117] Among new undergraduate transfer students who enrolled at SJSU in fall 2017, 33.0% graduated within two years, 69% graduated within three years, and 80.0% graduated within four years. Among first-time graduate students who enrolled at SJSU in fall 2017, 52.0% graduated within two years, 78% graduated within three years, and 83.0% graduated within four years.[117]
|
| 312 |
-
|
| 313 |
-
The percentage of undergraduate students from the fall 2019 cohort returning in fall 2020 was 86.0% for full-time freshman students, 90.0% for new undergraduate transfer students, and 92.0% for first-time graduate students.[117]
|
| 314 |
-
|
| 315 |
-
Faculty and research
|
| 316 |
-
|
| 317 |
-
The Moss Landing Marine Laboratories.
|
| 318 |
-
The university is classified among "R2: High Research Spending and Doctorate Production".[13] As of fall 2024, San José State University employed 2,177 faculty, 1,300 of whom (or about 60%) were full-time or equivalent (FTEF).[118]
|
| 319 |
-
|
| 320 |
-
According to National Science Foundation survey data, in 2023 San Jose State's research and development expenditures totaled $83.4 million, placing it second in total R&D expenditures out of all 23 California State University (CSU) campuses and No. 185 out of more than 900 colleges and universities nationwide.[119]
|
| 321 |
-
|
| 322 |
-
Research collections located at SJSU include the Ira F. Brilliant Center for Beethoven Studies, the Martha Heasley Cox Center for Steinbeck Studies, the J. Gordon Edwards Entomology Museum and the Carl W. Sharsmith Herbarium.
|
| 323 |
-
|
| 324 |
-
SJSU research partnerships include the SJSU Metropolitan Technology Center at NASA Ames Research Center, Moffett Field, the Cisco Networking Laboratory, and the Moss Landing Marine Laboratories. SJSU is also home to the Mineta Transportation Institute.
|
| 325 |
-
|
| 326 |
-
Additionally, the university operates the Survey and Policy Research Institute (SPRI), which conducts the quarterly, high-profile California Consumer Confidence Survey and many other research projects.
|
| 327 |
-
|
| 328 |
-
SJSU is a member institution of the National Space Grant College and Fellowship Program.[120]
|
| 329 |
-
|
| 330 |
-
Since 1979, the SJSU Department of Kinesiology operates the Timpany Center (located at 730 Empey Way), a non-profit therapeutic facility open to all and owned by the County of Santa Clara. The center is dedicated to the health and fitness of those with a disability or age-related concerns.[121]
|
| 331 |
-
|
| 332 |
-
From 1989 to 2024, the SJSU Environmental Studies Department headquartered and operated the Center for the Development of Recycling, an environmental research and service organization.[122]
|
| 333 |
-
|
| 334 |
-
On July 21, 2012, SJSU launched its first miniaturized satellite used for space research, TechEdSat, in a partnership with the NASA Ames Research Center.[123]
|
| 335 |
-
|
| 336 |
-
Since 2014, SJSU has operated the Silicon Valley Big Data and Cybersecurity Center (BDCC). The center serves as a cybersecurity research and knowledge hub by creating multidisciplinary collaborations between faculty members from across the university and Silicon Valley tech companies.
|
| 337 |
-
|
| 338 |
-
Air Force ROTC
|
| 339 |
-
Known academically as the Department of Aerospace Studies, SJSU's Detachment 045 is one of only two Air Force Reserve Officer Training Corps detachments in the San Francisco Bay Area.[124] As such, Detachment 045 hosts "crosstown cadets" from other Bay Area schools including Santa Clara University, Stanford University and UC Santa Cruz.[125] San Jose State students and crosstown cadets enrolled in the AFROTC program learn leadership skills and participate in a number of other mandatory activities leading to an active-duty U.S. military officer commission.[citation needed]
|
| 340 |
-
|
| 341 |
-
Student life
|
| 342 |
-
Undergraduate demographics as of Fall 2024
|
| 343 |
-
Race and ethnicity[126] Total
|
| 344 |
-
Asian 35.9%
|
| 345 |
-
|
| 346 |
-
Hispanic/Latino 29.6%
|
| 347 |
-
|
| 348 |
-
White 13.6%
|
| 349 |
-
|
| 350 |
-
Other[a] 9.4%
|
| 351 |
-
|
| 352 |
-
Foreign national 7.9%
|
| 353 |
-
|
| 354 |
-
Black or African American 3.3%
|
| 355 |
-
|
| 356 |
-
Native Hawaiian or Other Pacific Islander .4%
|
| 357 |
-
|
| 358 |
-
American Indian or Alaskan Native .1%
|
| 359 |
-
|
| 360 |
-
Economic diversity
|
| 361 |
-
Low-income[b] 40%
|
| 362 |
-
|
| 363 |
-
Affluent[c] 60%
|
| 364 |
-
|
| 365 |
-
Student Body Origin (Returning students) Fall 2024[5][127]
|
| 366 |
-
California: Santa Clara County 44.6%
|
| 367 |
-
California: Bay Area (Outside Santa Clara County) 35.0%
|
| 368 |
-
California: Non-local 12.6%
|
| 369 |
-
International 6.8%
|
| 370 |
-
Other U.S. 0.9%
|
| 371 |
-
As the oldest and one of the largest universities in the CSU system, SJSU attracts students from California, the United States, and 100 countries around the world.[128] As of fall 2022, 35,751 students were enrolled at SJSU including 26,863 undergraduate students and 8,888 graduate and credential students. Approximately 51% of students were male and 49% were female. Graduate student enrollment at SJSU was the highest of any campus in the CSU system.[4][5]
|
| 372 |
-
|
| 373 |
-
As of fall 2022, the average age of undergraduate students at SJSU was 22.2. The average age of graduate students was 29.0, and the average age of credential students was 31.7.[4]
|
| 374 |
-
|
| 375 |
-
Approximately 4,500 students (12.5%) live in campus housing and community impact studies show an estimated 5,000 more students live within easy walking or biking distance of the campus.[129] Additionally, approximately 45% of all first-year (freshman) students live in campus residence facilities.[111]
|
| 376 |
-
|
| 377 |
-
As of 2022, there were over 475 recognized student organizations at SJSU.[130] These include academic and honorary organizations, cultural and religious organizations, special interest organizations, fraternities and sororities, and a wide variety of club sports organizations.
|
| 378 |
-
|
| 379 |
-
Fraternities and sororities
|
| 380 |
-
Fraternities and sororities have existed at SJSU since 1896.[131] SJSU is home to 43 social fraternity and sorority chapters managed by Student Involvement. Greek life at SJSU comprises both social (NIC & NPC) and cultural (NPHC & USFC) organizations. Eighteen fraternities and sororities maintain chapter homes in the residential community east of campus along S. 10th and 11th streets, north of campus along San Fernando Street, and south of campus along San Salvador Street, S. 8th Street, and E. Reed Street, in downtown San Jose.[132]
|
| 381 |
-
|
| 382 |
-
An additional 26 fraternities are co-ed and are either major-related, honors-related, or community service-related. The United Sorority and Fraternity Council (USFC) at San José State University was established in 2003. USFC is the coordinating body for the 17 cultural interest fraternities and sororities at SJSU.[133] Approximately 6% of male students join social fraternities, and 6% of female students join social sororities.
|
| 383 |
-
|
| 384 |
-
Spartan Marching Band
|
| 385 |
-
|
| 386 |
-
A student drum major conducts the Pride of the Spartans marching band during a football game at Stanford University.
|
| 387 |
-
The Spartan marching band comprises students from every field of study on campus, from first year undergraduates through graduate students, as well as several "open university" members. At each home football game, the Spartan marching band performs a completely new halftime show, plus a pre-game show and a post-game concert. The band reflects all the color and fanfare of major university sports pageantry. The band is unofficially known as "The Pride of the Spartans," and generally performs with a color guard and dance team. The band performs at all home football games, and also travels with the team for select road games.[134]
|
| 388 |
-
|
| 389 |
-
Student press
|
| 390 |
-
Main article: The Spartan Daily
|
| 391 |
-
|
| 392 |
-
The Dwight Bentel Hall houses the School of Journalism and Mass Communications.
|
| 393 |
-
The school newspaper, The Spartan Daily, was founded in 1934 and is published three days a week when classes are in session. The publication follows a broadsheet format and has a daily print circulation of over 6,000, as well as a daily on-line edition. The newspaper is produced by journalism and advertising students enrolled in SJSU's School of Journalism and Mass Communications. The journalism school, including The Spartan Daily newsroom and other student press facilities, are housed inside Dwight Bentel Hall. The building was named after the department's founder and long time chairman, Dwight Bentel. The journalism school also runs an on-campus advertising agency, Dwight, Bentel and Hall Communications.[citation needed]
|
| 394 |
-
|
| 395 |
-
Update News is a weekly, student-produced television newscast that airs every weekend on KICU, Channel 36 in San Jose. The newscast is produced by San Jose State broadcast journalism students, and has aired in the Bay Area since 1982.[135] The newscast previously aired on educational station KTEH. Update News also features a daily live webcast.[citation needed]
|
| 396 |
-
|
| 397 |
-
Equal Time is a news magazine show produced by the San Jose State School of Journalism and Mass Communications. Each half-hour episode examines a different issue in depth, and ends with a roundtable discussion featuring professors and other experts in search of solutions. Equal Time airs Saturday afternoons on KQED+ (Channel 54 or Comcast Channel 10) in the Bay Area.[136]
|
| 398 |
-
|
| 399 |
-
Established in 1963, KSJS, 90.5 FM, is the university's student-run radio station. KSJS features live broadcasts of San Jose State athletic events, various types of music including electronic, urban, jazz, subversive rock, and rock en Español, as well as specialty talk shows.[137]
|
| 400 |
-
|
| 401 |
-
Notable student organizations
|
| 402 |
-
W6YL is a student-run amateur radio station that has been in continuous operation for 97–98 years.[138][139] Originally founded in 1927 when SJSU was still known as San Jose State Teachers College, SJSU Amateur Radio Club W6YL is recognized as one of the oldest continuously operating student organizations on campus.[140] The SJSU Amateur Radio Club is a federally licensed radio station that operates under the callsign W6YL on amateur radio bands.[141]
|
| 403 |
-
|
| 404 |
-
Athletics
|
| 405 |
-
Main article: San Jose State Spartans
|
| 406 |
-
|
| 407 |
-
California State Normal School football. (1910)
|
| 408 |
-
San José State University has participated in athletics since it first fielded a baseball team in 1890. SJSU sports teams are known as the Spartans, and compete in the Mountain West Conference (MWC) in NCAA Division I.[citation needed]
|
| 409 |
-
|
| 410 |
-
San José State University sports teams have won NCAA national titles in track and field, golf, boxing, fencing and tennis.[142] As of December 2022, SJSU has won 10 NCAA national Division 1 team championships[143] and produced 50 NCAA national Division 1 individual champions.[142] SJSU also has achieved an international reputation for its judo program, winning 52 National Collegiate Judo Association (NCJA) men's team championship titles and 26 NCJA women's team championship titles between 1962 and 2024.[144][145][146][147][148][149]
|
| 411 |
-
|
| 412 |
-
SJSU alumni have won 20 Olympic medals (including seven gold medals) dating back to the first gold medal won by Willie Steele in track and field in the 1948 Summer Olympics. Alumni also have won medals in swimming, judo, water polo and boxing. The track team coached by "Bud" Winter earned San Jose State the nickname "Speed City," and produced Olympic medalists and social activists Lee Evans, Tommie Smith and John Carlos. Smith and Carlos are perhaps best remembered for giving the raised fist salute from the medalist's podium during the 1968 Summer Olympics in Mexico City. In 2005, a monument of the protest was built on Tower Lawn, designed by artist Rigo 23 and titled Victory Salute, the monument encourages passerby's to recreate the historic moment.[150] The track and field program was canceled in 1988 after a series of budget cuts and Title IX related decisions decimated the program. The program was reinstated in 2016.[151]
|
| 413 |
-
|
| 414 |
-
After an 11-2 finish in 2012, SJSU's football team achieved its first-ever BCS ranking and first national ranking since 1990.[152] SJSU was ranked No. 21 in both the 2012 post-season Associated Press Poll and the USA Today Coaches' Poll.
|
| 415 |
-
|
| 416 |
-
The Spartan football team had another breakout season in 2020, cracking the AP Poll top-25 for the first time since 2012 and appearing in the College Football Playoff ranking at No. 24. The team also won its first conference championship title since 1991. The Spartans finished the 2020 season 7-1 and ranked No. 24 in the AP Poll.
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
Utah vs. San Jose State at Spartan Stadium (2009)
|
| 420 |
-
Club sports
|
| 421 |
-
In addition to its various NCAA Division I sports programs, San José State University has a very active club sports community consisting of approximately 25 sports and 50 teams.[153] Many of the club sports teams are run and organized by students, although some of the more established teams employ full-time paid coaches and enjoy strong alumni support. The list of club sports active at SJSU includes:
|
| 422 |
-
|
| 423 |
-
Men's and women's archery, men's and women's badminton, baseball, men's and women's basketball, men's and women's bowling, men's and women's boxing, men's and women's cycling, dancesport, men's and women's dragon boat racing, esports, men's and women's fencing, men's and women's figure skating, men's and women's gymnastics, ACHA Division II and Division lll men's ice hockey, women's ice hockey, men's and women's judo, MCLA Division II men's lacrosse, women's lacrosse, mountain biking, men's and women's powerlifting, men's and women's quidditch, men's roller hockey, men's and women's rugby, salsa, men's and women's soccer, softball, men's and women's swimming, track and field, triathlon, ultimate Frisbee, men's and women's volleyball, men's and women's water polo, and men's and women's wrestling.[154]
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
The Boccardo Gate on the Paseo de San Carlos.
|
| 427 |
-
Traditions
|
| 428 |
-
The old campus bell, which was originally located in a small tower to the right of the main entrance to the campus, was purchased and installed in 1881 at a cost of $1,217. The bell chimed each morning at eight o'clock until the 1906 San Francisco earthquake stilled its voice. When Tower Hall was constructed in 1909, it was specially designed to house the old bell. The bell rang on special occasions until the college obtained new carillon chimes in 1946. The old bell is displayed to this day on the Washington Square quad near Tower Hall.[29]
|
| 429 |
-
|
| 430 |
-
In 1922, the State Teachers College at San Jose adopted the Spartans as the school's official mascot and nickname. Mascots and nicknames prior to 1922 included the Daniels, the Teachers, the Pedagogues, the Normals and the Normalites.
|
| 431 |
-
|
| 432 |
-
In 1925, students debated whether to change the school colors from gold and white to purple and white. Tradition won out, and the students decided to keep the original colors, gold and white. At some point prior to 1929 when the SJSU alma mater was officially adopted, blue was added as an official school color alongside gold and white.[29]
|
| 433 |
-
|
| 434 |
-
According to information published in the old SJSU La Torre yearbook, Spardi Gras was first held in 1929 on George Washington's birthday. Spardi Gras was described in the 1929 edition of La Torre as "[an] event which met with unprecedented participance by the entire student body ... a gala occasion of play, sport, and merrymaking later authorized by the Executive Board as an annual event because of its great success."[155] Spardi Gras was last mentioned in La Torre in 1960.[155]
|
| 435 |
-
|
| 436 |
-
Another longstanding event at SJSU was "Spartan Revelries." According to information published in the 1960 edition of La Torre, Spartan Revelries was an "all-student college musical event written, produced and presented entirely by students."[155] It's unclear when Spartan Revelries began, but some believe it started in 1929 as a grand finale to Spardi Gras. In 1949, an official Revelries board was established to carry out the business and management of each year's show, which had grown into a major annual event requiring the efforts of many students and several months of preparation.[155][29]
|
| 437 |
-
|
| 438 |
-
Sparta Camp was an annual event held between 1953 and 1965.[155] The retreat was hosted by the Associated Students and was held every spring at the Asilomar State Beach. The event was open to all students with an interest in student government, and students had to apply to go. Participants attended workshops and discussion groups on leadership. A similar event known as Freshman Camp was also held at Asilomar every September to help new students get oriented to the campus and the "Spirit of Sparta."[155][29]
|
| 439 |
-
|
| 440 |
-
The chimes heard on the SJSU campus each quarter hour are Westminster chimes, which were a gift from the class of 1947. They ring the same tones as the famous Big Ben chimes in England.[29] Students and alumni show their Spartan pride every Thursday by wearing Spartan blue and gold.[156] Each year during homecoming week, SJSU hosts a series of events leading up to the homecoming football game at CEFCU Stadium. Events include the Campus MovieFest Finale and Fire on the Fountain festival.[156]
|
| 441 |
-
|
| 442 |
-
Alma mater
|
| 443 |
-
"Hail! Spartans, Hail!" is the university's official alma mater. The lyrics were written by Gerald Erwin, a 1933 graduate. Erwin was a music major who also served as the student director of the glee club. The song was officially adopted as the school hymn on February 25, 1929.[155] Whenever the SJSU Alma Mater is played, students are asked to stand, remove their hats and sing along.[29]
|
| 444 |
-
|
| 445 |
-
The university also has a fight song, which is typically played and/or sung at the end of football games and other athletic events including pep rallies.[29]
|
| 446 |
-
|
| 447 |
-
Alumni
|
| 448 |
-
Main article: List of San Jose State University people
|
| 449 |
-
|
| 450 |
-
Lindsey Buckingham (left)
|
| 451 |
-
(Attended '68-'70) and Stevie Nicks (right)
|
| 452 |
-
(Attended '68-'70)
|
| 453 |
-
Musicians best known for Fleetwood Mac
|
| 454 |
-
|
| 455 |
-
Ben Nighthorse Campbell
|
| 456 |
-
(B.A., '57)
|
| 457 |
-
First Native American to be elected to the United States Senate and member of the first US Olympic judo team
|
| 458 |
-
|
| 459 |
-
John Carlos (right)
|
| 460 |
-
(Attended '68-'69)
|
| 461 |
-
and Tommie Smith (center) (B.A., '69)
|
| 462 |
-
Track and field athletes known for a 1968 Protest
|
| 463 |
-
|
| 464 |
-
Dian Fossey
|
| 465 |
-
(B.S., '54)
|
| 466 |
-
Primatologist and conservationist known for studying mountain gorilla groups
|
| 467 |
-
|
| 468 |
-
Lou Henry Hoover
|
| 469 |
-
(DipEd, 1893)
|
| 470 |
-
Philanthropist and former First Lady of the United States
|
| 471 |
-
|
| 472 |
-
Gordon Moore
|
| 473 |
-
(Attended '46-'47)
|
| 474 |
-
Founder of the Intel Corporation and creator of "Moore's law"
|
| 475 |
-
|
| 476 |
-
Gaylord Nelson
|
| 477 |
-
(B.A., '39)
|
| 478 |
-
Governor of Wisconsin and founder of Earth Day
|
| 479 |
-
|
| 480 |
-
Bill Walsh
|
| 481 |
-
(B.A, '55)
|
| 482 |
-
Football coach in the Pro Football Hall of Fame and three time Super Bowl champion
|
| 483 |
-
About 60% of San Jose State's 275,000 living alumni of record reside in the San Francisco Bay Area. The other 40% are scattered around the globe, with concentrations in Southern California, Seattle, Portland, Philadelphia, Washington, D.C., and New York City.[157]
|
| 484 |
-
|
| 485 |
-
SJSU is consistently listed among the leading suppliers of undergraduate and graduate alumni to Silicon Valley science and technology firms.[158][159][160] In 2015, San José State University was listed as the top feeder school for Apple Inc., which employed over 1,000 SJSU graduates at the time. SJSU ranked 9th on the list of top feeder schools for Facebook.[161]
|
| 486 |
-
|
| 487 |
-
Some of the more notable SJSU alumni in science and engineering include Ray Dolby, founder of Dolby sound systems; Dian Fossey, primatologist and gorilla researcher; Gordon Moore, founder of Intel Corporation and creator of "Moore's law;" and Ed Oates, co-founder of Oracle.[162]
|
| 488 |
-
|
| 489 |
-
Nearly 200 former SJSU students and graduates have founded, co-founded, served or serve as senior executives or officers of public and private companies reporting annual sales between $40 million and $26 billion.[163] This list includes former Intel Corporation CEO, Brian Krzanich,[164] and current Crown Worldwide Group CEO, billionaire James E. Thompson.[165]
|
| 490 |
-
|
| 491 |
-
Notable companies founded by SJSU students and alumni include Dolby Laboratories (1965), Intel Corporation (1968), Specialized Bicycle Components (1974), Oracle Corporation (1977), Seagate Technology (1979) and WhatsApp (2008).[166][167]
|
| 492 |
-
|
| 493 |
-
Musicians Doug Clifford and Stu Cook (Creedence Clearwater Revival), Tom Johnston and Patrick Simmons (the Doobie Brothers), Lindsey Buckingham and Stevie Nicks (Fleetwood Mac) and Paul Kantner (Jefferson Airplane) all attended San Jose State.[168][169][170][171][172]
|
| 494 |
-
|
| 495 |
-
SJSU distinguished alumni also include former first Lady of the United States, Lou Henry Hoover, novelists Amy Tan and Jayne Ann Krentz, and fashion designer Jessica McClintock.
|
| 496 |
-
|
| 497 |
-
SJSU alumni Dick Vermeil and Bill Walsh earned a combined four Super Bowl victories as NFL head coaches.[173][174]
|
| 498 |
-
|
| 499 |
-
San Jose State alumnus and 1964 U.S. Open winner Ken Venturi was named Sports Illustrated "Sportsman of the Year" and later inducted into the World Golf Hall of Fame.[175]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/rag.py
DELETED
|
@@ -1,207 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import glob
|
| 3 |
-
from typing import Optional, List, Dict, Any
|
| 4 |
-
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 5 |
-
from langchain_community.document_loaders import DirectoryLoader, TextLoader
|
| 6 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 7 |
-
from langchain_chroma import Chroma
|
| 8 |
-
|
| 9 |
-
DB_NAME = os.getenv("DB_NAME", "career_db")
|
| 10 |
-
DIRECTORY_NAME = os.getenv("DIRECTORY_NAME", "knowledge_base")
|
| 11 |
-
CHROMA_PERSIST_DIRECTORY = os.getenv("CHROMA_PERSIST_DIRECTORY", "./chroma_db")
|
| 12 |
-
TOP_K = int(os.getenv("TOP_K", "25"))
|
| 13 |
-
CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1000"))
|
| 14 |
-
CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "300"))
|
| 15 |
-
MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 16 |
-
|
| 17 |
-
class Retriever:
|
| 18 |
-
def __init__(
|
| 19 |
-
self,
|
| 20 |
-
db_name: str = DB_NAME,
|
| 21 |
-
directory_name: str = DIRECTORY_NAME,
|
| 22 |
-
top_k: int = TOP_K,
|
| 23 |
-
chunk_size: int = CHUNK_SIZE,
|
| 24 |
-
chunk_overlap: int = CHUNK_OVERLAP,
|
| 25 |
-
model_name: str = MODEL_NAME,
|
| 26 |
-
force_rebuild: bool = False,
|
| 27 |
-
):
|
| 28 |
-
self.db_name = db_name
|
| 29 |
-
self.directory_name = directory_name
|
| 30 |
-
self.top_k = top_k
|
| 31 |
-
self.chunk_size = chunk_size
|
| 32 |
-
self.chunk_overlap = chunk_overlap
|
| 33 |
-
self._embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 34 |
-
self.vectorstore = None
|
| 35 |
-
self._retriever = None
|
| 36 |
-
self._init_or_load_db(force_rebuild=force_rebuild)
|
| 37 |
-
|
| 38 |
-
def _get_documents(self) -> List:
|
| 39 |
-
text_loader_kwargs = {"encoding": "utf-8"}
|
| 40 |
-
docs = []
|
| 41 |
-
for pattern in ("*.txt", "*.md", "*.markdown"):
|
| 42 |
-
loader = DirectoryLoader(
|
| 43 |
-
self.directory_name,
|
| 44 |
-
glob=pattern,
|
| 45 |
-
loader_cls=TextLoader,
|
| 46 |
-
loader_kwargs=text_loader_kwargs,
|
| 47 |
-
show_progress=False,
|
| 48 |
-
)
|
| 49 |
-
docs.extend(loader.load())
|
| 50 |
-
return docs
|
| 51 |
-
|
| 52 |
-
def _build_store(self):
|
| 53 |
-
documents = self._get_documents()
|
| 54 |
-
if documents:
|
| 55 |
-
splitter = RecursiveCharacterTextSplitter(
|
| 56 |
-
chunk_size=self.chunk_size,
|
| 57 |
-
chunk_overlap=self.chunk_overlap,
|
| 58 |
-
)
|
| 59 |
-
chunks = splitter.split_documents(documents)
|
| 60 |
-
if chunks:
|
| 61 |
-
self.vectorstore = Chroma.from_documents(
|
| 62 |
-
documents=chunks,
|
| 63 |
-
embedding=self._embeddings,
|
| 64 |
-
persist_directory=self.db_name,
|
| 65 |
-
)
|
| 66 |
-
else:
|
| 67 |
-
self.vectorstore = Chroma(
|
| 68 |
-
persist_directory=self.db_name,
|
| 69 |
-
embedding_function=self._embeddings,
|
| 70 |
-
)
|
| 71 |
-
else:
|
| 72 |
-
self.vectorstore = Chroma(
|
| 73 |
-
persist_directory=self.db_name,
|
| 74 |
-
embedding_function=self._embeddings,
|
| 75 |
-
)
|
| 76 |
-
# Persistence is handled automatically when using persist_directory
|
| 77 |
-
|
| 78 |
-
def _init_or_load_db(self, force_rebuild: bool = False):
|
| 79 |
-
exists = os.path.exists(self.db_name) and any(
|
| 80 |
-
os.scandir(self.db_name)
|
| 81 |
-
)
|
| 82 |
-
if force_rebuild or not exists:
|
| 83 |
-
self._build_store()
|
| 84 |
-
else:
|
| 85 |
-
self.vectorstore = Chroma(
|
| 86 |
-
persist_directory=self.db_name,
|
| 87 |
-
embedding_function=self._embeddings,
|
| 88 |
-
)
|
| 89 |
-
self._retriever = self.vectorstore.as_retriever(
|
| 90 |
-
search_type="similarity_score_threshold",
|
| 91 |
-
search_kwargs={"k": self.top_k, "score_threshold": 0.2},
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
def rebuild(self):
|
| 95 |
-
self._build_store()
|
| 96 |
-
self._retriever = self.vectorstore.as_retriever(
|
| 97 |
-
search_type="similarity_score_threshold",
|
| 98 |
-
search_kwargs={"k": self.top_k, "score_threshold": 0.2},
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
def get_retriever(self, k: Optional[int] = None):
|
| 102 |
-
if k and k != self.top_k:
|
| 103 |
-
return self.vectorstore.as_retriever(search_kwargs={"k": k})
|
| 104 |
-
return self._retriever
|
| 105 |
-
|
| 106 |
-
def get_relevant_docs(self, message: str, k: Optional[int] = None):
|
| 107 |
-
if k:
|
| 108 |
-
# respect threshold on ad-hoc calls
|
| 109 |
-
retr = self.vectorstore.as_retriever(
|
| 110 |
-
search_type="similarity_score_threshold",
|
| 111 |
-
search_kwargs={"k": k, "score_threshold": 0.2},
|
| 112 |
-
)
|
| 113 |
-
return retr.invoke(message)
|
| 114 |
-
return self._retriever.invoke(message)
|
| 115 |
-
|
| 116 |
-
def get_relevant_chunks(self, message: str, k: Optional[int] = None):
|
| 117 |
-
docs = self.get_relevant_docs(message, k=k)
|
| 118 |
-
return [d.page_content for d in docs]
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
# --- Back-compat free functions expected by controller/app ---
|
| 122 |
-
_GLOBAL_RETRIEVER: Optional[Retriever] = None
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
def get_retriever() -> Any:
|
| 126 |
-
"""
|
| 127 |
-
Returns a LangChain retriever object with get_relevant_documents().
|
| 128 |
-
Lazily initializes a module-level Retriever to persist across calls.
|
| 129 |
-
"""
|
| 130 |
-
global _GLOBAL_RETRIEVER
|
| 131 |
-
if _GLOBAL_RETRIEVER is None:
|
| 132 |
-
_GLOBAL_RETRIEVER = Retriever()
|
| 133 |
-
return _GLOBAL_RETRIEVER.get_retriever()
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
def ingest(data_dir: Optional[str] = None) -> str:
|
| 137 |
-
"""
|
| 138 |
-
Rebuilds the vector store using documents from data_dir or default DIRECTORY_NAME.
|
| 139 |
-
Returns a short status string for UI display.
|
| 140 |
-
"""
|
| 141 |
-
target_dir = data_dir or DIRECTORY_NAME
|
| 142 |
-
global _GLOBAL_RETRIEVER
|
| 143 |
-
_GLOBAL_RETRIEVER = Retriever(directory_name=target_dir, force_rebuild=True)
|
| 144 |
-
count = getattr(_GLOBAL_RETRIEVER.vectorstore._collection, "count", lambda: 0)()
|
| 145 |
-
return f"ingested from {target_dir} — chunks in store: {count}"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
class ChromaRAG:
|
| 149 |
-
"""Minimal agent-style RAG wrapper: ingest, retrieve, metadata."""
|
| 150 |
-
|
| 151 |
-
def __init__(
|
| 152 |
-
self,
|
| 153 |
-
persist_dir: str = CHROMA_PERSIST_DIRECTORY,
|
| 154 |
-
kb_dir: str = DIRECTORY_NAME,
|
| 155 |
-
model_name: str = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
|
| 156 |
-
):
|
| 157 |
-
self.persist_dir = persist_dir
|
| 158 |
-
self.kb_dir = kb_dir
|
| 159 |
-
self._embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
| 160 |
-
self._vs = None
|
| 161 |
-
self._ensure_vectorstore()
|
| 162 |
-
|
| 163 |
-
def _ensure_vectorstore(self):
|
| 164 |
-
if os.path.exists(self.persist_dir) and any(os.scandir(self.persist_dir)):
|
| 165 |
-
self._vs = Chroma(persist_directory=self.persist_dir, embedding_function=self._embeddings)
|
| 166 |
-
print(f"[rag] loaded vectorstore at {self.persist_dir}")
|
| 167 |
-
else:
|
| 168 |
-
os.makedirs(self.persist_dir, exist_ok=True)
|
| 169 |
-
self._vs = Chroma(persist_directory=self.persist_dir, embedding_function=self._embeddings)
|
| 170 |
-
print(f"[rag] initialized empty vectorstore at {self.persist_dir}")
|
| 171 |
-
|
| 172 |
-
def ingest_documents(self, folder_path: Optional[str] = None) -> Dict[str, Any]:
|
| 173 |
-
folder = folder_path or self.kb_dir
|
| 174 |
-
text_loader_kwargs = {"encoding": "utf-8"}
|
| 175 |
-
docs: List = []
|
| 176 |
-
for pattern in ("*.txt", "*.md", "*.markdown"):
|
| 177 |
-
loader = DirectoryLoader(folder, glob=pattern, loader_cls=TextLoader, loader_kwargs=text_loader_kwargs, show_progress=False)
|
| 178 |
-
docs.extend(loader.load())
|
| 179 |
-
|
| 180 |
-
if not docs:
|
| 181 |
-
print(f"[rag] no documents in {folder}")
|
| 182 |
-
return {"ingested": 0, "total": getattr(self._vs._collection, "count", lambda: 0)()}
|
| 183 |
-
|
| 184 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 185 |
-
chunks = splitter.split_documents(docs)
|
| 186 |
-
if not chunks:
|
| 187 |
-
print("[rag] no chunks produced")
|
| 188 |
-
return {"ingested": 0, "total": getattr(self._vs._collection, "count", lambda: 0)()}
|
| 189 |
-
|
| 190 |
-
Chroma.from_documents(documents=chunks, embedding=self._embeddings, persist_directory=self.persist_dir)
|
| 191 |
-
self._ensure_vectorstore()
|
| 192 |
-
total = getattr(self._vs._collection, "count", lambda: 0)()
|
| 193 |
-
print(f"[rag] ingested={len(chunks)} total={total}")
|
| 194 |
-
return {"ingested": len(chunks), "total": total}
|
| 195 |
-
|
| 196 |
-
def retrieve_context(self, query: str, top_k: int = 3) -> List[str]:
|
| 197 |
-
retriever = self._vs.as_retriever(search_kwargs={"k": top_k})
|
| 198 |
-
docs = retriever.invoke(query)
|
| 199 |
-
return [d.page_content for d in docs]
|
| 200 |
-
|
| 201 |
-
def get_retrieval_metadata(self, query: str, top_k: int = 3) -> Dict[str, Any]:
|
| 202 |
-
retriever = self._vs.as_retriever(search_kwargs={"k": top_k})
|
| 203 |
-
docs = retriever.invoke(query)
|
| 204 |
-
results: List[Dict[str, Any]] = []
|
| 205 |
-
for d in docs:
|
| 206 |
-
results.append({"content": d.page_content, "metadata": getattr(d, "metadata", {})})
|
| 207 |
-
return {"query": query, "results": results}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/requirements.txt
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
langchain
|
| 3 |
-
langchain-community
|
| 4 |
-
langchain-openai
|
| 5 |
-
chromadb
|
| 6 |
-
sentence-transformers
|
| 7 |
-
python-dotenv
|
| 8 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/ChatBot_with_evaluator_and_notifier/tools.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import csv
|
| 3 |
-
import json
|
| 4 |
-
import base64
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
-
from datetime import datetime
|
| 7 |
-
import requests
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
try:
|
| 11 |
-
import gspread
|
| 12 |
-
from google.oauth2.service_account import Credentials
|
| 13 |
-
GOOGLE_SHEETS_AVAILABLE = True
|
| 14 |
-
except ImportError:
|
| 15 |
-
GOOGLE_SHEETS_AVAILABLE = False
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
CSV_FILE = "user_interest.csv"
|
| 19 |
-
SHEET_NAME = "UserInterest"
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def _get_google_credentials():
|
| 23 |
-
load_dotenv(override=True)
|
| 24 |
-
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
| 25 |
-
google_creds_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
|
| 26 |
-
|
| 27 |
-
if google_creds_json:
|
| 28 |
-
json_str = base64.b64decode(google_creds_json).decode('utf-8')
|
| 29 |
-
creds_dict = json.loads(json_str)
|
| 30 |
-
creds = Credentials.from_service_account_info(creds_dict, scopes=scope)
|
| 31 |
-
print("[info] Loaded Google credentials from environment.")
|
| 32 |
-
return creds
|
| 33 |
-
|
| 34 |
-
raise RuntimeError("Google credentials not found.")
|
| 35 |
-
|
| 36 |
-
def _save_to_google_sheets(email, name, notes):
|
| 37 |
-
creds = _get_google_credentials()
|
| 38 |
-
client = gspread.authorize(creds)
|
| 39 |
-
sheet = client.open(SHEET_NAME).sheet1
|
| 40 |
-
row = [datetime.today().strftime('%Y-%m-%d %H:%M'), email, name, notes]
|
| 41 |
-
sheet.append_row(row)
|
| 42 |
-
print(f"[Google Sheets] Recorded: {email}, {name}")
|
| 43 |
-
|
| 44 |
-
def _save_to_csv(email, name, notes):
|
| 45 |
-
file_exists = os.path.isfile(CSV_FILE)
|
| 46 |
-
with open(CSV_FILE, mode='a', newline='', encoding='utf-8') as f:
|
| 47 |
-
writer = csv.writer(f)
|
| 48 |
-
if not file_exists:
|
| 49 |
-
writer.writerow(["Timestamp", "Email", "Name", "Notes"])
|
| 50 |
-
writer.writerow([datetime.today().strftime('%Y-%m-%d %H:%M'), email, name, notes])
|
| 51 |
-
print(f"[CSV] Recorded: {email}, {name}")
|
| 52 |
-
|
| 53 |
-
def _record_user_details(email, name="Name not provided", notes="Not provided"):
|
| 54 |
-
try:
|
| 55 |
-
if GOOGLE_SHEETS_AVAILABLE:
|
| 56 |
-
_save_to_google_sheets(email, name, notes)
|
| 57 |
-
else:
|
| 58 |
-
raise ImportError("gspread not installed.")
|
| 59 |
-
except Exception as e:
|
| 60 |
-
print(f"[Warning] Google Sheets write failed, using CSV. Reason: {e}")
|
| 61 |
-
_save_to_csv(email, name, notes)
|
| 62 |
-
|
| 63 |
-
return {"recorded": "ok"}
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
# --- Minimal Pushover + logging helpers for agent-based RAG ---
|
| 67 |
-
|
| 68 |
-
def send_pushover_notification(message: str, user_details: dict | None = None):
|
| 69 |
-
"""
|
| 70 |
-
Sends a simple Pushover notification if PUSHOVER_TOKEN and PUSHOVER_USER are set.
|
| 71 |
-
Returns a small dict with status info; never raises to keep the app resilient.
|
| 72 |
-
"""
|
| 73 |
-
load_dotenv(override=True)
|
| 74 |
-
token = os.getenv("PUSHOVER_TOKEN")
|
| 75 |
-
user = os.getenv("PUSHOVER_USER")
|
| 76 |
-
|
| 77 |
-
if not token or not user:
|
| 78 |
-
print("[pushover] disabled (missing PUSHOVER_TOKEN or PUSHOVER_USER)")
|
| 79 |
-
return {"sent": False, "reason": "missing_creds"}
|
| 80 |
-
|
| 81 |
-
try:
|
| 82 |
-
payload = {
|
| 83 |
-
"token": token,
|
| 84 |
-
"user": user,
|
| 85 |
-
"title": "RAG: Unsupported Answer With Empty Context",
|
| 86 |
-
"message": message,
|
| 87 |
-
"priority": 0,
|
| 88 |
-
}
|
| 89 |
-
|
| 90 |
-
if user_details:
|
| 91 |
-
try:
|
| 92 |
-
details = {k: v for k, v in user_details.items() if v}
|
| 93 |
-
except Exception:
|
| 94 |
-
details = {}
|
| 95 |
-
if details:
|
| 96 |
-
payload["message"] = payload["message"] + "\n" + json.dumps(details)
|
| 97 |
-
|
| 98 |
-
resp = requests.post("https://api.pushover.net/1/messages.json", data=payload, timeout=10)
|
| 99 |
-
ok = resp.status_code == 200
|
| 100 |
-
print(f"[pushover] status={resp.status_code} ok={ok}")
|
| 101 |
-
return {"sent": ok, "status_code": resp.status_code}
|
| 102 |
-
except Exception as e:
|
| 103 |
-
print(f"[pushover] error: {e}")
|
| 104 |
-
return {"sent": False, "error": str(e)}
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
def collect_user_details(name: str | None = None, email: str | None = None) -> dict:
|
| 108 |
-
return {"name": name or "", "email": email or ""}
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
def log_interaction(query: str, response: str, evaluation: dict, user_details: dict | None = None, csv_path: str = "interactions.csv"):
|
| 112 |
-
try:
|
| 113 |
-
file_exists = os.path.isfile(csv_path)
|
| 114 |
-
with open(csv_path, mode='a', newline='', encoding='utf-8') as f:
|
| 115 |
-
writer = csv.writer(f)
|
| 116 |
-
if not file_exists:
|
| 117 |
-
writer.writerow(["timestamp", "query", "response", "evaluation", "user_details"])
|
| 118 |
-
writer.writerow([
|
| 119 |
-
datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
|
| 120 |
-
query,
|
| 121 |
-
response,
|
| 122 |
-
json.dumps(evaluation, ensure_ascii=False),
|
| 123 |
-
json.dumps(user_details or {}, ensure_ascii=False),
|
| 124 |
-
])
|
| 125 |
-
print(f"[log] wrote interaction to {csv_path}")
|
| 126 |
-
except Exception as e:
|
| 127 |
-
print(f"[log] error: {e}")
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
# Back-compat simple notifier expected by existing controller
|
| 131 |
-
def notify(title: str, message: str):
|
| 132 |
-
full = f"{title}: {message}" if title else message
|
| 133 |
-
return send_pushover_notification(full)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
community_contributions/Indira_1_lab1.ipynb
DELETED
|
@@ -1,370 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"metadata": {},
|
| 6 |
-
"source": [
|
| 7 |
-
"# Welcome to the start of your adventure in Agentic AI"
|
| 8 |
-
]
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"cell_type": "markdown",
|
| 12 |
-
"metadata": {},
|
| 13 |
-
"source": [
|
| 14 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 15 |
-
" <tr>\n",
|
| 16 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 17 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 18 |
-
" </td>\n",
|
| 19 |
-
" <td>\n",
|
| 20 |
-
" <h2 style=\"color:#ff7800;\">Are you ready for action??</h2>\n",
|
| 21 |
-
" <span style=\"color:#ff7800;\">Have you completed all the setup steps in the <a href=\"../setup/\">setup</a> folder?<br/>\n",
|
| 22 |
-
" Have you read the <a href=\"../README.md\">README</a>? Many common questions are answered here!<br/>\n",
|
| 23 |
-
" Have you checked out the guides in the <a href=\"../guides/01_intro.ipynb\">guides</a> folder?<br/>\n",
|
| 24 |
-
" Well in that case, you're ready!!\n",
|
| 25 |
-
" </span>\n",
|
| 26 |
-
" </td>\n",
|
| 27 |
-
" </tr>\n",
|
| 28 |
-
"</table>"
|
| 29 |
-
]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"cell_type": "markdown",
|
| 33 |
-
"metadata": {},
|
| 34 |
-
"source": [
|
| 35 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 36 |
-
" <tr>\n",
|
| 37 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 38 |
-
" <img src=\"../assets/tools.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 39 |
-
" </td>\n",
|
| 40 |
-
" <td>\n",
|
| 41 |
-
" <h2 style=\"color:#00bfff;\">This code is a live resource - keep an eye out for my updates</h2>\n",
|
| 42 |
-
" <span style=\"color:#00bfff;\">I push updates regularly. As people ask questions or have problems, I add more examples and improve explanations. As a result, the code below might not be identical to the videos, as I've added more steps and better comments. Consider this like an interactive book that accompanies the lectures.<br/><br/>\n",
|
| 43 |
-
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
| 44 |
-
" </span>\n",
|
| 45 |
-
" </td>\n",
|
| 46 |
-
" </tr>\n",
|
| 47 |
-
"</table>"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "markdown",
|
| 52 |
-
"metadata": {},
|
| 53 |
-
"source": [
|
| 54 |
-
"### And please do remember to contact me if I can help\n",
|
| 55 |
-
"\n",
|
| 56 |
-
"And I love to connect: https://www.linkedin.com/in/eddonner/\n",
|
| 57 |
-
"\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"### New to Notebooks like this one? Head over to the guides folder!\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"Just to check you've already added the Python and Jupyter extensions to Cursor, if not already installed:\n",
|
| 62 |
-
"- Open extensions (View >> extensions)\n",
|
| 63 |
-
"- Search for python, and when the results show, click on the ms-python one, and Install it if not already installed\n",
|
| 64 |
-
"- Search for jupyter, and when the results show, click on the Microsoft one, and Install it if not already installed \n",
|
| 65 |
-
"Then View >> Explorer to bring back the File Explorer.\n",
|
| 66 |
-
"\n",
|
| 67 |
-
"And then:\n",
|
| 68 |
-
"1. Click where it says \"Select Kernel\" near the top right, and select the option called `.venv (Python 3.12.9)` or similar, which should be the first choice or the most prominent choice. You may need to choose \"Python Environments\" first.\n",
|
| 69 |
-
"2. Click in each \"cell\" below, starting with the cell immediately below this text, and press Shift+Enter to run\n",
|
| 70 |
-
"3. Enjoy!\n",
|
| 71 |
-
"\n",
|
| 72 |
-
"After you click \"Select Kernel\", if there is no option like `.venv (Python 3.12.9)` then please do the following: \n",
|
| 73 |
-
"1. On Mac: From the Cursor menu, choose Settings >> VS Code Settings (NOTE: be sure to select `VSCode Settings` not `Cursor Settings`); \n",
|
| 74 |
-
"On Windows PC: From the File menu, choose Preferences >> VS Code Settings(NOTE: be sure to select `VSCode Settings` not `Cursor Settings`) \n",
|
| 75 |
-
"2. In the Settings search bar, type \"venv\" \n",
|
| 76 |
-
"3. In the field \"Path to folder with a list of Virtual Environments\" put the path to the project root, like C:\\Users\\username\\projects\\agents (on a Windows PC) or /Users/username/projects/agents (on Mac or Linux). \n",
|
| 77 |
-
"And then try again.\n",
|
| 78 |
-
"\n",
|
| 79 |
-
"Having problems with missing Python versions in that list? Have you ever used Anaconda before? It might be interferring. Quit Cursor, bring up a new command line, and make sure that your Anaconda environment is deactivated: \n",
|
| 80 |
-
"`conda deactivate` \n",
|
| 81 |
-
"And if you still have any problems with conda and python versions, it's possible that you will need to run this too: \n",
|
| 82 |
-
"`conda config --set auto_activate_base false` \n",
|
| 83 |
-
"and then from within the Agents directory, you should be able to run `uv python list` and see the Python 3.12 version."
|
| 84 |
-
]
|
| 85 |
-
},
|
| 86 |
-
{
|
| 87 |
-
"cell_type": "code",
|
| 88 |
-
"execution_count": null,
|
| 89 |
-
"metadata": {},
|
| 90 |
-
"outputs": [],
|
| 91 |
-
"source": [
|
| 92 |
-
"# First let's do an import. If you get an Import Error, double check that your Kernel is correct..\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"from dotenv import load_dotenv\n"
|
| 95 |
-
]
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"cell_type": "code",
|
| 99 |
-
"execution_count": null,
|
| 100 |
-
"metadata": {},
|
| 101 |
-
"outputs": [],
|
| 102 |
-
"source": [
|
| 103 |
-
"# Next it's time to load the API keys into environment variables\n",
|
| 104 |
-
"# If this returns false, see the next cell!\n",
|
| 105 |
-
"\n",
|
| 106 |
-
"load_dotenv(override=True)"
|
| 107 |
-
]
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"cell_type": "markdown",
|
| 111 |
-
"metadata": {},
|
| 112 |
-
"source": [
|
| 113 |
-
"### Wait, did that just output `False`??\n",
|
| 114 |
-
"\n",
|
| 115 |
-
"If so, the most common reason is that you didn't save your `.env` file after adding the key! Be sure to have saved.\n",
|
| 116 |
-
"\n",
|
| 117 |
-
"Also, make sure the `.env` file is named precisely `.env` and is in the project root directory (`agents`)\n",
|
| 118 |
-
"\n",
|
| 119 |
-
"By the way, your `.env` file should have a stop symbol next to it in Cursor on the left, and that's actually a good thing: that's Cursor saying to you, \"hey, I realize this is a file filled with secret information, and I'm not going to send it to an external AI to suggest changes, because your keys should not be shown to anyone else.\""
|
| 120 |
-
]
|
| 121 |
-
},
|
| 122 |
-
{
|
| 123 |
-
"cell_type": "markdown",
|
| 124 |
-
"metadata": {},
|
| 125 |
-
"source": [
|
| 126 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 127 |
-
" <tr>\n",
|
| 128 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 129 |
-
" <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 130 |
-
" </td>\n",
|
| 131 |
-
" <td>\n",
|
| 132 |
-
" <h2 style=\"color:#ff7800;\">Final reminders</h2>\n",
|
| 133 |
-
" <span style=\"color:#ff7800;\">1. If you're not confident about Environment Variables or Web Endpoints / APIs, please read Topics 3 and 5 in this <a href=\"../guides/04_technical_foundations.ipynb\">technical foundations guide</a>.<br/>\n",
|
| 134 |
-
" 2. If you want to use AIs other than OpenAI, like Gemini, DeepSeek or Ollama (free), please see the first section in this <a href=\"../guides/09_ai_apis_and_ollama.ipynb\">AI APIs guide</a>.<br/>\n",
|
| 135 |
-
" 3. If you ever get a Name Error in Python, you can always fix it immediately; see the last section of this <a href=\"../guides/06_python_foundations.ipynb\">Python Foundations guide</a> and follow both tutorials and exercises.<br/>\n",
|
| 136 |
-
" </span>\n",
|
| 137 |
-
" </td>\n",
|
| 138 |
-
" </tr>\n",
|
| 139 |
-
"</table>"
|
| 140 |
-
]
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"cell_type": "code",
|
| 144 |
-
"execution_count": null,
|
| 145 |
-
"metadata": {},
|
| 146 |
-
"outputs": [],
|
| 147 |
-
"source": [
|
| 148 |
-
"# Check the key - if you're not using OpenAI, check whichever key you're using! Ollama doesn't need a key.\n",
|
| 149 |
-
"\n",
|
| 150 |
-
"import os\n",
|
| 151 |
-
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
| 152 |
-
"\n",
|
| 153 |
-
"if openai_api_key:\n",
|
| 154 |
-
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
|
| 155 |
-
"else:\n",
|
| 156 |
-
" print(\"OpenAI API Key not set - please head to the troubleshooting guide in the setup folder\")\n",
|
| 157 |
-
" \n"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": null,
|
| 163 |
-
"metadata": {},
|
| 164 |
-
"outputs": [],
|
| 165 |
-
"source": [
|
| 166 |
-
"# And now - the all important import statement\n",
|
| 167 |
-
"# If you get an import error - head over to troubleshooting in the Setup folder\n",
|
| 168 |
-
"# Even for other LLM providers like Gemini, you still use this OpenAI import - see Guide 9 for why\n",
|
| 169 |
-
"\n",
|
| 170 |
-
"from openai import OpenAI"
|
| 171 |
-
]
|
| 172 |
-
},
|
| 173 |
-
{
|
| 174 |
-
"cell_type": "code",
|
| 175 |
-
"execution_count": null,
|
| 176 |
-
"metadata": {},
|
| 177 |
-
"outputs": [],
|
| 178 |
-
"source": [
|
| 179 |
-
"# And now we'll create an instance of the OpenAI class\n",
|
| 180 |
-
"# If you're not sure what it means to create an instance of a class - head over to the guides folder (guide 6)!\n",
|
| 181 |
-
"# If you get a NameError - head over to the guides folder (guide 6)to learn about NameErrors - always instantly fixable\n",
|
| 182 |
-
"# If you're not using OpenAI, you just need to slightly modify this - precise instructions are in the AI APIs guide (guide 9)\n",
|
| 183 |
-
"\n",
|
| 184 |
-
"openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')"
|
| 185 |
-
]
|
| 186 |
-
},
|
| 187 |
-
{
|
| 188 |
-
"cell_type": "code",
|
| 189 |
-
"execution_count": null,
|
| 190 |
-
"metadata": {},
|
| 191 |
-
"outputs": [],
|
| 192 |
-
"source": [
|
| 193 |
-
"# Create a list of messages in the familiar OpenAI format\n",
|
| 194 |
-
"\n",
|
| 195 |
-
"messages = [{\"role\": \"user\", \"content\": \"What is 2+2?\"}]"
|
| 196 |
-
]
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"cell_type": "code",
|
| 200 |
-
"execution_count": null,
|
| 201 |
-
"metadata": {},
|
| 202 |
-
"outputs": [],
|
| 203 |
-
"source": [
|
| 204 |
-
"# And now call it! Any problems, head to the troubleshooting guide\n",
|
| 205 |
-
"# This uses GPT 4.1 nano, the incredibly cheap model\n",
|
| 206 |
-
"# The APIs guide (guide 9) has exact instructions for using even cheaper or free alternatives to OpenAI\n",
|
| 207 |
-
"# If you get a NameError, head to the guides folder (guide 6) to learn about NameErrors - always instantly fixable\n",
|
| 208 |
-
"\n",
|
| 209 |
-
"response = openai.chat.completions.create(\n",
|
| 210 |
-
" model=\"llama3.2\",\n",
|
| 211 |
-
" messages=messages\n",
|
| 212 |
-
")\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"print(response.choices[0].message.content)\n"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": null,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [],
|
| 222 |
-
"source": [
|
| 223 |
-
"# And now - let's ask for a question:\n",
|
| 224 |
-
"\n",
|
| 225 |
-
"question = \"Please propose a hard, challenging question to assess someone's IQ. Respond only with the question.\"\n",
|
| 226 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 227 |
-
]
|
| 228 |
-
},
|
| 229 |
-
{
|
| 230 |
-
"cell_type": "code",
|
| 231 |
-
"execution_count": null,
|
| 232 |
-
"metadata": {},
|
| 233 |
-
"outputs": [],
|
| 234 |
-
"source": [
|
| 235 |
-
"# ask it - this uses GPT 4.1 mini, still cheap but more powerful than nano\n",
|
| 236 |
-
"\n",
|
| 237 |
-
"response = openai.chat.completions.create(\n",
|
| 238 |
-
" model=\"llama3.2\",\n",
|
| 239 |
-
" messages=messages\n",
|
| 240 |
-
")\n",
|
| 241 |
-
"\n",
|
| 242 |
-
"question = response.choices[0].message.content\n",
|
| 243 |
-
"\n",
|
| 244 |
-
"print(question)\n"
|
| 245 |
-
]
|
| 246 |
-
},
|
| 247 |
-
{
|
| 248 |
-
"cell_type": "code",
|
| 249 |
-
"execution_count": null,
|
| 250 |
-
"metadata": {},
|
| 251 |
-
"outputs": [],
|
| 252 |
-
"source": [
|
| 253 |
-
"# form a new messages list\n",
|
| 254 |
-
"messages = [{\"role\": \"user\", \"content\": question}]\n"
|
| 255 |
-
]
|
| 256 |
-
},
|
| 257 |
-
{
|
| 258 |
-
"cell_type": "code",
|
| 259 |
-
"execution_count": null,
|
| 260 |
-
"metadata": {},
|
| 261 |
-
"outputs": [],
|
| 262 |
-
"source": [
|
| 263 |
-
"# Ask it again\n",
|
| 264 |
-
"\n",
|
| 265 |
-
"response = openai.chat.completions.create(\n",
|
| 266 |
-
" model=\"llama3.2\",\n",
|
| 267 |
-
" messages=messages\n",
|
| 268 |
-
")\n",
|
| 269 |
-
"\n",
|
| 270 |
-
"answer = response.choices[0].message.content\n",
|
| 271 |
-
"print(answer)\n"
|
| 272 |
-
]
|
| 273 |
-
},
|
| 274 |
-
{
|
| 275 |
-
"cell_type": "code",
|
| 276 |
-
"execution_count": null,
|
| 277 |
-
"metadata": {},
|
| 278 |
-
"outputs": [],
|
| 279 |
-
"source": [
|
| 280 |
-
"from IPython.display import Markdown, display\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"display(Markdown(answer))\n",
|
| 283 |
-
"\n"
|
| 284 |
-
]
|
| 285 |
-
},
|
| 286 |
-
{
|
| 287 |
-
"cell_type": "markdown",
|
| 288 |
-
"metadata": {},
|
| 289 |
-
"source": [
|
| 290 |
-
"# Congratulations!\n",
|
| 291 |
-
"\n",
|
| 292 |
-
"That was a small, simple step in the direction of Agentic AI, with your new environment!\n",
|
| 293 |
-
"\n",
|
| 294 |
-
"Next time things get more interesting..."
|
| 295 |
-
]
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"cell_type": "markdown",
|
| 299 |
-
"metadata": {},
|
| 300 |
-
"source": [
|
| 301 |
-
"<table style=\"margin: 0; text-align: left; width:100%\">\n",
|
| 302 |
-
" <tr>\n",
|
| 303 |
-
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
| 304 |
-
" <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
| 305 |
-
" </td>\n",
|
| 306 |
-
" <td>\n",
|
| 307 |
-
" <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
|
| 308 |
-
" <span style=\"color:#ff7800;\">Now try this commercial application:<br/>\n",
|
| 309 |
-
" First ask the LLM to pick a business area that might be worth exploring for an Agentic AI opportunity.<br/>\n",
|
| 310 |
-
" Then ask the LLM to present a pain-point in that industry - something challenging that might be ripe for an Agentic solution.<br/>\n",
|
| 311 |
-
" Finally have 3 third LLM call propose the Agentic AI solution. <br/>\n",
|
| 312 |
-
" We will cover this at up-coming labs, so don't worry if you're unsure.. just give it a try!\n",
|
| 313 |
-
" </span>\n",
|
| 314 |
-
" </td>\n",
|
| 315 |
-
" </tr>\n",
|
| 316 |
-
"</table>"
|
| 317 |
-
]
|
| 318 |
-
},
|
| 319 |
-
{
|
| 320 |
-
"cell_type": "code",
|
| 321 |
-
"execution_count": null,
|
| 322 |
-
"metadata": {},
|
| 323 |
-
"outputs": [],
|
| 324 |
-
"source": [
|
| 325 |
-
"# First create the messages:\n",
|
| 326 |
-
"\n",
|
| 327 |
-
"messages = [{\"role\": \"user\", \"content\": \"Pick a business area that might be worth exploring for an Agentic AI opportunity.\"}]\n",
|
| 328 |
-
"\n",
|
| 329 |
-
"# Then make the first call:\n",
|
| 330 |
-
"\n",
|
| 331 |
-
"\n",
|
| 332 |
-
"response = openai.chat.completions.create(\n",
|
| 333 |
-
" model = \"llama3.2\",\n",
|
| 334 |
-
" messages=messages\n",
|
| 335 |
-
")\n",
|
| 336 |
-
"\n",
|
| 337 |
-
"# Then read the business idea:\n",
|
| 338 |
-
"\n",
|
| 339 |
-
"business_idea = response.choices[0].message.content\n",
|
| 340 |
-
"print(business_idea)"
|
| 341 |
-
]
|
| 342 |
-
},
|
| 343 |
-
{
|
| 344 |
-
"cell_type": "markdown",
|
| 345 |
-
"metadata": {},
|
| 346 |
-
"source": []
|
| 347 |
-
}
|
| 348 |
-
],
|
| 349 |
-
"metadata": {
|
| 350 |
-
"kernelspec": {
|
| 351 |
-
"display_name": ".venv",
|
| 352 |
-
"language": "python",
|
| 353 |
-
"name": "python3"
|
| 354 |
-
},
|
| 355 |
-
"language_info": {
|
| 356 |
-
"codemirror_mode": {
|
| 357 |
-
"name": "ipython",
|
| 358 |
-
"version": 3
|
| 359 |
-
},
|
| 360 |
-
"file_extension": ".py",
|
| 361 |
-
"mimetype": "text/x-python",
|
| 362 |
-
"name": "python",
|
| 363 |
-
"nbconvert_exporter": "python",
|
| 364 |
-
"pygments_lexer": "ipython3",
|
| 365 |
-
"version": "3.12.11"
|
| 366 |
-
}
|
| 367 |
-
},
|
| 368 |
-
"nbformat": 4,
|
| 369 |
-
"nbformat_minor": 2
|
| 370 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|