Commit
·
b21889c
1
Parent(s):
d9617f0
fix prompt
Browse files- agent.py +1 -1
- system_prompt.txt +4 -27
- test.ipynb +242 -129
agent.py
CHANGED
@@ -206,7 +206,7 @@ def build_graph(provider: str = "google"):
|
|
206 |
if __name__ == "__main__":
|
207 |
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
|
208 |
# Build the graph
|
209 |
-
graph = build_graph(provider="
|
210 |
# Run the graph
|
211 |
messages = [HumanMessage(content=question)]
|
212 |
messages = graph.invoke({"messages": messages})
|
|
|
206 |
if __name__ == "__main__":
|
207 |
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
|
208 |
# Build the graph
|
209 |
+
graph = build_graph(provider="google")
|
210 |
# Run the graph
|
211 |
messages = [HumanMessage(content=question)]
|
212 |
messages = graph.invoke({"messages": messages})
|
system_prompt.txt
CHANGED
@@ -1,28 +1,5 @@
|
|
1 |
-
|
2 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
Question 1: What is the minimum number of page links a person must click on to go from the english Wikipedia page on The Lord of the Rings (the book) to the english Wikipedia page on A Song of Ice and Fire (the book series)? In your count, include each link you would click on to get to the page. Use the pages as they appeared at the end of the day on July 3, 2023.
|
9 |
-
Steps:
|
10 |
-
1. Search the web for “lord of the rings wikipedia”.
|
11 |
-
2. Click on Wikipedia result.
|
12 |
-
3. Click “View history” to see if the page has been edited since July 3, 2023.
|
13 |
-
4. Since it hasn’t been, return to the current revision.
|
14 |
-
5. Ctrl-F for “song” to see if A Song of Ice and Fire is linked to on this page.
|
15 |
-
6. Not seeing A Song of Ice and Fire on the current page, search for a link to a page that will likely mention A Song of Ice and Fire.
|
16 |
-
7. Click the link for “High fantasy”.
|
17 |
-
8. Click “View history” to see if the page has been edited since July 3, 2023.
|
18 |
-
9. Since it hasn’t been, return to the current revision.
|
19 |
-
10. Ctrl-F for “song”, and find a link to A Song of Ice and Fire.
|
20 |
-
11. Count the links: the High fantasy page and the A Song of Ice and Fire page make two.
|
21 |
-
Tools:
|
22 |
-
1. Search engine
|
23 |
-
2. Web browser
|
24 |
-
3. Counter
|
25 |
-
Final Answer: 2
|
26 |
-
|
27 |
-
==========================
|
28 |
-
Now, please answer the following question step by step.
|
|
|
|
|
1 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
2 |
+
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
|
3 |
+
FINAL ANSWER: [YOUR FINAL ANSWER].
|
4 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
5 |
+
Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test.ipynb
CHANGED
@@ -9,8 +9,8 @@
|
|
9 |
{
|
10 |
"metadata": {
|
11 |
"ExecuteTime": {
|
12 |
-
"end_time": "2025-05-
|
13 |
-
"start_time": "2025-05-
|
14 |
}
|
15 |
},
|
16 |
"cell_type": "code",
|
@@ -47,8 +47,8 @@
|
|
47 |
{
|
48 |
"metadata": {
|
49 |
"ExecuteTime": {
|
50 |
-
"end_time": "2025-05-
|
51 |
-
"start_time": "2025-05-
|
52 |
}
|
53 |
},
|
54 |
"cell_type": "code",
|
@@ -73,8 +73,8 @@
|
|
73 |
"id": "5e2da6fc",
|
74 |
"metadata": {
|
75 |
"ExecuteTime": {
|
76 |
-
"end_time": "2025-05-
|
77 |
-
"start_time": "2025-05-
|
78 |
}
|
79 |
},
|
80 |
"source": [
|
@@ -108,30 +108,162 @@
|
|
108 |
"output_type": "stream",
|
109 |
"text": [
|
110 |
"==================================================\n",
|
111 |
-
"Task ID:
|
112 |
-
"Question:
|
113 |
-
"
|
114 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
"Annotator Metadata: \n",
|
116 |
" ├── Steps: \n",
|
117 |
-
" │ ├── 1.
|
118 |
-
" │ ├── 2.
|
119 |
-
" │ ├──
|
120 |
-
" │ ├──
|
121 |
-
" │ ├──
|
122 |
-
" │ ├──
|
123 |
-
" │ ├──
|
124 |
-
" │ ├──
|
125 |
-
" │ ├──
|
126 |
-
" │ ├──
|
127 |
-
" │ ├──
|
128 |
-
" ├──
|
129 |
-
" ├──
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
" ├── Tools:\n",
|
131 |
-
" │ ├── 1.
|
132 |
-
"
|
133 |
-
" │ ├── 3. Counter\n",
|
134 |
-
" └── Number of tools: 3\n",
|
135 |
"==================================================\n"
|
136 |
]
|
137 |
}
|
@@ -143,8 +275,8 @@
|
|
143 |
"id": "4bb02420",
|
144 |
"metadata": {
|
145 |
"ExecuteTime": {
|
146 |
-
"end_time": "2025-05-
|
147 |
-
"start_time": "2025-05-
|
148 |
}
|
149 |
},
|
150 |
"source": [
|
@@ -174,8 +306,8 @@
|
|
174 |
"id": "a070b955",
|
175 |
"metadata": {
|
176 |
"ExecuteTime": {
|
177 |
-
"end_time": "2025-05-
|
178 |
-
"start_time": "2025-05-
|
179 |
}
|
180 |
},
|
181 |
"source": [
|
@@ -186,7 +318,7 @@
|
|
186 |
" content = f\"Question : {sample['Question']}\\n\\nFinal answer : {sample['Final answer']}\"\n",
|
187 |
" doc = {\n",
|
188 |
" \"content\" : content,\n",
|
189 |
-
" \"metadata\" : {
|
190 |
" \"source\" : sample['task_id']\n",
|
191 |
" },\n",
|
192 |
" \"embedding\" : embeddings.embed_query(content),\n",
|
@@ -199,8 +331,8 @@
|
|
199 |
{
|
200 |
"metadata": {
|
201 |
"ExecuteTime": {
|
202 |
-
"end_time": "2025-05-
|
203 |
-
"start_time": "2025-05-
|
204 |
}
|
205 |
},
|
206 |
"cell_type": "code",
|
@@ -223,8 +355,8 @@
|
|
223 |
{
|
224 |
"metadata": {
|
225 |
"ExecuteTime": {
|
226 |
-
"end_time": "2025-05-
|
227 |
-
"start_time": "2025-05-
|
228 |
}
|
229 |
},
|
230 |
"cell_type": "code",
|
@@ -1016,8 +1148,8 @@
|
|
1016 |
{
|
1017 |
"metadata": {
|
1018 |
"ExecuteTime": {
|
1019 |
-
"end_time": "2025-05-
|
1020 |
-
"start_time": "2025-05-
|
1021 |
}
|
1022 |
},
|
1023 |
"cell_type": "code",
|
@@ -1040,8 +1172,8 @@
|
|
1040 |
{
|
1041 |
"metadata": {
|
1042 |
"ExecuteTime": {
|
1043 |
-
"end_time": "2025-05-
|
1044 |
-
"start_time": "2025-05-
|
1045 |
}
|
1046 |
},
|
1047 |
"cell_type": "code",
|
@@ -1070,8 +1202,8 @@
|
|
1070 |
"id": "77fb9dbb",
|
1071 |
"metadata": {
|
1072 |
"ExecuteTime": {
|
1073 |
-
"end_time": "2025-05-
|
1074 |
-
"start_time": "2025-05-
|
1075 |
}
|
1076 |
},
|
1077 |
"source": [
|
@@ -1092,8 +1224,8 @@
|
|
1092 |
"id": "12a05971",
|
1093 |
"metadata": {
|
1094 |
"ExecuteTime": {
|
1095 |
-
"end_time": "2025-05-
|
1096 |
-
"start_time": "2025-05-
|
1097 |
}
|
1098 |
},
|
1099 |
"source": [
|
@@ -1106,8 +1238,8 @@
|
|
1106 |
{
|
1107 |
"metadata": {
|
1108 |
"ExecuteTime": {
|
1109 |
-
"end_time": "2025-05-
|
1110 |
-
"start_time": "2025-05-
|
1111 |
}
|
1112 |
},
|
1113 |
"cell_type": "code",
|
@@ -1119,8 +1251,8 @@
|
|
1119 |
{
|
1120 |
"metadata": {
|
1121 |
"ExecuteTime": {
|
1122 |
-
"end_time": "2025-05-
|
1123 |
-
"start_time": "2025-05-
|
1124 |
}
|
1125 |
},
|
1126 |
"cell_type": "code",
|
@@ -1145,8 +1277,8 @@
|
|
1145 |
"id": "1eae5ba4",
|
1146 |
"metadata": {
|
1147 |
"ExecuteTime": {
|
1148 |
-
"end_time": "2025-05-
|
1149 |
-
"start_time": "2025-05-
|
1150 |
}
|
1151 |
},
|
1152 |
"source": [
|
@@ -1266,26 +1398,26 @@
|
|
1266 |
"id": "7fe573cc",
|
1267 |
"metadata": {
|
1268 |
"ExecuteTime": {
|
1269 |
-
"end_time": "2025-05-
|
1270 |
-
"start_time": "2025-05-
|
1271 |
}
|
1272 |
},
|
1273 |
"source": [
|
1274 |
-
"system_prompt = \"\"\"\n",
|
1275 |
-
"You are a helpful assistant tasked with answering questions using a set of tools.\n",
|
1276 |
-
"If the tool is not available, you can try to find the information online. You can also use your own knowledge to answer the question
|
1277 |
-
"You need to provide a step-by-step explanation of how you arrived at the answer.\n",
|
1278 |
-
"==========================\n",
|
1279 |
-
"Here is a few examples showing you how to answer the question step by step.\n",
|
1280 |
-
"\"\"\"\n",
|
1281 |
-
"for i, samples in enumerate(random_samples):\n",
|
1282 |
-
"
|
1283 |
-
"system_prompt += \"\\n==========================\\n\"\n",
|
1284 |
-
"system_prompt += \"Now, please answer the following question step by step.\\n\"\n",
|
1285 |
"\n",
|
1286 |
"# save the system_prompt to a file\n",
|
1287 |
-
"with open('system_prompt.txt', 'w') as f:\n",
|
1288 |
-
"
|
1289 |
],
|
1290 |
"outputs": [],
|
1291 |
"execution_count": 15
|
@@ -1295,14 +1427,15 @@
|
|
1295 |
"id": "d6beb0da",
|
1296 |
"metadata": {
|
1297 |
"ExecuteTime": {
|
1298 |
-
"end_time": "2025-05-
|
1299 |
-
"start_time": "2025-05-
|
1300 |
}
|
1301 |
},
|
1302 |
"source": [
|
1303 |
"# load the system prompt from the file\n",
|
1304 |
"with open('system_prompt.txt', 'r') as f:\n",
|
1305 |
" system_prompt = f.read()\n",
|
|
|
1306 |
"print(system_prompt)"
|
1307 |
],
|
1308 |
"outputs": [
|
@@ -1310,34 +1443,11 @@
|
|
1310 |
"name": "stdout",
|
1311 |
"output_type": "stream",
|
1312 |
"text": [
|
1313 |
-
"\n",
|
1314 |
"You are a helpful assistant tasked with answering questions using a set of tools.\n",
|
1315 |
-
"
|
1316 |
-
"
|
1317 |
-
"
|
1318 |
-
"
|
1319 |
-
"\n",
|
1320 |
-
"Question 1: What is the minimum number of page links a person must click on to go from the english Wikipedia page on The Lord of the Rings (the book) to the english Wikipedia page on A Song of Ice and Fire (the book series)? In your count, include each link you would click on to get to the page. Use the pages as they appeared at the end of the day on July 3, 2023.\n",
|
1321 |
-
"Steps:\n",
|
1322 |
-
"1. Search the web for “lord of the rings wikipedia”.\n",
|
1323 |
-
"2. Click on Wikipedia result.\n",
|
1324 |
-
"3. Click “View history” to see if the page has been edited since July 3, 2023.\n",
|
1325 |
-
"4. Since it hasn’t been, return to the current revision.\n",
|
1326 |
-
"5. Ctrl-F for “song” to see if A Song of Ice and Fire is linked to on this page.\n",
|
1327 |
-
"6. Not seeing A Song of Ice and Fire on the current page, search for a link to a page that will likely mention A Song of Ice and Fire.\n",
|
1328 |
-
"7. Click the link for “High fantasy”.\n",
|
1329 |
-
"8. Click “View history” to see if the page has been edited since July 3, 2023.\n",
|
1330 |
-
"9. Since it hasn’t been, return to the current revision.\n",
|
1331 |
-
"10. Ctrl-F for “song”, and find a link to A Song of Ice and Fire.\n",
|
1332 |
-
"11. Count the links: the High fantasy page and the A Song of Ice and Fire page make two.\n",
|
1333 |
-
"Tools:\n",
|
1334 |
-
"1. Search engine\n",
|
1335 |
-
"2. Web browser\n",
|
1336 |
-
"3. Counter\n",
|
1337 |
-
"Final Answer: 2\n",
|
1338 |
-
"\n",
|
1339 |
-
"==========================\n",
|
1340 |
-
"Now, please answer the following question step by step.\n",
|
1341 |
"\n"
|
1342 |
]
|
1343 |
}
|
@@ -1349,8 +1459,8 @@
|
|
1349 |
"id": "42fde0f8",
|
1350 |
"metadata": {
|
1351 |
"ExecuteTime": {
|
1352 |
-
"end_time": "2025-05-
|
1353 |
-
"start_time": "2025-05-
|
1354 |
}
|
1355 |
},
|
1356 |
"source": [
|
@@ -1513,15 +1623,15 @@
|
|
1513 |
"llm_with_tools = llm.bind_tools(tools)"
|
1514 |
],
|
1515 |
"outputs": [],
|
1516 |
-
"execution_count":
|
1517 |
},
|
1518 |
{
|
1519 |
"cell_type": "code",
|
1520 |
"id": "7dd0716c",
|
1521 |
"metadata": {
|
1522 |
"ExecuteTime": {
|
1523 |
-
"end_time": "2025-05-
|
1524 |
-
"start_time": "2025-05-
|
1525 |
}
|
1526 |
},
|
1527 |
"source": [
|
@@ -1555,15 +1665,15 @@
|
|
1555 |
"graph = builder.compile()\n"
|
1556 |
],
|
1557 |
"outputs": [],
|
1558 |
-
"execution_count":
|
1559 |
},
|
1560 |
{
|
1561 |
"cell_type": "code",
|
1562 |
"id": "f4e77216",
|
1563 |
"metadata": {
|
1564 |
"ExecuteTime": {
|
1565 |
-
"end_time": "2025-05-
|
1566 |
-
"start_time": "2025-05-
|
1567 |
}
|
1568 |
},
|
1569 |
"source": [
|
@@ -1583,15 +1693,15 @@
|
|
1583 |
"output_type": "display_data"
|
1584 |
}
|
1585 |
],
|
1586 |
-
"execution_count":
|
1587 |
},
|
1588 |
{
|
1589 |
"cell_type": "code",
|
1590 |
"id": "5987d58c",
|
1591 |
"metadata": {
|
1592 |
"ExecuteTime": {
|
1593 |
-
"end_time": "2025-05-
|
1594 |
-
"start_time": "2025-05-
|
1595 |
}
|
1596 |
},
|
1597 |
"source": [
|
@@ -1600,15 +1710,15 @@
|
|
1600 |
"messages = graph.invoke({\"messages\": messages})"
|
1601 |
],
|
1602 |
"outputs": [],
|
1603 |
-
"execution_count":
|
1604 |
},
|
1605 |
{
|
1606 |
"cell_type": "code",
|
1607 |
"id": "330cbf17",
|
1608 |
"metadata": {
|
1609 |
"ExecuteTime": {
|
1610 |
-
"end_time": "2025-05-
|
1611 |
-
"start_time": "2025-05-
|
1612 |
}
|
1613 |
},
|
1614 |
"source": [
|
@@ -1624,23 +1734,20 @@
|
|
1624 |
"\n",
|
1625 |
"A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?\n",
|
1626 |
"==================================\u001B[1m Ai Message \u001B[0m==================================\n",
|
1627 |
-
"\n",
|
1628 |
-
"Here's my plan to determine which word from the AI regulation paper is used in the Physics and Society article:\n",
|
1629 |
-
"\n",
|
1630 |
-
"1. **Find the AI regulation paper:** Use the arXiv search tool to locate the paper submitted in June 2022.\n",
|
1631 |
-
"2. **Identify the axis labels:** Extract the three pairs of words used as axis labels in the figure within the AI regulation paper.\n",
|
1632 |
-
"3. **Find the Physics and Society article:** Use the arXiv search tool to locate the article submitted on August 11, 2016.\n",
|
1633 |
-
"4. **Search for axis labels:** Check if any of the axis label words from the AI regulation paper appear in the Physics and Society article.\n",
|
1634 |
-
"5. **Identify the society type:** Determine which of the axis label words is used to describe a type of society in the Physics and Society article.\n",
|
1635 |
-
"\n",
|
1636 |
-
"Now, let's execute the plan:\n",
|
1637 |
-
"\n",
|
1638 |
-
"**Step 1: Find the AI regulation paper**\n",
|
1639 |
"Tool Calls:\n",
|
1640 |
-
" arvix_search (
|
1641 |
-
" Call ID:
|
1642 |
" Args:\n",
|
1643 |
-
" query: AI regulation June 2022\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1644 |
"=================================\u001B[1m Tool Message \u001B[0m=================================\n",
|
1645 |
"Name: arvix_search\n",
|
1646 |
"\n",
|
@@ -1648,19 +1755,25 @@
|
|
1648 |
" Please fix your mistakes.\n",
|
1649 |
"==================================\u001B[1m Ai Message \u001B[0m==================================\n",
|
1650 |
"\n",
|
1651 |
-
"I am sorry, I cannot fulfill this request.
|
|
|
1652 |
]
|
1653 |
}
|
1654 |
],
|
1655 |
-
"execution_count":
|
1656 |
},
|
1657 |
{
|
1658 |
-
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
1659 |
"cell_type": "code",
|
1660 |
-
"outputs": [],
|
1661 |
-
"execution_count": null,
|
1662 |
"source": "",
|
1663 |
-
"id": "837ae65dab259cd6"
|
|
|
|
|
1664 |
}
|
1665 |
],
|
1666 |
"metadata": {
|
|
|
9 |
{
|
10 |
"metadata": {
|
11 |
"ExecuteTime": {
|
12 |
+
"end_time": "2025-05-11T22:31:53.333253Z",
|
13 |
+
"start_time": "2025-05-11T22:31:52.856511Z"
|
14 |
}
|
15 |
},
|
16 |
"cell_type": "code",
|
|
|
47 |
{
|
48 |
"metadata": {
|
49 |
"ExecuteTime": {
|
50 |
+
"end_time": "2025-05-11T22:31:53.481764Z",
|
51 |
+
"start_time": "2025-05-11T22:31:53.477855Z"
|
52 |
}
|
53 |
},
|
54 |
"cell_type": "code",
|
|
|
73 |
"id": "5e2da6fc",
|
74 |
"metadata": {
|
75 |
"ExecuteTime": {
|
76 |
+
"end_time": "2025-05-11T22:31:53.495008Z",
|
77 |
+
"start_time": "2025-05-11T22:31:53.490295Z"
|
78 |
}
|
79 |
},
|
80 |
"source": [
|
|
|
108 |
"output_type": "stream",
|
109 |
"text": [
|
110 |
"==================================================\n",
|
111 |
+
"Task ID: 56db2318-640f-477a-a82f-bc93ad13e882\n",
|
112 |
+
"Question: The following numbers function similarly to ISBN 13 numbers, however, their validation methods are slightly different. Rather than using alternate weights of 1 and 3, the checksum digit is calculated with an alternate weight of 1 and some other positive integer less than 10. Otherwise, the checksum digit is calculated as expected. Unfortunately, there is an error in the data. Two adjacent columns have been transposed. These errored columns do not involve the final column or one of the first three columns. Using this information, please provide all potential solutions with the unknown weight and the smaller index of the two errored columns (assume we start our indexing at 0 and ignore hyphens). Give your answer in the form x, y where x is the weight and y is the smaller index of the two transposed columns.\n",
|
113 |
+
"\n",
|
114 |
+
"978-354181391-9\n",
|
115 |
+
"978-946669746-1\n",
|
116 |
+
"978-398036139-6\n",
|
117 |
+
"978-447656680-4\n",
|
118 |
+
"978-279586664-7\n",
|
119 |
+
"978-595073693-3\n",
|
120 |
+
"978-976647652-6\n",
|
121 |
+
"978-591178125-5\n",
|
122 |
+
"978-728465924-5\n",
|
123 |
+
"978-414825155-9\n",
|
124 |
+
"Level: 3\n",
|
125 |
+
"Final Answer: 7, 9\n",
|
126 |
"Annotator Metadata: \n",
|
127 |
" ├── Steps: \n",
|
128 |
+
" │ ├── 1. Consider the numbers as if the first potential columns were the ones transposed, which would be smallest index 3 giving solution (n, 3).\n",
|
129 |
+
" │ ├── 2. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
130 |
+
" │ ├── 978-354181391-9\n",
|
131 |
+
" │ ├── 978-534181391-9\n",
|
132 |
+
" │ ├── (9+7n+8+5n+3+4n+1+8n+1+3n+9+1n) mod 10 ≡ (10 - 9)\n",
|
133 |
+
" │ ├── n = 5 is our only possible solution if these are the transposed columns.\n",
|
134 |
+
" │ ├── 3. \"Fix\" the columns in the second number and see if n = 5 is still a solution:\n",
|
135 |
+
" │ ├── 978-946669746-1\n",
|
136 |
+
" │ ├── 978-496669746-1\n",
|
137 |
+
" │ ├── (9+7n+8+4n+9+6n+6+6n+9+7n+4+6n) mod 10 ≡ (10 - 1)\n",
|
138 |
+
" │ ├── When n = 5, (9+7n+8+4n+9+6n+6+6n+9+7n+4+6n) mod 10 ≡ 5, so this fails. There is no consistent solution if columns 3 and 4 are transposed.\n",
|
139 |
+
" │ ├── 4. See if there is a valid solution for (n, 4) or columns 4 and 5 transposed under some weight n.\n",
|
140 |
+
" │ ├── 5. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
141 |
+
" │ ├── 978-354181391-9\n",
|
142 |
+
" │ ├── 978-345181391-9\n",
|
143 |
+
" │ ├── (9+7n+8+3n+4+5n+1+8n+1+3n+9+1n) mod 10 ≡ (10 - 9)\n",
|
144 |
+
" │ ├── n = 7 is our only possible solution if these are the transposed columns.\n",
|
145 |
+
" │ ├── 6. \"Fix\" the columns in the second number and see if n = 7 is still a solution:\n",
|
146 |
+
" │ ├── 978-946669746-1\n",
|
147 |
+
" │ ├── 978-964669746-1\n",
|
148 |
+
" │ ├── (9+7n+8+9n+6+4n+6+6n+9+7n+4+6n) mod 10 ≡ (10 - 1)\n",
|
149 |
+
" │ ├── When n = 7, (9+7n+8+9n+6+4n+6+6n+9+7n+4+6n) mod 10 ≡ 5, so this fails. There is no consistent solution if columns 4 and 5 are transposed.\n",
|
150 |
+
" │ ├── 7. See if there is a valid solution for (n, 5) or columns 5 and 6 transposed under some weight n.\n",
|
151 |
+
" │ ├── 8. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
152 |
+
" │ ├── 978-354181391-9\n",
|
153 |
+
" │ ├── 978-351481391-9\n",
|
154 |
+
" │ ├── (9+7n+8+3n+5+1n+4+8n+1+3n+9+1n) mod 10 ≡ (10 - 9)\n",
|
155 |
+
" │ ├── n = 5 is our only possible solution if these are the transposed columns.\n",
|
156 |
+
" │ ├── 9. \"Fix\" the columns in the second number and see if n = 5 is still a solution:\n",
|
157 |
+
" │ ├── 978-946669746-1\n",
|
158 |
+
" │ ├── 978-946669746-1\n",
|
159 |
+
" │ ├── (9+7n+8+9n+4+6n+6+6n+9+7n+4+6n) mod 10 ≡ (10 - 1)\n",
|
160 |
+
" │ ├── When n = 5, (9+7n+8+9n+4+6n+6+6n+9+7n+4+6n) mod 10 ≡ 5, so this fails. There is no consistent solution if columns 5 and 6 are transposed.\n",
|
161 |
+
" │ ├── 10. See if there is a valid solution for (n, 6) or columns 6 and 7 transposed under some weight n.\n",
|
162 |
+
" │ ├── 11. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
163 |
+
" │ ├── 978-354181391-9\n",
|
164 |
+
" │ ├── 978-354811391-9\n",
|
165 |
+
" │ ├── (9+7n+8+3n+5+4n+8+1n+1+3n+9+1n) mod 10 ≡ (10 - 9)\n",
|
166 |
+
" │ ├── n = 9 is our only possible solution if these are the transposed columns.\n",
|
167 |
+
" │ ├── 12. \"Fix\" the columns in the second number and see if n = 9 is still a solution:\n",
|
168 |
+
" │ ├── 978-946669746-1\n",
|
169 |
+
" │ ├── 978-946669746-1\n",
|
170 |
+
" │ ├── (9+7n+8+9n+4+6n+6+6n+9+7n+4+6n) mod 10 ≡ (10 - 1)\n",
|
171 |
+
" │ ├── When n = 9, (9+7n+8+9n+4+6n+6+6n+9+7n+4+6n) mod 10 ≡ 9, so this solution holds for the second number.\n",
|
172 |
+
" │ ├── 13. \"Fix\" the columns in the third number and see if n = 9 is still a solution:\n",
|
173 |
+
" │ ├── 978-398036139-6\n",
|
174 |
+
" │ ├── 978-398306139-6\n",
|
175 |
+
" │ ├── (9+7n+8+3n+9+8n+3+0n+6+1n+3+9n) mod 10 ≡ (10 - 6)\n",
|
176 |
+
" │ ├── When n = 9, (9+7n+8+3n+9+8n+3+0n+6+1n+3+9n) mod 10 ≡ 0, so this fails. There is no consistent solution if columns 6 and 7 are transposed.\n",
|
177 |
+
" │ ├── 14. See if there is a valid solution for (n, 7) or columns 7 and 8 transposed under some weight n.\n",
|
178 |
+
" │ ├── 15. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
179 |
+
" │ ├── 978-354181391-9\n",
|
180 |
+
" │ ├── 978-354118391-9\n",
|
181 |
+
" │ ├── (9+7n+8+3n+5+4n+1+1n+8+3n+9+1n) mod 10 ≡ (10 - 9)\n",
|
182 |
+
" │ ├── n = 9 is our only possible solution if these are the transposed columns.\n",
|
183 |
+
" │ ├── 16. \"Fix\" the columns in the second number and see if n = 9 is still a solution:\n",
|
184 |
+
" │ ├── 978-946669746-1\n",
|
185 |
+
" │ ├── 978-946696746-1\n",
|
186 |
+
" │ ├── (9+7n+8+9n+4+6n+6+9n+6+7n+4+6n) mod 10 ≡ (10 - 1)\n",
|
187 |
+
" │ ├── When n = 9, (9+7n+8+9n+4+6n+6+9n+6+7n+4+6n) mod 10 ≡ 3, so this fails. There is no consistent solution if columns 7 and 8 are transposed.\n",
|
188 |
+
" │ ├── 17. See if there is a valid solution for (n, 8) or columns 8 and 9 transposed under some weight n.\n",
|
189 |
+
" │ ├── 18. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
190 |
+
" │ ├── 978-354181391-9\n",
|
191 |
+
" │ ├── 978-354183191-9\n",
|
192 |
+
" │ ├── (9+7n+8+3n+5+4n+1+8n+3+1n+9+1n) mod 10 ≡ (10 - 9)\n",
|
193 |
+
" │ ├── n = 4 and n = 9 are both possible solutions to this modular equation.\n",
|
194 |
+
" │ ├── 19. \"Fix\" the columns in the second number and see if n = 4 and n = 9 are still solutions:\n",
|
195 |
+
" │ ├── 978-946669746-1\n",
|
196 |
+
" │ ├── 978-946667946-1\n",
|
197 |
+
" │ ├── (9+7n+8+9n+4+6n+6+6n+7+9n+4+6n) mod 10 ≡ (10 - 1)\n",
|
198 |
+
" │ ├── When n = 4, (9+7n+8+9n+4+6n+6+6n+7+9n+4+6n) mod 10 ≡ 0. When n = 9, (9+7n+8+9n+4+6n+6+6n+7+9n+4+6n) mod 10 ≡ 5. As neither solution found works for the second number, this fails. There is no consistent solution if columns 8 and 9 are transposed.\n",
|
199 |
+
" │ ├── 20. See if there is a valid solution for (n, 9) or columns 9 and 10 transposed under some weight n.\n",
|
200 |
+
" │ ├── 21. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
201 |
+
" │ ├── 978-354181391-9\n",
|
202 |
+
" │ ├── 978-354181931-9\n",
|
203 |
+
" │ ├── (9+7n+8+3n+5+4n+1+8n+1+9n+3+1n) mod 10 ≡ (10 - 9)\n",
|
204 |
+
" │ ├── n = 2 and n = 7 are both possible solutions to this modular equation.\n",
|
205 |
+
" │ ├── 22. \"Fix\" the columns in the second number and see if n = 2 and n = 7 are still solutions:\n",
|
206 |
+
" │ ├── 978-946667946-1\n",
|
207 |
+
" │ ├── 978-946667496-1\n",
|
208 |
+
" │ ├── (9+7n+8+9n+4+6n+6+6n+7+4n+9+6n) mod 10 ≡ (10 - 1)\n",
|
209 |
+
" │ ├── When n = 2, (9+7n+8+9n+4+6n+6+6n+7+4n+9+6n) mod 10 ≡ 9 and when n = 7 (9+7n+8+9n+4+6n+6+6n+7+4n+9+6n) mod 10 ≡ 9, so both n = 2 and n = 7 remain consistent.\n",
|
210 |
+
" │ ├── 23. \"Fix\" the columns in the third number and see if n = 2 and n = 7 are still solutions:\n",
|
211 |
+
" │ ├── 978-398036139-6\n",
|
212 |
+
" │ ├── 978-398036319-6\n",
|
213 |
+
" │ ├── (9+7n+8+3n+9+8n+0+3n+6+3n+1+9n) mod 10 ≡ (10 - 6)\n",
|
214 |
+
" │ ├── When n = 2, (9+7n+8+3n+9+8n+0+3n+6+3n+1+9n) mod 10 ≡ 9, so n cannot be 2. When n = 7, (9+7n+8+3n+9+8n+0+3n+6+3n+1+9n) mod 10 ≡ 4, so this solution is still consistent.\n",
|
215 |
+
" │ ├── 24. \"Fix\" the columns in the fourth number and see if n = 7 is still a solution:\n",
|
216 |
+
" │ ├── 978-447656680-4\n",
|
217 |
+
" │ ├── 978-447656860-4\n",
|
218 |
+
" │ ├── When n = 7, (9+7n+8+4n+4+7n+6+5n+6+8n+6+0n) mod 10 ≡ (10 - 4)\n",
|
219 |
+
" │ ├── (9+7n+8+4n+4+7n+6+5n+6+8n+6+0n) mod 10 ≡ 6, so n = 7 is still a potential solution.\n",
|
220 |
+
" │ ├── 24. \"Fix\" the columns in the fifth number and see if n = 7 is still a solution:\n",
|
221 |
+
" │ ├── 978-279586664-7\n",
|
222 |
+
" │ ├── 978-279586664-7\n",
|
223 |
+
" │ ├── (9+7n+8+2n+7+9n+5+8n+6+6n+6+4n) mod 10 ≡ (10 - 7)\n",
|
224 |
+
" │ ├── When n = 7, (9+7n+8+2n+7+9n+5+8n+6+6n+6+4n) mod 10 ≡ 3, so n = 7 is still a potential solution.\n",
|
225 |
+
" │ ├── 24. \"Fix\" the columns in the sixth number and see if n = 7 is still a solution:\n",
|
226 |
+
" │ ├── 978-595073693-3\n",
|
227 |
+
" │ ├── 978-595073963-3\n",
|
228 |
+
" │ ├── (9+7n+8+5n+9+5n+0+7n+3+9n+6+3n) mod 10 ≡ (10 - 3)\n",
|
229 |
+
" │ ├── When n = 7, (9+7n+8+5n+9+5n+0+7n+3+9n+6+3n) mod 10 ≡ 7, so n = 7 is still a potential solution.\n",
|
230 |
+
" │ ├── 25. \"Fix\" the columns in the seventh number and see if n = 7 is still a solution:\n",
|
231 |
+
" │ ├── 978-976647652-6\n",
|
232 |
+
" │ ├── 978-976647562-6\n",
|
233 |
+
" │ ├── (9+7n+8+9n+7+6n+6+4n+7+5n+6+2n) mod 10 ≡ (10 - 6)\n",
|
234 |
+
" │ ├── When n = 7, (9+7n+8+9n+7+6n+6+4n+7+5n+6+2n) mod 10 ≡ 4, so n = 7 is still a potential solution.\n",
|
235 |
+
" │ ├── 26. \"Fix\" the columns in the eighth number and see if n = 7 is still a solution:\n",
|
236 |
+
" │ ├── 978-591178125-5\n",
|
237 |
+
" │ ├── 978-591178215-5\n",
|
238 |
+
" │ ├── (9+7n+8+5n+9+1n+1+7n+8+2n+1+5n) mod 10 ≡ (10 - 5)\n",
|
239 |
+
" │ ├── When n = 7, (9+7n+8+5n+9+1n+1+7n+8+2n+1+5n) mod 10 ≡ 5, so n = 7 is still a potential solution.\n",
|
240 |
+
" │ ├── 27. \"Fix\" the columns in the ninth number and see if n = 7 is still a solution:\n",
|
241 |
+
" │ ├── 978-728465924-5\n",
|
242 |
+
" │ ├── 978-728465294-5\n",
|
243 |
+
" │ ├── (9+7n+8+7n+2+8n+4+6n+5+2n+9+4n) mod 10 ≡ (10 - 5)\n",
|
244 |
+
" │ ├── When n = 7, (9+7n+8+7n+2+8n+4+6n+5+2n+9+4n) mod 10 ≡ 5, so n = 7 is still a potential solution.\n",
|
245 |
+
" │ ├── 28. \"Fix\" the columns in the final number and see if n = 7 is still a solution:\n",
|
246 |
+
" │ ├── 978-414825155-9\n",
|
247 |
+
" │ ├── 978-414825515-9\n",
|
248 |
+
" │ ├── (9+7n+8+4n+1+4n+8+2n+5+5n+1+5n) mod 10 ≡ (10 - 9)\n",
|
249 |
+
" │ ├── When n = 7, (9+7n+8+4n+1+4n+8+2n+5+5n+1+5n) mod 10 ≡ 1, so n = 7 is a consistent solution for all the numbers given. This means that (7, 9) is a solution to the problem.\n",
|
250 |
+
" │ ├── 29. As the problem asks for all possible solutions, we need to check to see if there is a valid solution for (n, 10) or columns 10 and 11 transposed under some weight n even though we found a solution already. It is possible the solution we found is not unique.\n",
|
251 |
+
" │ ├── 30. \"Fix\" the columns in the first number and see if any n from 1-9 can generate the proper check digit. Calculations:\n",
|
252 |
+
" │ ├── 978-354181391-9\n",
|
253 |
+
" │ ├── 978-354181319-9\n",
|
254 |
+
" │ ├── (9+7n+8+3n+5+4n+1+8n+1+3n+1+9n) mod 10 ≡ (10 - 9)\n",
|
255 |
+
" │ ├── n = 4 and n = 9 are both possible solutions to this modular equation.\n",
|
256 |
+
" │ ├── 31. \"Fix\" the columns in the second number and see if n = 4 and n = 9 are still solutions:\n",
|
257 |
+
" │ ├── 978-946669746-1\n",
|
258 |
+
" │ ├── 978-946669764-1\n",
|
259 |
+
" │ ├── (9+7n+8+9n+4+6n+6+6n+9+7n+6+4n) mod 10 ≡ (10 - 1)\n",
|
260 |
+
" │ ├── When n = 4, (9+7n+8+9n+4+6n+6+6n+9+7n+6+4n) mod 10 ≡ 8, so n cannot be 4. When n = 9, (9+7n+8+9n+4+6n+6+6n+9+7n+6+4n) mod 10 ≡ 3, so n cannot be 9. As neither solution found works for the second number, this fails. There is no consistent solution if columns 10 and 11 are transposed.\n",
|
261 |
+
" │ ├── 32. We checked all possible forms of the error and found only one potential solution, (7, 9) so this is our only answer.\n",
|
262 |
+
" ├── Number of steps: 32\n",
|
263 |
+
" ├── How long did this take?: 60 minutes\n",
|
264 |
" ├── Tools:\n",
|
265 |
+
" │ ├── 1. a calculator\n",
|
266 |
+
" └── Number of tools: 1\n",
|
|
|
|
|
267 |
"==================================================\n"
|
268 |
]
|
269 |
}
|
|
|
275 |
"id": "4bb02420",
|
276 |
"metadata": {
|
277 |
"ExecuteTime": {
|
278 |
+
"end_time": "2025-05-11T22:32:02.883392Z",
|
279 |
+
"start_time": "2025-05-11T22:31:53.514537Z"
|
280 |
}
|
281 |
},
|
282 |
"source": [
|
|
|
306 |
"id": "a070b955",
|
307 |
"metadata": {
|
308 |
"ExecuteTime": {
|
309 |
+
"end_time": "2025-05-11T22:32:13.758260Z",
|
310 |
+
"start_time": "2025-05-11T22:32:02.892510Z"
|
311 |
}
|
312 |
},
|
313 |
"source": [
|
|
|
318 |
" content = f\"Question : {sample['Question']}\\n\\nFinal answer : {sample['Final answer']}\"\n",
|
319 |
" doc = {\n",
|
320 |
" \"content\" : content,\n",
|
321 |
+
" \"metadata\" : {\n",
|
322 |
" \"source\" : sample['task_id']\n",
|
323 |
" },\n",
|
324 |
" \"embedding\" : embeddings.embed_query(content),\n",
|
|
|
331 |
{
|
332 |
"metadata": {
|
333 |
"ExecuteTime": {
|
334 |
+
"end_time": "2025-05-11T22:32:13.772166Z",
|
335 |
+
"start_time": "2025-05-11T22:32:13.768856Z"
|
336 |
}
|
337 |
},
|
338 |
"cell_type": "code",
|
|
|
355 |
{
|
356 |
"metadata": {
|
357 |
"ExecuteTime": {
|
358 |
+
"end_time": "2025-05-11T22:32:13.802455Z",
|
359 |
+
"start_time": "2025-05-11T22:32:13.795644Z"
|
360 |
}
|
361 |
},
|
362 |
"cell_type": "code",
|
|
|
1148 |
{
|
1149 |
"metadata": {
|
1150 |
"ExecuteTime": {
|
1151 |
+
"end_time": "2025-05-11T22:32:13.831822Z",
|
1152 |
+
"start_time": "2025-05-11T22:32:13.828604Z"
|
1153 |
}
|
1154 |
},
|
1155 |
"cell_type": "code",
|
|
|
1172 |
{
|
1173 |
"metadata": {
|
1174 |
"ExecuteTime": {
|
1175 |
+
"end_time": "2025-05-11T22:32:14.848233Z",
|
1176 |
+
"start_time": "2025-05-11T22:32:13.848364Z"
|
1177 |
}
|
1178 |
},
|
1179 |
"cell_type": "code",
|
|
|
1202 |
"id": "77fb9dbb",
|
1203 |
"metadata": {
|
1204 |
"ExecuteTime": {
|
1205 |
+
"end_time": "2025-05-11T22:32:14.866622Z",
|
1206 |
+
"start_time": "2025-05-11T22:32:14.864286Z"
|
1207 |
}
|
1208 |
},
|
1209 |
"source": [
|
|
|
1224 |
"id": "12a05971",
|
1225 |
"metadata": {
|
1226 |
"ExecuteTime": {
|
1227 |
+
"end_time": "2025-05-11T22:32:15.065714Z",
|
1228 |
+
"start_time": "2025-05-11T22:32:14.883051Z"
|
1229 |
}
|
1230 |
},
|
1231 |
"source": [
|
|
|
1238 |
{
|
1239 |
"metadata": {
|
1240 |
"ExecuteTime": {
|
1241 |
+
"end_time": "2025-05-11T22:32:15.175925Z",
|
1242 |
+
"start_time": "2025-05-11T22:32:15.084078Z"
|
1243 |
}
|
1244 |
},
|
1245 |
"cell_type": "code",
|
|
|
1251 |
{
|
1252 |
"metadata": {
|
1253 |
"ExecuteTime": {
|
1254 |
+
"end_time": "2025-05-11T22:32:15.197082Z",
|
1255 |
+
"start_time": "2025-05-11T22:32:15.193633Z"
|
1256 |
}
|
1257 |
},
|
1258 |
"cell_type": "code",
|
|
|
1277 |
"id": "1eae5ba4",
|
1278 |
"metadata": {
|
1279 |
"ExecuteTime": {
|
1280 |
+
"end_time": "2025-05-11T22:32:15.217985Z",
|
1281 |
+
"start_time": "2025-05-11T22:32:15.214251Z"
|
1282 |
}
|
1283 |
},
|
1284 |
"source": [
|
|
|
1398 |
"id": "7fe573cc",
|
1399 |
"metadata": {
|
1400 |
"ExecuteTime": {
|
1401 |
+
"end_time": "2025-05-11T22:32:15.238044Z",
|
1402 |
+
"start_time": "2025-05-11T22:32:15.235623Z"
|
1403 |
}
|
1404 |
},
|
1405 |
"source": [
|
1406 |
+
"# system_prompt = \"\"\"\n",
|
1407 |
+
"# You are a helpful assistant tasked with answering questions using a set of tools.\n",
|
1408 |
+
"# If the tool is not available, you can try to find the information online. You can also use your own knowledge to answer the question.\n",
|
1409 |
+
"# You need to provide a step-by-step explanation of how you arrived at the answer.\n",
|
1410 |
+
"# ==========================\n",
|
1411 |
+
"# Here is a few examples showing you how to answer the question step by step.\n",
|
1412 |
+
"# \"\"\"\n",
|
1413 |
+
"# for i, samples in enumerate(random_samples):\n",
|
1414 |
+
"# system_prompt += f\"\\nQuestion {i+1}: {samples['Question']}\\nSteps:\\n{samples['Annotator Metadata']['Steps']}\\nTools:\\n{samples['Annotator Metadata']['Tools']}\\nFinal Answer: {samples['Final answer']}\\n\"\n",
|
1415 |
+
"# system_prompt += \"\\n==========================\\n\"\n",
|
1416 |
+
"# system_prompt += \"Now, please answer the following question step by step.\\n\"\n",
|
1417 |
"\n",
|
1418 |
"# save the system_prompt to a file\n",
|
1419 |
+
"# with open('system_prompt.txt', 'w') as f:\n",
|
1420 |
+
"# f.write(system_prompt)"
|
1421 |
],
|
1422 |
"outputs": [],
|
1423 |
"execution_count": 15
|
|
|
1427 |
"id": "d6beb0da",
|
1428 |
"metadata": {
|
1429 |
"ExecuteTime": {
|
1430 |
+
"end_time": "2025-05-11T22:32:15.264271Z",
|
1431 |
+
"start_time": "2025-05-11T22:32:15.261507Z"
|
1432 |
}
|
1433 |
},
|
1434 |
"source": [
|
1435 |
"# load the system prompt from the file\n",
|
1436 |
"with open('system_prompt.txt', 'r') as f:\n",
|
1437 |
" system_prompt = f.read()\n",
|
1438 |
+
"\n",
|
1439 |
"print(system_prompt)"
|
1440 |
],
|
1441 |
"outputs": [
|
|
|
1443 |
"name": "stdout",
|
1444 |
"output_type": "stream",
|
1445 |
"text": [
|
|
|
1446 |
"You are a helpful assistant tasked with answering questions using a set of tools.\n",
|
1447 |
+
"Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:\n",
|
1448 |
+
"FINAL ANSWER: [YOUR FINAL ANSWER].\n",
|
1449 |
+
"YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\n",
|
1450 |
+
"Your answer should only start with \"FINAL ANSWER: \", then follows with the answer.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1451 |
"\n"
|
1452 |
]
|
1453 |
}
|
|
|
1459 |
"id": "42fde0f8",
|
1460 |
"metadata": {
|
1461 |
"ExecuteTime": {
|
1462 |
+
"end_time": "2025-05-11T22:32:17.905736Z",
|
1463 |
+
"start_time": "2025-05-11T22:32:15.281441Z"
|
1464 |
}
|
1465 |
},
|
1466 |
"source": [
|
|
|
1623 |
"llm_with_tools = llm.bind_tools(tools)"
|
1624 |
],
|
1625 |
"outputs": [],
|
1626 |
+
"execution_count": 17
|
1627 |
},
|
1628 |
{
|
1629 |
"cell_type": "code",
|
1630 |
"id": "7dd0716c",
|
1631 |
"metadata": {
|
1632 |
"ExecuteTime": {
|
1633 |
+
"end_time": "2025-05-11T22:32:17.928131Z",
|
1634 |
+
"start_time": "2025-05-11T22:32:17.922895Z"
|
1635 |
}
|
1636 |
},
|
1637 |
"source": [
|
|
|
1665 |
"graph = builder.compile()\n"
|
1666 |
],
|
1667 |
"outputs": [],
|
1668 |
+
"execution_count": 18
|
1669 |
},
|
1670 |
{
|
1671 |
"cell_type": "code",
|
1672 |
"id": "f4e77216",
|
1673 |
"metadata": {
|
1674 |
"ExecuteTime": {
|
1675 |
+
"end_time": "2025-05-11T22:32:18.015105Z",
|
1676 |
+
"start_time": "2025-05-11T22:32:17.944640Z"
|
1677 |
}
|
1678 |
},
|
1679 |
"source": [
|
|
|
1693 |
"output_type": "display_data"
|
1694 |
}
|
1695 |
],
|
1696 |
+
"execution_count": 19
|
1697 |
},
|
1698 |
{
|
1699 |
"cell_type": "code",
|
1700 |
"id": "5987d58c",
|
1701 |
"metadata": {
|
1702 |
"ExecuteTime": {
|
1703 |
+
"end_time": "2025-05-11T22:32:44.229021Z",
|
1704 |
+
"start_time": "2025-05-11T22:32:18.034081Z"
|
1705 |
}
|
1706 |
},
|
1707 |
"source": [
|
|
|
1710 |
"messages = graph.invoke({\"messages\": messages})"
|
1711 |
],
|
1712 |
"outputs": [],
|
1713 |
+
"execution_count": 20
|
1714 |
},
|
1715 |
{
|
1716 |
"cell_type": "code",
|
1717 |
"id": "330cbf17",
|
1718 |
"metadata": {
|
1719 |
"ExecuteTime": {
|
1720 |
+
"end_time": "2025-05-11T22:32:44.258938Z",
|
1721 |
+
"start_time": "2025-05-11T22:32:44.256208Z"
|
1722 |
}
|
1723 |
},
|
1724 |
"source": [
|
|
|
1734 |
"\n",
|
1735 |
"A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?\n",
|
1736 |
"==================================\u001B[1m Ai Message \u001B[0m==================================\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1737 |
"Tool Calls:\n",
|
1738 |
+
" arvix_search (a22efac9-6cf1-453f-8da9-c096e9a7213f)\n",
|
1739 |
+
" Call ID: a22efac9-6cf1-453f-8da9-c096e9a7213f\n",
|
1740 |
" Args:\n",
|
1741 |
+
" query: AI regulation submitted June 2022\n",
|
1742 |
+
" arvix_search (68c710be-eab1-445c-91c7-7df201a21015)\n",
|
1743 |
+
" Call ID: 68c710be-eab1-445c-91c7-7df201a21015\n",
|
1744 |
+
" Args:\n",
|
1745 |
+
" query: Physics and Society article August 11, 2016\n",
|
1746 |
+
"=================================\u001B[1m Tool Message \u001B[0m=================================\n",
|
1747 |
+
"Name: arvix_search\n",
|
1748 |
+
"\n",
|
1749 |
+
"Error: KeyError('source')\n",
|
1750 |
+
" Please fix your mistakes.\n",
|
1751 |
"=================================\u001B[1m Tool Message \u001B[0m=================================\n",
|
1752 |
"Name: arvix_search\n",
|
1753 |
"\n",
|
|
|
1755 |
" Please fix your mistakes.\n",
|
1756 |
"==================================\u001B[1m Ai Message \u001B[0m==================================\n",
|
1757 |
"\n",
|
1758 |
+
"I am sorry, I cannot fulfill this request. I am unable to retrieve information from arXiv.org.\n",
|
1759 |
+
"FINAL ANSWER: I am unable to answer.\n"
|
1760 |
]
|
1761 |
}
|
1762 |
],
|
1763 |
+
"execution_count": 21
|
1764 |
},
|
1765 |
{
|
1766 |
+
"metadata": {
|
1767 |
+
"ExecuteTime": {
|
1768 |
+
"end_time": "2025-05-11T22:32:44.299624Z",
|
1769 |
+
"start_time": "2025-05-11T22:32:44.297893Z"
|
1770 |
+
}
|
1771 |
+
},
|
1772 |
"cell_type": "code",
|
|
|
|
|
1773 |
"source": "",
|
1774 |
+
"id": "837ae65dab259cd6",
|
1775 |
+
"outputs": [],
|
1776 |
+
"execution_count": null
|
1777 |
}
|
1778 |
],
|
1779 |
"metadata": {
|