anderson-ufrj commited on
Commit
e9b5c2a
·
1 Parent(s): a11fbc5

feat(tools): create dados.gov.br tool for agent integration

Browse files

- Implement DadosGovTool class extending BaseTool
- Support multiple actions: search, analyze, get_dataset, find_spending, find_procurement
- Format responses for agent consumption
- Include usage instructions and examples

Files changed (1) hide show
  1. src/tools/dados_gov_tool.py +332 -0
src/tools/dados_gov_tool.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dados.gov.br tool for agent usage.
3
+
4
+ This tool allows agents to search and analyze data from the Brazilian
5
+ Open Data Portal (dados.gov.br) to enhance their investigations.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ from src.services.dados_gov_service import DadosGovService
13
+ from src.tools.base import BaseTool, ToolResult
14
+ from src.tools.dados_gov_api import DadosGovAPIError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class DadosGovTool(BaseTool):
20
+ """
21
+ Tool for accessing Brazilian Open Data Portal (dados.gov.br).
22
+
23
+ This tool enables agents to search for government datasets,
24
+ analyze data availability, and find specific types of public data.
25
+ """
26
+
27
+ name = "dados_gov_search"
28
+ description = (
29
+ "Search and analyze Brazilian government open data from dados.gov.br. "
30
+ "Use this to find datasets about government spending, contracts, "
31
+ "education, health, and other public data."
32
+ )
33
+
34
+ def __init__(self):
35
+ """Initialize the dados.gov.br tool"""
36
+ super().__init__()
37
+ self.service = DadosGovService()
38
+
39
+ async def _execute(
40
+ self,
41
+ query: Optional[str] = None,
42
+ action: str = "search",
43
+ topic: Optional[str] = None,
44
+ organization: Optional[str] = None,
45
+ dataset_id: Optional[str] = None,
46
+ year: Optional[int] = None,
47
+ state: Optional[str] = None,
48
+ limit: int = 10,
49
+ **kwargs
50
+ ) -> ToolResult:
51
+ """
52
+ Execute dados.gov.br operations.
53
+
54
+ Args:
55
+ query: Search query string
56
+ action: Action to perform (search, analyze, get_dataset, find_spending, find_procurement)
57
+ topic: Topic for analysis (e.g., "educação", "saúde")
58
+ organization: Filter by organization
59
+ dataset_id: Specific dataset ID to retrieve
60
+ year: Filter by year
61
+ state: Filter by state
62
+ limit: Maximum number of results
63
+ **kwargs: Additional parameters
64
+
65
+ Returns:
66
+ ToolResult with the operation results
67
+ """
68
+ try:
69
+ if action == "search":
70
+ return await self._search_datasets(
71
+ query=query,
72
+ organization=organization,
73
+ limit=limit,
74
+ )
75
+
76
+ elif action == "analyze":
77
+ if not topic:
78
+ return ToolResult(
79
+ success=False,
80
+ error="Topic is required for analysis action",
81
+ )
82
+ return await self._analyze_topic(topic)
83
+
84
+ elif action == "get_dataset":
85
+ if not dataset_id:
86
+ return ToolResult(
87
+ success=False,
88
+ error="Dataset ID is required for get_dataset action",
89
+ )
90
+ return await self._get_dataset(dataset_id)
91
+
92
+ elif action == "find_spending":
93
+ return await self._find_spending_data(
94
+ year=year,
95
+ state=state,
96
+ limit=limit,
97
+ )
98
+
99
+ elif action == "find_procurement":
100
+ return await self._find_procurement_data(
101
+ organization=organization,
102
+ limit=limit,
103
+ )
104
+
105
+ else:
106
+ return ToolResult(
107
+ success=False,
108
+ error=f"Unknown action: {action}. Valid actions are: search, analyze, get_dataset, find_spending, find_procurement",
109
+ )
110
+
111
+ except DadosGovAPIError as e:
112
+ logger.error(f"dados.gov.br API error: {e}")
113
+ return ToolResult(
114
+ success=False,
115
+ error=f"API error: {str(e)}",
116
+ )
117
+ except Exception as e:
118
+ logger.error(f"Unexpected error in dados.gov.br tool: {e}")
119
+ return ToolResult(
120
+ success=False,
121
+ error=f"Unexpected error: {str(e)}",
122
+ )
123
+ finally:
124
+ await self.service.close()
125
+
126
+ async def _search_datasets(
127
+ self,
128
+ query: Optional[str],
129
+ organization: Optional[str],
130
+ limit: int,
131
+ ) -> ToolResult:
132
+ """Search for datasets"""
133
+ keywords = []
134
+ if query:
135
+ # Split query into keywords
136
+ keywords = [k.strip() for k in query.split(",") if k.strip()]
137
+
138
+ result = await self.service.search_transparency_datasets(
139
+ keywords=keywords if keywords else None,
140
+ organization=organization,
141
+ limit=limit,
142
+ )
143
+
144
+ # Format results for agent consumption
145
+ datasets = []
146
+ for dataset in result.results:
147
+ dataset_info = {
148
+ "id": dataset.id,
149
+ "title": dataset.title,
150
+ "organization": dataset.organization.title if dataset.organization else "Unknown",
151
+ "description": dataset.notes[:200] + "..." if dataset.notes and len(dataset.notes) > 200 else dataset.notes,
152
+ "resources": [
153
+ {
154
+ "name": res.name,
155
+ "format": res.format,
156
+ "url": res.url,
157
+ }
158
+ for res in dataset.resources[:3] # Limit to first 3 resources
159
+ ],
160
+ "tags": [tag.name for tag in dataset.tags],
161
+ }
162
+ datasets.append(dataset_info)
163
+
164
+ return ToolResult(
165
+ success=True,
166
+ data={
167
+ "total_results": result.count,
168
+ "datasets": datasets,
169
+ "query": query,
170
+ "organization": organization,
171
+ },
172
+ )
173
+
174
+ async def _analyze_topic(self, topic: str) -> ToolResult:
175
+ """Analyze data availability for a topic"""
176
+ analysis = await self.service.analyze_data_availability(topic)
177
+
178
+ # Summarize key findings
179
+ summary = {
180
+ "topic": topic,
181
+ "total_datasets": analysis["total_datasets"],
182
+ "top_organizations": dict(list(analysis["organizations"].items())[:5]),
183
+ "available_formats": list(analysis["formats"].keys()),
184
+ "years_covered": analysis["years_covered"],
185
+ "coverage": {
186
+ "federal": f"{analysis['geographic_coverage']['federal']} datasets",
187
+ "state": f"{analysis['geographic_coverage']['state']} datasets",
188
+ "municipal": f"{analysis['geographic_coverage']['municipal']} datasets",
189
+ },
190
+ "update_frequency": analysis["update_frequency"],
191
+ }
192
+
193
+ return ToolResult(
194
+ success=True,
195
+ data=summary,
196
+ )
197
+
198
+ async def _get_dataset(self, dataset_id: str) -> ToolResult:
199
+ """Get detailed dataset information"""
200
+ dataset = await self.service.get_dataset_with_resources(dataset_id)
201
+
202
+ # Format dataset details
203
+ details = {
204
+ "id": dataset.id,
205
+ "title": dataset.title,
206
+ "organization": dataset.organization.title if dataset.organization else "Unknown",
207
+ "description": dataset.notes,
208
+ "license": dataset.license_id,
209
+ "author": dataset.author,
210
+ "maintainer": dataset.maintainer,
211
+ "created": dataset.metadata_created.isoformat() if dataset.metadata_created else None,
212
+ "modified": dataset.metadata_modified.isoformat() if dataset.metadata_modified else None,
213
+ "resources": [
214
+ {
215
+ "id": res.id,
216
+ "name": res.name,
217
+ "description": res.description,
218
+ "format": res.format,
219
+ "url": res.url,
220
+ "size": res.size,
221
+ "last_modified": res.last_modified.isoformat() if res.last_modified else None,
222
+ }
223
+ for res in dataset.resources
224
+ ],
225
+ "tags": [tag.name for tag in dataset.tags],
226
+ }
227
+
228
+ return ToolResult(
229
+ success=True,
230
+ data=details,
231
+ )
232
+
233
+ async def _find_spending_data(
234
+ self,
235
+ year: Optional[int],
236
+ state: Optional[str],
237
+ limit: int,
238
+ ) -> ToolResult:
239
+ """Find government spending datasets"""
240
+ datasets = await self.service.find_government_spending_data(
241
+ year=year,
242
+ state=state,
243
+ )
244
+
245
+ # Format spending datasets
246
+ spending_data = []
247
+ for dataset in datasets[:limit]:
248
+ spending_data.append({
249
+ "id": dataset.id,
250
+ "title": dataset.title,
251
+ "organization": dataset.organization.title if dataset.organization else "Unknown",
252
+ "description": dataset.notes[:200] + "..." if dataset.notes and len(dataset.notes) > 200 else dataset.notes,
253
+ "resources_count": len(dataset.resources),
254
+ "formats": list(set(res.format for res in dataset.resources if res.format)),
255
+ })
256
+
257
+ return ToolResult(
258
+ success=True,
259
+ data={
260
+ "total_found": len(datasets),
261
+ "datasets": spending_data,
262
+ "filters": {
263
+ "year": year,
264
+ "state": state,
265
+ },
266
+ },
267
+ )
268
+
269
+ async def _find_procurement_data(
270
+ self,
271
+ organization: Optional[str],
272
+ limit: int,
273
+ ) -> ToolResult:
274
+ """Find procurement/contract datasets"""
275
+ datasets = await self.service.find_procurement_data(
276
+ organization=organization,
277
+ )
278
+
279
+ # Format procurement datasets
280
+ procurement_data = []
281
+ for dataset in datasets[:limit]:
282
+ procurement_data.append({
283
+ "id": dataset.id,
284
+ "title": dataset.title,
285
+ "organization": dataset.organization.title if dataset.organization else "Unknown",
286
+ "description": dataset.notes[:200] + "..." if dataset.notes and len(dataset.notes) > 200 else dataset.notes,
287
+ "resources_count": len(dataset.resources),
288
+ "formats": list(set(res.format for res in dataset.resources if res.format)),
289
+ })
290
+
291
+ return ToolResult(
292
+ success=True,
293
+ data={
294
+ "total_found": len(datasets),
295
+ "datasets": procurement_data,
296
+ "organization_filter": organization,
297
+ },
298
+ )
299
+
300
+ def get_usage_instructions(self) -> str:
301
+ """Get usage instructions for agents"""
302
+ return """
303
+ Dados.gov.br Tool Usage:
304
+
305
+ 1. Search datasets:
306
+ - action: "search"
307
+ - query: "educação básica, censo escolar"
308
+ - organization: "inep" (optional)
309
+ - limit: 10
310
+
311
+ 2. Analyze topic availability:
312
+ - action: "analyze"
313
+ - topic: "saúde"
314
+
315
+ 3. Get dataset details:
316
+ - action: "get_dataset"
317
+ - dataset_id: "dataset-uuid-here"
318
+
319
+ 4. Find government spending data:
320
+ - action: "find_spending"
321
+ - year: 2023 (optional)
322
+ - state: "SP" (optional)
323
+
324
+ 5. Find procurement/contract data:
325
+ - action: "find_procurement"
326
+ - organization: "ministério-da-saúde" (optional)
327
+
328
+ Examples:
329
+ - To find education datasets: {"action": "search", "query": "educação"}
330
+ - To analyze health data availability: {"action": "analyze", "topic": "saúde"}
331
+ - To find 2023 spending in São Paulo: {"action": "find_spending", "year": 2023, "state": "SP"}
332
+ """