""" Module: api.routes.reports Description: Report generation endpoints for creating natural language reports Author: Anderson H. Silva Date: 2025-01-24 License: Proprietary - All rights reserved """ import asyncio from datetime import datetime from typing import Dict, List, Optional, Any from uuid import uuid4 from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Query, Response from fastapi.responses import HTMLResponse, FileResponse from pydantic import BaseModel, Field as PydanticField, validator from src.core import json_utils from src.core import get_logger from src.agents.tiradentes import ReporterAgent from src.agents import AgentContext from src.api.middleware.authentication import get_current_user logger = get_logger(__name__) router = APIRouter() class ReportRequest(BaseModel): """Request model for report generation.""" report_type: str = PydanticField(description="Type of report to generate") title: str = PydanticField(description="Report title") data_sources: List[str] = PydanticField(description="Data sources to include") investigation_ids: List[str] = PydanticField(default=[], description="Investigation IDs to include") analysis_ids: List[str] = PydanticField(default=[], description="Analysis IDs to include") time_range: Dict[str, str] = PydanticField(description="Time range for the report") output_format: str = PydanticField(default="markdown", description="Output format") include_visualizations: bool = PydanticField(default=True, description="Include charts and graphs") include_raw_data: bool = PydanticField(default=False, description="Include raw data appendix") target_audience: str = PydanticField(default="general", description="Target audience") @validator('report_type') def validate_report_type(cls, v): """Validate report type.""" allowed_types = [ 'executive_summary', 'detailed_analysis', 'investigation_report', 'transparency_dashboard', 'comparative_analysis', 'audit_report' ] if v not in allowed_types: raise ValueError(f'Report type must be one of: {allowed_types}') return v @validator('output_format') def validate_output_format(cls, v): """Validate output format.""" allowed_formats = ['markdown', 'html', 'json', 'pdf'] if v not in allowed_formats: raise ValueError(f'Output format must be one of: {allowed_formats}') return v @validator('target_audience') def validate_target_audience(cls, v): """Validate target audience.""" allowed_audiences = ['general', 'technical', 'executive', 'journalist', 'researcher'] if v not in allowed_audiences: raise ValueError(f'Target audience must be one of: {allowed_audiences}') return v class ReportResponse(BaseModel): """Response model for generated reports.""" report_id: str title: str report_type: str output_format: str generated_at: datetime word_count: int status: str content: str metadata: Dict[str, Any] download_url: Optional[str] = None class ReportStatus(BaseModel): """Report generation status.""" report_id: str status: str progress: float current_phase: str estimated_completion: Optional[datetime] = None error_message: Optional[str] = None # In-memory storage for report tracking _active_reports: Dict[str, Dict[str, Any]] = {} @router.post("/generate", response_model=Dict[str, str]) async def generate_report( request: ReportRequest, background_tasks: BackgroundTasks, current_user: Dict[str, Any] = Depends(get_current_user) ): """ Generate a new report. Creates and queues a report generation task that will create natural language reports from investigations and analyses. """ report_id = str(uuid4()) # Store report metadata _active_reports[report_id] = { "id": report_id, "status": "started", "title": request.title, "report_type": request.report_type, "output_format": request.output_format, "target_audience": request.target_audience, "data_sources": request.data_sources, "investigation_ids": request.investigation_ids, "analysis_ids": request.analysis_ids, "time_range": request.time_range, "user_id": current_user.get("user_id"), "started_at": datetime.utcnow(), "progress": 0.0, "current_phase": "initializing", "content": "", "metadata": {}, "word_count": 0, } # Start report generation in background background_tasks.add_task( _generate_report, report_id, request ) logger.info( "report_generation_started", report_id=report_id, report_type=request.report_type, title=request.title, user_id=current_user.get("user_id"), ) return { "report_id": report_id, "status": "started", "message": "Report generation queued for processing" } @router.get("/templates", response_model=List[Dict[str, Any]]) async def get_report_templates(): """ Get available report templates. Returns a list of predefined report templates with descriptions and required parameters. """ templates = [ { "type": "executive_summary", "name": "Relatório Executivo", "description": "Resumo executivo com principais achados e recomendações", "target_audience": "executive", "sections": ["resumo", "principais_achados", "recomendacoes", "proximos_passos"], "estimated_pages": "2-4", }, { "type": "detailed_analysis", "name": "Análise Detalhada", "description": "Relatório técnico com análise aprofundada de dados", "target_audience": "technical", "sections": ["metodologia", "analise_dados", "descobertas", "conclusoes", "anexos"], "estimated_pages": "10-20", }, { "type": "investigation_report", "name": "Relatório de Investigação", "description": "Relatório focado em anomalias e irregularidades encontradas", "target_audience": "journalist", "sections": ["contexto", "metodologia", "anomalias", "evidencias", "recomendacoes"], "estimated_pages": "5-15", }, { "type": "transparency_dashboard", "name": "Dashboard de Transparência", "description": "Visão geral interativa dos dados de transparência", "target_audience": "general", "sections": ["metricas_principais", "graficos", "tendencias", "destaques"], "estimated_pages": "1-3", }, { "type": "comparative_analysis", "name": "Análise Comparativa", "description": "Comparação entre diferentes períodos ou organizações", "target_audience": "researcher", "sections": ["baseline", "comparacao", "diferencas", "fatores", "insights"], "estimated_pages": "8-12", }, { "type": "audit_report", "name": "Relatório de Auditoria", "description": "Relatório formal para auditores e órgãos de controle", "target_audience": "technical", "sections": ["escopo", "metodologia", "achados", "riscos", "recomendacoes", "resposta_gestao"], "estimated_pages": "15-30", } ] return templates @router.get("/{report_id}/status", response_model=ReportStatus) async def get_report_status( report_id: str, current_user: Dict[str, Any] = Depends(get_current_user) ): """ Get the current status of a report generation. Returns progress information and current phase. """ if report_id not in _active_reports: raise HTTPException(status_code=404, detail="Report not found") report = _active_reports[report_id] # Check user authorization if report["user_id"] != current_user.get("user_id"): raise HTTPException(status_code=403, detail="Access denied") return ReportStatus( report_id=report_id, status=report["status"], progress=report["progress"], current_phase=report["current_phase"], estimated_completion=report.get("estimated_completion"), error_message=report.get("error_message"), ) @router.get("/{report_id}", response_model=ReportResponse) async def get_report( report_id: str, current_user: Dict[str, Any] = Depends(get_current_user) ): """ Get a generated report. Returns the complete report content and metadata. """ if report_id not in _active_reports: raise HTTPException(status_code=404, detail="Report not found") report = _active_reports[report_id] # Check user authorization if report["user_id"] != current_user.get("user_id"): raise HTTPException(status_code=403, detail="Access denied") if report["status"] not in ["completed", "failed"]: raise HTTPException(status_code=409, detail="Report not yet completed") return ReportResponse( report_id=report_id, title=report["title"], report_type=report["report_type"], output_format=report["output_format"], generated_at=report.get("completed_at", report["started_at"]), word_count=report["word_count"], status=report["status"], content=report["content"], metadata=report["metadata"], download_url=f"/api/v1/reports/{report_id}/download" if report["status"] == "completed" else None ) @router.get("/{report_id}/download") async def download_report( report_id: str, format: str = Query("html", description="Download format"), current_user: Dict[str, Any] = Depends(get_current_user) ): """ Download a report in the specified format. Returns the report as a downloadable file. """ if report_id not in _active_reports: raise HTTPException(status_code=404, detail="Report not found") report = _active_reports[report_id] # Check user authorization if report["user_id"] != current_user.get("user_id"): raise HTTPException(status_code=403, detail="Access denied") if report["status"] != "completed": raise HTTPException(status_code=409, detail="Report not yet completed") content = report["content"] title = report["title"].replace(" ", "_") if format == "html": # Convert markdown to HTML if needed if report["output_format"] == "markdown": # TODO: Implement markdown to HTML conversion html_content = f"

{report['title']}

{content}
" else: html_content = content return HTMLResponse( content=html_content, headers={ "Content-Disposition": f"attachment; filename={title}.html" } ) elif format == "markdown": return Response( content=content, media_type="text/markdown", headers={ "Content-Disposition": f"attachment; filename={title}.md" } ) elif format == "json": json_content = { "report": report, "content": content, "metadata": report["metadata"] } return Response( content=json_utils.dumps(json_content, indent=2, ensure_ascii=False), media_type="application/json", headers={ "Content-Disposition": f"attachment; filename={title}.json" } ) elif format == "pdf": # Check if content is base64 encoded PDF import base64 try: # If content is already a base64 PDF, decode it if report["output_format"] == "pdf": pdf_bytes = base64.b64decode(content) else: # Convert markdown/html content to PDF from src.services.export_service import export_service pdf_bytes = await export_service.generate_pdf( content=content, title=report["title"], metadata=report["metadata"], format_type="report" ) return Response( content=pdf_bytes, media_type="application/pdf", headers={ "Content-Disposition": f"attachment; filename={title}.pdf" } ) except Exception as e: logger.error("pdf_download_error", error=str(e), report_id=report_id) raise HTTPException(status_code=500, detail="Failed to generate PDF") else: raise HTTPException(status_code=400, detail="Unsupported format") @router.get("/", response_model=List[Dict[str, Any]]) async def list_reports( report_type: Optional[str] = Query(None, description="Filter by report type"), status: Optional[str] = Query(None, description="Filter by status"), limit: int = Query(10, ge=1, le=100, description="Number of reports to return"), current_user: Dict[str, Any] = Depends(get_current_user) ): """ List user's reports. Returns a list of reports owned by the current user. """ user_id = current_user.get("user_id") # Filter reports by user user_reports = [ report for report in _active_reports.values() if report["user_id"] == user_id ] # Filter by report type if provided if report_type: user_reports = [report for report in user_reports if report["report_type"] == report_type] # Filter by status if provided if status: user_reports = [report for report in user_reports if report["status"] == status] # Sort by start time (newest first) user_reports.sort(key=lambda x: x["started_at"], reverse=True) # Apply limit user_reports = user_reports[:limit] return [ { "report_id": report["id"], "title": report["title"], "report_type": report["report_type"], "output_format": report["output_format"], "status": report["status"], "progress": report["progress"], "word_count": report["word_count"], "started_at": report["started_at"], "completed_at": report.get("completed_at"), } for report in user_reports ] @router.delete("/{report_id}") async def delete_report( report_id: str, current_user: Dict[str, Any] = Depends(get_current_user) ): """ Delete a report. Removes the report from storage. """ if report_id not in _active_reports: raise HTTPException(status_code=404, detail="Report not found") report = _active_reports[report_id] # Check user authorization if report["user_id"] != current_user.get("user_id"): raise HTTPException(status_code=403, detail="Access denied") # Remove report del _active_reports[report_id] logger.info( "report_deleted", report_id=report_id, user_id=current_user.get("user_id"), ) return {"message": "Report deleted successfully"} async def _generate_report(report_id: str, request: ReportRequest): """ Generate the report in the background. This function runs the actual report generation using ReporterAgent. """ report = _active_reports[report_id] try: # Update status report["status"] = "running" report["current_phase"] = "data_collection" report["progress"] = 0.1 # Create agent context context = AgentContext( conversation_id=report_id, user_id=report["user_id"], session_data={"report_type": request.report_type} ) # Initialize ReporterAgent reporter = ReporterAgent() report["current_phase"] = "content_generation" report["progress"] = 0.3 # Create report request for Tiradentes from src.agents.tiradentes import ReportRequest as TiradentesReportRequest, ReportType, ReportFormat # Map report type report_type_map = { "executive_summary": ReportType.EXECUTIVE_SUMMARY, "detailed_analysis": ReportType.ANALYSIS_REPORT, "investigation_report": ReportType.INVESTIGATION_REPORT, "transparency_dashboard": ReportType.COMBINED_REPORT, "comparative_analysis": ReportType.TREND_ANALYSIS, "audit_report": ReportType.INVESTIGATION_REPORT, } # Map format format_map = { "markdown": ReportFormat.MARKDOWN, "html": ReportFormat.HTML, "json": ReportFormat.JSON, "pdf": ReportFormat.PDF, } tiradentes_request = TiradentesReportRequest( report_type=report_type_map.get(request.report_type, ReportType.INVESTIGATION_REPORT), format=format_map.get(request.output_format, ReportFormat.MARKDOWN), target_audience=request.target_audience, language="pt-BR", ) # Process with Tiradentes from src.agents import AgentMessage message = AgentMessage( agent_id=reporter.agent_id, content={ "request": tiradentes_request, "investigation_ids": request.investigation_ids, "analysis_ids": request.analysis_ids, "data_sources": request.data_sources, "time_range": request.time_range, }, requires_response=True ) result = await reporter.process(message, context) content = result.data.get("report_content", "") report["current_phase"] = "formatting" report["progress"] = 0.7 # Content is already formatted by Tiradentes based on the format requested formatted_content = content report["current_phase"] = "finalization" report["progress"] = 0.9 # Calculate word count word_count = len(formatted_content.split()) # Generate metadata metadata = { "sections_generated": content.count("#"), "data_sources_used": len(request.data_sources), "investigations_included": len(request.investigation_ids), "analyses_included": len(request.analysis_ids), "target_audience": request.target_audience, "generation_method": "ai_assisted", } # Store final results report["content"] = formatted_content report["word_count"] = word_count report["metadata"] = metadata # Mark as completed report["status"] = "completed" report["completed_at"] = datetime.utcnow() report["progress"] = 1.0 report["current_phase"] = "completed" logger.info( "report_generated", report_id=report_id, report_type=request.report_type, word_count=word_count, ) except Exception as e: logger.error( "report_generation_failed", report_id=report_id, error=str(e), ) report["status"] = "failed" report["completed_at"] = datetime.utcnow() report["current_phase"] = "failed" report["error_message"] = str(e)