Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

yangdx commited on Mar 31

Commit

028ff3b

1 Parent(s): ba7b5d4

feat(api): Add Pydantic models for all endpoints in document_routes.py

Browse files

Files changed (1) hide show

lightrag/api/routers/document_routes.py +221 -35

lightrag/api/routers/document_routes.py CHANGED Viewed

@@ -10,7 +10,7 @@ import traceback
 import pipmaster as pm
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, List, Optional, Any
 from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
 from pydantic import BaseModel, Field, field_validator
@@ -30,7 +30,37 @@ router = APIRouter(
 temp_prefix = "__tmp__"
 class InsertTextRequest(BaseModel):
     text: str = Field(
         min_length=1,
         description="The text to insert",
@@ -41,8 +71,21 @@ class InsertTextRequest(BaseModel):
     def strip_after(cls, text: str) -> str:
         return text.strip()
 class InsertTextsRequest(BaseModel):
     texts: list[str] = Field(
         min_length=1,
         description="The texts to insert",
@@ -53,30 +96,116 @@ class InsertTextsRequest(BaseModel):
     def strip_after(cls, texts: list[str]) -> list[str]:
         return [text.strip() for text in texts]
 class InsertResponse(BaseModel):
-    status: str = Field(description="Status of the operation")
     message: str = Field(description="Message describing the operation result")
 class ClearDocumentsResponse(BaseModel):
-    status: str = Field(
-        description="Status of the clear operation: success/partial_success/busy/fail"
     )
     message: str = Field(description="Message describing the operation result")
 class ClearCacheRequest(BaseModel):
-    modes: Optional[List[str]] = Field(
         default=None,
-        description="Modes of cache to clear. Options: ['default', 'naive', 'local', 'global', 'hybrid', 'mix']. If None, clears all cache.",
     )
 class ClearCacheResponse(BaseModel):
-    status: str = Field(description="Status of the clear operation: success/fail")
     message: str = Field(description="Message describing the operation result")
 class DocStatusResponse(BaseModel):
     @staticmethod
@@ -87,34 +216,82 @@ class DocStatusResponse(BaseModel):
             return dt
         return dt.isoformat()
-    """Response model for document status
     Attributes:
-        id: Document identifier
-        content_summary: Summary of document content
-        content_length: Length of document content
-        status: Current processing status
-        created_at: Creation timestamp (ISO format string)
-        updated_at: Last update timestamp (ISO format string)
-        chunks_count: Number of chunks (optional)
-        error: Error message if any (optional)
-        metadata: Additional metadata (optional)
     """
-    id: str
-    content_summary: str
-    content_length: int
-    status: DocStatus
-    created_at: str
-    updated_at: str
-    chunks_count: Optional[int] = None
-    error: Optional[str] = None
-    metadata: Optional[dict[str, Any]] = None
-    file_path: str
-class DocsStatusesResponse(BaseModel):
-    statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
 class PipelineStatusResponse(BaseModel):
@@ -529,7 +706,9 @@ def create_document_routes(
     # Create combined auth dependency for document routes
     combined_auth = get_combined_auth_dependency(api_key)
-    @router.post("/scan", dependencies=[Depends(combined_auth)])
     async def scan_for_new_documents(background_tasks: BackgroundTasks):
         """
         Trigger the scanning process for new documents.
@@ -539,13 +718,18 @@ def create_document_routes(
         that fact.
         Returns:
-            dict: A dictionary containing the scanning status
         """
         # Start the scanning process in the background
         background_tasks.add_task(run_scanning_process, rag, doc_manager)
-        return {"status": "scanning_started"}
-    @router.post("/upload", dependencies=[Depends(combined_auth)])
     async def upload_to_input_dir(
         background_tasks: BackgroundTasks, file: UploadFile = File(...)
     ):
@@ -1016,7 +1200,9 @@ def create_document_routes(
             logger.error(traceback.format_exc())
             raise HTTPException(status_code=500, detail=str(e))
-    @router.get("", dependencies=[Depends(combined_auth)])
     async def documents() -> DocsStatusesResponse:
         """
         Get the status of all documents in the system.

 import pipmaster as pm
 from datetime import datetime
 from pathlib import Path
+from typing import Dict, List, Optional, Any, Literal
 from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
 from pydantic import BaseModel, Field, field_validator
 temp_prefix = "__tmp__"
+class ScanResponse(BaseModel):
+    """Response model for document scanning operation
+    Attributes:
+        status: Status of the scanning operation
+        message: Optional message with additional details
+    """
+    status: Literal["scanning_started"] = Field(
+        description="Status of the scanning operation"
+    )
+    message: Optional[str] = Field(
+        default=None, description="Additional details about the scanning operation"
+    )
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": "scanning_started",
+                "message": "Scanning process has been initiated in the background",
+            }
+        }
 class InsertTextRequest(BaseModel):
+    """Request model for inserting a single text document
+    Attributes:
+        text: The text content to be inserted into the RAG system
+    """
     text: str = Field(
         min_length=1,
         description="The text to insert",
     def strip_after(cls, text: str) -> str:
         return text.strip()
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "text": "This is a sample text to be inserted into the RAG system."
+            }
+        }
 class InsertTextsRequest(BaseModel):
+    """Request model for inserting multiple text documents
+    Attributes:
+        texts: List of text contents to be inserted into the RAG system
+    """
     texts: list[str] = Field(
         min_length=1,
         description="The texts to insert",
     def strip_after(cls, texts: list[str]) -> list[str]:
         return [text.strip() for text in texts]
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "texts": [
+                    "This is the first text to be inserted.",
+                    "This is the second text to be inserted.",
+                ]
+            }
+        }
 class InsertResponse(BaseModel):
+    """Response model for document insertion operations
+    Attributes:
+        status: Status of the operation (success, duplicated, partial_success, failure)
+        message: Detailed message describing the operation result
+    """
+    status: Literal["success", "duplicated", "partial_success", "failure"] = Field(
+        description="Status of the operation"
+    )
     message: str = Field(description="Message describing the operation result")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": "success",
+                "message": "File 'document.pdf' uploaded successfully. Processing will continue in background.",
+            }
+        }
 class ClearDocumentsResponse(BaseModel):
+    """Response model for document clearing operation
+    Attributes:
+        status: Status of the clear operation
+        message: Detailed message describing the operation result
+    """
+    status: Literal["success", "partial_success", "busy", "fail"] = Field(
+        description="Status of the clear operation"
     )
     message: str = Field(description="Message describing the operation result")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": "success",
+                "message": "All documents cleared successfully. Deleted 15 files.",
+            }
+        }
 class ClearCacheRequest(BaseModel):
+    """Request model for clearing cache
+    Attributes:
+        modes: Optional list of cache modes to clear
+    """
+    modes: Optional[
+        List[Literal["default", "naive", "local", "global", "hybrid", "mix"]]
+    ] = Field(
         default=None,
+        description="Modes of cache to clear. If None, clears all cache.",
     )
+    class Config:
+        json_schema_extra = {"example": {"modes": ["default", "naive"]}}
 class ClearCacheResponse(BaseModel):
+    """Response model for cache clearing operation
+    Attributes:
+        status: Status of the clear operation
+        message: Detailed message describing the operation result
+    """
+    status: Literal["success", "fail"] = Field(
+        description="Status of the clear operation"
+    )
     message: str = Field(description="Message describing the operation result")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": "success",
+                "message": "Successfully cleared cache for modes: ['default', 'naive']",
+            }
+        }
+"""Response model for document status
+Attributes:
+    id: Document identifier
+    content_summary: Summary of document content
+    content_length: Length of document content
+    status: Current processing status
+    created_at: Creation timestamp (ISO format string)
+    updated_at: Last update timestamp (ISO format string)
+    chunks_count: Number of chunks (optional)
+    error: Error message if any (optional)
+    metadata: Additional metadata (optional)
+    file_path: Path to the document file
+"""
 class DocStatusResponse(BaseModel):
     @staticmethod
             return dt
         return dt.isoformat()
+    id: str = Field(description="Document identifier")
+    content_summary: str = Field(description="Summary of document content")
+    content_length: int = Field(description="Length of document content in characters")
+    status: DocStatus = Field(description="Current processing status")
+    created_at: str = Field(description="Creation timestamp (ISO format string)")
+    updated_at: str = Field(description="Last update timestamp (ISO format string)")
+    chunks_count: Optional[int] = Field(
+        default=None, description="Number of chunks the document was split into"
+    )
+    error: Optional[str] = Field(
+        default=None, description="Error message if processing failed"
+    )
+    metadata: Optional[dict[str, Any]] = Field(
+        default=None, description="Additional metadata about the document"
+    )
+    file_path: str = Field(description="Path to the document file")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "id": "doc_123456",
+                "content_summary": "Research paper on machine learning",
+                "content_length": 15240,
+                "status": "PROCESSED",
+                "created_at": "2025-03-31T12:34:56",
+                "updated_at": "2025-03-31T12:35:30",
+                "chunks_count": 12,
+                "error": None,
+                "metadata": {"author": "John Doe", "year": 2025},
+                "file_path": "research_paper.pdf",
+            }
+        }
+class DocsStatusesResponse(BaseModel):
+    """Response model for document statuses
     Attributes:
+        statuses: Dictionary mapping document status to lists of document status responses
     """
+    statuses: Dict[DocStatus, List[DocStatusResponse]] = Field(
+        default_factory=dict,
+        description="Dictionary mapping document status to lists of document status responses",
+    )
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "statuses": {
+                    "PENDING": [
+                        {
+                            "id": "doc_123",
+                            "content_summary": "Pending document",
+                            "content_length": 5000,
+                            "status": "PENDING",
+                            "created_at": "2025-03-31T10:00:00",
+                            "updated_at": "2025-03-31T10:00:00",
+                            "file_path": "pending_doc.pdf",
+                        }
+                    ],
+                    "PROCESSED": [
+                        {
+                            "id": "doc_456",
+                            "content_summary": "Processed document",
+                            "content_length": 8000,
+                            "status": "PROCESSED",
+                            "created_at": "2025-03-31T09:00:00",
+                            "updated_at": "2025-03-31T09:05:00",
+                            "chunks_count": 8,
+                            "file_path": "processed_doc.pdf",
+                        }
+                    ],
+                }
+            }
+        }
 class PipelineStatusResponse(BaseModel):
     # Create combined auth dependency for document routes
     combined_auth = get_combined_auth_dependency(api_key)
+    @router.post(
+        "/scan", response_model=ScanResponse, dependencies=[Depends(combined_auth)]
+    )
     async def scan_for_new_documents(background_tasks: BackgroundTasks):
         """
         Trigger the scanning process for new documents.
         that fact.
         Returns:
+            ScanResponse: A response object containing the scanning status
         """
         # Start the scanning process in the background
         background_tasks.add_task(run_scanning_process, rag, doc_manager)
+        return ScanResponse(
+            status="scanning_started",
+            message="Scanning process has been initiated in the background",
+        )
+    @router.post(
+        "/upload", response_model=InsertResponse, dependencies=[Depends(combined_auth)]
+    )
     async def upload_to_input_dir(
         background_tasks: BackgroundTasks, file: UploadFile = File(...)
     ):
             logger.error(traceback.format_exc())
             raise HTTPException(status_code=500, detail=str(e))
+    @router.get(
+        "", response_model=DocsStatusesResponse, dependencies=[Depends(combined_auth)]
+    )
     async def documents() -> DocsStatusesResponse:
         """
         Get the status of all documents in the system.