Merge branch 'main' into improve-property-tooltip
Browse files
lightrag/api/routers/document_routes.py
CHANGED
@@ -99,6 +99,37 @@ class DocsStatusesResponse(BaseModel):
|
|
99 |
statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
|
100 |
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
class DocumentManager:
|
103 |
def __init__(
|
104 |
self,
|
@@ -247,7 +278,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
247 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
248 |
if not pm.is_installed("docling"): # type: ignore
|
249 |
pm.install("docling")
|
250 |
-
from docling.document_converter import DocumentConverter
|
251 |
|
252 |
converter = DocumentConverter()
|
253 |
result = converter.convert(file_path)
|
@@ -266,7 +297,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
266 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
267 |
if not pm.is_installed("docling"): # type: ignore
|
268 |
pm.install("docling")
|
269 |
-
from docling.document_converter import DocumentConverter
|
270 |
|
271 |
converter = DocumentConverter()
|
272 |
result = converter.convert(file_path)
|
@@ -286,7 +317,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
286 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
287 |
if not pm.is_installed("docling"): # type: ignore
|
288 |
pm.install("docling")
|
289 |
-
from docling.document_converter import DocumentConverter
|
290 |
|
291 |
converter = DocumentConverter()
|
292 |
result = converter.convert(file_path)
|
@@ -307,7 +338,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
307 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
308 |
if not pm.is_installed("docling"): # type: ignore
|
309 |
pm.install("docling")
|
310 |
-
from docling.document_converter import DocumentConverter
|
311 |
|
312 |
converter = DocumentConverter()
|
313 |
result = converter.convert(file_path)
|
@@ -718,17 +749,33 @@ def create_document_routes(
|
|
718 |
logger.error(traceback.format_exc())
|
719 |
raise HTTPException(status_code=500, detail=str(e))
|
720 |
|
721 |
-
@router.get(
|
722 |
-
|
|
|
|
|
|
|
|
|
723 |
"""
|
724 |
Get the current status of the document indexing pipeline.
|
725 |
|
726 |
This endpoint returns information about the current state of the document processing pipeline,
|
727 |
-
including
|
728 |
-
are being processed, how many batches there are, and which batch is currently being processed.
|
729 |
|
730 |
Returns:
|
731 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
732 |
"""
|
733 |
try:
|
734 |
from lightrag.kg.shared_storage import get_namespace_data
|
@@ -746,7 +793,7 @@ def create_document_routes(
|
|
746 |
if status_dict.get("job_start"):
|
747 |
status_dict["job_start"] = str(status_dict["job_start"])
|
748 |
|
749 |
-
return status_dict
|
750 |
except Exception as e:
|
751 |
logger.error(f"Error getting pipeline status: {str(e)}")
|
752 |
logger.error(traceback.format_exc())
|
|
|
99 |
statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
|
100 |
|
101 |
|
102 |
+
class PipelineStatusResponse(BaseModel):
|
103 |
+
"""Response model for pipeline status
|
104 |
+
|
105 |
+
Attributes:
|
106 |
+
autoscanned: Whether auto-scan has started
|
107 |
+
busy: Whether the pipeline is currently busy
|
108 |
+
job_name: Current job name (e.g., indexing files/indexing texts)
|
109 |
+
job_start: Job start time as ISO format string (optional)
|
110 |
+
docs: Total number of documents to be indexed
|
111 |
+
batchs: Number of batches for processing documents
|
112 |
+
cur_batch: Current processing batch
|
113 |
+
request_pending: Flag for pending request for processing
|
114 |
+
latest_message: Latest message from pipeline processing
|
115 |
+
history_messages: List of history messages
|
116 |
+
"""
|
117 |
+
|
118 |
+
autoscanned: bool = False
|
119 |
+
busy: bool = False
|
120 |
+
job_name: str = "Default Job"
|
121 |
+
job_start: Optional[str] = None
|
122 |
+
docs: int = 0
|
123 |
+
batchs: int = 0
|
124 |
+
cur_batch: int = 0
|
125 |
+
request_pending: bool = False
|
126 |
+
latest_message: str = ""
|
127 |
+
history_messages: Optional[List[str]] = None
|
128 |
+
|
129 |
+
class Config:
|
130 |
+
extra = "allow" # Allow additional fields from the pipeline status
|
131 |
+
|
132 |
+
|
133 |
class DocumentManager:
|
134 |
def __init__(
|
135 |
self,
|
|
|
278 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
279 |
if not pm.is_installed("docling"): # type: ignore
|
280 |
pm.install("docling")
|
281 |
+
from docling.document_converter import DocumentConverter # type: ignore
|
282 |
|
283 |
converter = DocumentConverter()
|
284 |
result = converter.convert(file_path)
|
|
|
297 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
298 |
if not pm.is_installed("docling"): # type: ignore
|
299 |
pm.install("docling")
|
300 |
+
from docling.document_converter import DocumentConverter # type: ignore
|
301 |
|
302 |
converter = DocumentConverter()
|
303 |
result = converter.convert(file_path)
|
|
|
317 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
318 |
if not pm.is_installed("docling"): # type: ignore
|
319 |
pm.install("docling")
|
320 |
+
from docling.document_converter import DocumentConverter # type: ignore
|
321 |
|
322 |
converter = DocumentConverter()
|
323 |
result = converter.convert(file_path)
|
|
|
338 |
if global_args["main_args"].document_loading_engine == "DOCLING":
|
339 |
if not pm.is_installed("docling"): # type: ignore
|
340 |
pm.install("docling")
|
341 |
+
from docling.document_converter import DocumentConverter # type: ignore
|
342 |
|
343 |
converter = DocumentConverter()
|
344 |
result = converter.convert(file_path)
|
|
|
749 |
logger.error(traceback.format_exc())
|
750 |
raise HTTPException(status_code=500, detail=str(e))
|
751 |
|
752 |
+
@router.get(
|
753 |
+
"/pipeline_status",
|
754 |
+
dependencies=[Depends(optional_api_key)],
|
755 |
+
response_model=PipelineStatusResponse,
|
756 |
+
)
|
757 |
+
async def get_pipeline_status() -> PipelineStatusResponse:
|
758 |
"""
|
759 |
Get the current status of the document indexing pipeline.
|
760 |
|
761 |
This endpoint returns information about the current state of the document processing pipeline,
|
762 |
+
including the processing status, progress information, and history messages.
|
|
|
763 |
|
764 |
Returns:
|
765 |
+
PipelineStatusResponse: A response object containing:
|
766 |
+
- autoscanned (bool): Whether auto-scan has started
|
767 |
+
- busy (bool): Whether the pipeline is currently busy
|
768 |
+
- job_name (str): Current job name (e.g., indexing files/indexing texts)
|
769 |
+
- job_start (str, optional): Job start time as ISO format string
|
770 |
+
- docs (int): Total number of documents to be indexed
|
771 |
+
- batchs (int): Number of batches for processing documents
|
772 |
+
- cur_batch (int): Current processing batch
|
773 |
+
- request_pending (bool): Flag for pending request for processing
|
774 |
+
- latest_message (str): Latest message from pipeline processing
|
775 |
+
- history_messages (List[str], optional): List of history messages
|
776 |
+
|
777 |
+
Raises:
|
778 |
+
HTTPException: If an error occurs while retrieving pipeline status (500)
|
779 |
"""
|
780 |
try:
|
781 |
from lightrag.kg.shared_storage import get_namespace_data
|
|
|
793 |
if status_dict.get("job_start"):
|
794 |
status_dict["job_start"] = str(status_dict["job_start"])
|
795 |
|
796 |
+
return PipelineStatusResponse(**status_dict)
|
797 |
except Exception as e:
|
798 |
logger.error(f"Error getting pipeline status: {str(e)}")
|
799 |
logger.error(traceback.format_exc())
|