yangdx commited on
Commit
f56f204
·
2 Parent(s): cb7e133 567ec93

Merge branch 'main' into improve-property-tooltip

Browse files
lightrag/api/routers/document_routes.py CHANGED
@@ -99,6 +99,37 @@ class DocsStatusesResponse(BaseModel):
99
  statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
100
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  class DocumentManager:
103
  def __init__(
104
  self,
@@ -247,7 +278,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
247
  if global_args["main_args"].document_loading_engine == "DOCLING":
248
  if not pm.is_installed("docling"): # type: ignore
249
  pm.install("docling")
250
- from docling.document_converter import DocumentConverter
251
 
252
  converter = DocumentConverter()
253
  result = converter.convert(file_path)
@@ -266,7 +297,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
266
  if global_args["main_args"].document_loading_engine == "DOCLING":
267
  if not pm.is_installed("docling"): # type: ignore
268
  pm.install("docling")
269
- from docling.document_converter import DocumentConverter
270
 
271
  converter = DocumentConverter()
272
  result = converter.convert(file_path)
@@ -286,7 +317,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
286
  if global_args["main_args"].document_loading_engine == "DOCLING":
287
  if not pm.is_installed("docling"): # type: ignore
288
  pm.install("docling")
289
- from docling.document_converter import DocumentConverter
290
 
291
  converter = DocumentConverter()
292
  result = converter.convert(file_path)
@@ -307,7 +338,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
307
  if global_args["main_args"].document_loading_engine == "DOCLING":
308
  if not pm.is_installed("docling"): # type: ignore
309
  pm.install("docling")
310
- from docling.document_converter import DocumentConverter
311
 
312
  converter = DocumentConverter()
313
  result = converter.convert(file_path)
@@ -718,17 +749,33 @@ def create_document_routes(
718
  logger.error(traceback.format_exc())
719
  raise HTTPException(status_code=500, detail=str(e))
720
 
721
- @router.get("/pipeline_status", dependencies=[Depends(optional_api_key)])
722
- async def get_pipeline_status():
 
 
 
 
723
  """
724
  Get the current status of the document indexing pipeline.
725
 
726
  This endpoint returns information about the current state of the document processing pipeline,
727
- including whether it's busy, the current job name, when it started, how many documents
728
- are being processed, how many batches there are, and which batch is currently being processed.
729
 
730
  Returns:
731
- dict: A dictionary containing the pipeline status information
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  """
733
  try:
734
  from lightrag.kg.shared_storage import get_namespace_data
@@ -746,7 +793,7 @@ def create_document_routes(
746
  if status_dict.get("job_start"):
747
  status_dict["job_start"] = str(status_dict["job_start"])
748
 
749
- return status_dict
750
  except Exception as e:
751
  logger.error(f"Error getting pipeline status: {str(e)}")
752
  logger.error(traceback.format_exc())
 
99
  statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
100
 
101
 
102
+ class PipelineStatusResponse(BaseModel):
103
+ """Response model for pipeline status
104
+
105
+ Attributes:
106
+ autoscanned: Whether auto-scan has started
107
+ busy: Whether the pipeline is currently busy
108
+ job_name: Current job name (e.g., indexing files/indexing texts)
109
+ job_start: Job start time as ISO format string (optional)
110
+ docs: Total number of documents to be indexed
111
+ batchs: Number of batches for processing documents
112
+ cur_batch: Current processing batch
113
+ request_pending: Flag for pending request for processing
114
+ latest_message: Latest message from pipeline processing
115
+ history_messages: List of history messages
116
+ """
117
+
118
+ autoscanned: bool = False
119
+ busy: bool = False
120
+ job_name: str = "Default Job"
121
+ job_start: Optional[str] = None
122
+ docs: int = 0
123
+ batchs: int = 0
124
+ cur_batch: int = 0
125
+ request_pending: bool = False
126
+ latest_message: str = ""
127
+ history_messages: Optional[List[str]] = None
128
+
129
+ class Config:
130
+ extra = "allow" # Allow additional fields from the pipeline status
131
+
132
+
133
  class DocumentManager:
134
  def __init__(
135
  self,
 
278
  if global_args["main_args"].document_loading_engine == "DOCLING":
279
  if not pm.is_installed("docling"): # type: ignore
280
  pm.install("docling")
281
+ from docling.document_converter import DocumentConverter # type: ignore
282
 
283
  converter = DocumentConverter()
284
  result = converter.convert(file_path)
 
297
  if global_args["main_args"].document_loading_engine == "DOCLING":
298
  if not pm.is_installed("docling"): # type: ignore
299
  pm.install("docling")
300
+ from docling.document_converter import DocumentConverter # type: ignore
301
 
302
  converter = DocumentConverter()
303
  result = converter.convert(file_path)
 
317
  if global_args["main_args"].document_loading_engine == "DOCLING":
318
  if not pm.is_installed("docling"): # type: ignore
319
  pm.install("docling")
320
+ from docling.document_converter import DocumentConverter # type: ignore
321
 
322
  converter = DocumentConverter()
323
  result = converter.convert(file_path)
 
338
  if global_args["main_args"].document_loading_engine == "DOCLING":
339
  if not pm.is_installed("docling"): # type: ignore
340
  pm.install("docling")
341
+ from docling.document_converter import DocumentConverter # type: ignore
342
 
343
  converter = DocumentConverter()
344
  result = converter.convert(file_path)
 
749
  logger.error(traceback.format_exc())
750
  raise HTTPException(status_code=500, detail=str(e))
751
 
752
+ @router.get(
753
+ "/pipeline_status",
754
+ dependencies=[Depends(optional_api_key)],
755
+ response_model=PipelineStatusResponse,
756
+ )
757
+ async def get_pipeline_status() -> PipelineStatusResponse:
758
  """
759
  Get the current status of the document indexing pipeline.
760
 
761
  This endpoint returns information about the current state of the document processing pipeline,
762
+ including the processing status, progress information, and history messages.
 
763
 
764
  Returns:
765
+ PipelineStatusResponse: A response object containing:
766
+ - autoscanned (bool): Whether auto-scan has started
767
+ - busy (bool): Whether the pipeline is currently busy
768
+ - job_name (str): Current job name (e.g., indexing files/indexing texts)
769
+ - job_start (str, optional): Job start time as ISO format string
770
+ - docs (int): Total number of documents to be indexed
771
+ - batchs (int): Number of batches for processing documents
772
+ - cur_batch (int): Current processing batch
773
+ - request_pending (bool): Flag for pending request for processing
774
+ - latest_message (str): Latest message from pipeline processing
775
+ - history_messages (List[str], optional): List of history messages
776
+
777
+ Raises:
778
+ HTTPException: If an error occurs while retrieving pipeline status (500)
779
  """
780
  try:
781
  from lightrag.kg.shared_storage import get_namespace_data
 
793
  if status_dict.get("job_start"):
794
  status_dict["job_start"] = str(status_dict["job_start"])
795
 
796
+ return PipelineStatusResponse(**status_dict)
797
  except Exception as e:
798
  logger.error(f"Error getting pipeline status: {str(e)}")
799
  logger.error(traceback.format_exc())