ArnoChen commited on
Commit
01c2d11
·
1 Parent(s): 023166e

implement endpoint to retrieve document statuses

Browse files
lightrag/api/lightrag_server.py CHANGED
@@ -19,6 +19,7 @@ from lightrag import LightRAG, QueryParam
19
  from lightrag.types import GPTKeywordExtractionFormat
20
  from lightrag.api import __api_version__
21
  from lightrag.utils import EmbeddingFunc
 
22
  from enum import Enum
23
  from pathlib import Path
24
  import shutil
@@ -693,6 +694,22 @@ class InsertResponse(BaseModel):
693
  message: str
694
 
695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
  def QueryRequestToQueryParams(request: QueryRequest):
697
  param = QueryParam(mode=request.mode, stream=request.stream)
698
  if request.only_need_context is not None:
@@ -1728,20 +1745,57 @@ def create_app(args):
1728
  app.include_router(ollama_api.router, prefix="/api")
1729
 
1730
  @app.get("/documents", dependencies=[Depends(optional_api_key)])
1731
- async def documents():
1732
- """Get current system status"""
1733
- return doc_manager.indexed_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1734
 
1735
  @app.get("/health", dependencies=[Depends(optional_api_key)])
1736
  async def get_status():
1737
  """Get current system status"""
1738
- files = doc_manager.scan_directory()
1739
  return {
1740
  "status": "healthy",
1741
  "working_directory": str(args.working_dir),
1742
  "input_directory": str(args.input_dir),
1743
- "indexed_files": [str(f) for f in files],
1744
- "indexed_files_count": len(files),
1745
  "configuration": {
1746
  # LLM configuration binding/host address (if applicable)/model (if applicable)
1747
  "llm_binding": args.llm_binding,
 
19
  from lightrag.types import GPTKeywordExtractionFormat
20
  from lightrag.api import __api_version__
21
  from lightrag.utils import EmbeddingFunc
22
+ from lightrag.base import DocStatus, DocProcessingStatus
23
  from enum import Enum
24
  from pathlib import Path
25
  import shutil
 
694
  message: str
695
 
696
 
697
+ class DocStatusResponse(BaseModel):
698
+ id: str
699
+ content_summary: str
700
+ content_length: int
701
+ status: DocStatus
702
+ created_at: str
703
+ updated_at: str
704
+ chunks_count: Optional[int] = None
705
+ error: Optional[str] = None
706
+ metadata: Optional[dict[str, Any]] = None
707
+
708
+
709
+ class DocsStatusesResponse(BaseModel):
710
+ statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
711
+
712
+
713
  def QueryRequestToQueryParams(request: QueryRequest):
714
  param = QueryParam(mode=request.mode, stream=request.stream)
715
  if request.only_need_context is not None:
 
1745
  app.include_router(ollama_api.router, prefix="/api")
1746
 
1747
  @app.get("/documents", dependencies=[Depends(optional_api_key)])
1748
+ async def documents() -> DocsStatusesResponse:
1749
+ """
1750
+ Get documents statuses
1751
+
1752
+ Returns:
1753
+ DocsStatusesResponse: A response object containing the status, message, and the number of indexed documents.
1754
+ """
1755
+ try:
1756
+ statuses = (
1757
+ DocStatus.PENDING,
1758
+ DocStatus.PROCESSING,
1759
+ DocStatus.PROCESSED,
1760
+ DocStatus.FAILED,
1761
+ )
1762
+
1763
+ tasks = [rag.get_docs_by_status(status) for status in statuses]
1764
+ results: List[Dict[str, DocProcessingStatus]] = await asyncio.gather(*tasks)
1765
+
1766
+ response = DocsStatusesResponse()
1767
+
1768
+ for idx, result in enumerate(results):
1769
+ status = statuses[idx]
1770
+ for doc_id, doc_status in result.items():
1771
+ if status not in response.statuses:
1772
+ response.statuses[status] = []
1773
+ response.statuses[status].append(
1774
+ DocStatusResponse(
1775
+ id=doc_id,
1776
+ content_summary=doc_status.content_summary,
1777
+ content_length=doc_status.content_length,
1778
+ status=doc_status.status,
1779
+ created_at=doc_status.created_at,
1780
+ updated_at=doc_status.updated_at,
1781
+ chunks_count=doc_status.chunks_count,
1782
+ error=doc_status.error,
1783
+ metadata=doc_status.metadata,
1784
+ )
1785
+ )
1786
+ return response
1787
+ except Exception as e:
1788
+ logging.error(f"Error GET /documents: {str(e)}")
1789
+ logging.error(traceback.format_exc())
1790
+ raise HTTPException(status_code=500, detail=str(e))
1791
 
1792
  @app.get("/health", dependencies=[Depends(optional_api_key)])
1793
  async def get_status():
1794
  """Get current system status"""
 
1795
  return {
1796
  "status": "healthy",
1797
  "working_directory": str(args.working_dir),
1798
  "input_directory": str(args.input_dir),
 
 
1799
  "configuration": {
1800
  # LLM configuration binding/host address (if applicable)/model (if applicable)
1801
  "llm_binding": args.llm_binding,
lightrag/lightrag.py CHANGED
@@ -1254,6 +1254,16 @@ class LightRAG:
1254
  """
1255
  return await self.doc_status.get_status_counts()
1256
 
 
 
 
 
 
 
 
 
 
 
1257
  async def adelete_by_doc_id(self, doc_id: str) -> None:
1258
  """Delete a document and all its related data
1259
 
 
1254
  """
1255
  return await self.doc_status.get_status_counts()
1256
 
1257
+ async def get_docs_by_status(
1258
+ self, status: DocStatus
1259
+ ) -> dict[str, DocProcessingStatus]:
1260
+ """Get documents by status
1261
+
1262
+ Returns:
1263
+ Dict with document id is keys and document status is values
1264
+ """
1265
+ return await self.doc_status.get_docs_by_status(status)
1266
+
1267
  async def adelete_by_doc_id(self, doc_id: str) -> None:
1268
  """Delete a document and all its related data
1269