ArnoChen
commited on
Commit
·
01c2d11
1
Parent(s):
023166e
implement endpoint to retrieve document statuses
Browse files- lightrag/api/lightrag_server.py +60 -6
- lightrag/lightrag.py +10 -0
lightrag/api/lightrag_server.py
CHANGED
@@ -19,6 +19,7 @@ from lightrag import LightRAG, QueryParam
|
|
19 |
from lightrag.types import GPTKeywordExtractionFormat
|
20 |
from lightrag.api import __api_version__
|
21 |
from lightrag.utils import EmbeddingFunc
|
|
|
22 |
from enum import Enum
|
23 |
from pathlib import Path
|
24 |
import shutil
|
@@ -693,6 +694,22 @@ class InsertResponse(BaseModel):
|
|
693 |
message: str
|
694 |
|
695 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
def QueryRequestToQueryParams(request: QueryRequest):
|
697 |
param = QueryParam(mode=request.mode, stream=request.stream)
|
698 |
if request.only_need_context is not None:
|
@@ -1728,20 +1745,57 @@ def create_app(args):
|
|
1728 |
app.include_router(ollama_api.router, prefix="/api")
|
1729 |
|
1730 |
@app.get("/documents", dependencies=[Depends(optional_api_key)])
|
1731 |
-
async def documents():
|
1732 |
-
"""
|
1733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1734 |
|
1735 |
@app.get("/health", dependencies=[Depends(optional_api_key)])
|
1736 |
async def get_status():
|
1737 |
"""Get current system status"""
|
1738 |
-
files = doc_manager.scan_directory()
|
1739 |
return {
|
1740 |
"status": "healthy",
|
1741 |
"working_directory": str(args.working_dir),
|
1742 |
"input_directory": str(args.input_dir),
|
1743 |
-
"indexed_files": [str(f) for f in files],
|
1744 |
-
"indexed_files_count": len(files),
|
1745 |
"configuration": {
|
1746 |
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
1747 |
"llm_binding": args.llm_binding,
|
|
|
19 |
from lightrag.types import GPTKeywordExtractionFormat
|
20 |
from lightrag.api import __api_version__
|
21 |
from lightrag.utils import EmbeddingFunc
|
22 |
+
from lightrag.base import DocStatus, DocProcessingStatus
|
23 |
from enum import Enum
|
24 |
from pathlib import Path
|
25 |
import shutil
|
|
|
694 |
message: str
|
695 |
|
696 |
|
697 |
+
class DocStatusResponse(BaseModel):
|
698 |
+
id: str
|
699 |
+
content_summary: str
|
700 |
+
content_length: int
|
701 |
+
status: DocStatus
|
702 |
+
created_at: str
|
703 |
+
updated_at: str
|
704 |
+
chunks_count: Optional[int] = None
|
705 |
+
error: Optional[str] = None
|
706 |
+
metadata: Optional[dict[str, Any]] = None
|
707 |
+
|
708 |
+
|
709 |
+
class DocsStatusesResponse(BaseModel):
|
710 |
+
statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
|
711 |
+
|
712 |
+
|
713 |
def QueryRequestToQueryParams(request: QueryRequest):
|
714 |
param = QueryParam(mode=request.mode, stream=request.stream)
|
715 |
if request.only_need_context is not None:
|
|
|
1745 |
app.include_router(ollama_api.router, prefix="/api")
|
1746 |
|
1747 |
@app.get("/documents", dependencies=[Depends(optional_api_key)])
|
1748 |
+
async def documents() -> DocsStatusesResponse:
|
1749 |
+
"""
|
1750 |
+
Get documents statuses
|
1751 |
+
|
1752 |
+
Returns:
|
1753 |
+
DocsStatusesResponse: A response object containing the status, message, and the number of indexed documents.
|
1754 |
+
"""
|
1755 |
+
try:
|
1756 |
+
statuses = (
|
1757 |
+
DocStatus.PENDING,
|
1758 |
+
DocStatus.PROCESSING,
|
1759 |
+
DocStatus.PROCESSED,
|
1760 |
+
DocStatus.FAILED,
|
1761 |
+
)
|
1762 |
+
|
1763 |
+
tasks = [rag.get_docs_by_status(status) for status in statuses]
|
1764 |
+
results: List[Dict[str, DocProcessingStatus]] = await asyncio.gather(*tasks)
|
1765 |
+
|
1766 |
+
response = DocsStatusesResponse()
|
1767 |
+
|
1768 |
+
for idx, result in enumerate(results):
|
1769 |
+
status = statuses[idx]
|
1770 |
+
for doc_id, doc_status in result.items():
|
1771 |
+
if status not in response.statuses:
|
1772 |
+
response.statuses[status] = []
|
1773 |
+
response.statuses[status].append(
|
1774 |
+
DocStatusResponse(
|
1775 |
+
id=doc_id,
|
1776 |
+
content_summary=doc_status.content_summary,
|
1777 |
+
content_length=doc_status.content_length,
|
1778 |
+
status=doc_status.status,
|
1779 |
+
created_at=doc_status.created_at,
|
1780 |
+
updated_at=doc_status.updated_at,
|
1781 |
+
chunks_count=doc_status.chunks_count,
|
1782 |
+
error=doc_status.error,
|
1783 |
+
metadata=doc_status.metadata,
|
1784 |
+
)
|
1785 |
+
)
|
1786 |
+
return response
|
1787 |
+
except Exception as e:
|
1788 |
+
logging.error(f"Error GET /documents: {str(e)}")
|
1789 |
+
logging.error(traceback.format_exc())
|
1790 |
+
raise HTTPException(status_code=500, detail=str(e))
|
1791 |
|
1792 |
@app.get("/health", dependencies=[Depends(optional_api_key)])
|
1793 |
async def get_status():
|
1794 |
"""Get current system status"""
|
|
|
1795 |
return {
|
1796 |
"status": "healthy",
|
1797 |
"working_directory": str(args.working_dir),
|
1798 |
"input_directory": str(args.input_dir),
|
|
|
|
|
1799 |
"configuration": {
|
1800 |
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
1801 |
"llm_binding": args.llm_binding,
|
lightrag/lightrag.py
CHANGED
@@ -1254,6 +1254,16 @@ class LightRAG:
|
|
1254 |
"""
|
1255 |
return await self.doc_status.get_status_counts()
|
1256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1257 |
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
1258 |
"""Delete a document and all its related data
|
1259 |
|
|
|
1254 |
"""
|
1255 |
return await self.doc_status.get_status_counts()
|
1256 |
|
1257 |
+
async def get_docs_by_status(
|
1258 |
+
self, status: DocStatus
|
1259 |
+
) -> dict[str, DocProcessingStatus]:
|
1260 |
+
"""Get documents by status
|
1261 |
+
|
1262 |
+
Returns:
|
1263 |
+
Dict with document id is keys and document status is values
|
1264 |
+
"""
|
1265 |
+
return await self.doc_status.get_docs_by_status(status)
|
1266 |
+
|
1267 |
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
1268 |
"""Delete a document and all its related data
|
1269 |
|