Fix linting
Browse files- lightrag/api/routers/document_routes.py +3 -7
- lightrag/lightrag.py +54 -24
lightrag/api/routers/document_routes.py
CHANGED
@@ -841,7 +841,7 @@ def create_document_routes(
|
|
841 |
except Exception as e:
|
842 |
logger.error(f"Error checking doc_id existence: {str(e)}")
|
843 |
return False
|
844 |
-
|
845 |
# Create combined auth dependency for document routes
|
846 |
combined_auth = get_combined_auth_dependency(api_key)
|
847 |
|
@@ -1323,7 +1323,6 @@ def create_document_routes(
|
|
1323 |
dependencies=[Depends(combined_auth)],
|
1324 |
summary="Delete a document and all its associated data by its ID.",
|
1325 |
)
|
1326 |
-
|
1327 |
async def delete_document(
|
1328 |
delete_request: DeleteDocRequest,
|
1329 |
background_tasks: BackgroundTasks,
|
@@ -1355,11 +1354,8 @@ def create_document_routes(
|
|
1355 |
# Check if doc_id exists first - return error immediately if not found
|
1356 |
doc_id = delete_request.doc_id
|
1357 |
if not await check_doc_id_exists(doc_id):
|
1358 |
-
raise HTTPException(
|
1359 |
-
|
1360 |
-
detail=f"Document {doc_id} not found."
|
1361 |
-
)
|
1362 |
-
|
1363 |
# The rag object is initialized from the server startup args,
|
1364 |
# so we can access its properties here.
|
1365 |
if not rag.enable_llm_cache_for_entity_extract:
|
|
|
841 |
except Exception as e:
|
842 |
logger.error(f"Error checking doc_id existence: {str(e)}")
|
843 |
return False
|
844 |
+
|
845 |
# Create combined auth dependency for document routes
|
846 |
combined_auth = get_combined_auth_dependency(api_key)
|
847 |
|
|
|
1323 |
dependencies=[Depends(combined_auth)],
|
1324 |
summary="Delete a document and all its associated data by its ID.",
|
1325 |
)
|
|
|
1326 |
async def delete_document(
|
1327 |
delete_request: DeleteDocRequest,
|
1328 |
background_tasks: BackgroundTasks,
|
|
|
1354 |
# Check if doc_id exists first - return error immediately if not found
|
1355 |
doc_id = delete_request.doc_id
|
1356 |
if not await check_doc_id_exists(doc_id):
|
1357 |
+
raise HTTPException(status_code=404, detail=f"Document {doc_id} not found.")
|
1358 |
+
|
|
|
|
|
|
|
1359 |
# The rag object is initialized from the server startup args,
|
1360 |
# so we can access its properties here.
|
1361 |
if not rag.enable_llm_cache_for_entity_extract:
|
lightrag/lightrag.py
CHANGED
@@ -1705,10 +1705,10 @@ class LightRAG:
|
|
1705 |
"""
|
1706 |
deletion_operations_started = False
|
1707 |
original_exception = None
|
1708 |
-
|
1709 |
try:
|
1710 |
logger.info(f"Starting deletion process for document {doc_id}")
|
1711 |
-
|
1712 |
# 1. Get the document status and related data
|
1713 |
doc_status_data = await self.doc_status.get_by_id(doc_id)
|
1714 |
if not doc_status_data:
|
@@ -1731,7 +1731,9 @@ class LightRAG:
|
|
1731 |
if isinstance(chunk_data, dict)
|
1732 |
and chunk_data.get("full_doc_id") == doc_id
|
1733 |
}
|
1734 |
-
logger.info(
|
|
|
|
|
1735 |
except Exception as e:
|
1736 |
logger.error(f"Failed to retrieve chunks for document {doc_id}: {e}")
|
1737 |
raise Exception(f"Failed to retrieve document chunks: {e}") from e
|
@@ -1746,9 +1748,11 @@ class LightRAG:
|
|
1746 |
await self.doc_status.delete([doc_id])
|
1747 |
logger.info(f"Deleted document {doc_id} with no associated chunks")
|
1748 |
except Exception as e:
|
1749 |
-
logger.error(
|
|
|
|
|
1750 |
raise Exception(f"Failed to delete document entry: {e}") from e
|
1751 |
-
|
1752 |
return DeletionResult(
|
1753 |
status="success",
|
1754 |
doc_id=doc_id,
|
@@ -1773,7 +1777,9 @@ class LightRAG:
|
|
1773 |
async with graph_db_lock:
|
1774 |
try:
|
1775 |
# Get all affected nodes and edges in batch
|
1776 |
-
logger.info(
|
|
|
|
|
1777 |
affected_nodes = (
|
1778 |
await self.chunk_entity_relation_graph.get_nodes_by_chunk_ids(
|
1779 |
list(chunk_ids)
|
@@ -1784,7 +1790,9 @@ class LightRAG:
|
|
1784 |
list(chunk_ids)
|
1785 |
)
|
1786 |
)
|
1787 |
-
logger.info(
|
|
|
|
|
1788 |
except Exception as e:
|
1789 |
logger.error(f"Failed to analyze affected graph elements: {e}")
|
1790 |
raise Exception(f"Failed to analyze graph dependencies: {e}") from e
|
@@ -1822,11 +1830,13 @@ class LightRAG:
|
|
1822 |
relationships_to_delete.add(edge_tuple)
|
1823 |
elif remaining_sources != sources:
|
1824 |
relationships_to_rebuild[edge_tuple] = remaining_sources
|
1825 |
-
|
1826 |
-
logger.info(
|
1827 |
-
|
1828 |
-
|
1829 |
-
|
|
|
|
|
1830 |
except Exception as e:
|
1831 |
logger.error(f"Failed to process graph analysis results: {e}")
|
1832 |
raise Exception(f"Failed to process graph dependencies: {e}") from e
|
@@ -1837,7 +1847,9 @@ class LightRAG:
|
|
1837 |
logger.info(f"Deleting {len(chunk_ids)} chunks from storage")
|
1838 |
await self.chunks_vdb.delete(chunk_ids)
|
1839 |
await self.text_chunks.delete(chunk_ids)
|
1840 |
-
logger.info(
|
|
|
|
|
1841 |
except Exception as e:
|
1842 |
logger.error(f"Failed to delete chunks: {e}")
|
1843 |
raise Exception(f"Failed to delete document chunks: {e}") from e
|
@@ -1857,7 +1869,9 @@ class LightRAG:
|
|
1857 |
await self.chunk_entity_relation_graph.remove_nodes(
|
1858 |
list(entities_to_delete)
|
1859 |
)
|
1860 |
-
logger.info(
|
|
|
|
|
1861 |
except Exception as e:
|
1862 |
logger.error(f"Failed to delete entities: {e}")
|
1863 |
raise Exception(f"Failed to delete entities: {e}") from e
|
@@ -1865,7 +1879,9 @@ class LightRAG:
|
|
1865 |
# 7. Delete relationships that have no remaining sources
|
1866 |
if relationships_to_delete:
|
1867 |
try:
|
1868 |
-
logger.info(
|
|
|
|
|
1869 |
# Delete from vector database
|
1870 |
rel_ids_to_delete = []
|
1871 |
for src, tgt in relationships_to_delete:
|
@@ -1881,7 +1897,9 @@ class LightRAG:
|
|
1881 |
await self.chunk_entity_relation_graph.remove_edges(
|
1882 |
list(relationships_to_delete)
|
1883 |
)
|
1884 |
-
logger.info(
|
|
|
|
|
1885 |
except Exception as e:
|
1886 |
logger.error(f"Failed to delete relationships: {e}")
|
1887 |
raise Exception(f"Failed to delete relationships: {e}") from e
|
@@ -1889,7 +1907,9 @@ class LightRAG:
|
|
1889 |
# 8. Rebuild entities and relationships from remaining chunks
|
1890 |
if entities_to_rebuild or relationships_to_rebuild:
|
1891 |
try:
|
1892 |
-
logger.info(
|
|
|
|
|
1893 |
await _rebuild_knowledge_from_chunks(
|
1894 |
entities_to_rebuild=entities_to_rebuild,
|
1895 |
relationships_to_rebuild=relationships_to_rebuild,
|
@@ -1900,10 +1920,14 @@ class LightRAG:
|
|
1900 |
llm_response_cache=self.llm_response_cache,
|
1901 |
global_config=asdict(self),
|
1902 |
)
|
1903 |
-
logger.info(
|
|
|
|
|
1904 |
except Exception as e:
|
1905 |
logger.error(f"Failed to rebuild knowledge from chunks: {e}")
|
1906 |
-
raise Exception(
|
|
|
|
|
1907 |
|
1908 |
# 9. Delete original document and status
|
1909 |
try:
|
@@ -1935,19 +1959,23 @@ class LightRAG:
|
|
1935 |
message=error_message,
|
1936 |
status_code=500,
|
1937 |
)
|
1938 |
-
|
1939 |
finally:
|
1940 |
# ALWAYS ensure persistence if any deletion operations were started
|
1941 |
if deletion_operations_started:
|
1942 |
try:
|
1943 |
-
logger.info(
|
|
|
|
|
1944 |
await self._insert_done()
|
1945 |
-
logger.info(
|
|
|
|
|
1946 |
except Exception as persistence_error:
|
1947 |
persistence_error_msg = f"Failed to persist data after deletion attempt for {doc_id}: {persistence_error}"
|
1948 |
logger.error(persistence_error_msg)
|
1949 |
logger.error(traceback.format_exc())
|
1950 |
-
|
1951 |
# If there was no original exception, this persistence error becomes the main error
|
1952 |
if original_exception is None:
|
1953 |
return DeletionResult(
|
@@ -1959,7 +1987,9 @@ class LightRAG:
|
|
1959 |
# If there was an original exception, log the persistence error but don't override the original error
|
1960 |
# The original error result was already returned in the except block
|
1961 |
else:
|
1962 |
-
logger.debug(
|
|
|
|
|
1963 |
|
1964 |
async def adelete_by_entity(self, entity_name: str) -> DeletionResult:
|
1965 |
"""Asynchronously delete an entity and all its relationships.
|
|
|
1705 |
"""
|
1706 |
deletion_operations_started = False
|
1707 |
original_exception = None
|
1708 |
+
|
1709 |
try:
|
1710 |
logger.info(f"Starting deletion process for document {doc_id}")
|
1711 |
+
|
1712 |
# 1. Get the document status and related data
|
1713 |
doc_status_data = await self.doc_status.get_by_id(doc_id)
|
1714 |
if not doc_status_data:
|
|
|
1731 |
if isinstance(chunk_data, dict)
|
1732 |
and chunk_data.get("full_doc_id") == doc_id
|
1733 |
}
|
1734 |
+
logger.info(
|
1735 |
+
f"Retrieved {len(all_chunks)} total chunks, {len(related_chunks)} related to document {doc_id}"
|
1736 |
+
)
|
1737 |
except Exception as e:
|
1738 |
logger.error(f"Failed to retrieve chunks for document {doc_id}: {e}")
|
1739 |
raise Exception(f"Failed to retrieve document chunks: {e}") from e
|
|
|
1748 |
await self.doc_status.delete([doc_id])
|
1749 |
logger.info(f"Deleted document {doc_id} with no associated chunks")
|
1750 |
except Exception as e:
|
1751 |
+
logger.error(
|
1752 |
+
f"Failed to delete document {doc_id} with no chunks: {e}"
|
1753 |
+
)
|
1754 |
raise Exception(f"Failed to delete document entry: {e}") from e
|
1755 |
+
|
1756 |
return DeletionResult(
|
1757 |
status="success",
|
1758 |
doc_id=doc_id,
|
|
|
1777 |
async with graph_db_lock:
|
1778 |
try:
|
1779 |
# Get all affected nodes and edges in batch
|
1780 |
+
logger.info(
|
1781 |
+
f"Analyzing affected entities and relationships for {len(chunk_ids)} chunks"
|
1782 |
+
)
|
1783 |
affected_nodes = (
|
1784 |
await self.chunk_entity_relation_graph.get_nodes_by_chunk_ids(
|
1785 |
list(chunk_ids)
|
|
|
1790 |
list(chunk_ids)
|
1791 |
)
|
1792 |
)
|
1793 |
+
logger.info(
|
1794 |
+
f"Found {len(affected_nodes)} affected nodes and {len(affected_edges)} affected edges"
|
1795 |
+
)
|
1796 |
except Exception as e:
|
1797 |
logger.error(f"Failed to analyze affected graph elements: {e}")
|
1798 |
raise Exception(f"Failed to analyze graph dependencies: {e}") from e
|
|
|
1830 |
relationships_to_delete.add(edge_tuple)
|
1831 |
elif remaining_sources != sources:
|
1832 |
relationships_to_rebuild[edge_tuple] = remaining_sources
|
1833 |
+
|
1834 |
+
logger.info(
|
1835 |
+
f"Analysis complete: {len(entities_to_delete)} entities to delete, "
|
1836 |
+
f"{len(entities_to_rebuild)} entities to rebuild, "
|
1837 |
+
f"{len(relationships_to_delete)} relationships to delete, "
|
1838 |
+
f"{len(relationships_to_rebuild)} relationships to rebuild"
|
1839 |
+
)
|
1840 |
except Exception as e:
|
1841 |
logger.error(f"Failed to process graph analysis results: {e}")
|
1842 |
raise Exception(f"Failed to process graph dependencies: {e}") from e
|
|
|
1847 |
logger.info(f"Deleting {len(chunk_ids)} chunks from storage")
|
1848 |
await self.chunks_vdb.delete(chunk_ids)
|
1849 |
await self.text_chunks.delete(chunk_ids)
|
1850 |
+
logger.info(
|
1851 |
+
f"Successfully deleted {len(chunk_ids)} chunks from storage"
|
1852 |
+
)
|
1853 |
except Exception as e:
|
1854 |
logger.error(f"Failed to delete chunks: {e}")
|
1855 |
raise Exception(f"Failed to delete document chunks: {e}") from e
|
|
|
1869 |
await self.chunk_entity_relation_graph.remove_nodes(
|
1870 |
list(entities_to_delete)
|
1871 |
)
|
1872 |
+
logger.info(
|
1873 |
+
f"Successfully deleted {len(entities_to_delete)} entities"
|
1874 |
+
)
|
1875 |
except Exception as e:
|
1876 |
logger.error(f"Failed to delete entities: {e}")
|
1877 |
raise Exception(f"Failed to delete entities: {e}") from e
|
|
|
1879 |
# 7. Delete relationships that have no remaining sources
|
1880 |
if relationships_to_delete:
|
1881 |
try:
|
1882 |
+
logger.info(
|
1883 |
+
f"Deleting {len(relationships_to_delete)} relationships"
|
1884 |
+
)
|
1885 |
# Delete from vector database
|
1886 |
rel_ids_to_delete = []
|
1887 |
for src, tgt in relationships_to_delete:
|
|
|
1897 |
await self.chunk_entity_relation_graph.remove_edges(
|
1898 |
list(relationships_to_delete)
|
1899 |
)
|
1900 |
+
logger.info(
|
1901 |
+
f"Successfully deleted {len(relationships_to_delete)} relationships"
|
1902 |
+
)
|
1903 |
except Exception as e:
|
1904 |
logger.error(f"Failed to delete relationships: {e}")
|
1905 |
raise Exception(f"Failed to delete relationships: {e}") from e
|
|
|
1907 |
# 8. Rebuild entities and relationships from remaining chunks
|
1908 |
if entities_to_rebuild or relationships_to_rebuild:
|
1909 |
try:
|
1910 |
+
logger.info(
|
1911 |
+
f"Rebuilding {len(entities_to_rebuild)} entities and {len(relationships_to_rebuild)} relationships"
|
1912 |
+
)
|
1913 |
await _rebuild_knowledge_from_chunks(
|
1914 |
entities_to_rebuild=entities_to_rebuild,
|
1915 |
relationships_to_rebuild=relationships_to_rebuild,
|
|
|
1920 |
llm_response_cache=self.llm_response_cache,
|
1921 |
global_config=asdict(self),
|
1922 |
)
|
1923 |
+
logger.info(
|
1924 |
+
f"Successfully rebuilt {len(entities_to_rebuild)} entities and {len(relationships_to_rebuild)} relationships"
|
1925 |
+
)
|
1926 |
except Exception as e:
|
1927 |
logger.error(f"Failed to rebuild knowledge from chunks: {e}")
|
1928 |
+
raise Exception(
|
1929 |
+
f"Failed to rebuild knowledge graph: {e}"
|
1930 |
+
) from e
|
1931 |
|
1932 |
# 9. Delete original document and status
|
1933 |
try:
|
|
|
1959 |
message=error_message,
|
1960 |
status_code=500,
|
1961 |
)
|
1962 |
+
|
1963 |
finally:
|
1964 |
# ALWAYS ensure persistence if any deletion operations were started
|
1965 |
if deletion_operations_started:
|
1966 |
try:
|
1967 |
+
logger.info(
|
1968 |
+
f"Ensuring data persistence for document {doc_id} deletion"
|
1969 |
+
)
|
1970 |
await self._insert_done()
|
1971 |
+
logger.info(
|
1972 |
+
f"Data persistence completed successfully for document {doc_id} deletion"
|
1973 |
+
)
|
1974 |
except Exception as persistence_error:
|
1975 |
persistence_error_msg = f"Failed to persist data after deletion attempt for {doc_id}: {persistence_error}"
|
1976 |
logger.error(persistence_error_msg)
|
1977 |
logger.error(traceback.format_exc())
|
1978 |
+
|
1979 |
# If there was no original exception, this persistence error becomes the main error
|
1980 |
if original_exception is None:
|
1981 |
return DeletionResult(
|
|
|
1987 |
# If there was an original exception, log the persistence error but don't override the original error
|
1988 |
# The original error result was already returned in the except block
|
1989 |
else:
|
1990 |
+
logger.debug(
|
1991 |
+
f"No deletion operations were started for document {doc_id}, skipping persistence"
|
1992 |
+
)
|
1993 |
|
1994 |
async def adelete_by_entity(self, entity_name: str) -> DeletionResult:
|
1995 |
"""Asynchronously delete an entity and all its relationships.
|