LarFii commited on
Commit
5289003
·
1 Parent(s): 0bee9e3

fix linting errors

Browse files
Files changed (2) hide show
  1. lightrag/lightrag.py +92 -53
  2. lightrag/storage.py +13 -6
lightrag/lightrag.py CHANGED
@@ -731,30 +731,34 @@ class LightRAG:
731
  if not doc_status:
732
  logger.warning(f"Document {doc_id} not found")
733
  return
734
-
735
  logger.debug(f"Starting deletion for document {doc_id}")
736
-
737
  # 2. Get all related chunks
738
- chunks = await self.text_chunks.filter(lambda x: x.get("full_doc_id") == doc_id)
 
 
739
  chunk_ids = list(chunks.keys())
740
  logger.debug(f"Found {len(chunk_ids)} chunks to delete")
741
-
742
  # 3. Before deleting, check the related entities and relationships for these chunks
743
  for chunk_id in chunk_ids:
744
  # Check entities
745
  entities = [
746
- dp for dp in self.entities_vdb.client_storage["data"]
 
747
  if dp.get("source_id") == chunk_id
748
  ]
749
  logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities")
750
-
751
  # Check relationships
752
  relations = [
753
- dp for dp in self.relationships_vdb.client_storage["data"]
 
754
  if dp.get("source_id") == chunk_id
755
  ]
756
  logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations")
757
-
758
  # Continue with the original deletion process...
759
 
760
  # 4. Delete chunks from vector database
@@ -775,31 +779,39 @@ class LightRAG:
775
 
776
  # Process entities
777
  for node, data in nodes:
778
- if 'source_id' in data:
779
  # Split source_id using GRAPH_FIELD_SEP
780
- sources = set(data['source_id'].split(GRAPH_FIELD_SEP))
781
  sources.difference_update(chunk_ids)
782
  if not sources:
783
  entities_to_delete.add(node)
784
- logger.debug(f"Entity {node} marked for deletion - no remaining sources")
 
 
785
  else:
786
  new_source_id = GRAPH_FIELD_SEP.join(sources)
787
  entities_to_update[node] = new_source_id
788
- logger.debug(f"Entity {node} will be updated with new source_id: {new_source_id}")
 
 
789
 
790
  # Process relationships
791
  for src, tgt, data in edges:
792
- if 'source_id' in data:
793
  # Split source_id using GRAPH_FIELD_SEP
794
- sources = set(data['source_id'].split(GRAPH_FIELD_SEP))
795
  sources.difference_update(chunk_ids)
796
  if not sources:
797
  relationships_to_delete.add((src, tgt))
798
- logger.debug(f"Relationship {src}-{tgt} marked for deletion - no remaining sources")
 
 
799
  else:
800
  new_source_id = GRAPH_FIELD_SEP.join(sources)
801
  relationships_to_update[(src, tgt)] = new_source_id
802
- logger.debug(f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}")
 
 
803
 
804
  # Delete entities
805
  if entities_to_delete:
@@ -812,9 +824,11 @@ class LightRAG:
812
  # Update entities
813
  for entity, new_source_id in entities_to_update.items():
814
  node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
815
- node_data['source_id'] = new_source_id
816
  await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
817
- logger.debug(f"Updated entity {entity} with new source_id: {new_source_id}")
 
 
818
 
819
  # Delete relationships
820
  if relationships_to_delete:
@@ -823,15 +837,21 @@ class LightRAG:
823
  rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-")
824
  await self.relationships_vdb.delete([rel_id_0, rel_id_1])
825
  logger.debug(f"Deleted relationship {src}-{tgt} from vector DB")
826
- self.chunk_entity_relation_graph.remove_edges(list(relationships_to_delete))
827
- logger.debug(f"Deleted {len(relationships_to_delete)} relationships from graph")
 
 
 
 
828
 
829
  # Update relationships
830
  for (src, tgt), new_source_id in relationships_to_update.items():
831
  edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
832
- edge_data['source_id'] = new_source_id
833
  await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
834
- logger.debug(f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}")
 
 
835
 
836
  # 6. Delete original document and status
837
  await self.full_docs.delete([doc_id])
@@ -851,31 +871,39 @@ class LightRAG:
851
  # Verify if the document has been deleted
852
  if await self.full_docs.get_by_id(doc_id):
853
  logger.error(f"Document {doc_id} still exists in full_docs")
854
-
855
  # Verify if chunks have been deleted
856
  remaining_chunks = await self.text_chunks.filter(
857
  lambda x: x.get("full_doc_id") == doc_id
858
  )
859
  if remaining_chunks:
860
  logger.error(f"Found {len(remaining_chunks)} remaining chunks")
861
-
862
  # Verify entities and relationships
863
  for chunk_id in chunk_ids:
864
  # Check entities
865
  entities_with_chunk = [
866
- dp for dp in self.entities_vdb.client_storage["data"]
867
- if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
 
 
868
  ]
869
  if entities_with_chunk:
870
- logger.error(f"Found {len(entities_with_chunk)} entities still referencing chunk {chunk_id}")
871
-
 
 
872
  # Check relationships
873
  relations_with_chunk = [
874
- dp for dp in self.relationships_vdb.client_storage["data"]
875
- if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
 
 
876
  ]
877
  if relations_with_chunk:
878
- logger.error(f"Found {len(relations_with_chunk)} relations still referencing chunk {chunk_id}")
 
 
879
 
880
  await verify_deletion()
881
 
@@ -886,13 +914,15 @@ class LightRAG:
886
  """Synchronous version of adelete"""
887
  return asyncio.run(self.adelete_by_doc_id(doc_id))
888
 
889
- async def get_entity_info(self, entity_name: str, include_vector_data: bool = False):
 
 
890
  """Get detailed information of an entity
891
-
892
  Args:
893
  entity_name: Entity name (no need for quotes)
894
  include_vector_data: Whether to include data from the vector database
895
-
896
  Returns:
897
  dict: A dictionary containing entity information, including:
898
  - entity_name: Entity name
@@ -901,47 +931,50 @@ class LightRAG:
901
  - vector_data: (optional) Data from the vector database
902
  """
903
  entity_name = f'"{entity_name.upper()}"'
904
-
905
  # Get information from the graph
906
  node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
907
- source_id = node_data.get('source_id') if node_data else None
908
-
909
  result = {
910
  "entity_name": entity_name,
911
  "source_id": source_id,
912
  "graph_data": node_data,
913
  }
914
-
915
  # Optional: Get vector database information
916
  if include_vector_data:
917
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
918
  vector_data = self.entities_vdb._client.get([entity_id])
919
  result["vector_data"] = vector_data[0] if vector_data else None
920
-
921
  return result
922
 
923
  def get_entity_info_sync(self, entity_name: str, include_vector_data: bool = False):
924
  """Synchronous version of getting entity information
925
-
926
  Args:
927
  entity_name: Entity name (no need for quotes)
928
  include_vector_data: Whether to include data from the vector database
929
  """
930
  try:
931
  import tracemalloc
 
932
  tracemalloc.start()
933
  return asyncio.run(self.get_entity_info(entity_name, include_vector_data))
934
  finally:
935
  tracemalloc.stop()
936
 
937
- async def get_relation_info(self, src_entity: str, tgt_entity: str, include_vector_data: bool = False):
 
 
938
  """Get detailed information of a relationship
939
-
940
  Args:
941
  src_entity: Source entity name (no need for quotes)
942
  tgt_entity: Target entity name (no need for quotes)
943
  include_vector_data: Whether to include data from the vector database
944
-
945
  Returns:
946
  dict: A dictionary containing relationship information, including:
947
  - src_entity: Source entity name
@@ -952,29 +985,33 @@ class LightRAG:
952
  """
953
  src_entity = f'"{src_entity.upper()}"'
954
  tgt_entity = f'"{tgt_entity.upper()}"'
955
-
956
  # Get information from the graph
957
- edge_data = await self.chunk_entity_relation_graph.get_edge(src_entity, tgt_entity)
958
- source_id = edge_data.get('source_id') if edge_data else None
959
-
 
 
960
  result = {
961
  "src_entity": src_entity,
962
  "tgt_entity": tgt_entity,
963
  "source_id": source_id,
964
  "graph_data": edge_data,
965
  }
966
-
967
  # Optional: Get vector database information
968
  if include_vector_data:
969
  rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
970
  vector_data = self.relationships_vdb._client.get([rel_id])
971
  result["vector_data"] = vector_data[0] if vector_data else None
972
-
973
  return result
974
 
975
- def get_relation_info_sync(self, src_entity: str, tgt_entity: str, include_vector_data: bool = False):
 
 
976
  """Synchronous version of getting relationship information
977
-
978
  Args:
979
  src_entity: Source entity name (no need for quotes)
980
  tgt_entity: Target entity name (no need for quotes)
@@ -982,8 +1019,10 @@ class LightRAG:
982
  """
983
  try:
984
  import tracemalloc
 
985
  tracemalloc.start()
986
- return asyncio.run(self.get_relation_info(src_entity, tgt_entity, include_vector_data))
 
 
987
  finally:
988
  tracemalloc.stop()
989
-
 
731
  if not doc_status:
732
  logger.warning(f"Document {doc_id} not found")
733
  return
734
+
735
  logger.debug(f"Starting deletion for document {doc_id}")
736
+
737
  # 2. Get all related chunks
738
+ chunks = await self.text_chunks.filter(
739
+ lambda x: x.get("full_doc_id") == doc_id
740
+ )
741
  chunk_ids = list(chunks.keys())
742
  logger.debug(f"Found {len(chunk_ids)} chunks to delete")
743
+
744
  # 3. Before deleting, check the related entities and relationships for these chunks
745
  for chunk_id in chunk_ids:
746
  # Check entities
747
  entities = [
748
+ dp
749
+ for dp in self.entities_vdb.client_storage["data"]
750
  if dp.get("source_id") == chunk_id
751
  ]
752
  logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities")
753
+
754
  # Check relationships
755
  relations = [
756
+ dp
757
+ for dp in self.relationships_vdb.client_storage["data"]
758
  if dp.get("source_id") == chunk_id
759
  ]
760
  logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations")
761
+
762
  # Continue with the original deletion process...
763
 
764
  # 4. Delete chunks from vector database
 
779
 
780
  # Process entities
781
  for node, data in nodes:
782
+ if "source_id" in data:
783
  # Split source_id using GRAPH_FIELD_SEP
784
+ sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
785
  sources.difference_update(chunk_ids)
786
  if not sources:
787
  entities_to_delete.add(node)
788
+ logger.debug(
789
+ f"Entity {node} marked for deletion - no remaining sources"
790
+ )
791
  else:
792
  new_source_id = GRAPH_FIELD_SEP.join(sources)
793
  entities_to_update[node] = new_source_id
794
+ logger.debug(
795
+ f"Entity {node} will be updated with new source_id: {new_source_id}"
796
+ )
797
 
798
  # Process relationships
799
  for src, tgt, data in edges:
800
+ if "source_id" in data:
801
  # Split source_id using GRAPH_FIELD_SEP
802
+ sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
803
  sources.difference_update(chunk_ids)
804
  if not sources:
805
  relationships_to_delete.add((src, tgt))
806
+ logger.debug(
807
+ f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
808
+ )
809
  else:
810
  new_source_id = GRAPH_FIELD_SEP.join(sources)
811
  relationships_to_update[(src, tgt)] = new_source_id
812
+ logger.debug(
813
+ f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
814
+ )
815
 
816
  # Delete entities
817
  if entities_to_delete:
 
824
  # Update entities
825
  for entity, new_source_id in entities_to_update.items():
826
  node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
827
+ node_data["source_id"] = new_source_id
828
  await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
829
+ logger.debug(
830
+ f"Updated entity {entity} with new source_id: {new_source_id}"
831
+ )
832
 
833
  # Delete relationships
834
  if relationships_to_delete:
 
837
  rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-")
838
  await self.relationships_vdb.delete([rel_id_0, rel_id_1])
839
  logger.debug(f"Deleted relationship {src}-{tgt} from vector DB")
840
+ self.chunk_entity_relation_graph.remove_edges(
841
+ list(relationships_to_delete)
842
+ )
843
+ logger.debug(
844
+ f"Deleted {len(relationships_to_delete)} relationships from graph"
845
+ )
846
 
847
  # Update relationships
848
  for (src, tgt), new_source_id in relationships_to_update.items():
849
  edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
850
+ edge_data["source_id"] = new_source_id
851
  await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
852
+ logger.debug(
853
+ f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
854
+ )
855
 
856
  # 6. Delete original document and status
857
  await self.full_docs.delete([doc_id])
 
871
  # Verify if the document has been deleted
872
  if await self.full_docs.get_by_id(doc_id):
873
  logger.error(f"Document {doc_id} still exists in full_docs")
874
+
875
  # Verify if chunks have been deleted
876
  remaining_chunks = await self.text_chunks.filter(
877
  lambda x: x.get("full_doc_id") == doc_id
878
  )
879
  if remaining_chunks:
880
  logger.error(f"Found {len(remaining_chunks)} remaining chunks")
881
+
882
  # Verify entities and relationships
883
  for chunk_id in chunk_ids:
884
  # Check entities
885
  entities_with_chunk = [
886
+ dp
887
+ for dp in self.entities_vdb.client_storage["data"]
888
+ if chunk_id
889
+ in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
890
  ]
891
  if entities_with_chunk:
892
+ logger.error(
893
+ f"Found {len(entities_with_chunk)} entities still referencing chunk {chunk_id}"
894
+ )
895
+
896
  # Check relationships
897
  relations_with_chunk = [
898
+ dp
899
+ for dp in self.relationships_vdb.client_storage["data"]
900
+ if chunk_id
901
+ in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
902
  ]
903
  if relations_with_chunk:
904
+ logger.error(
905
+ f"Found {len(relations_with_chunk)} relations still referencing chunk {chunk_id}"
906
+ )
907
 
908
  await verify_deletion()
909
 
 
914
  """Synchronous version of adelete"""
915
  return asyncio.run(self.adelete_by_doc_id(doc_id))
916
 
917
+ async def get_entity_info(
918
+ self, entity_name: str, include_vector_data: bool = False
919
+ ):
920
  """Get detailed information of an entity
921
+
922
  Args:
923
  entity_name: Entity name (no need for quotes)
924
  include_vector_data: Whether to include data from the vector database
925
+
926
  Returns:
927
  dict: A dictionary containing entity information, including:
928
  - entity_name: Entity name
 
931
  - vector_data: (optional) Data from the vector database
932
  """
933
  entity_name = f'"{entity_name.upper()}"'
934
+
935
  # Get information from the graph
936
  node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
937
+ source_id = node_data.get("source_id") if node_data else None
938
+
939
  result = {
940
  "entity_name": entity_name,
941
  "source_id": source_id,
942
  "graph_data": node_data,
943
  }
944
+
945
  # Optional: Get vector database information
946
  if include_vector_data:
947
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
948
  vector_data = self.entities_vdb._client.get([entity_id])
949
  result["vector_data"] = vector_data[0] if vector_data else None
950
+
951
  return result
952
 
953
  def get_entity_info_sync(self, entity_name: str, include_vector_data: bool = False):
954
  """Synchronous version of getting entity information
955
+
956
  Args:
957
  entity_name: Entity name (no need for quotes)
958
  include_vector_data: Whether to include data from the vector database
959
  """
960
  try:
961
  import tracemalloc
962
+
963
  tracemalloc.start()
964
  return asyncio.run(self.get_entity_info(entity_name, include_vector_data))
965
  finally:
966
  tracemalloc.stop()
967
 
968
+ async def get_relation_info(
969
+ self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
970
+ ):
971
  """Get detailed information of a relationship
972
+
973
  Args:
974
  src_entity: Source entity name (no need for quotes)
975
  tgt_entity: Target entity name (no need for quotes)
976
  include_vector_data: Whether to include data from the vector database
977
+
978
  Returns:
979
  dict: A dictionary containing relationship information, including:
980
  - src_entity: Source entity name
 
985
  """
986
  src_entity = f'"{src_entity.upper()}"'
987
  tgt_entity = f'"{tgt_entity.upper()}"'
988
+
989
  # Get information from the graph
990
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
991
+ src_entity, tgt_entity
992
+ )
993
+ source_id = edge_data.get("source_id") if edge_data else None
994
+
995
  result = {
996
  "src_entity": src_entity,
997
  "tgt_entity": tgt_entity,
998
  "source_id": source_id,
999
  "graph_data": edge_data,
1000
  }
1001
+
1002
  # Optional: Get vector database information
1003
  if include_vector_data:
1004
  rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
1005
  vector_data = self.relationships_vdb._client.get([rel_id])
1006
  result["vector_data"] = vector_data[0] if vector_data else None
1007
+
1008
  return result
1009
 
1010
+ def get_relation_info_sync(
1011
+ self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
1012
+ ):
1013
  """Synchronous version of getting relationship information
1014
+
1015
  Args:
1016
  src_entity: Source entity name (no need for quotes)
1017
  tgt_entity: Target entity name (no need for quotes)
 
1019
  """
1020
  try:
1021
  import tracemalloc
1022
+
1023
  tracemalloc.start()
1024
+ return asyncio.run(
1025
+ self.get_relation_info(src_entity, tgt_entity, include_vector_data)
1026
+ )
1027
  finally:
1028
  tracemalloc.stop()
 
lightrag/storage.py CHANGED
@@ -188,14 +188,18 @@ class NanoVectorDBStorage(BaseVectorStorage):
188
  """
189
  try:
190
  self._client.delete(ids)
191
- logger.info(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
 
 
192
  except Exception as e:
193
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
194
 
195
  async def delete_entity(self, entity_name: str):
196
  try:
197
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
198
- logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
 
 
199
  # Check if the entity exists
200
  if self._client.get([entity_id]):
201
  await self.delete([entity_id])
@@ -208,15 +212,18 @@ class NanoVectorDBStorage(BaseVectorStorage):
208
  async def delete_entity_relation(self, entity_name: str):
209
  try:
210
  relations = [
211
- dp for dp in self.client_storage["data"]
 
212
  if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
213
  ]
214
  logger.debug(f"Found {len(relations)} relations for entity {entity_name}")
215
  ids_to_delete = [relation["__id__"] for relation in relations]
216
-
217
  if ids_to_delete:
218
  await self.delete(ids_to_delete)
219
- logger.debug(f"Deleted {len(ids_to_delete)} relations for {entity_name}")
 
 
220
  else:
221
  logger.debug(f"No relations found for entity {entity_name}")
222
  except Exception as e:
@@ -446,4 +453,4 @@ class JsonDocStatusStorage(DocStatusStorage):
446
  """Delete document status by IDs"""
447
  for doc_id in doc_ids:
448
  self._data.pop(doc_id, None)
449
- await self.index_done_callback()
 
188
  """
189
  try:
190
  self._client.delete(ids)
191
+ logger.info(
192
+ f"Successfully deleted {len(ids)} vectors from {self.namespace}"
193
+ )
194
  except Exception as e:
195
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
196
 
197
  async def delete_entity(self, entity_name: str):
198
  try:
199
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
200
+ logger.debug(
201
+ f"Attempting to delete entity {entity_name} with ID {entity_id}"
202
+ )
203
  # Check if the entity exists
204
  if self._client.get([entity_id]):
205
  await self.delete([entity_id])
 
212
  async def delete_entity_relation(self, entity_name: str):
213
  try:
214
  relations = [
215
+ dp
216
+ for dp in self.client_storage["data"]
217
  if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
218
  ]
219
  logger.debug(f"Found {len(relations)} relations for entity {entity_name}")
220
  ids_to_delete = [relation["__id__"] for relation in relations]
221
+
222
  if ids_to_delete:
223
  await self.delete(ids_to_delete)
224
+ logger.debug(
225
+ f"Deleted {len(ids_to_delete)} relations for {entity_name}"
226
+ )
227
  else:
228
  logger.debug(f"No relations found for entity {entity_name}")
229
  except Exception as e:
 
453
  """Delete document status by IDs"""
454
  for doc_id in doc_ids:
455
  self._data.pop(doc_id, None)
456
+ await self.index_done_callback()