LarFii commited on
Commit
607159b
·
1 Parent(s): f020a9c

Add a feature that allows modifying nodes and relationships.

Browse files
Files changed (2) hide show
  1. README.md +76 -0
  2. lightrag/lightrag.py +479 -13
README.md CHANGED
@@ -750,6 +750,82 @@ rag.delete_by_entity("Project Gutenberg")
750
  rag.delete_by_doc_id("doc_id")
751
  ```
752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
  ## Cache
754
 
755
  <details>
 
750
  rag.delete_by_doc_id("doc_id")
751
  ```
752
 
753
+ ## Edit Entities and Relations
754
+
755
+ LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
756
+
757
+ ### Create Entities and Relations
758
+
759
+ ```python
760
+ import asyncio
761
+ from lightrag import LightRAG
762
+
763
+ # Initialize LightRAG
764
+ rag = LightRAG(
765
+ working_dir="your_working_dir",
766
+ embedding_func=your_embedding_function,
767
+ llm_model_func=your_llm_function
768
+ )
769
+
770
+ # Create new entity
771
+ entity = rag.create_entity("Google", {
772
+ "description": "Google is a multinational technology company specializing in internet-related services and products.",
773
+ "entity_type": "company"
774
+ })
775
+
776
+ # Create another entity
777
+ product = rag.create_entity("Gmail", {
778
+ "description": "Gmail is an email service developed by Google.",
779
+ "entity_type": "product"
780
+ })
781
+
782
+ # Create relation between entities
783
+ relation = rag.create_relation("Google", "Gmail", {
784
+ "description": "Google develops and operates Gmail.",
785
+ "keywords": "develops operates service",
786
+ "weight": 2.0
787
+ })
788
+ ```
789
+
790
+ ### Edit Entities and Relations
791
+
792
+ ```python
793
+ # Edit an existing entity
794
+ updated_entity = rag.edit_entity("Google", {
795
+ "description": "Google is a subsidiary of Alphabet Inc., founded in 1998.",
796
+ "entity_type": "tech_company"
797
+ })
798
+
799
+ # Rename an entity (with all its relationships properly migrated)
800
+ renamed_entity = rag.edit_entity("Gmail", {
801
+ "entity_name": "Google Mail",
802
+ "description": "Google Mail (formerly Gmail) is an email service."
803
+ })
804
+
805
+ # Edit a relation between entities
806
+ updated_relation = rag.edit_relation("Google", "Google Mail", {
807
+ "description": "Google created and maintains Google Mail service.",
808
+ "keywords": "creates maintains email service",
809
+ "weight": 3.0
810
+ })
811
+ ```
812
+
813
+ All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
814
+
815
+ #### Entity Operations
816
+
817
+ - **create_entity**: Creates a new entity with specified attributes
818
+ - **edit_entity**: Updates an existing entity's attributes or renames it
819
+ - **delete_entity**: Removes an entity and all its relationships
820
+
821
+ #### Relation Operations
822
+
823
+ - **create_relation**: Creates a new relation between existing entities
824
+ - **edit_relation**: Updates an existing relation's attributes
825
+ - **delete_relation**: Removes a relation between entities
826
+
827
+ These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
828
+
829
  ## Cache
830
 
831
  <details>
lightrag/lightrag.py CHANGED
@@ -967,7 +967,7 @@ class LightRAG:
967
  # Insert chunks into vector storage
968
  all_chunks_data: dict[str, dict[str, str]] = {}
969
  chunk_to_source_map: dict[str, str] = {}
970
- for chunk_data in custom_kg.get("chunks", {}):
971
  chunk_content = self.clean_text(chunk_data["content"])
972
  source_id = chunk_data["source_id"]
973
  tokens = len(
@@ -997,9 +997,10 @@ class LightRAG:
997
  update_storage = True
998
 
999
  if all_chunks_data:
1000
- await self.chunks_vdb.upsert(all_chunks_data)
1001
- if all_chunks_data:
1002
- await self.text_chunks.upsert(all_chunks_data)
 
1003
 
1004
  # Insert entities into knowledge graph
1005
  all_entities_data: list[dict[str, str]] = []
@@ -1007,7 +1008,6 @@ class LightRAG:
1007
  entity_name = entity_data["entity_name"]
1008
  entity_type = entity_data.get("entity_type", "UNKNOWN")
1009
  description = entity_data.get("description", "No description provided")
1010
- # source_id = entity_data["source_id"]
1011
  source_chunk_id = entity_data.get("source_id", "UNKNOWN")
1012
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1013
 
@@ -1039,7 +1039,6 @@ class LightRAG:
1039
  description = relationship_data["description"]
1040
  keywords = relationship_data["keywords"]
1041
  weight = relationship_data.get("weight", 1.0)
1042
- # source_id = relationship_data["source_id"]
1043
  source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
1044
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1045
 
@@ -1079,34 +1078,43 @@ class LightRAG:
1079
  "tgt_id": tgt_id,
1080
  "description": description,
1081
  "keywords": keywords,
 
 
1082
  }
1083
  all_relationships_data.append(edge_data)
1084
  update_storage = True
1085
 
1086
- # Insert entities into vector storage if needed
1087
  data_for_vdb = {
1088
  compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
1089
- "content": dp["entity_name"] + dp["description"],
1090
  "entity_name": dp["entity_name"],
 
 
 
1091
  }
1092
  for dp in all_entities_data
1093
  }
1094
  await self.entities_vdb.upsert(data_for_vdb)
1095
 
1096
- # Insert relationships into vector storage if needed
1097
  data_for_vdb = {
1098
  compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
1099
  "src_id": dp["src_id"],
1100
  "tgt_id": dp["tgt_id"],
1101
- "content": dp["keywords"]
1102
- + dp["src_id"]
1103
- + dp["tgt_id"]
1104
- + dp["description"],
 
1105
  }
1106
  for dp in all_relationships_data
1107
  }
1108
  await self.relationships_vdb.upsert(data_for_vdb)
1109
 
 
 
 
1110
  finally:
1111
  if update_storage:
1112
  await self._insert_done()
@@ -1759,3 +1767,461 @@ class LightRAG:
1759
  def clear_cache(self, modes: list[str] | None = None) -> None:
1760
  """Synchronous version of aclear_cache."""
1761
  return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
  # Insert chunks into vector storage
968
  all_chunks_data: dict[str, dict[str, str]] = {}
969
  chunk_to_source_map: dict[str, str] = {}
970
+ for chunk_data in custom_kg.get("chunks", []):
971
  chunk_content = self.clean_text(chunk_data["content"])
972
  source_id = chunk_data["source_id"]
973
  tokens = len(
 
997
  update_storage = True
998
 
999
  if all_chunks_data:
1000
+ await asyncio.gather(
1001
+ self.chunks_vdb.upsert(all_chunks_data),
1002
+ self.text_chunks.upsert(all_chunks_data),
1003
+ )
1004
 
1005
  # Insert entities into knowledge graph
1006
  all_entities_data: list[dict[str, str]] = []
 
1008
  entity_name = entity_data["entity_name"]
1009
  entity_type = entity_data.get("entity_type", "UNKNOWN")
1010
  description = entity_data.get("description", "No description provided")
 
1011
  source_chunk_id = entity_data.get("source_id", "UNKNOWN")
1012
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1013
 
 
1039
  description = relationship_data["description"]
1040
  keywords = relationship_data["keywords"]
1041
  weight = relationship_data.get("weight", 1.0)
 
1042
  source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
1043
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1044
 
 
1078
  "tgt_id": tgt_id,
1079
  "description": description,
1080
  "keywords": keywords,
1081
+ "source_id": source_id,
1082
+ "weight": weight,
1083
  }
1084
  all_relationships_data.append(edge_data)
1085
  update_storage = True
1086
 
1087
+ # Insert entities into vector storage with consistent format
1088
  data_for_vdb = {
1089
  compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
1090
+ "content": dp["entity_name"] + "\n" + dp["description"],
1091
  "entity_name": dp["entity_name"],
1092
+ "source_id": dp["source_id"],
1093
+ "description": dp["description"],
1094
+ "entity_type": dp["entity_type"],
1095
  }
1096
  for dp in all_entities_data
1097
  }
1098
  await self.entities_vdb.upsert(data_for_vdb)
1099
 
1100
+ # Insert relationships into vector storage with consistent format
1101
  data_for_vdb = {
1102
  compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
1103
  "src_id": dp["src_id"],
1104
  "tgt_id": dp["tgt_id"],
1105
+ "source_id": dp["source_id"],
1106
+ "content": f"{dp['keywords']}\t{dp['src_id']}\n{dp['tgt_id']}\n{dp['description']}",
1107
+ "keywords": dp["keywords"],
1108
+ "description": dp["description"],
1109
+ "weight": dp["weight"],
1110
  }
1111
  for dp in all_relationships_data
1112
  }
1113
  await self.relationships_vdb.upsert(data_for_vdb)
1114
 
1115
+ except Exception as e:
1116
+ logger.error(f"Error in ainsert_custom_kg: {e}")
1117
+ raise
1118
  finally:
1119
  if update_storage:
1120
  await self._insert_done()
 
1767
  def clear_cache(self, modes: list[str] | None = None) -> None:
1768
  """Synchronous version of aclear_cache."""
1769
  return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
1770
+
1771
+ async def aedit_entity(
1772
+ self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
1773
+ ) -> dict[str, Any]:
1774
+ """Asynchronously edit entity information.
1775
+
1776
+ Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
1777
+
1778
+ Args:
1779
+ entity_name: Name of the entity to edit
1780
+ updated_data: Dictionary containing updated attributes, e.g. {"description": "new description", "entity_type": "new type"}
1781
+ allow_rename: Whether to allow entity renaming, defaults to True
1782
+
1783
+ Returns:
1784
+ Dictionary containing updated entity information
1785
+ """
1786
+ try:
1787
+ # 1. Get current entity information
1788
+ node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
1789
+ if not node_data:
1790
+ raise ValueError(f"Entity '{entity_name}' does not exist")
1791
+
1792
+ # Check if entity is being renamed
1793
+ new_entity_name = updated_data.get("entity_name", entity_name)
1794
+ is_renaming = new_entity_name != entity_name
1795
+
1796
+ # If renaming, check if new name already exists
1797
+ if is_renaming:
1798
+ if not allow_rename:
1799
+ raise ValueError(
1800
+ "Entity renaming is not allowed. Set allow_rename=True to enable this feature"
1801
+ )
1802
+
1803
+ existing_node = await self.chunk_entity_relation_graph.get_node(
1804
+ new_entity_name
1805
+ )
1806
+ if existing_node:
1807
+ raise ValueError(
1808
+ f"Entity name '{new_entity_name}' already exists, cannot rename"
1809
+ )
1810
+
1811
+ # 2. Update entity information in the graph
1812
+ new_node_data = {**node_data, **updated_data}
1813
+ if "entity_name" in new_node_data:
1814
+ del new_node_data[
1815
+ "entity_name"
1816
+ ] # Node data should not contain entity_name field
1817
+
1818
+ # If renaming entity
1819
+ if is_renaming:
1820
+ logger.info(f"Renaming entity '{entity_name}' to '{new_entity_name}'")
1821
+
1822
+ # Create new entity
1823
+ await self.chunk_entity_relation_graph.upsert_node(
1824
+ new_entity_name, new_node_data
1825
+ )
1826
+
1827
+ # Get all edges related to the original entity
1828
+ edges = await self.chunk_entity_relation_graph.get_node_edges(
1829
+ entity_name
1830
+ )
1831
+ if edges:
1832
+ # Recreate edges for the new entity
1833
+ for source, target in edges:
1834
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
1835
+ source, target
1836
+ )
1837
+ if edge_data:
1838
+ if source == entity_name:
1839
+ await self.chunk_entity_relation_graph.upsert_edge(
1840
+ new_entity_name, target, edge_data
1841
+ )
1842
+ else: # target == entity_name
1843
+ await self.chunk_entity_relation_graph.upsert_edge(
1844
+ source, new_entity_name, edge_data
1845
+ )
1846
+
1847
+ # Delete old entity
1848
+ await self.chunk_entity_relation_graph.delete_node(entity_name)
1849
+
1850
+ # Delete old entity record from vector database
1851
+ old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
1852
+ await self.entities_vdb.delete([old_entity_id])
1853
+
1854
+ # Update working entity name to new name
1855
+ entity_name = new_entity_name
1856
+ else:
1857
+ # If not renaming, directly update node data
1858
+ await self.chunk_entity_relation_graph.upsert_node(
1859
+ entity_name, new_node_data
1860
+ )
1861
+
1862
+ # 3. Recalculate entity's vector representation and update vector database
1863
+ description = new_node_data.get("description", "")
1864
+ source_id = new_node_data.get("source_id", "")
1865
+ entity_type = new_node_data.get("entity_type", "")
1866
+ content = entity_name + "\n" + description
1867
+
1868
+ # Calculate entity ID
1869
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
1870
+
1871
+ # Prepare data for vector database update
1872
+ entity_data = {
1873
+ entity_id: {
1874
+ "content": content,
1875
+ "entity_name": entity_name,
1876
+ "source_id": source_id,
1877
+ "description": description,
1878
+ "entity_type": entity_type,
1879
+ }
1880
+ }
1881
+
1882
+ # Update vector database
1883
+ await self.entities_vdb.upsert(entity_data)
1884
+
1885
+ # 4. Save changes
1886
+ await self._edit_entity_done()
1887
+
1888
+ logger.info(f"Entity '{entity_name}' successfully updated")
1889
+ return await self.get_entity_info(entity_name, include_vector_data=True)
1890
+ except Exception as e:
1891
+ logger.error(f"Error while editing entity '{entity_name}': {e}")
1892
+ raise
1893
+
1894
+ def edit_entity(
1895
+ self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
1896
+ ) -> dict[str, Any]:
1897
+ """Synchronously edit entity information.
1898
+
1899
+ Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
1900
+
1901
+ Args:
1902
+ entity_name: Name of the entity to edit
1903
+ updated_data: Dictionary containing updated attributes, e.g. {"description": "new description", "entity_type": "new type"}
1904
+ allow_rename: Whether to allow entity renaming, defaults to True
1905
+
1906
+ Returns:
1907
+ Dictionary containing updated entity information
1908
+ """
1909
+ loop = always_get_an_event_loop()
1910
+ return loop.run_until_complete(
1911
+ self.aedit_entity(entity_name, updated_data, allow_rename)
1912
+ )
1913
+
1914
+ async def _edit_entity_done(self) -> None:
1915
+ """Callback after entity editing is complete, ensures updates are persisted"""
1916
+ await asyncio.gather(
1917
+ *[
1918
+ cast(StorageNameSpace, storage_inst).index_done_callback()
1919
+ for storage_inst in [ # type: ignore
1920
+ self.entities_vdb,
1921
+ self.chunk_entity_relation_graph,
1922
+ ]
1923
+ ]
1924
+ )
1925
+
1926
+ async def aedit_relation(
1927
+ self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
1928
+ ) -> dict[str, Any]:
1929
+ """Asynchronously edit relation information.
1930
+
1931
+ Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
1932
+
1933
+ Args:
1934
+ source_entity: Name of the source entity
1935
+ target_entity: Name of the target entity
1936
+ updated_data: Dictionary containing updated attributes, e.g. {"description": "new description", "keywords": "new keywords"}
1937
+
1938
+ Returns:
1939
+ Dictionary containing updated relation information
1940
+ """
1941
+ try:
1942
+ # 1. Get current relation information
1943
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
1944
+ source_entity, target_entity
1945
+ )
1946
+ if not edge_data:
1947
+ raise ValueError(
1948
+ f"Relation from '{source_entity}' to '{target_entity}' does not exist"
1949
+ )
1950
+
1951
+ # 2. Update relation information in the graph
1952
+ new_edge_data = {**edge_data, **updated_data}
1953
+ await self.chunk_entity_relation_graph.upsert_edge(
1954
+ source_entity, target_entity, new_edge_data
1955
+ )
1956
+
1957
+ # 3. Recalculate relation's vector representation and update vector database
1958
+ description = new_edge_data.get("description", "")
1959
+ keywords = new_edge_data.get("keywords", "")
1960
+ source_id = new_edge_data.get("source_id", "")
1961
+ weight = float(new_edge_data.get("weight", 1.0))
1962
+
1963
+ # Create content for embedding
1964
+ content = f"{keywords}\t{source_entity}\n{target_entity}\n{description}"
1965
+
1966
+ # Calculate relation ID
1967
+ relation_id = compute_mdhash_id(
1968
+ source_entity + target_entity, prefix="rel-"
1969
+ )
1970
+
1971
+ # Prepare data for vector database update
1972
+ relation_data = {
1973
+ relation_id: {
1974
+ "content": content,
1975
+ "src_id": source_entity,
1976
+ "tgt_id": target_entity,
1977
+ "source_id": source_id,
1978
+ "description": description,
1979
+ "keywords": keywords,
1980
+ "weight": weight,
1981
+ }
1982
+ }
1983
+
1984
+ # Update vector database
1985
+ await self.relationships_vdb.upsert(relation_data)
1986
+
1987
+ # 4. Save changes
1988
+ await self._edit_relation_done()
1989
+
1990
+ logger.info(
1991
+ f"Relation from '{source_entity}' to '{target_entity}' successfully updated"
1992
+ )
1993
+ return await self.get_relation_info(
1994
+ source_entity, target_entity, include_vector_data=True
1995
+ )
1996
+ except Exception as e:
1997
+ logger.error(
1998
+ f"Error while editing relation from '{source_entity}' to '{target_entity}': {e}"
1999
+ )
2000
+ raise
2001
+
2002
+ def edit_relation(
2003
+ self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
2004
+ ) -> dict[str, Any]:
2005
+ """Synchronously edit relation information.
2006
+
2007
+ Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
2008
+
2009
+ Args:
2010
+ source_entity: Name of the source entity
2011
+ target_entity: Name of the target entity
2012
+ updated_data: Dictionary containing updated attributes, e.g. {"description": "new description", "keywords": "keywords"}
2013
+
2014
+ Returns:
2015
+ Dictionary containing updated relation information
2016
+ """
2017
+ loop = always_get_an_event_loop()
2018
+ return loop.run_until_complete(
2019
+ self.aedit_relation(source_entity, target_entity, updated_data)
2020
+ )
2021
+
2022
+ async def _edit_relation_done(self) -> None:
2023
+ """Callback after relation editing is complete, ensures updates are persisted"""
2024
+ await asyncio.gather(
2025
+ *[
2026
+ cast(StorageNameSpace, storage_inst).index_done_callback()
2027
+ for storage_inst in [ # type: ignore
2028
+ self.relationships_vdb,
2029
+ self.chunk_entity_relation_graph,
2030
+ ]
2031
+ ]
2032
+ )
2033
+
2034
+ async def acreate_entity(
2035
+ self, entity_name: str, entity_data: dict[str, Any]
2036
+ ) -> dict[str, Any]:
2037
+ """Asynchronously create a new entity.
2038
+
2039
+ Creates a new entity in the knowledge graph and adds it to the vector database.
2040
+
2041
+ Args:
2042
+ entity_name: Name of the new entity
2043
+ entity_data: Dictionary containing entity attributes, e.g. {"description": "description", "entity_type": "type"}
2044
+
2045
+ Returns:
2046
+ Dictionary containing created entity information
2047
+ """
2048
+ try:
2049
+ # Check if entity already exists
2050
+ existing_node = await self.chunk_entity_relation_graph.get_node(entity_name)
2051
+ if existing_node:
2052
+ raise ValueError(f"Entity '{entity_name}' already exists")
2053
+
2054
+ # Prepare node data with defaults if missing
2055
+ node_data = {
2056
+ "entity_type": entity_data.get("entity_type", "UNKNOWN"),
2057
+ "description": entity_data.get("description", ""),
2058
+ "source_id": entity_data.get("source_id", "manual"),
2059
+ }
2060
+
2061
+ # Add entity to knowledge graph
2062
+ await self.chunk_entity_relation_graph.upsert_node(entity_name, node_data)
2063
+
2064
+ # Prepare content for entity
2065
+ description = node_data.get("description", "")
2066
+ source_id = node_data.get("source_id", "")
2067
+ entity_type = node_data.get("entity_type", "")
2068
+ content = entity_name + "\n" + description
2069
+
2070
+ # Calculate entity ID
2071
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2072
+
2073
+ # Prepare data for vector database update
2074
+ entity_data_for_vdb = {
2075
+ entity_id: {
2076
+ "content": content,
2077
+ "entity_name": entity_name,
2078
+ "source_id": source_id,
2079
+ "description": description,
2080
+ "entity_type": entity_type,
2081
+ }
2082
+ }
2083
+
2084
+ # Update vector database
2085
+ await self.entities_vdb.upsert(entity_data_for_vdb)
2086
+
2087
+ # Save changes
2088
+ await self._edit_entity_done()
2089
+
2090
+ logger.info(f"Entity '{entity_name}' successfully created")
2091
+ return await self.get_entity_info(entity_name, include_vector_data=True)
2092
+ except Exception as e:
2093
+ logger.error(f"Error while creating entity '{entity_name}': {e}")
2094
+ raise
2095
+
2096
+ def create_entity(
2097
+ self, entity_name: str, entity_data: dict[str, Any]
2098
+ ) -> dict[str, Any]:
2099
+ """Synchronously create a new entity.
2100
+
2101
+ Creates a new entity in the knowledge graph and adds it to the vector database.
2102
+
2103
+ Args:
2104
+ entity_name: Name of the new entity
2105
+ entity_data: Dictionary containing entity attributes, e.g. {"description": "description", "entity_type": "type"}
2106
+
2107
+ Returns:
2108
+ Dictionary containing created entity information
2109
+ """
2110
+ loop = always_get_an_event_loop()
2111
+ return loop.run_until_complete(self.acreate_entity(entity_name, entity_data))
2112
+
2113
+ async def acreate_relation(
2114
+ self, source_entity: str, target_entity: str, relation_data: dict[str, Any]
2115
+ ) -> dict[str, Any]:
2116
+ """Asynchronously create a new relation between entities.
2117
+
2118
+ Creates a new relation (edge) in the knowledge graph and adds it to the vector database.
2119
+
2120
+ Args:
2121
+ source_entity: Name of the source entity
2122
+ target_entity: Name of the target entity
2123
+ relation_data: Dictionary containing relation attributes, e.g. {"description": "description", "keywords": "keywords"}
2124
+
2125
+ Returns:
2126
+ Dictionary containing created relation information
2127
+ """
2128
+ try:
2129
+ # Check if both entities exist
2130
+ source_exists = await self.chunk_entity_relation_graph.has_node(
2131
+ source_entity
2132
+ )
2133
+ target_exists = await self.chunk_entity_relation_graph.has_node(
2134
+ target_entity
2135
+ )
2136
+
2137
+ if not source_exists:
2138
+ raise ValueError(f"Source entity '{source_entity}' does not exist")
2139
+ if not target_exists:
2140
+ raise ValueError(f"Target entity '{target_entity}' does not exist")
2141
+
2142
+ # Check if relation already exists
2143
+ existing_edge = await self.chunk_entity_relation_graph.get_edge(
2144
+ source_entity, target_entity
2145
+ )
2146
+ if existing_edge:
2147
+ raise ValueError(
2148
+ f"Relation from '{source_entity}' to '{target_entity}' already exists"
2149
+ )
2150
+
2151
+ # Prepare edge data with defaults if missing
2152
+ edge_data = {
2153
+ "description": relation_data.get("description", ""),
2154
+ "keywords": relation_data.get("keywords", ""),
2155
+ "source_id": relation_data.get("source_id", "manual"),
2156
+ "weight": float(relation_data.get("weight", 1.0)),
2157
+ }
2158
+
2159
+ # Add relation to knowledge graph
2160
+ await self.chunk_entity_relation_graph.upsert_edge(
2161
+ source_entity, target_entity, edge_data
2162
+ )
2163
+
2164
+ # Prepare content for embedding
2165
+ description = edge_data.get("description", "")
2166
+ keywords = edge_data.get("keywords", "")
2167
+ source_id = edge_data.get("source_id", "")
2168
+ weight = edge_data.get("weight", 1.0)
2169
+
2170
+ # Create content for embedding
2171
+ content = f"{keywords}\t{source_entity}\n{target_entity}\n{description}"
2172
+
2173
+ # Calculate relation ID
2174
+ relation_id = compute_mdhash_id(
2175
+ source_entity + target_entity, prefix="rel-"
2176
+ )
2177
+
2178
+ # Prepare data for vector database update
2179
+ relation_data_for_vdb = {
2180
+ relation_id: {
2181
+ "content": content,
2182
+ "src_id": source_entity,
2183
+ "tgt_id": target_entity,
2184
+ "source_id": source_id,
2185
+ "description": description,
2186
+ "keywords": keywords,
2187
+ "weight": weight,
2188
+ }
2189
+ }
2190
+
2191
+ # Update vector database
2192
+ await self.relationships_vdb.upsert(relation_data_for_vdb)
2193
+
2194
+ # Save changes
2195
+ await self._edit_relation_done()
2196
+
2197
+ logger.info(
2198
+ f"Relation from '{source_entity}' to '{target_entity}' successfully created"
2199
+ )
2200
+ return await self.get_relation_info(
2201
+ source_entity, target_entity, include_vector_data=True
2202
+ )
2203
+ except Exception as e:
2204
+ logger.error(
2205
+ f"Error while creating relation from '{source_entity}' to '{target_entity}': {e}"
2206
+ )
2207
+ raise
2208
+
2209
+ def create_relation(
2210
+ self, source_entity: str, target_entity: str, relation_data: dict[str, Any]
2211
+ ) -> dict[str, Any]:
2212
+ """Synchronously create a new relation between entities.
2213
+
2214
+ Creates a new relation (edge) in the knowledge graph and adds it to the vector database.
2215
+
2216
+ Args:
2217
+ source_entity: Name of the source entity
2218
+ target_entity: Name of the target entity
2219
+ relation_data: Dictionary containing relation attributes, e.g. {"description": "description", "keywords": "keywords"}
2220
+
2221
+ Returns:
2222
+ Dictionary containing created relation information
2223
+ """
2224
+ loop = always_get_an_event_loop()
2225
+ return loop.run_until_complete(
2226
+ self.acreate_relation(source_entity, target_entity, relation_data)
2227
+ )