Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

jin commited on Nov 12, 2024

Commit

ce83057

2 Parent(s): 1298c66 776de33

add Oracle support

Browse files

Files changed (7) hide show

.gitignore +1 -0
README.md +20 -2
lightrag/__init__.py +1 -1
lightrag/base.py +1 -1
lightrag/kg/oracle_impl.py +16 -5
lightrag/lightrag.py +31 -0
lightrag/storage.py +56 -1

.gitignore CHANGED Viewed

@@ -11,3 +11,4 @@ neo4jWorkDir/
 ignore_this.txt
 .venv/
 *.ignore.*

 ignore_this.txt
 .venv/
 *.ignore.*
+.ruff_cache/

README.md CHANGED Viewed

@@ -8,7 +8,7 @@
         <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
         <a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
         <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
-        <a href='https://discord.gg/rdE8YVPm'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
     </p>
      <p>
           <img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
@@ -22,7 +22,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
 </div>
 ## 🎉 News
-- [x] [2024.11.11]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
 - [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
 - [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
 - [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
@@ -319,6 +320,23 @@ with open("./newText.txt") as f:
     rag.insert(f.read())
 ```
 ### Multi-file Type Support
 The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.

         <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
         <a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
         <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
+        <a href='https://discord.gg/yF2MmDJyGJ'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
     </p>
      <p>
           <img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
 </div>
 ## 🎉 News
+- [x] [2024.11.12]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
+- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
 - [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
 - [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
 - [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
     rag.insert(f.read())
 ```
+### Delete Entity
+```python
+#  Delete Entity: Deleting entities by their names
+rag = LightRAG(
+     working_dir=WORKING_DIR,
+     llm_model_func=llm_model_func,
+     embedding_func=EmbeddingFunc(
+          embedding_dim=embedding_dimension,
+          max_token_size=8192,
+          func=embedding_func,
+     ),
+)
+rag.delete_by_entity("Project Gutenberg")
+```
 ### Multi-file Type Support
 The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.

lightrag/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
-__version__ = "0.0.9"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"

 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
+__version__ = "1.0.0"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"

lightrag/base.py CHANGED Viewed

@@ -118,7 +118,7 @@ class BaseGraphStorage(StorageNameSpace):
     ):
         raise NotImplementedError
-    async def clustering(self, algorithm: str):
         raise NotImplementedError
     async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:

     ):
         raise NotImplementedError
+    async def delete_node(self, node_id: str):
         raise NotImplementedError
     async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:

lightrag/kg/oracle_impl.py CHANGED Viewed

@@ -592,7 +592,9 @@ TABLES = {
                     workspace varchar(1024),
                     doc_name varchar(1024),
                     content CLOB,
-                    meta JSON
                     )"""},
     "LIGHTRAG_DOC_CHUNKS":
@@ -603,7 +605,9 @@ TABLES = {
                     chunk_order_index NUMBER,
                     tokens NUMBER,
                     content CLOB,
-                    content_vector VECTOR
                     )"""},
     "LIGHTRAG_GRAPH_NODES":
@@ -615,7 +619,9 @@ TABLES = {
                     description CLOB,
                     source_chunk_id varchar(256),
                     content CLOB,
-                    content_vector VECTOR
                     )"""},
     "LIGHTRAG_GRAPH_EDGES":
           {"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
@@ -628,13 +634,18 @@ TABLES = {
                     description CLOB,
                     source_chunk_id varchar(256),
                     content CLOB,
-                    content_vector VECTOR
                     )"""},
     "LIGHTRAG_LLM_CACHE":
           {"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
                     id varchar(256) PRIMARY KEY,
                     return clob,
-                    model varchar(1024)
                     )"""},
     "LIGHTRAG_GRAPH":

                     workspace varchar(1024),
                     doc_name varchar(1024),
                     content CLOB,
+                    meta JSON,
+                    createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updatetime TIMESTAMP DEFAULT NULL
                     )"""},
     "LIGHTRAG_DOC_CHUNKS":
                     chunk_order_index NUMBER,
                     tokens NUMBER,
                     content CLOB,
+                    content_vector VECTOR,
+                    createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updatetime TIMESTAMP DEFAULT NULL
                     )"""},
     "LIGHTRAG_GRAPH_NODES":
                     description CLOB,
                     source_chunk_id varchar(256),
                     content CLOB,
+                    content_vector VECTOR,
+                    createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updatetime TIMESTAMP DEFAULT NULL
                     )"""},
     "LIGHTRAG_GRAPH_EDGES":
           {"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
                     description CLOB,
                     source_chunk_id varchar(256),
                     content CLOB,
+                    content_vector VECTOR,
+                    createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updatetime TIMESTAMP DEFAULT NULL
                     )"""},
     "LIGHTRAG_LLM_CACHE":
           {"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
                     id varchar(256) PRIMARY KEY,
+                    send clob,
                     return clob,
+                    model varchar(1024),
+                    createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updatetime TIMESTAMP DEFAULT NULL
                     )"""},
     "LIGHTRAG_GRAPH":

lightrag/lightrag.py CHANGED Viewed

@@ -351,3 +351,34 @@ class LightRAG:
                 continue
             tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
         await asyncio.gather(*tasks)

                 continue
             tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
         await asyncio.gather(*tasks)
+    def delete_by_entity(self, entity_name: str):
+        loop = always_get_an_event_loop()
+        return loop.run_until_complete(self.adelete_by_entity(entity_name))
+    async def adelete_by_entity(self, entity_name: str):
+        entity_name = f'"{entity_name.upper()}"'
+        try:
+            await self.entities_vdb.delete_entity(entity_name)
+            await self.relationships_vdb.delete_relation(entity_name)
+            await self.chunk_entity_relation_graph.delete_node(entity_name)
+            logger.info(
+                f"Entity '{entity_name}' and its relationships have been deleted."
+            )
+            await self._delete_by_entity_done()
+        except Exception as e:
+            logger.error(f"Error while deleting entity '{entity_name}': {e}")
+    async def _delete_by_entity_done(self):
+        tasks = []
+        for storage_inst in [
+            self.entities_vdb,
+            self.relationships_vdb,
+            self.chunk_entity_relation_graph,
+        ]:
+            if storage_inst is None:
+                continue
+            tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+        await asyncio.gather(*tasks)

lightrag/storage.py CHANGED Viewed

@@ -7,7 +7,13 @@ import networkx as nx
 import numpy as np
 from nano_vectordb import NanoVectorDB
-from .utils import load_json, logger, write_json
 from .base import (
     BaseGraphStorage,
     BaseKVStorage,
@@ -111,6 +117,43 @@ class NanoVectorDBStorage(BaseVectorStorage):
         ]
         return results
     async def index_done_callback(self):
         self._client.save()
@@ -228,6 +271,18 @@ class NetworkXStorage(BaseGraphStorage):
     ):
         self._graph.add_edge(source_node_id, target_node_id, **edge_data)
     async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
         if algorithm not in self._node_embed_algorithms:
             raise ValueError(f"Node embedding algorithm {algorithm} not supported")

 import numpy as np
 from nano_vectordb import NanoVectorDB
+from .utils import (
+    logger,
+    load_json,
+    write_json,
+    compute_mdhash_id,
+)
 from .base import (
     BaseGraphStorage,
     BaseKVStorage,
         ]
         return results
+    @property
+    def client_storage(self):
+        return getattr(self._client, "_NanoVectorDB__storage")
+    async def delete_entity(self, entity_name: str):
+        try:
+            entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
+            if self._client.get(entity_id):
+                self._client.delete(entity_id)
+                logger.info(f"Entity {entity_name} have been deleted.")
+            else:
+                logger.info(f"No entity found with name {entity_name}.")
+        except Exception as e:
+            logger.error(f"Error while deleting entity {entity_name}: {e}")
+    async def delete_relation(self, entity_name: str):
+        try:
+            relations = [
+                dp
+                for dp in self.client_storage["data"]
+                if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
+            ]
+            ids_to_delete = [relation["__id__"] for relation in relations]
+            if ids_to_delete:
+                self._client.delete(ids_to_delete)
+                logger.info(
+                    f"All relations related to entity {entity_name} have been deleted."
+                )
+            else:
+                logger.info(f"No relations found for entity {entity_name}.")
+        except Exception as e:
+            logger.error(
+                f"Error while deleting relations for entity {entity_name}: {e}"
+            )
     async def index_done_callback(self):
         self._client.save()
     ):
         self._graph.add_edge(source_node_id, target_node_id, **edge_data)
+    async def delete_node(self, node_id: str):
+        """
+        Delete a node from the graph based on the specified node_id.
+        :param node_id: The node_id to delete
+        """
+        if self._graph.has_node(node_id):
+            self._graph.remove_node(node_id)
+            logger.info(f"Node {node_id} deleted from the graph.")
+        else:
+            logger.warning(f"Node {node_id} not found in the graph for deletion.")
     async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
         if algorithm not in self._node_embed_algorithms:
             raise ValueError(f"Node embedding algorithm {algorithm} not supported")