jin commited on
Commit
ce83057
·
2 Parent(s): 1298c66 776de33

add Oracle support

Browse files
.gitignore CHANGED
@@ -11,3 +11,4 @@ neo4jWorkDir/
11
  ignore_this.txt
12
  .venv/
13
  *.ignore.*
 
 
11
  ignore_this.txt
12
  .venv/
13
  *.ignore.*
14
+ .ruff_cache/
README.md CHANGED
@@ -8,7 +8,7 @@
8
  <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
9
  <a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
10
  <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
11
- <a href='https://discord.gg/rdE8YVPm'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
12
  </p>
13
  <p>
14
  <img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
@@ -22,7 +22,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
22
  </div>
23
 
24
  ## 🎉 News
25
- - [x] [2024.11.11]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
 
26
  - [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
27
  - [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
28
  - [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
@@ -319,6 +320,23 @@ with open("./newText.txt") as f:
319
  rag.insert(f.read())
320
  ```
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  ### Multi-file Type Support
323
 
324
  The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
 
8
  <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
9
  <a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
10
  <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
11
+ <a href='https://discord.gg/yF2MmDJyGJ'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
12
  </p>
13
  <p>
14
  <img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
 
22
  </div>
23
 
24
  ## 🎉 News
25
+ - [x] [2024.11.12]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
26
+ - [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
27
  - [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
28
  - [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
29
  - [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
 
320
  rag.insert(f.read())
321
  ```
322
 
323
+ ### Delete Entity
324
+
325
+ ```python
326
+ # Delete Entity: Deleting entities by their names
327
+ rag = LightRAG(
328
+ working_dir=WORKING_DIR,
329
+ llm_model_func=llm_model_func,
330
+ embedding_func=EmbeddingFunc(
331
+ embedding_dim=embedding_dimension,
332
+ max_token_size=8192,
333
+ func=embedding_func,
334
+ ),
335
+ )
336
+
337
+ rag.delete_by_entity("Project Gutenberg")
338
+ ```
339
+
340
  ### Multi-file Type Support
341
 
342
  The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
lightrag/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
- __version__ = "0.0.9"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
 
1
  from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
2
 
3
+ __version__ = "1.0.0"
4
  __author__ = "Zirui Guo"
5
  __url__ = "https://github.com/HKUDS/LightRAG"
lightrag/base.py CHANGED
@@ -118,7 +118,7 @@ class BaseGraphStorage(StorageNameSpace):
118
  ):
119
  raise NotImplementedError
120
 
121
- async def clustering(self, algorithm: str):
122
  raise NotImplementedError
123
 
124
  async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
 
118
  ):
119
  raise NotImplementedError
120
 
121
+ async def delete_node(self, node_id: str):
122
  raise NotImplementedError
123
 
124
  async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
lightrag/kg/oracle_impl.py CHANGED
@@ -592,7 +592,9 @@ TABLES = {
592
  workspace varchar(1024),
593
  doc_name varchar(1024),
594
  content CLOB,
595
- meta JSON
 
 
596
  )"""},
597
 
598
  "LIGHTRAG_DOC_CHUNKS":
@@ -603,7 +605,9 @@ TABLES = {
603
  chunk_order_index NUMBER,
604
  tokens NUMBER,
605
  content CLOB,
606
- content_vector VECTOR
 
 
607
  )"""},
608
 
609
  "LIGHTRAG_GRAPH_NODES":
@@ -615,7 +619,9 @@ TABLES = {
615
  description CLOB,
616
  source_chunk_id varchar(256),
617
  content CLOB,
618
- content_vector VECTOR
 
 
619
  )"""},
620
  "LIGHTRAG_GRAPH_EDGES":
621
  {"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
@@ -628,13 +634,18 @@ TABLES = {
628
  description CLOB,
629
  source_chunk_id varchar(256),
630
  content CLOB,
631
- content_vector VECTOR
 
 
632
  )"""},
633
  "LIGHTRAG_LLM_CACHE":
634
  {"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
635
  id varchar(256) PRIMARY KEY,
 
636
  return clob,
637
- model varchar(1024)
 
 
638
  )"""},
639
 
640
  "LIGHTRAG_GRAPH":
 
592
  workspace varchar(1024),
593
  doc_name varchar(1024),
594
  content CLOB,
595
+ meta JSON,
596
+ createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
597
+ updatetime TIMESTAMP DEFAULT NULL
598
  )"""},
599
 
600
  "LIGHTRAG_DOC_CHUNKS":
 
605
  chunk_order_index NUMBER,
606
  tokens NUMBER,
607
  content CLOB,
608
+ content_vector VECTOR,
609
+ createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
610
+ updatetime TIMESTAMP DEFAULT NULL
611
  )"""},
612
 
613
  "LIGHTRAG_GRAPH_NODES":
 
619
  description CLOB,
620
  source_chunk_id varchar(256),
621
  content CLOB,
622
+ content_vector VECTOR,
623
+ createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
624
+ updatetime TIMESTAMP DEFAULT NULL
625
  )"""},
626
  "LIGHTRAG_GRAPH_EDGES":
627
  {"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
 
634
  description CLOB,
635
  source_chunk_id varchar(256),
636
  content CLOB,
637
+ content_vector VECTOR,
638
+ createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
639
+ updatetime TIMESTAMP DEFAULT NULL
640
  )"""},
641
  "LIGHTRAG_LLM_CACHE":
642
  {"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
643
  id varchar(256) PRIMARY KEY,
644
+ send clob,
645
  return clob,
646
+ model varchar(1024),
647
+ createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
648
+ updatetime TIMESTAMP DEFAULT NULL
649
  )"""},
650
 
651
  "LIGHTRAG_GRAPH":
lightrag/lightrag.py CHANGED
@@ -351,3 +351,34 @@ class LightRAG:
351
  continue
352
  tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
353
  await asyncio.gather(*tasks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  continue
352
  tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
353
  await asyncio.gather(*tasks)
354
+
355
+ def delete_by_entity(self, entity_name: str):
356
+ loop = always_get_an_event_loop()
357
+ return loop.run_until_complete(self.adelete_by_entity(entity_name))
358
+
359
+ async def adelete_by_entity(self, entity_name: str):
360
+ entity_name = f'"{entity_name.upper()}"'
361
+
362
+ try:
363
+ await self.entities_vdb.delete_entity(entity_name)
364
+ await self.relationships_vdb.delete_relation(entity_name)
365
+ await self.chunk_entity_relation_graph.delete_node(entity_name)
366
+
367
+ logger.info(
368
+ f"Entity '{entity_name}' and its relationships have been deleted."
369
+ )
370
+ await self._delete_by_entity_done()
371
+ except Exception as e:
372
+ logger.error(f"Error while deleting entity '{entity_name}': {e}")
373
+
374
+ async def _delete_by_entity_done(self):
375
+ tasks = []
376
+ for storage_inst in [
377
+ self.entities_vdb,
378
+ self.relationships_vdb,
379
+ self.chunk_entity_relation_graph,
380
+ ]:
381
+ if storage_inst is None:
382
+ continue
383
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
384
+ await asyncio.gather(*tasks)
lightrag/storage.py CHANGED
@@ -7,7 +7,13 @@ import networkx as nx
7
  import numpy as np
8
  from nano_vectordb import NanoVectorDB
9
 
10
- from .utils import load_json, logger, write_json
 
 
 
 
 
 
11
  from .base import (
12
  BaseGraphStorage,
13
  BaseKVStorage,
@@ -111,6 +117,43 @@ class NanoVectorDBStorage(BaseVectorStorage):
111
  ]
112
  return results
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  async def index_done_callback(self):
115
  self._client.save()
116
 
@@ -228,6 +271,18 @@ class NetworkXStorage(BaseGraphStorage):
228
  ):
229
  self._graph.add_edge(source_node_id, target_node_id, **edge_data)
230
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
232
  if algorithm not in self._node_embed_algorithms:
233
  raise ValueError(f"Node embedding algorithm {algorithm} not supported")
 
7
  import numpy as np
8
  from nano_vectordb import NanoVectorDB
9
 
10
+ from .utils import (
11
+ logger,
12
+ load_json,
13
+ write_json,
14
+ compute_mdhash_id,
15
+ )
16
+
17
  from .base import (
18
  BaseGraphStorage,
19
  BaseKVStorage,
 
117
  ]
118
  return results
119
 
120
+ @property
121
+ def client_storage(self):
122
+ return getattr(self._client, "_NanoVectorDB__storage")
123
+
124
+ async def delete_entity(self, entity_name: str):
125
+ try:
126
+ entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
127
+
128
+ if self._client.get(entity_id):
129
+ self._client.delete(entity_id)
130
+ logger.info(f"Entity {entity_name} have been deleted.")
131
+ else:
132
+ logger.info(f"No entity found with name {entity_name}.")
133
+ except Exception as e:
134
+ logger.error(f"Error while deleting entity {entity_name}: {e}")
135
+
136
+ async def delete_relation(self, entity_name: str):
137
+ try:
138
+ relations = [
139
+ dp
140
+ for dp in self.client_storage["data"]
141
+ if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
142
+ ]
143
+ ids_to_delete = [relation["__id__"] for relation in relations]
144
+
145
+ if ids_to_delete:
146
+ self._client.delete(ids_to_delete)
147
+ logger.info(
148
+ f"All relations related to entity {entity_name} have been deleted."
149
+ )
150
+ else:
151
+ logger.info(f"No relations found for entity {entity_name}.")
152
+ except Exception as e:
153
+ logger.error(
154
+ f"Error while deleting relations for entity {entity_name}: {e}"
155
+ )
156
+
157
  async def index_done_callback(self):
158
  self._client.save()
159
 
 
271
  ):
272
  self._graph.add_edge(source_node_id, target_node_id, **edge_data)
273
 
274
+ async def delete_node(self, node_id: str):
275
+ """
276
+ Delete a node from the graph based on the specified node_id.
277
+
278
+ :param node_id: The node_id to delete
279
+ """
280
+ if self._graph.has_node(node_id):
281
+ self._graph.remove_node(node_id)
282
+ logger.info(f"Node {node_id} deleted from the graph.")
283
+ else:
284
+ logger.warning(f"Node {node_id} not found in the graph for deletion.")
285
+
286
  async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
287
  if algorithm not in self._node_embed_algorithms:
288
  raise ValueError(f"Node embedding algorithm {algorithm} not supported")