add Oracle support
Browse files- .gitignore +1 -0
- README.md +20 -2
- lightrag/__init__.py +1 -1
- lightrag/base.py +1 -1
- lightrag/kg/oracle_impl.py +16 -5
- lightrag/lightrag.py +31 -0
- lightrag/storage.py +56 -1
.gitignore
CHANGED
@@ -11,3 +11,4 @@ neo4jWorkDir/
|
|
11 |
ignore_this.txt
|
12 |
.venv/
|
13 |
*.ignore.*
|
|
|
|
11 |
ignore_this.txt
|
12 |
.venv/
|
13 |
*.ignore.*
|
14 |
+
.ruff_cache/
|
README.md
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
<a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
9 |
<a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
10 |
<a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
|
11 |
-
<a href='https://discord.gg/
|
12 |
</p>
|
13 |
<p>
|
14 |
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
@@ -22,7 +22,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
|
|
22 |
</div>
|
23 |
|
24 |
## 🎉 News
|
25 |
-
- [x] [2024.11.
|
|
|
26 |
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
|
27 |
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
|
28 |
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
|
@@ -319,6 +320,23 @@ with open("./newText.txt") as f:
|
|
319 |
rag.insert(f.read())
|
320 |
```
|
321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
### Multi-file Type Support
|
323 |
|
324 |
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
|
|
|
8 |
<a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
9 |
<a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
10 |
<a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
|
11 |
+
<a href='https://discord.gg/yF2MmDJyGJ'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
|
12 |
</p>
|
13 |
<p>
|
14 |
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
|
|
22 |
</div>
|
23 |
|
24 |
## 🎉 News
|
25 |
+
- [x] [2024.11.12]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
|
26 |
+
- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
|
27 |
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
|
28 |
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
|
29 |
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
|
|
|
320 |
rag.insert(f.read())
|
321 |
```
|
322 |
|
323 |
+
### Delete Entity
|
324 |
+
|
325 |
+
```python
|
326 |
+
# Delete Entity: Deleting entities by their names
|
327 |
+
rag = LightRAG(
|
328 |
+
working_dir=WORKING_DIR,
|
329 |
+
llm_model_func=llm_model_func,
|
330 |
+
embedding_func=EmbeddingFunc(
|
331 |
+
embedding_dim=embedding_dimension,
|
332 |
+
max_token_size=8192,
|
333 |
+
func=embedding_func,
|
334 |
+
),
|
335 |
+
)
|
336 |
+
|
337 |
+
rag.delete_by_entity("Project Gutenberg")
|
338 |
+
```
|
339 |
+
|
340 |
### Multi-file Type Support
|
341 |
|
342 |
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
|
lightrag/__init__.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
-
__version__ = "0.0
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
+
__version__ = "1.0.0"
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
lightrag/base.py
CHANGED
@@ -118,7 +118,7 @@ class BaseGraphStorage(StorageNameSpace):
|
|
118 |
):
|
119 |
raise NotImplementedError
|
120 |
|
121 |
-
async def
|
122 |
raise NotImplementedError
|
123 |
|
124 |
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
|
|
118 |
):
|
119 |
raise NotImplementedError
|
120 |
|
121 |
+
async def delete_node(self, node_id: str):
|
122 |
raise NotImplementedError
|
123 |
|
124 |
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
lightrag/kg/oracle_impl.py
CHANGED
@@ -592,7 +592,9 @@ TABLES = {
|
|
592 |
workspace varchar(1024),
|
593 |
doc_name varchar(1024),
|
594 |
content CLOB,
|
595 |
-
meta JSON
|
|
|
|
|
596 |
)"""},
|
597 |
|
598 |
"LIGHTRAG_DOC_CHUNKS":
|
@@ -603,7 +605,9 @@ TABLES = {
|
|
603 |
chunk_order_index NUMBER,
|
604 |
tokens NUMBER,
|
605 |
content CLOB,
|
606 |
-
content_vector VECTOR
|
|
|
|
|
607 |
)"""},
|
608 |
|
609 |
"LIGHTRAG_GRAPH_NODES":
|
@@ -615,7 +619,9 @@ TABLES = {
|
|
615 |
description CLOB,
|
616 |
source_chunk_id varchar(256),
|
617 |
content CLOB,
|
618 |
-
content_vector VECTOR
|
|
|
|
|
619 |
)"""},
|
620 |
"LIGHTRAG_GRAPH_EDGES":
|
621 |
{"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
|
@@ -628,13 +634,18 @@ TABLES = {
|
|
628 |
description CLOB,
|
629 |
source_chunk_id varchar(256),
|
630 |
content CLOB,
|
631 |
-
content_vector VECTOR
|
|
|
|
|
632 |
)"""},
|
633 |
"LIGHTRAG_LLM_CACHE":
|
634 |
{"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
|
635 |
id varchar(256) PRIMARY KEY,
|
|
|
636 |
return clob,
|
637 |
-
model varchar(1024)
|
|
|
|
|
638 |
)"""},
|
639 |
|
640 |
"LIGHTRAG_GRAPH":
|
|
|
592 |
workspace varchar(1024),
|
593 |
doc_name varchar(1024),
|
594 |
content CLOB,
|
595 |
+
meta JSON,
|
596 |
+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
597 |
+
updatetime TIMESTAMP DEFAULT NULL
|
598 |
)"""},
|
599 |
|
600 |
"LIGHTRAG_DOC_CHUNKS":
|
|
|
605 |
chunk_order_index NUMBER,
|
606 |
tokens NUMBER,
|
607 |
content CLOB,
|
608 |
+
content_vector VECTOR,
|
609 |
+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
610 |
+
updatetime TIMESTAMP DEFAULT NULL
|
611 |
)"""},
|
612 |
|
613 |
"LIGHTRAG_GRAPH_NODES":
|
|
|
619 |
description CLOB,
|
620 |
source_chunk_id varchar(256),
|
621 |
content CLOB,
|
622 |
+
content_vector VECTOR,
|
623 |
+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
624 |
+
updatetime TIMESTAMP DEFAULT NULL
|
625 |
)"""},
|
626 |
"LIGHTRAG_GRAPH_EDGES":
|
627 |
{"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
|
|
|
634 |
description CLOB,
|
635 |
source_chunk_id varchar(256),
|
636 |
content CLOB,
|
637 |
+
content_vector VECTOR,
|
638 |
+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
639 |
+
updatetime TIMESTAMP DEFAULT NULL
|
640 |
)"""},
|
641 |
"LIGHTRAG_LLM_CACHE":
|
642 |
{"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
|
643 |
id varchar(256) PRIMARY KEY,
|
644 |
+
send clob,
|
645 |
return clob,
|
646 |
+
model varchar(1024),
|
647 |
+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
648 |
+
updatetime TIMESTAMP DEFAULT NULL
|
649 |
)"""},
|
650 |
|
651 |
"LIGHTRAG_GRAPH":
|
lightrag/lightrag.py
CHANGED
@@ -351,3 +351,34 @@ class LightRAG:
|
|
351 |
continue
|
352 |
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
353 |
await asyncio.gather(*tasks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
continue
|
352 |
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
353 |
await asyncio.gather(*tasks)
|
354 |
+
|
355 |
+
def delete_by_entity(self, entity_name: str):
|
356 |
+
loop = always_get_an_event_loop()
|
357 |
+
return loop.run_until_complete(self.adelete_by_entity(entity_name))
|
358 |
+
|
359 |
+
async def adelete_by_entity(self, entity_name: str):
|
360 |
+
entity_name = f'"{entity_name.upper()}"'
|
361 |
+
|
362 |
+
try:
|
363 |
+
await self.entities_vdb.delete_entity(entity_name)
|
364 |
+
await self.relationships_vdb.delete_relation(entity_name)
|
365 |
+
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
366 |
+
|
367 |
+
logger.info(
|
368 |
+
f"Entity '{entity_name}' and its relationships have been deleted."
|
369 |
+
)
|
370 |
+
await self._delete_by_entity_done()
|
371 |
+
except Exception as e:
|
372 |
+
logger.error(f"Error while deleting entity '{entity_name}': {e}")
|
373 |
+
|
374 |
+
async def _delete_by_entity_done(self):
|
375 |
+
tasks = []
|
376 |
+
for storage_inst in [
|
377 |
+
self.entities_vdb,
|
378 |
+
self.relationships_vdb,
|
379 |
+
self.chunk_entity_relation_graph,
|
380 |
+
]:
|
381 |
+
if storage_inst is None:
|
382 |
+
continue
|
383 |
+
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
384 |
+
await asyncio.gather(*tasks)
|
lightrag/storage.py
CHANGED
@@ -7,7 +7,13 @@ import networkx as nx
|
|
7 |
import numpy as np
|
8 |
from nano_vectordb import NanoVectorDB
|
9 |
|
10 |
-
from .utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from .base import (
|
12 |
BaseGraphStorage,
|
13 |
BaseKVStorage,
|
@@ -111,6 +117,43 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
111 |
]
|
112 |
return results
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
async def index_done_callback(self):
|
115 |
self._client.save()
|
116 |
|
@@ -228,6 +271,18 @@ class NetworkXStorage(BaseGraphStorage):
|
|
228 |
):
|
229 |
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
232 |
if algorithm not in self._node_embed_algorithms:
|
233 |
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
|
|
7 |
import numpy as np
|
8 |
from nano_vectordb import NanoVectorDB
|
9 |
|
10 |
+
from .utils import (
|
11 |
+
logger,
|
12 |
+
load_json,
|
13 |
+
write_json,
|
14 |
+
compute_mdhash_id,
|
15 |
+
)
|
16 |
+
|
17 |
from .base import (
|
18 |
BaseGraphStorage,
|
19 |
BaseKVStorage,
|
|
|
117 |
]
|
118 |
return results
|
119 |
|
120 |
+
@property
|
121 |
+
def client_storage(self):
|
122 |
+
return getattr(self._client, "_NanoVectorDB__storage")
|
123 |
+
|
124 |
+
async def delete_entity(self, entity_name: str):
|
125 |
+
try:
|
126 |
+
entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
|
127 |
+
|
128 |
+
if self._client.get(entity_id):
|
129 |
+
self._client.delete(entity_id)
|
130 |
+
logger.info(f"Entity {entity_name} have been deleted.")
|
131 |
+
else:
|
132 |
+
logger.info(f"No entity found with name {entity_name}.")
|
133 |
+
except Exception as e:
|
134 |
+
logger.error(f"Error while deleting entity {entity_name}: {e}")
|
135 |
+
|
136 |
+
async def delete_relation(self, entity_name: str):
|
137 |
+
try:
|
138 |
+
relations = [
|
139 |
+
dp
|
140 |
+
for dp in self.client_storage["data"]
|
141 |
+
if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
|
142 |
+
]
|
143 |
+
ids_to_delete = [relation["__id__"] for relation in relations]
|
144 |
+
|
145 |
+
if ids_to_delete:
|
146 |
+
self._client.delete(ids_to_delete)
|
147 |
+
logger.info(
|
148 |
+
f"All relations related to entity {entity_name} have been deleted."
|
149 |
+
)
|
150 |
+
else:
|
151 |
+
logger.info(f"No relations found for entity {entity_name}.")
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(
|
154 |
+
f"Error while deleting relations for entity {entity_name}: {e}"
|
155 |
+
)
|
156 |
+
|
157 |
async def index_done_callback(self):
|
158 |
self._client.save()
|
159 |
|
|
|
271 |
):
|
272 |
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
|
273 |
|
274 |
+
async def delete_node(self, node_id: str):
|
275 |
+
"""
|
276 |
+
Delete a node from the graph based on the specified node_id.
|
277 |
+
|
278 |
+
:param node_id: The node_id to delete
|
279 |
+
"""
|
280 |
+
if self._graph.has_node(node_id):
|
281 |
+
self._graph.remove_node(node_id)
|
282 |
+
logger.info(f"Node {node_id} deleted from the graph.")
|
283 |
+
else:
|
284 |
+
logger.warning(f"Node {node_id} not found in the graph for deletion.")
|
285 |
+
|
286 |
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
287 |
if algorithm not in self._node_embed_algorithms:
|
288 |
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|