Ken Wiltshire
commited on
Commit
·
45c89eb
1
Parent(s):
836c74a
index to neo4j working and tested. check queires next.
Browse files- .gitignore +4 -1
- get_all_edges_nx.py +34 -0
- graph_chunk_entity_relation.gefx +0 -0
- lightrag/kg/neo4j.py +82 -70
- lightrag/lightrag.py +1 -1
- lightrag/storage.py +1 -1
- neo4jWorkDir/kv_store_llm_response_cache.json +0 -0
- neo4jWorkDir/lightrag.log +0 -0
- neo4jWorkDir/vdb_chunks.json +0 -0
- neo4jWorkDir/vdb_entities.json +0 -0
- neo4jWorkDir/vdb_relationships.json +0 -0
- test.py +21 -2
- testkg.py +1 -2
.gitignore
CHANGED
@@ -5,4 +5,7 @@ book.txt
|
|
5 |
lightrag-dev/
|
6 |
.idea/
|
7 |
dist/
|
8 |
-
env/
|
|
|
|
|
|
|
|
5 |
lightrag-dev/
|
6 |
.idea/
|
7 |
dist/
|
8 |
+
env/
|
9 |
+
local_neo4jWorkDir/
|
10 |
+
local_neo4jWorkDir.bak/
|
11 |
+
neo4jWorkDir/
|
get_all_edges_nx.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import networkx as nx
|
2 |
+
|
3 |
+
G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
4 |
+
|
5 |
+
def get_all_edges_and_nodes(G):
|
6 |
+
# Get all edges and their properties
|
7 |
+
edges_with_properties = []
|
8 |
+
for u, v, data in G.edges(data=True):
|
9 |
+
edges_with_properties.append({
|
10 |
+
'start': u,
|
11 |
+
'end': v,
|
12 |
+
'label': data.get('label', ''), # Assuming 'label' is used for edge type
|
13 |
+
'properties': data,
|
14 |
+
'start_node_properties': G.nodes[u],
|
15 |
+
'end_node_properties': G.nodes[v]
|
16 |
+
})
|
17 |
+
|
18 |
+
return edges_with_properties
|
19 |
+
|
20 |
+
# Example usage
|
21 |
+
if __name__ == "__main__":
|
22 |
+
# Assume G is your NetworkX graph loaded from Neo4j
|
23 |
+
|
24 |
+
all_edges = get_all_edges_and_nodes(G)
|
25 |
+
|
26 |
+
# Print all edges and node properties
|
27 |
+
for edge in all_edges:
|
28 |
+
print(f"Edge Label: {edge['label']}")
|
29 |
+
print(f"Edge Properties: {edge['properties']}")
|
30 |
+
print(f"Start Node: {edge['start']}")
|
31 |
+
print(f"Start Node Properties: {edge['start_node_properties']}")
|
32 |
+
print(f"End Node: {edge['end']}")
|
33 |
+
print(f"End Node Properties: {edge['end_node_properties']}")
|
34 |
+
print("---")
|
graph_chunk_entity_relation.gefx
ADDED
The diff for this file is too large to render.
See raw diff
|
|
lightrag/kg/neo4j.py
CHANGED
@@ -4,24 +4,42 @@ import os
|
|
4 |
from dataclasses import dataclass
|
5 |
from typing import Any, Union, cast
|
6 |
import numpy as np
|
7 |
-
from nano_vectordb import NanoVectorDB
|
8 |
import inspect
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
-
# import package.common.utils as utils
|
14 |
|
15 |
|
16 |
-
from lightrag.utils import load_json, logger, write_json
|
17 |
-
from ..base import (
|
18 |
-
BaseGraphStorage
|
19 |
-
)
|
20 |
-
from neo4j import GraphDatabase
|
21 |
# Replace with your actual URI, username, and password
|
|
|
22 |
URI = "neo4j://localhost:7687"
|
23 |
USERNAME = "neo4j"
|
24 |
-
PASSWORD = "
|
|
|
|
|
|
|
|
|
|
|
25 |
# Create a driver object
|
26 |
|
27 |
|
@@ -33,7 +51,7 @@ class GraphStorage(BaseGraphStorage):
|
|
33 |
|
34 |
def __post_init__(self):
|
35 |
# self._graph = preloaded_graph or nx.Graph()
|
36 |
-
self._driver = GraphDatabase.driver(
|
37 |
self._node_embed_algorithms = {
|
38 |
"node2vec": self._node2vec_embed,
|
39 |
}
|
@@ -129,13 +147,11 @@ class GraphStorage(BaseGraphStorage):
|
|
129 |
|
130 |
# degree = session.read_transaction(get_edge_degree, 1, 2)
|
131 |
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
132 |
-
|
133 |
entity_name_label_target = tgt_id.strip('\"')
|
134 |
with self._driver.session() as session:
|
135 |
-
query = """MATCH (n1:`{
|
136 |
-
RETURN count(r) AS degree"""
|
137 |
-
entity_name_label_target=entity_name_label_target)
|
138 |
-
|
139 |
result = session.run(query)
|
140 |
record = result.single()
|
141 |
logger.info(
|
@@ -144,7 +160,7 @@ class GraphStorage(BaseGraphStorage):
|
|
144 |
return record["degree"]
|
145 |
|
146 |
async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
|
147 |
-
|
148 |
entity_name_label_target = target_node_id.strip('\"')
|
149 |
"""
|
150 |
Find all edges between nodes of two given labels
|
@@ -156,28 +172,25 @@ class GraphStorage(BaseGraphStorage):
|
|
156 |
Returns:
|
157 |
list: List of all relationships/edges found
|
158 |
"""
|
159 |
-
with self._driver.session()
|
160 |
query = f"""
|
161 |
-
MATCH (
|
162 |
-
RETURN r
|
163 |
-
|
|
|
164 |
|
165 |
-
result = session.run(query)
|
166 |
-
|
|
|
|
|
167 |
logger.info(
|
168 |
-
f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{
|
169 |
-
)
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
|
174 |
|
175 |
-
|
176 |
-
async def get_node_edges(self, source_node_id: str):
|
177 |
-
if self._graph.has_node(source_node_id):
|
178 |
-
return list(self._graph.edges(source_node_id))
|
179 |
-
return None
|
180 |
-
|
181 |
async def get_node_edges(self, source_node_id: str):
|
182 |
node_label = source_node_id.strip('\"')
|
183 |
|
@@ -208,8 +221,8 @@ class GraphStorage(BaseGraphStorage):
|
|
208 |
target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
209 |
|
210 |
if source_label and target_label:
|
211 |
-
print (f"appending: {
|
212 |
-
edges.append(
|
213 |
|
214 |
return edges
|
215 |
|
@@ -218,57 +231,54 @@ class GraphStorage(BaseGraphStorage):
|
|
218 |
return edges
|
219 |
|
220 |
|
221 |
-
# try:
|
222 |
-
# with self._driver.session() as session:
|
223 |
-
# if self.has_node(node_label):
|
224 |
-
# edges = session.read_transaction(fetch_edges,node_label)
|
225 |
-
# return list(edges)
|
226 |
-
# return edges
|
227 |
-
# finally:
|
228 |
-
# print ("consider closign driver here")
|
229 |
-
# # driver.close()
|
230 |
|
231 |
-
from typing import List, Tuple
|
232 |
-
async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
245 |
|
246 |
-
|
247 |
-
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
253 |
|
254 |
-
|
255 |
-
|
|
|
256 |
|
257 |
|
258 |
|
259 |
|
260 |
|
261 |
#upsert_node
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
async def upsert_node(self, node_id: str, node_data: dict[str, str]):
|
263 |
label = node_id.strip('\"')
|
264 |
properties = node_data
|
265 |
"""
|
266 |
Upsert a node with the given label and properties within a transaction.
|
267 |
-
If a node with the same label exists, it will:
|
268 |
-
- Update existing properties with new values
|
269 |
-
- Add new properties that don't exist
|
270 |
-
If no node exists, creates a new node with all properties.
|
271 |
-
|
272 |
Args:
|
273 |
label: The node label to search for and apply
|
274 |
properties: Dictionary of node properties
|
@@ -355,7 +365,7 @@ class GraphStorage(BaseGraphStorage):
|
|
355 |
|
356 |
result = tx.run(query, properties=edge_properties)
|
357 |
logger.info(
|
358 |
-
f'{inspect.currentframe().f_code.co_name}:query:{query}:
|
359 |
)
|
360 |
return result.single()
|
361 |
|
@@ -369,6 +379,8 @@ class GraphStorage(BaseGraphStorage):
|
|
369 |
# return result
|
370 |
|
371 |
async def _node2vec_embed(self):
|
|
|
|
|
372 |
# async def _node2vec_embed(self):
|
373 |
with self._driver.session() as session:
|
374 |
#Define the Cypher query
|
|
|
4 |
from dataclasses import dataclass
|
5 |
from typing import Any, Union, cast
|
6 |
import numpy as np
|
|
|
7 |
import inspect
|
8 |
+
# import package.common.utils as utils
|
9 |
+
from lightrag.utils import load_json, logger, write_json
|
10 |
+
from ..base import (
|
11 |
+
BaseGraphStorage
|
12 |
+
)
|
13 |
+
from neo4j import GraphDatabase, exceptions as neo4jExceptions
|
14 |
|
15 |
|
16 |
+
from tenacity import (
|
17 |
+
retry,
|
18 |
+
stop_after_attempt,
|
19 |
+
wait_exponential,
|
20 |
+
retry_if_exception_type,
|
21 |
+
)
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
# @TODO: catch and retry "ERROR:neo4j.io:Failed to write data to connection ResolvedIPv4Address"
|
28 |
+
# during indexing.
|
29 |
|
30 |
|
|
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
# Replace with your actual URI, username, and password
|
34 |
+
#local
|
35 |
URI = "neo4j://localhost:7687"
|
36 |
USERNAME = "neo4j"
|
37 |
+
PASSWORD = "password"
|
38 |
+
|
39 |
+
#aura
|
40 |
+
# URI = "neo4j+s://91fbae6c.databases.neo4j.io"
|
41 |
+
# USERNAME = "neo4j"
|
42 |
+
# PASSWORD = "KWKPXfXcClDbUlmDdGgIQhU5mL1N4E_2CJp2BDFbEbw"
|
43 |
# Create a driver object
|
44 |
|
45 |
|
|
|
51 |
|
52 |
def __post_init__(self):
|
53 |
# self._graph = preloaded_graph or nx.Graph()
|
54 |
+
self._driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
|
55 |
self._node_embed_algorithms = {
|
56 |
"node2vec": self._node2vec_embed,
|
57 |
}
|
|
|
147 |
|
148 |
# degree = session.read_transaction(get_edge_degree, 1, 2)
|
149 |
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
150 |
+
entity_name_label_source = src_id.strip('\"')
|
151 |
entity_name_label_target = tgt_id.strip('\"')
|
152 |
with self._driver.session() as session:
|
153 |
+
query = f"""MATCH (n1:`{entity_name_label_source}`)-[r]-(n2:`{entity_name_label_target}`)
|
154 |
+
RETURN count(r) AS degree"""
|
|
|
|
|
155 |
result = session.run(query)
|
156 |
record = result.single()
|
157 |
logger.info(
|
|
|
160 |
return record["degree"]
|
161 |
|
162 |
async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
|
163 |
+
entity_name_label_source = source_node_id.strip('\"')
|
164 |
entity_name_label_target = target_node_id.strip('\"')
|
165 |
"""
|
166 |
Find all edges between nodes of two given labels
|
|
|
172 |
Returns:
|
173 |
list: List of all relationships/edges found
|
174 |
"""
|
175 |
+
with self._driver.session() as session:
|
176 |
query = f"""
|
177 |
+
MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
|
178 |
+
RETURN properties(r) as edge_properties
|
179 |
+
LIMIT 1
|
180 |
+
""".format(entity_name_label_source=entity_name_label_source, entity_name_label_target=entity_name_label_target)
|
181 |
|
182 |
+
result = session.run(query)
|
183 |
+
record = result.single()
|
184 |
+
if record:
|
185 |
+
result = dict(record["edge_properties"])
|
186 |
logger.info(
|
187 |
+
f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{result}'
|
188 |
+
)
|
189 |
+
return result
|
190 |
+
else:
|
191 |
+
return None
|
192 |
|
193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
async def get_node_edges(self, source_node_id: str):
|
195 |
node_label = source_node_id.strip('\"')
|
196 |
|
|
|
221 |
target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
222 |
|
223 |
if source_label and target_label:
|
224 |
+
print (f"appending: {(source_label, target_label)}")
|
225 |
+
edges.append((source_label, target_label))
|
226 |
|
227 |
return edges
|
228 |
|
|
|
231 |
return edges
|
232 |
|
233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
+
# from typing import List, Tuple
|
236 |
+
# async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
|
237 |
+
# def get_connections_for_node(tx):
|
238 |
+
# query = f"""
|
239 |
+
# MATCH (n:`{label}`)
|
240 |
+
# OPTIONAL MATCH (n)-[r]-(connected)
|
241 |
+
# RETURN n, r, connected
|
242 |
+
# """
|
243 |
+
# results = tx.run(query)
|
244 |
|
245 |
+
|
246 |
+
# connections = []
|
247 |
+
# for record in results:
|
248 |
+
# source_node = record['n']
|
249 |
+
# connected_node = record['connected']
|
250 |
|
251 |
+
# source_label = list(source_node.labels)[0] if source_node.labels else None
|
252 |
+
# target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
253 |
|
254 |
+
# if source_label and target_label:
|
255 |
+
# connections.append((source_label, target_label))
|
256 |
+
|
257 |
+
# logger.info(
|
258 |
+
# f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{connections}'
|
259 |
+
# )
|
260 |
+
# return connections
|
261 |
|
262 |
+
# with driver.session() as session:
|
263 |
+
|
264 |
+
# return session.read_transaction(get_connections_for_node)
|
265 |
|
266 |
|
267 |
|
268 |
|
269 |
|
270 |
#upsert_node
|
271 |
+
|
272 |
+
@retry(
|
273 |
+
stop=stop_after_attempt(3),
|
274 |
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
275 |
+
retry=retry_if_exception_type((neo4jExceptions.ServiceUnavailable, neo4jExceptions.TransientError, neo4jExceptions.WriteServiceUnavailable)),
|
276 |
+
)
|
277 |
async def upsert_node(self, node_id: str, node_data: dict[str, str]):
|
278 |
label = node_id.strip('\"')
|
279 |
properties = node_data
|
280 |
"""
|
281 |
Upsert a node with the given label and properties within a transaction.
|
|
|
|
|
|
|
|
|
|
|
282 |
Args:
|
283 |
label: The node label to search for and apply
|
284 |
properties: Dictionary of node properties
|
|
|
365 |
|
366 |
result = tx.run(query, properties=edge_properties)
|
367 |
logger.info(
|
368 |
+
f'{inspect.currentframe().f_code.co_name}:query:{query}:edge_properties:{edge_properties}'
|
369 |
)
|
370 |
return result.single()
|
371 |
|
|
|
379 |
# return result
|
380 |
|
381 |
async def _node2vec_embed(self):
|
382 |
+
print ("this is never called. checking to be sure.")
|
383 |
+
|
384 |
# async def _node2vec_embed(self):
|
385 |
with self._driver.session() as session:
|
386 |
#Define the Cypher query
|
lightrag/lightrag.py
CHANGED
@@ -102,8 +102,8 @@ class LightRAG:
|
|
102 |
|
103 |
# module = importlib.import_module('kg.neo4j')
|
104 |
# Neo4JStorage = getattr(module, 'GraphStorage')
|
105 |
-
|
106 |
if True==True:
|
|
|
107 |
graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
|
108 |
else:
|
109 |
graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
|
|
|
102 |
|
103 |
# module = importlib.import_module('kg.neo4j')
|
104 |
# Neo4JStorage = getattr(module, 'GraphStorage')
|
|
|
105 |
if True==True:
|
106 |
+
print ("using KG")
|
107 |
graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
|
108 |
else:
|
109 |
graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
|
lightrag/storage.py
CHANGED
@@ -235,7 +235,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
235 |
|
236 |
async def _node2vec_embed(self):
|
237 |
from graspologic import embed
|
238 |
-
|
239 |
embeddings, nodes = embed.node2vec_embed(
|
240 |
self._graph,
|
241 |
**self.global_config["node2vec_params"],
|
|
|
235 |
|
236 |
async def _node2vec_embed(self):
|
237 |
from graspologic import embed
|
238 |
+
print ("is this ever called?")
|
239 |
embeddings, nodes = embed.node2vec_embed(
|
240 |
self._graph,
|
241 |
**self.global_config["node2vec_params"],
|
neo4jWorkDir/kv_store_llm_response_cache.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/lightrag.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_chunks.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_entities.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_relationships.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
test.py
CHANGED
@@ -1,15 +1,34 @@
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
|
4 |
-
|
5 |
#########
|
6 |
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
7 |
# import nest_asyncio
|
8 |
# nest_asyncio.apply()
|
9 |
#########
|
10 |
|
11 |
-
WORKING_DIR = "./
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
|
|
13 |
|
14 |
if not os.path.exists(WORKING_DIR):
|
15 |
os.mkdir(WORKING_DIR)
|
|
|
1 |
import os
|
2 |
from lightrag import LightRAG, QueryParam
|
3 |
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
|
4 |
+
from pprint import pprint
|
5 |
#########
|
6 |
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
7 |
# import nest_asyncio
|
8 |
# nest_asyncio.apply()
|
9 |
#########
|
10 |
|
11 |
+
WORKING_DIR = "./dickensTestEmbedcall"
|
12 |
+
|
13 |
+
|
14 |
+
# G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
15 |
+
# nx.write_gexf(G, "graph_chunk_entity_relation.gefx")
|
16 |
+
|
17 |
+
import networkx as nx
|
18 |
+
from networkx_query import search_nodes, search_edges
|
19 |
+
G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
20 |
+
query = {} # Empty query matches all nodes
|
21 |
+
result = search_nodes(G, query)
|
22 |
+
|
23 |
+
# Extract node IDs from the result
|
24 |
+
node_ids = sorted([node for node in result])
|
25 |
+
|
26 |
+
print("All node IDs in the graph:")
|
27 |
+
pprint(node_ids)
|
28 |
+
raise Exception
|
29 |
+
|
30 |
|
31 |
+
# raise Exception
|
32 |
|
33 |
if not os.path.exists(WORKING_DIR):
|
34 |
os.mkdir(WORKING_DIR)
|
testkg.py
CHANGED
@@ -8,8 +8,7 @@ from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
|
|
8 |
# nest_asyncio.apply()
|
9 |
#########
|
10 |
|
11 |
-
WORKING_DIR = "./
|
12 |
-
|
13 |
|
14 |
if not os.path.exists(WORKING_DIR):
|
15 |
os.mkdir(WORKING_DIR)
|
|
|
8 |
# nest_asyncio.apply()
|
9 |
#########
|
10 |
|
11 |
+
WORKING_DIR = "./local_neo4jWorkDir"
|
|
|
12 |
|
13 |
if not os.path.exists(WORKING_DIR):
|
14 |
os.mkdir(WORKING_DIR)
|