Ken Wiltshire commited on
Commit
45c89eb
·
1 Parent(s): 836c74a

index to neo4j working and tested. check queires next.

Browse files
.gitignore CHANGED
@@ -5,4 +5,7 @@ book.txt
5
  lightrag-dev/
6
  .idea/
7
  dist/
8
- env/
 
 
 
 
5
  lightrag-dev/
6
  .idea/
7
  dist/
8
+ env/
9
+ local_neo4jWorkDir/
10
+ local_neo4jWorkDir.bak/
11
+ neo4jWorkDir/
get_all_edges_nx.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import networkx as nx
2
+
3
+ G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
4
+
5
+ def get_all_edges_and_nodes(G):
6
+ # Get all edges and their properties
7
+ edges_with_properties = []
8
+ for u, v, data in G.edges(data=True):
9
+ edges_with_properties.append({
10
+ 'start': u,
11
+ 'end': v,
12
+ 'label': data.get('label', ''), # Assuming 'label' is used for edge type
13
+ 'properties': data,
14
+ 'start_node_properties': G.nodes[u],
15
+ 'end_node_properties': G.nodes[v]
16
+ })
17
+
18
+ return edges_with_properties
19
+
20
+ # Example usage
21
+ if __name__ == "__main__":
22
+ # Assume G is your NetworkX graph loaded from Neo4j
23
+
24
+ all_edges = get_all_edges_and_nodes(G)
25
+
26
+ # Print all edges and node properties
27
+ for edge in all_edges:
28
+ print(f"Edge Label: {edge['label']}")
29
+ print(f"Edge Properties: {edge['properties']}")
30
+ print(f"Start Node: {edge['start']}")
31
+ print(f"Start Node Properties: {edge['start_node_properties']}")
32
+ print(f"End Node: {edge['end']}")
33
+ print(f"End Node Properties: {edge['end_node_properties']}")
34
+ print("---")
graph_chunk_entity_relation.gefx ADDED
The diff for this file is too large to render. See raw diff
 
lightrag/kg/neo4j.py CHANGED
@@ -4,24 +4,42 @@ import os
4
  from dataclasses import dataclass
5
  from typing import Any, Union, cast
6
  import numpy as np
7
- from nano_vectordb import NanoVectorDB
8
  import inspect
 
 
 
 
 
 
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
- # import package.common.utils as utils
14
 
15
 
16
- from lightrag.utils import load_json, logger, write_json
17
- from ..base import (
18
- BaseGraphStorage
19
- )
20
- from neo4j import GraphDatabase
21
  # Replace with your actual URI, username, and password
 
22
  URI = "neo4j://localhost:7687"
23
  USERNAME = "neo4j"
24
- PASSWORD = "your_password"
 
 
 
 
 
25
  # Create a driver object
26
 
27
 
@@ -33,7 +51,7 @@ class GraphStorage(BaseGraphStorage):
33
 
34
  def __post_init__(self):
35
  # self._graph = preloaded_graph or nx.Graph()
36
- self._driver = GraphDatabase.driver("neo4j+s://91fbae6c.databases.neo4j.io", auth=("neo4j", "KWKPXfXcClDbUlmDdGgIQhU5mL1N4E_2CJp2BDFbEbw"))
37
  self._node_embed_algorithms = {
38
  "node2vec": self._node2vec_embed,
39
  }
@@ -129,13 +147,11 @@ class GraphStorage(BaseGraphStorage):
129
 
130
  # degree = session.read_transaction(get_edge_degree, 1, 2)
131
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
132
- entity_name__label_source = src_id.strip('\"')
133
  entity_name_label_target = tgt_id.strip('\"')
134
  with self._driver.session() as session:
135
- query = """MATCH (n1:`{node_label1}`)-[r]-(n2:`{node_label2}`)
136
- RETURN count(r) AS degree""".format(entity_name__label_source=entity_name__label_source,
137
- entity_name_label_target=entity_name_label_target)
138
-
139
  result = session.run(query)
140
  record = result.single()
141
  logger.info(
@@ -144,7 +160,7 @@ class GraphStorage(BaseGraphStorage):
144
  return record["degree"]
145
 
146
  async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
147
- entity_name__label_source = source_node_id.strip('\"')
148
  entity_name_label_target = target_node_id.strip('\"')
149
  """
150
  Find all edges between nodes of two given labels
@@ -156,28 +172,25 @@ class GraphStorage(BaseGraphStorage):
156
  Returns:
157
  list: List of all relationships/edges found
158
  """
159
- with self._driver.session() as session:
160
  query = f"""
161
- MATCH (source:`{entity_name__label_source}`)-[r]-(target:`{entity_name_label_target}`)
162
- RETURN r
163
- """
 
164
 
165
- result = session.run(query)
166
- for logrecord in result:
 
 
167
  logger.info(
168
- f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{logrecord["r"]}'
169
- )
170
-
171
-
172
- return [record["r"] for record in result]
173
 
174
 
175
-
176
- async def get_node_edges(self, source_node_id: str):
177
- if self._graph.has_node(source_node_id):
178
- return list(self._graph.edges(source_node_id))
179
- return None
180
-
181
  async def get_node_edges(self, source_node_id: str):
182
  node_label = source_node_id.strip('\"')
183
 
@@ -208,8 +221,8 @@ class GraphStorage(BaseGraphStorage):
208
  target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
209
 
210
  if source_label and target_label:
211
- print (f"appending: {[source_label, target_label]}")
212
- edges.append([source_label, target_label])
213
 
214
  return edges
215
 
@@ -218,57 +231,54 @@ class GraphStorage(BaseGraphStorage):
218
  return edges
219
 
220
 
221
- # try:
222
- # with self._driver.session() as session:
223
- # if self.has_node(node_label):
224
- # edges = session.read_transaction(fetch_edges,node_label)
225
- # return list(edges)
226
- # return edges
227
- # finally:
228
- # print ("consider closign driver here")
229
- # # driver.close()
230
 
231
- from typing import List, Tuple
232
- async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
233
- def run_query(tx):
234
- query = f"""
235
- MATCH (n:`{label}`)
236
- OPTIONAL MATCH (n)-[r]-(connected)
237
- RETURN n, r, connected
238
- """
239
- results = tx.run(query)
240
 
241
- connections = []
242
- for record in results:
243
- source_node = record['n']
244
- connected_node = record['connected']
 
245
 
246
- source_label = list(source_node.labels)[0] if source_node.labels else None
247
- target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
248
 
249
- if source_label and target_label:
250
- connections.append((source_label, target_label))
251
-
252
- return connections
 
 
 
253
 
254
- with driver.session() as session:
255
- return session.read_transaction(run_query)
 
256
 
257
 
258
 
259
 
260
 
261
  #upsert_node
 
 
 
 
 
 
262
  async def upsert_node(self, node_id: str, node_data: dict[str, str]):
263
  label = node_id.strip('\"')
264
  properties = node_data
265
  """
266
  Upsert a node with the given label and properties within a transaction.
267
- If a node with the same label exists, it will:
268
- - Update existing properties with new values
269
- - Add new properties that don't exist
270
- If no node exists, creates a new node with all properties.
271
-
272
  Args:
273
  label: The node label to search for and apply
274
  properties: Dictionary of node properties
@@ -355,7 +365,7 @@ class GraphStorage(BaseGraphStorage):
355
 
356
  result = tx.run(query, properties=edge_properties)
357
  logger.info(
358
- f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{None}'
359
  )
360
  return result.single()
361
 
@@ -369,6 +379,8 @@ class GraphStorage(BaseGraphStorage):
369
  # return result
370
 
371
  async def _node2vec_embed(self):
 
 
372
  # async def _node2vec_embed(self):
373
  with self._driver.session() as session:
374
  #Define the Cypher query
 
4
  from dataclasses import dataclass
5
  from typing import Any, Union, cast
6
  import numpy as np
 
7
  import inspect
8
+ # import package.common.utils as utils
9
+ from lightrag.utils import load_json, logger, write_json
10
+ from ..base import (
11
+ BaseGraphStorage
12
+ )
13
+ from neo4j import GraphDatabase, exceptions as neo4jExceptions
14
 
15
 
16
+ from tenacity import (
17
+ retry,
18
+ stop_after_attempt,
19
+ wait_exponential,
20
+ retry_if_exception_type,
21
+ )
22
+
23
+
24
+
25
+
26
+
27
+ # @TODO: catch and retry "ERROR:neo4j.io:Failed to write data to connection ResolvedIPv4Address"
28
+ # during indexing.
29
 
30
 
 
31
 
32
 
 
 
 
 
 
33
  # Replace with your actual URI, username, and password
34
+ #local
35
  URI = "neo4j://localhost:7687"
36
  USERNAME = "neo4j"
37
+ PASSWORD = "password"
38
+
39
+ #aura
40
+ # URI = "neo4j+s://91fbae6c.databases.neo4j.io"
41
+ # USERNAME = "neo4j"
42
+ # PASSWORD = "KWKPXfXcClDbUlmDdGgIQhU5mL1N4E_2CJp2BDFbEbw"
43
  # Create a driver object
44
 
45
 
 
51
 
52
  def __post_init__(self):
53
  # self._graph = preloaded_graph or nx.Graph()
54
+ self._driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))
55
  self._node_embed_algorithms = {
56
  "node2vec": self._node2vec_embed,
57
  }
 
147
 
148
  # degree = session.read_transaction(get_edge_degree, 1, 2)
149
  async def edge_degree(self, src_id: str, tgt_id: str) -> int:
150
+ entity_name_label_source = src_id.strip('\"')
151
  entity_name_label_target = tgt_id.strip('\"')
152
  with self._driver.session() as session:
153
+ query = f"""MATCH (n1:`{entity_name_label_source}`)-[r]-(n2:`{entity_name_label_target}`)
154
+ RETURN count(r) AS degree"""
 
 
155
  result = session.run(query)
156
  record = result.single()
157
  logger.info(
 
160
  return record["degree"]
161
 
162
  async def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
163
+ entity_name_label_source = source_node_id.strip('\"')
164
  entity_name_label_target = target_node_id.strip('\"')
165
  """
166
  Find all edges between nodes of two given labels
 
172
  Returns:
173
  list: List of all relationships/edges found
174
  """
175
+ with self._driver.session() as session:
176
  query = f"""
177
+ MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
178
+ RETURN properties(r) as edge_properties
179
+ LIMIT 1
180
+ """.format(entity_name_label_source=entity_name_label_source, entity_name_label_target=entity_name_label_target)
181
 
182
+ result = session.run(query)
183
+ record = result.single()
184
+ if record:
185
+ result = dict(record["edge_properties"])
186
  logger.info(
187
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{result}'
188
+ )
189
+ return result
190
+ else:
191
+ return None
192
 
193
 
 
 
 
 
 
 
194
  async def get_node_edges(self, source_node_id: str):
195
  node_label = source_node_id.strip('\"')
196
 
 
221
  target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
222
 
223
  if source_label and target_label:
224
+ print (f"appending: {(source_label, target_label)}")
225
+ edges.append((source_label, target_label))
226
 
227
  return edges
228
 
 
231
  return edges
232
 
233
 
 
 
 
 
 
 
 
 
 
234
 
235
+ # from typing import List, Tuple
236
+ # async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
237
+ # def get_connections_for_node(tx):
238
+ # query = f"""
239
+ # MATCH (n:`{label}`)
240
+ # OPTIONAL MATCH (n)-[r]-(connected)
241
+ # RETURN n, r, connected
242
+ # """
243
+ # results = tx.run(query)
244
 
245
+
246
+ # connections = []
247
+ # for record in results:
248
+ # source_node = record['n']
249
+ # connected_node = record['connected']
250
 
251
+ # source_label = list(source_node.labels)[0] if source_node.labels else None
252
+ # target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
253
 
254
+ # if source_label and target_label:
255
+ # connections.append((source_label, target_label))
256
+
257
+ # logger.info(
258
+ # f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{connections}'
259
+ # )
260
+ # return connections
261
 
262
+ # with driver.session() as session:
263
+
264
+ # return session.read_transaction(get_connections_for_node)
265
 
266
 
267
 
268
 
269
 
270
  #upsert_node
271
+
272
+ @retry(
273
+ stop=stop_after_attempt(3),
274
+ wait=wait_exponential(multiplier=1, min=4, max=10),
275
+ retry=retry_if_exception_type((neo4jExceptions.ServiceUnavailable, neo4jExceptions.TransientError, neo4jExceptions.WriteServiceUnavailable)),
276
+ )
277
  async def upsert_node(self, node_id: str, node_data: dict[str, str]):
278
  label = node_id.strip('\"')
279
  properties = node_data
280
  """
281
  Upsert a node with the given label and properties within a transaction.
 
 
 
 
 
282
  Args:
283
  label: The node label to search for and apply
284
  properties: Dictionary of node properties
 
365
 
366
  result = tx.run(query, properties=edge_properties)
367
  logger.info(
368
+ f'{inspect.currentframe().f_code.co_name}:query:{query}:edge_properties:{edge_properties}'
369
  )
370
  return result.single()
371
 
 
379
  # return result
380
 
381
  async def _node2vec_embed(self):
382
+ print ("this is never called. checking to be sure.")
383
+
384
  # async def _node2vec_embed(self):
385
  with self._driver.session() as session:
386
  #Define the Cypher query
lightrag/lightrag.py CHANGED
@@ -102,8 +102,8 @@ class LightRAG:
102
 
103
  # module = importlib.import_module('kg.neo4j')
104
  # Neo4JStorage = getattr(module, 'GraphStorage')
105
-
106
  if True==True:
 
107
  graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
108
  else:
109
  graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
 
102
 
103
  # module = importlib.import_module('kg.neo4j')
104
  # Neo4JStorage = getattr(module, 'GraphStorage')
 
105
  if True==True:
106
+ print ("using KG")
107
  graph_storage_cls: Type[BaseGraphStorage] = Neo4JStorage
108
  else:
109
  graph_storage_cls: Type[BaseGraphStorage] = NetworkXStorage
lightrag/storage.py CHANGED
@@ -235,7 +235,7 @@ class NetworkXStorage(BaseGraphStorage):
235
 
236
  async def _node2vec_embed(self):
237
  from graspologic import embed
238
-
239
  embeddings, nodes = embed.node2vec_embed(
240
  self._graph,
241
  **self.global_config["node2vec_params"],
 
235
 
236
  async def _node2vec_embed(self):
237
  from graspologic import embed
238
+ print ("is this ever called?")
239
  embeddings, nodes = embed.node2vec_embed(
240
  self._graph,
241
  **self.global_config["node2vec_params"],
neo4jWorkDir/kv_store_llm_response_cache.json CHANGED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/lightrag.log CHANGED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_chunks.json CHANGED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_entities.json CHANGED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_relationships.json CHANGED
The diff for this file is too large to render. See raw diff
 
test.py CHANGED
@@ -1,15 +1,34 @@
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
  from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
4
-
5
  #########
6
  # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
7
  # import nest_asyncio
8
  # nest_asyncio.apply()
9
  #########
10
 
11
- WORKING_DIR = "./dickens"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
13
 
14
  if not os.path.exists(WORKING_DIR):
15
  os.mkdir(WORKING_DIR)
 
1
  import os
2
  from lightrag import LightRAG, QueryParam
3
  from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
4
+ from pprint import pprint
5
  #########
6
  # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
7
  # import nest_asyncio
8
  # nest_asyncio.apply()
9
  #########
10
 
11
+ WORKING_DIR = "./dickensTestEmbedcall"
12
+
13
+
14
+ # G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
15
+ # nx.write_gexf(G, "graph_chunk_entity_relation.gefx")
16
+
17
+ import networkx as nx
18
+ from networkx_query import search_nodes, search_edges
19
+ G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
20
+ query = {} # Empty query matches all nodes
21
+ result = search_nodes(G, query)
22
+
23
+ # Extract node IDs from the result
24
+ node_ids = sorted([node for node in result])
25
+
26
+ print("All node IDs in the graph:")
27
+ pprint(node_ids)
28
+ raise Exception
29
+
30
 
31
+ # raise Exception
32
 
33
  if not os.path.exists(WORKING_DIR):
34
  os.mkdir(WORKING_DIR)
testkg.py CHANGED
@@ -8,8 +8,7 @@ from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
8
  # nest_asyncio.apply()
9
  #########
10
 
11
- WORKING_DIR = "./neo4jWorkDir"
12
-
13
 
14
  if not os.path.exists(WORKING_DIR):
15
  os.mkdir(WORKING_DIR)
 
8
  # nest_asyncio.apply()
9
  #########
10
 
11
+ WORKING_DIR = "./local_neo4jWorkDir"
 
12
 
13
  if not os.path.exists(WORKING_DIR):
14
  os.mkdir(WORKING_DIR)