Ken Wiltshire commited on
Commit
14321eb
·
1 Parent(s): 5675139

cleaning code for pull

Browse files
README.md CHANGED
@@ -155,6 +155,36 @@ rag = LightRAG(
155
  ```
156
  </details>
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  <details>
159
  <summary> Using Ollama Models </summary>
160
 
 
155
  ```
156
  </details>
157
 
158
+
159
+ <details>
160
+ <summary> Using Neo4J for Storage </summary>
161
+
162
+ * For production level scenarios you will most likely want to leverage an enterprise solution
163
+ for KG storage.
164
+ ```python
165
+ export NEO4J_URI="neo4j://localhost:7687"
166
+ export NEO4J_USERNAME="neo4j"
167
+ export NEO4J_PASSWORD="password"
168
+
169
+ When you launch the project be sure to override the default KG: NetworkS
170
+ by specifying kg="Neo4JStorage".
171
+
172
+ # Note: Default settings use NetworkX
173
+ #Initialize LightRAG with Neo4J implementation.
174
+ WORKING_DIR = "./local_neo4jWorkDir"
175
+
176
+ rag = LightRAG(
177
+ working_dir=WORKING_DIR,
178
+ llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
179
+ kg="Neo4JStorage", #<-----------override KG default
180
+ log_level="DEBUG" #<-----------override log_level default
181
+ )
182
+ ```
183
+ see test_neo4j.py for a working example.
184
+ </details>
185
+
186
+
187
+
188
  <details>
189
  <summary> Using Ollama Models </summary>
190
 
lightrag/kg/__init__.py CHANGED
@@ -1,27 +1,3 @@
1
- print ("init package vars here. ......")
2
- # from .neo4j import GraphStorage as Neo4JStorage
3
 
4
 
5
- # import sys
6
- # import importlib
7
- # # Specify the path to the directory containing the module
8
- # # Add the directory to the system path
9
- # module_dir = '/Users/kenwiltshire/documents/dev/LightRag/lightrag/kg'
10
- # sys.path.append(module_dir)
11
- # # Specify the module name
12
- # module_name = 'neo4j'
13
- # # Import the module
14
- # spec = importlib.util.spec_from_file_location(module_name, f'{module_dir}/{module_name}.py')
15
-
16
- # Neo4JStorage = importlib.util.module_from_spec(spec)
17
- # spec.loader.exec_module(Neo4JStorage)
18
-
19
-
20
-
21
- # Relative imports are still possible by adding a leading period to the module name when using the from ... import form:
22
-
23
- # # Import names from pkg.string
24
- # from .string import name1, name2
25
- # # Import pkg.string
26
- # from . import string
27
-
 
1
+ # print ("init package vars here. ......")
 
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lightrag/lightrag.py CHANGED
@@ -120,9 +120,6 @@ class LightRAG:
120
  addon_params: dict = field(default_factory=dict)
121
  convert_response_to_json_func: callable = convert_response_to_json
122
 
123
- # def get_configured_KG(self):
124
- # return self.kg
125
-
126
  def __post_init__(self):
127
  log_file = os.path.join(self.working_dir, "lightrag.log")
128
  set_logger(log_file)
@@ -133,7 +130,7 @@ class LightRAG:
133
  _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
134
  logger.debug(f"LightRAG init with param:\n {_print_config}\n")
135
 
136
- #should move all storage setup here to leverage initial start params attached to self.
137
  self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg]
138
 
139
  if not os.path.exists(self.working_dir):
 
120
  addon_params: dict = field(default_factory=dict)
121
  convert_response_to_json_func: callable = convert_response_to_json
122
 
 
 
 
123
  def __post_init__(self):
124
  log_file = os.path.join(self.working_dir, "lightrag.log")
125
  set_logger(log_file)
 
130
  _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
131
  logger.debug(f"LightRAG init with param:\n {_print_config}\n")
132
 
133
+ #@TODO: should move all storage setup here to leverage initial start params attached to self.
134
  self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg]
135
 
136
  if not os.path.exists(self.working_dir):
lightrag/llm.py CHANGED
@@ -72,9 +72,7 @@ async def openai_complete_if_cache(
72
 
73
  @retry(
74
  stop=stop_after_attempt(3),
75
- #kw_
76
- wait=wait_exponential(multiplier=1, min=10, max=60),
77
- # wait=wait_exponential(multiplier=1, min=4, max=10),
78
  retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
79
  )
80
  async def azure_openai_complete_if_cache(model,
 
72
 
73
  @retry(
74
  stop=stop_after_attempt(3),
75
+ wait=wait_exponential(multiplier=1, min=4, max=10),
 
 
76
  retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
77
  )
78
  async def azure_openai_complete_if_cache(model,
lightrag/operate.py CHANGED
@@ -908,7 +908,6 @@ async def hybrid_query(
908
  .strip()
909
  )
910
  result = "{" + result.split("{")[1].split("}")[0] + "}"
911
-
912
  keywords_data = json.loads(result)
913
  hl_keywords = keywords_data.get("high_level_keywords", [])
914
  ll_keywords = keywords_data.get("low_level_keywords", [])
 
908
  .strip()
909
  )
910
  result = "{" + result.split("{")[1].split("}")[0] + "}"
 
911
  keywords_data = json.loads(result)
912
  hl_keywords = keywords_data.get("high_level_keywords", [])
913
  ll_keywords = keywords_data.get("low_level_keywords", [])
lightrag/storage.py CHANGED
@@ -95,7 +95,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
95
  embeddings = np.concatenate(embeddings_list)
96
  for i, d in enumerate(list_data):
97
  d["__vector__"] = embeddings[i]
98
- print (f"Upserting to vector: {list_data}")
99
  results = self._client.upsert(datas=list_data)
100
  return results
101
 
@@ -110,7 +109,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
110
  results = [
111
  {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
112
  ]
113
- print (f"vector db results {results} for query {query}")
114
  return results
115
 
116
  async def index_done_callback(self):
@@ -235,9 +233,11 @@ class NetworkXStorage(BaseGraphStorage):
235
  raise ValueError(f"Node embedding algorithm {algorithm} not supported")
236
  return await self._node_embed_algorithms[algorithm]()
237
 
 
 
238
  async def _node2vec_embed(self):
239
  from graspologic import embed
240
- print ("is this ever called?")
241
  embeddings, nodes = embed.node2vec_embed(
242
  self._graph,
243
  **self.global_config["node2vec_params"],
 
95
  embeddings = np.concatenate(embeddings_list)
96
  for i, d in enumerate(list_data):
97
  d["__vector__"] = embeddings[i]
 
98
  results = self._client.upsert(datas=list_data)
99
  return results
100
 
 
109
  results = [
110
  {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
111
  ]
 
112
  return results
113
 
114
  async def index_done_callback(self):
 
233
  raise ValueError(f"Node embedding algorithm {algorithm} not supported")
234
  return await self._node_embed_algorithms[algorithm]()
235
 
236
+
237
+ #@TODO: NOT USED
238
  async def _node2vec_embed(self):
239
  from graspologic import embed
240
+
241
  embeddings, nodes = embed.node2vec_embed(
242
  self._graph,
243
  **self.global_config["node2vec_params"],
neo4jWorkDir/kv_store_full_docs.json DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/kv_store_llm_response_cache.json DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/kv_store_text_chunks.json DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/lightrag.log DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_chunks.json DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_entities.json DELETED
The diff for this file is too large to render. See raw diff
 
neo4jWorkDir/vdb_relationships.json DELETED
The diff for this file is too large to render. See raw diff
 
testkg.py → test_neo4j.py RENAMED
@@ -17,12 +17,12 @@ rag = LightRAG(
17
  working_dir=WORKING_DIR,
18
  llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
19
  kg="Neo4JStorage",
20
- log_level="INFO"
21
  # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
22
  )
23
 
24
- # with open("./book.txt") as f:
25
- # rag.insert(f.read())
26
 
27
  # Perform naive search
28
  print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
 
17
  working_dir=WORKING_DIR,
18
  llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
19
  kg="Neo4JStorage",
20
+ log_level="DEBUG"
21
  # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
22
  )
23
 
24
+ with open("./book.txt") as f:
25
+ rag.insert(f.read())
26
 
27
  # Perform naive search
28
  print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))