Ken Wiltshire
commited on
Commit
·
14321eb
1
Parent(s):
5675139
cleaning code for pull
Browse files- README.md +30 -0
- lightrag/kg/__init__.py +1 -25
- lightrag/lightrag.py +1 -4
- lightrag/llm.py +1 -3
- lightrag/operate.py +0 -1
- lightrag/storage.py +3 -3
- neo4jWorkDir/kv_store_full_docs.json +0 -0
- neo4jWorkDir/kv_store_llm_response_cache.json +0 -0
- neo4jWorkDir/kv_store_text_chunks.json +0 -0
- neo4jWorkDir/lightrag.log +0 -0
- neo4jWorkDir/vdb_chunks.json +0 -0
- neo4jWorkDir/vdb_entities.json +0 -0
- neo4jWorkDir/vdb_relationships.json +0 -0
- testkg.py → test_neo4j.py +3 -3
README.md
CHANGED
@@ -155,6 +155,36 @@ rag = LightRAG(
|
|
155 |
```
|
156 |
</details>
|
157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
<details>
|
159 |
<summary> Using Ollama Models </summary>
|
160 |
|
|
|
155 |
```
|
156 |
</details>
|
157 |
|
158 |
+
|
159 |
+
<details>
|
160 |
+
<summary> Using Neo4J for Storage </summary>
|
161 |
+
|
162 |
+
* For production level scenarios you will most likely want to leverage an enterprise solution
|
163 |
+
for KG storage.
|
164 |
+
```python
|
165 |
+
export NEO4J_URI="neo4j://localhost:7687"
|
166 |
+
export NEO4J_USERNAME="neo4j"
|
167 |
+
export NEO4J_PASSWORD="password"
|
168 |
+
|
169 |
+
When you launch the project be sure to override the default KG: NetworkS
|
170 |
+
by specifying kg="Neo4JStorage".
|
171 |
+
|
172 |
+
# Note: Default settings use NetworkX
|
173 |
+
#Initialize LightRAG with Neo4J implementation.
|
174 |
+
WORKING_DIR = "./local_neo4jWorkDir"
|
175 |
+
|
176 |
+
rag = LightRAG(
|
177 |
+
working_dir=WORKING_DIR,
|
178 |
+
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
179 |
+
kg="Neo4JStorage", #<-----------override KG default
|
180 |
+
log_level="DEBUG" #<-----------override log_level default
|
181 |
+
)
|
182 |
+
```
|
183 |
+
see test_neo4j.py for a working example.
|
184 |
+
</details>
|
185 |
+
|
186 |
+
|
187 |
+
|
188 |
<details>
|
189 |
<summary> Using Ollama Models </summary>
|
190 |
|
lightrag/kg/__init__.py
CHANGED
@@ -1,27 +1,3 @@
|
|
1 |
-
print ("init package vars here. ......")
|
2 |
-
# from .neo4j import GraphStorage as Neo4JStorage
|
3 |
|
4 |
|
5 |
-
# import sys
|
6 |
-
# import importlib
|
7 |
-
# # Specify the path to the directory containing the module
|
8 |
-
# # Add the directory to the system path
|
9 |
-
# module_dir = '/Users/kenwiltshire/documents/dev/LightRag/lightrag/kg'
|
10 |
-
# sys.path.append(module_dir)
|
11 |
-
# # Specify the module name
|
12 |
-
# module_name = 'neo4j'
|
13 |
-
# # Import the module
|
14 |
-
# spec = importlib.util.spec_from_file_location(module_name, f'{module_dir}/{module_name}.py')
|
15 |
-
|
16 |
-
# Neo4JStorage = importlib.util.module_from_spec(spec)
|
17 |
-
# spec.loader.exec_module(Neo4JStorage)
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
# Relative imports are still possible by adding a leading period to the module name when using the from ... import form:
|
22 |
-
|
23 |
-
# # Import names from pkg.string
|
24 |
-
# from .string import name1, name2
|
25 |
-
# # Import pkg.string
|
26 |
-
# from . import string
|
27 |
-
|
|
|
1 |
+
# print ("init package vars here. ......")
|
|
|
2 |
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lightrag/lightrag.py
CHANGED
@@ -120,9 +120,6 @@ class LightRAG:
|
|
120 |
addon_params: dict = field(default_factory=dict)
|
121 |
convert_response_to_json_func: callable = convert_response_to_json
|
122 |
|
123 |
-
# def get_configured_KG(self):
|
124 |
-
# return self.kg
|
125 |
-
|
126 |
def __post_init__(self):
|
127 |
log_file = os.path.join(self.working_dir, "lightrag.log")
|
128 |
set_logger(log_file)
|
@@ -133,7 +130,7 @@ class LightRAG:
|
|
133 |
_print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
|
134 |
logger.debug(f"LightRAG init with param:\n {_print_config}\n")
|
135 |
|
136 |
-
|
137 |
self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg]
|
138 |
|
139 |
if not os.path.exists(self.working_dir):
|
|
|
120 |
addon_params: dict = field(default_factory=dict)
|
121 |
convert_response_to_json_func: callable = convert_response_to_json
|
122 |
|
|
|
|
|
|
|
123 |
def __post_init__(self):
|
124 |
log_file = os.path.join(self.working_dir, "lightrag.log")
|
125 |
set_logger(log_file)
|
|
|
130 |
_print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
|
131 |
logger.debug(f"LightRAG init with param:\n {_print_config}\n")
|
132 |
|
133 |
+
#@TODO: should move all storage setup here to leverage initial start params attached to self.
|
134 |
self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg]
|
135 |
|
136 |
if not os.path.exists(self.working_dir):
|
lightrag/llm.py
CHANGED
@@ -72,9 +72,7 @@ async def openai_complete_if_cache(
|
|
72 |
|
73 |
@retry(
|
74 |
stop=stop_after_attempt(3),
|
75 |
-
|
76 |
-
wait=wait_exponential(multiplier=1, min=10, max=60),
|
77 |
-
# wait=wait_exponential(multiplier=1, min=4, max=10),
|
78 |
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
|
79 |
)
|
80 |
async def azure_openai_complete_if_cache(model,
|
|
|
72 |
|
73 |
@retry(
|
74 |
stop=stop_after_attempt(3),
|
75 |
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
|
|
|
76 |
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
|
77 |
)
|
78 |
async def azure_openai_complete_if_cache(model,
|
lightrag/operate.py
CHANGED
@@ -908,7 +908,6 @@ async def hybrid_query(
|
|
908 |
.strip()
|
909 |
)
|
910 |
result = "{" + result.split("{")[1].split("}")[0] + "}"
|
911 |
-
|
912 |
keywords_data = json.loads(result)
|
913 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
914 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
|
|
908 |
.strip()
|
909 |
)
|
910 |
result = "{" + result.split("{")[1].split("}")[0] + "}"
|
|
|
911 |
keywords_data = json.loads(result)
|
912 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
913 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
lightrag/storage.py
CHANGED
@@ -95,7 +95,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
95 |
embeddings = np.concatenate(embeddings_list)
|
96 |
for i, d in enumerate(list_data):
|
97 |
d["__vector__"] = embeddings[i]
|
98 |
-
print (f"Upserting to vector: {list_data}")
|
99 |
results = self._client.upsert(datas=list_data)
|
100 |
return results
|
101 |
|
@@ -110,7 +109,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
110 |
results = [
|
111 |
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
|
112 |
]
|
113 |
-
print (f"vector db results {results} for query {query}")
|
114 |
return results
|
115 |
|
116 |
async def index_done_callback(self):
|
@@ -235,9 +233,11 @@ class NetworkXStorage(BaseGraphStorage):
|
|
235 |
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
236 |
return await self._node_embed_algorithms[algorithm]()
|
237 |
|
|
|
|
|
238 |
async def _node2vec_embed(self):
|
239 |
from graspologic import embed
|
240 |
-
|
241 |
embeddings, nodes = embed.node2vec_embed(
|
242 |
self._graph,
|
243 |
**self.global_config["node2vec_params"],
|
|
|
95 |
embeddings = np.concatenate(embeddings_list)
|
96 |
for i, d in enumerate(list_data):
|
97 |
d["__vector__"] = embeddings[i]
|
|
|
98 |
results = self._client.upsert(datas=list_data)
|
99 |
return results
|
100 |
|
|
|
109 |
results = [
|
110 |
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
|
111 |
]
|
|
|
112 |
return results
|
113 |
|
114 |
async def index_done_callback(self):
|
|
|
233 |
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
234 |
return await self._node_embed_algorithms[algorithm]()
|
235 |
|
236 |
+
|
237 |
+
#@TODO: NOT USED
|
238 |
async def _node2vec_embed(self):
|
239 |
from graspologic import embed
|
240 |
+
|
241 |
embeddings, nodes = embed.node2vec_embed(
|
242 |
self._graph,
|
243 |
**self.global_config["node2vec_params"],
|
neo4jWorkDir/kv_store_full_docs.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/kv_store_llm_response_cache.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/kv_store_text_chunks.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/lightrag.log
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_chunks.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_entities.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
neo4jWorkDir/vdb_relationships.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
testkg.py → test_neo4j.py
RENAMED
@@ -17,12 +17,12 @@ rag = LightRAG(
|
|
17 |
working_dir=WORKING_DIR,
|
18 |
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
19 |
kg="Neo4JStorage",
|
20 |
-
log_level="
|
21 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
22 |
)
|
23 |
|
24 |
-
|
25 |
-
|
26 |
|
27 |
# Perform naive search
|
28 |
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
|
|
|
17 |
working_dir=WORKING_DIR,
|
18 |
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
19 |
kg="Neo4JStorage",
|
20 |
+
log_level="DEBUG"
|
21 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
22 |
)
|
23 |
|
24 |
+
with open("./book.txt") as f:
|
25 |
+
rag.insert(f.read())
|
26 |
|
27 |
# Perform naive search
|
28 |
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
|