Larfii
commited on
Commit
·
38e1d84
1
Parent(s):
a8a6171
update
Browse files- LICENSE +21 -0
- README.md +5 -0
- examples/insert.py +0 -1
- examples/query.py +1 -2
- lightrag/__init__.py +5 -1
- lightrag/__pycache__/__init__.cpython-310.pyc +0 -0
- lightrag/__pycache__/base.cpython-310.pyc +0 -0
- lightrag/__pycache__/llm.cpython-310.pyc +0 -0
- lightrag/operate.py +11 -14
- requirements.txt +8 -0
- setup.py +39 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Gustavo Ye
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1 +1,6 @@
|
|
1 |
# LightRAG
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# LightRAG
|
2 |
+
|
3 |
+
## Citation
|
4 |
+
## Acknowledgement
|
5 |
+
|
6 |
+
The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
|
examples/insert.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
sys.path.append('xxx/xxx/LightRAG')
|
4 |
|
5 |
from lightrag import LightRAG
|
6 |
|
|
|
1 |
import os
|
2 |
import sys
|
|
|
3 |
|
4 |
from lightrag import LightRAG
|
5 |
|
examples/query.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
sys.path.append('xxx/xxx/LightRAG')
|
4 |
|
5 |
from lightrag import LightRAG, QueryParam
|
6 |
|
@@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR)
|
|
13 |
mode = 'global'
|
14 |
query_param = QueryParam(mode=mode)
|
15 |
|
16 |
-
result
|
17 |
print(result)
|
|
|
1 |
import os
|
2 |
import sys
|
|
|
3 |
|
4 |
from lightrag import LightRAG, QueryParam
|
5 |
|
|
|
12 |
mode = 'global'
|
13 |
query_param = QueryParam(mode=mode)
|
14 |
|
15 |
+
result = rag.query("", param=query_param)
|
16 |
print(result)
|
lightrag/__init__.py
CHANGED
@@ -1 +1,5 @@
|
|
1 |
-
from .lightrag import LightRAG, QueryParam
|
|
|
|
|
|
|
|
|
|
1 |
+
from .lightrag import LightRAG, QueryParam
|
2 |
+
|
3 |
+
__version__ = "0.0.1"
|
4 |
+
__author__ = "Zirui Guo"
|
5 |
+
__url__ = "https://github.com/HKUDS/GraphEdit"
|
lightrag/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/lightrag/__pycache__/__init__.cpython-310.pyc and b/lightrag/__pycache__/__init__.cpython-310.pyc differ
|
|
lightrag/__pycache__/base.cpython-310.pyc
CHANGED
Binary files a/lightrag/__pycache__/base.cpython-310.pyc and b/lightrag/__pycache__/base.cpython-310.pyc differ
|
|
lightrag/__pycache__/llm.cpython-310.pyc
CHANGED
Binary files a/lightrag/__pycache__/llm.cpython-310.pyc and b/lightrag/__pycache__/llm.cpython-310.pyc differ
|
|
lightrag/operate.py
CHANGED
@@ -176,7 +176,6 @@ async def _merge_edges_then_upsert(
|
|
176 |
already_weights = []
|
177 |
already_source_ids = []
|
178 |
already_description = []
|
179 |
-
##################
|
180 |
already_keywords = []
|
181 |
|
182 |
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
@@ -186,7 +185,6 @@ async def _merge_edges_then_upsert(
|
|
186 |
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
187 |
)
|
188 |
already_description.append(already_edge["description"])
|
189 |
-
############
|
190 |
already_keywords.extend(
|
191 |
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
192 |
)
|
@@ -195,7 +193,6 @@ async def _merge_edges_then_upsert(
|
|
195 |
description = GRAPH_FIELD_SEP.join(
|
196 |
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
197 |
)
|
198 |
-
##########
|
199 |
keywords = GRAPH_FIELD_SEP.join(
|
200 |
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
201 |
)
|
@@ -403,7 +400,7 @@ async def local_query(
|
|
403 |
except json.JSONDecodeError as e:
|
404 |
# Handle parsing error
|
405 |
print(f"JSON parsing error: {e}")
|
406 |
-
return PROMPTS["fail_response"]
|
407 |
|
408 |
context = await _build_local_query_context(
|
409 |
keywords,
|
@@ -415,7 +412,7 @@ async def local_query(
|
|
415 |
if query_param.only_need_context:
|
416 |
return context
|
417 |
if context is None:
|
418 |
-
return PROMPTS["fail_response"]
|
419 |
sys_prompt_temp = PROMPTS["rag_response"]
|
420 |
sys_prompt = sys_prompt_temp.format(
|
421 |
context_data=context, response_type=query_param.response_type
|
@@ -424,7 +421,7 @@ async def local_query(
|
|
424 |
query,
|
425 |
system_prompt=sys_prompt,
|
426 |
)
|
427 |
-
return response
|
428 |
|
429 |
async def _build_local_query_context(
|
430 |
query,
|
@@ -622,7 +619,7 @@ async def global_query(
|
|
622 |
except json.JSONDecodeError as e:
|
623 |
# Handle parsing error
|
624 |
print(f"JSON parsing error: {e}")
|
625 |
-
return PROMPTS["fail_response"]
|
626 |
|
627 |
context = await _build_global_query_context(
|
628 |
keywords,
|
@@ -636,7 +633,7 @@ async def global_query(
|
|
636 |
if query_param.only_need_context:
|
637 |
return context
|
638 |
if context is None:
|
639 |
-
return PROMPTS["fail_response"]
|
640 |
|
641 |
sys_prompt_temp = PROMPTS["rag_response"]
|
642 |
sys_prompt = sys_prompt_temp.format(
|
@@ -646,7 +643,7 @@ async def global_query(
|
|
646 |
query,
|
647 |
system_prompt=sys_prompt,
|
648 |
)
|
649 |
-
return
|
650 |
|
651 |
async def _build_global_query_context(
|
652 |
keywords,
|
@@ -836,7 +833,7 @@ async def hybird_query(
|
|
836 |
except json.JSONDecodeError as e:
|
837 |
# Handle parsing error
|
838 |
print(f"JSON parsing error: {e}")
|
839 |
-
return PROMPTS["fail_response"]
|
840 |
|
841 |
low_level_context = await _build_local_query_context(
|
842 |
ll_keywords,
|
@@ -860,7 +857,7 @@ async def hybird_query(
|
|
860 |
if query_param.only_need_context:
|
861 |
return context
|
862 |
if context is None:
|
863 |
-
return PROMPTS["fail_response"]
|
864 |
|
865 |
sys_prompt_temp = PROMPTS["rag_response"]
|
866 |
sys_prompt = sys_prompt_temp.format(
|
@@ -870,7 +867,7 @@ async def hybird_query(
|
|
870 |
query,
|
871 |
system_prompt=sys_prompt,
|
872 |
)
|
873 |
-
return
|
874 |
|
875 |
def combine_contexts(high_level_context, low_level_context):
|
876 |
# Function to extract entities, relationships, and sources from context strings
|
@@ -922,14 +919,14 @@ async def naive_query(
|
|
922 |
use_model_func = global_config["llm_model_func"]
|
923 |
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
924 |
if not len(results):
|
925 |
-
return PROMPTS["fail_response"]
|
926 |
chunks_ids = [r["id"] for r in results]
|
927 |
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
928 |
|
929 |
maybe_trun_chunks = truncate_list_by_token_size(
|
930 |
chunks,
|
931 |
key=lambda x: x["content"],
|
932 |
-
max_token_size=query_param.
|
933 |
)
|
934 |
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
935 |
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
|
|
176 |
already_weights = []
|
177 |
already_source_ids = []
|
178 |
already_description = []
|
|
|
179 |
already_keywords = []
|
180 |
|
181 |
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
|
|
185 |
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
186 |
)
|
187 |
already_description.append(already_edge["description"])
|
|
|
188 |
already_keywords.extend(
|
189 |
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
190 |
)
|
|
|
193 |
description = GRAPH_FIELD_SEP.join(
|
194 |
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
195 |
)
|
|
|
196 |
keywords = GRAPH_FIELD_SEP.join(
|
197 |
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
198 |
)
|
|
|
400 |
except json.JSONDecodeError as e:
|
401 |
# Handle parsing error
|
402 |
print(f"JSON parsing error: {e}")
|
403 |
+
return PROMPTS["fail_response"]
|
404 |
|
405 |
context = await _build_local_query_context(
|
406 |
keywords,
|
|
|
412 |
if query_param.only_need_context:
|
413 |
return context
|
414 |
if context is None:
|
415 |
+
return PROMPTS["fail_response"]
|
416 |
sys_prompt_temp = PROMPTS["rag_response"]
|
417 |
sys_prompt = sys_prompt_temp.format(
|
418 |
context_data=context, response_type=query_param.response_type
|
|
|
421 |
query,
|
422 |
system_prompt=sys_prompt,
|
423 |
)
|
424 |
+
return response
|
425 |
|
426 |
async def _build_local_query_context(
|
427 |
query,
|
|
|
619 |
except json.JSONDecodeError as e:
|
620 |
# Handle parsing error
|
621 |
print(f"JSON parsing error: {e}")
|
622 |
+
return PROMPTS["fail_response"]
|
623 |
|
624 |
context = await _build_global_query_context(
|
625 |
keywords,
|
|
|
633 |
if query_param.only_need_context:
|
634 |
return context
|
635 |
if context is None:
|
636 |
+
return PROMPTS["fail_response"]
|
637 |
|
638 |
sys_prompt_temp = PROMPTS["rag_response"]
|
639 |
sys_prompt = sys_prompt_temp.format(
|
|
|
643 |
query,
|
644 |
system_prompt=sys_prompt,
|
645 |
)
|
646 |
+
return response
|
647 |
|
648 |
async def _build_global_query_context(
|
649 |
keywords,
|
|
|
833 |
except json.JSONDecodeError as e:
|
834 |
# Handle parsing error
|
835 |
print(f"JSON parsing error: {e}")
|
836 |
+
return PROMPTS["fail_response"]
|
837 |
|
838 |
low_level_context = await _build_local_query_context(
|
839 |
ll_keywords,
|
|
|
857 |
if query_param.only_need_context:
|
858 |
return context
|
859 |
if context is None:
|
860 |
+
return PROMPTS["fail_response"]
|
861 |
|
862 |
sys_prompt_temp = PROMPTS["rag_response"]
|
863 |
sys_prompt = sys_prompt_temp.format(
|
|
|
867 |
query,
|
868 |
system_prompt=sys_prompt,
|
869 |
)
|
870 |
+
return response
|
871 |
|
872 |
def combine_contexts(high_level_context, low_level_context):
|
873 |
# Function to extract entities, relationships, and sources from context strings
|
|
|
919 |
use_model_func = global_config["llm_model_func"]
|
920 |
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
921 |
if not len(results):
|
922 |
+
return PROMPTS["fail_response"]
|
923 |
chunks_ids = [r["id"] for r in results]
|
924 |
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
925 |
|
926 |
maybe_trun_chunks = truncate_list_by_token_size(
|
927 |
chunks,
|
928 |
key=lambda x: x["content"],
|
929 |
+
max_token_size=query_param.max_token_for_text_unit,
|
930 |
)
|
931 |
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
932 |
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
tiktoken
|
3 |
+
networkx
|
4 |
+
graspologic
|
5 |
+
nano-vectordb
|
6 |
+
hnswlib
|
7 |
+
xxhash
|
8 |
+
tenacity
|
setup.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import setuptools
|
2 |
+
|
3 |
+
with open("README.md", "r") as fh:
|
4 |
+
long_description = fh.read()
|
5 |
+
|
6 |
+
|
7 |
+
vars2find = ["__author__", "__version__", "__url__"]
|
8 |
+
vars2readme = {}
|
9 |
+
with open("./lightrag/__init__.py") as f:
|
10 |
+
for line in f.readlines():
|
11 |
+
for v in vars2find:
|
12 |
+
if line.startswith(v):
|
13 |
+
line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
|
14 |
+
vars2readme[v] = line.split("=")[1]
|
15 |
+
|
16 |
+
deps = []
|
17 |
+
with open("./requirements.txt") as f:
|
18 |
+
for line in f.readlines():
|
19 |
+
if not line.strip():
|
20 |
+
continue
|
21 |
+
deps.append(line.strip())
|
22 |
+
|
23 |
+
setuptools.setup(
|
24 |
+
name="lightrag",
|
25 |
+
url=vars2readme["__url__"],
|
26 |
+
version=vars2readme["__version__"],
|
27 |
+
author=vars2readme["__author__"],
|
28 |
+
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
|
29 |
+
long_description=long_description,
|
30 |
+
long_description_content_type="text/markdown",
|
31 |
+
packages=["lightrag"],
|
32 |
+
classifiers=[
|
33 |
+
"Programming Language :: Python :: 3",
|
34 |
+
"License :: OSI Approved :: MIT License",
|
35 |
+
"Operating System :: OS Independent",
|
36 |
+
],
|
37 |
+
python_requires=">=3.9",
|
38 |
+
install_requires=deps,
|
39 |
+
)
|