Larfii commited on
Commit ·
38e1d84
1
Parent(s): a8a6171
update
Browse files- LICENSE +21 -0
- README.md +5 -0
- examples/insert.py +0 -1
- examples/query.py +1 -2
- lightrag/__init__.py +5 -1
- lightrag/__pycache__/__init__.cpython-310.pyc +0 -0
- lightrag/__pycache__/base.cpython-310.pyc +0 -0
- lightrag/__pycache__/llm.cpython-310.pyc +0 -0
- lightrag/operate.py +11 -14
- requirements.txt +8 -0
- setup.py +39 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 Gustavo Ye
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1 +1,6 @@
|
|
| 1 |
# LightRAG
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# LightRAG
|
| 2 |
+
|
| 3 |
+
## Citation
|
| 4 |
+
## Acknowledgement
|
| 5 |
+
|
| 6 |
+
The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
|
examples/insert.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
-
sys.path.append('xxx/xxx/LightRAG')
|
| 4 |
|
| 5 |
from lightrag import LightRAG
|
| 6 |
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
| 3 |
|
| 4 |
from lightrag import LightRAG
|
| 5 |
|
examples/query.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
-
sys.path.append('xxx/xxx/LightRAG')
|
| 4 |
|
| 5 |
from lightrag import LightRAG, QueryParam
|
| 6 |
|
|
@@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR)
|
|
| 13 |
mode = 'global'
|
| 14 |
query_param = QueryParam(mode=mode)
|
| 15 |
|
| 16 |
-
result
|
| 17 |
print(result)
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
| 3 |
|
| 4 |
from lightrag import LightRAG, QueryParam
|
| 5 |
|
|
|
|
| 12 |
mode = 'global'
|
| 13 |
query_param = QueryParam(mode=mode)
|
| 14 |
|
| 15 |
+
result = rag.query("", param=query_param)
|
| 16 |
print(result)
|
lightrag/__init__.py
CHANGED
|
@@ -1 +1,5 @@
|
|
| 1 |
-
from .lightrag import LightRAG, QueryParam
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .lightrag import LightRAG, QueryParam
|
| 2 |
+
|
| 3 |
+
__version__ = "0.0.1"
|
| 4 |
+
__author__ = "Zirui Guo"
|
| 5 |
+
__url__ = "https://github.com/HKUDS/GraphEdit"
|
lightrag/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/lightrag/__pycache__/__init__.cpython-310.pyc and b/lightrag/__pycache__/__init__.cpython-310.pyc differ
|
|
|
lightrag/__pycache__/base.cpython-310.pyc
CHANGED
|
Binary files a/lightrag/__pycache__/base.cpython-310.pyc and b/lightrag/__pycache__/base.cpython-310.pyc differ
|
|
|
lightrag/__pycache__/llm.cpython-310.pyc
CHANGED
|
Binary files a/lightrag/__pycache__/llm.cpython-310.pyc and b/lightrag/__pycache__/llm.cpython-310.pyc differ
|
|
|
lightrag/operate.py
CHANGED
|
@@ -176,7 +176,6 @@ async def _merge_edges_then_upsert(
|
|
| 176 |
already_weights = []
|
| 177 |
already_source_ids = []
|
| 178 |
already_description = []
|
| 179 |
-
##################
|
| 180 |
already_keywords = []
|
| 181 |
|
| 182 |
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
|
@@ -186,7 +185,6 @@ async def _merge_edges_then_upsert(
|
|
| 186 |
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
| 187 |
)
|
| 188 |
already_description.append(already_edge["description"])
|
| 189 |
-
############
|
| 190 |
already_keywords.extend(
|
| 191 |
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
| 192 |
)
|
|
@@ -195,7 +193,6 @@ async def _merge_edges_then_upsert(
|
|
| 195 |
description = GRAPH_FIELD_SEP.join(
|
| 196 |
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
| 197 |
)
|
| 198 |
-
##########
|
| 199 |
keywords = GRAPH_FIELD_SEP.join(
|
| 200 |
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
| 201 |
)
|
|
@@ -403,7 +400,7 @@ async def local_query(
|
|
| 403 |
except json.JSONDecodeError as e:
|
| 404 |
# Handle parsing error
|
| 405 |
print(f"JSON parsing error: {e}")
|
| 406 |
-
return PROMPTS["fail_response"]
|
| 407 |
|
| 408 |
context = await _build_local_query_context(
|
| 409 |
keywords,
|
|
@@ -415,7 +412,7 @@ async def local_query(
|
|
| 415 |
if query_param.only_need_context:
|
| 416 |
return context
|
| 417 |
if context is None:
|
| 418 |
-
return PROMPTS["fail_response"]
|
| 419 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 420 |
sys_prompt = sys_prompt_temp.format(
|
| 421 |
context_data=context, response_type=query_param.response_type
|
|
@@ -424,7 +421,7 @@ async def local_query(
|
|
| 424 |
query,
|
| 425 |
system_prompt=sys_prompt,
|
| 426 |
)
|
| 427 |
-
return response
|
| 428 |
|
| 429 |
async def _build_local_query_context(
|
| 430 |
query,
|
|
@@ -622,7 +619,7 @@ async def global_query(
|
|
| 622 |
except json.JSONDecodeError as e:
|
| 623 |
# Handle parsing error
|
| 624 |
print(f"JSON parsing error: {e}")
|
| 625 |
-
return PROMPTS["fail_response"]
|
| 626 |
|
| 627 |
context = await _build_global_query_context(
|
| 628 |
keywords,
|
|
@@ -636,7 +633,7 @@ async def global_query(
|
|
| 636 |
if query_param.only_need_context:
|
| 637 |
return context
|
| 638 |
if context is None:
|
| 639 |
-
return PROMPTS["fail_response"]
|
| 640 |
|
| 641 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 642 |
sys_prompt = sys_prompt_temp.format(
|
|
@@ -646,7 +643,7 @@ async def global_query(
|
|
| 646 |
query,
|
| 647 |
system_prompt=sys_prompt,
|
| 648 |
)
|
| 649 |
-
return
|
| 650 |
|
| 651 |
async def _build_global_query_context(
|
| 652 |
keywords,
|
|
@@ -836,7 +833,7 @@ async def hybird_query(
|
|
| 836 |
except json.JSONDecodeError as e:
|
| 837 |
# Handle parsing error
|
| 838 |
print(f"JSON parsing error: {e}")
|
| 839 |
-
return PROMPTS["fail_response"]
|
| 840 |
|
| 841 |
low_level_context = await _build_local_query_context(
|
| 842 |
ll_keywords,
|
|
@@ -860,7 +857,7 @@ async def hybird_query(
|
|
| 860 |
if query_param.only_need_context:
|
| 861 |
return context
|
| 862 |
if context is None:
|
| 863 |
-
return PROMPTS["fail_response"]
|
| 864 |
|
| 865 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 866 |
sys_prompt = sys_prompt_temp.format(
|
|
@@ -870,7 +867,7 @@ async def hybird_query(
|
|
| 870 |
query,
|
| 871 |
system_prompt=sys_prompt,
|
| 872 |
)
|
| 873 |
-
return
|
| 874 |
|
| 875 |
def combine_contexts(high_level_context, low_level_context):
|
| 876 |
# Function to extract entities, relationships, and sources from context strings
|
|
@@ -922,14 +919,14 @@ async def naive_query(
|
|
| 922 |
use_model_func = global_config["llm_model_func"]
|
| 923 |
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
| 924 |
if not len(results):
|
| 925 |
-
return PROMPTS["fail_response"]
|
| 926 |
chunks_ids = [r["id"] for r in results]
|
| 927 |
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
| 928 |
|
| 929 |
maybe_trun_chunks = truncate_list_by_token_size(
|
| 930 |
chunks,
|
| 931 |
key=lambda x: x["content"],
|
| 932 |
-
max_token_size=query_param.
|
| 933 |
)
|
| 934 |
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
| 935 |
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
|
|
|
| 176 |
already_weights = []
|
| 177 |
already_source_ids = []
|
| 178 |
already_description = []
|
|
|
|
| 179 |
already_keywords = []
|
| 180 |
|
| 181 |
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
|
|
|
| 185 |
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
| 186 |
)
|
| 187 |
already_description.append(already_edge["description"])
|
|
|
|
| 188 |
already_keywords.extend(
|
| 189 |
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
| 190 |
)
|
|
|
|
| 193 |
description = GRAPH_FIELD_SEP.join(
|
| 194 |
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
| 195 |
)
|
|
|
|
| 196 |
keywords = GRAPH_FIELD_SEP.join(
|
| 197 |
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
| 198 |
)
|
|
|
|
| 400 |
except json.JSONDecodeError as e:
|
| 401 |
# Handle parsing error
|
| 402 |
print(f"JSON parsing error: {e}")
|
| 403 |
+
return PROMPTS["fail_response"]
|
| 404 |
|
| 405 |
context = await _build_local_query_context(
|
| 406 |
keywords,
|
|
|
|
| 412 |
if query_param.only_need_context:
|
| 413 |
return context
|
| 414 |
if context is None:
|
| 415 |
+
return PROMPTS["fail_response"]
|
| 416 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 417 |
sys_prompt = sys_prompt_temp.format(
|
| 418 |
context_data=context, response_type=query_param.response_type
|
|
|
|
| 421 |
query,
|
| 422 |
system_prompt=sys_prompt,
|
| 423 |
)
|
| 424 |
+
return response
|
| 425 |
|
| 426 |
async def _build_local_query_context(
|
| 427 |
query,
|
|
|
|
| 619 |
except json.JSONDecodeError as e:
|
| 620 |
# Handle parsing error
|
| 621 |
print(f"JSON parsing error: {e}")
|
| 622 |
+
return PROMPTS["fail_response"]
|
| 623 |
|
| 624 |
context = await _build_global_query_context(
|
| 625 |
keywords,
|
|
|
|
| 633 |
if query_param.only_need_context:
|
| 634 |
return context
|
| 635 |
if context is None:
|
| 636 |
+
return PROMPTS["fail_response"]
|
| 637 |
|
| 638 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 639 |
sys_prompt = sys_prompt_temp.format(
|
|
|
|
| 643 |
query,
|
| 644 |
system_prompt=sys_prompt,
|
| 645 |
)
|
| 646 |
+
return response
|
| 647 |
|
| 648 |
async def _build_global_query_context(
|
| 649 |
keywords,
|
|
|
|
| 833 |
except json.JSONDecodeError as e:
|
| 834 |
# Handle parsing error
|
| 835 |
print(f"JSON parsing error: {e}")
|
| 836 |
+
return PROMPTS["fail_response"]
|
| 837 |
|
| 838 |
low_level_context = await _build_local_query_context(
|
| 839 |
ll_keywords,
|
|
|
|
| 857 |
if query_param.only_need_context:
|
| 858 |
return context
|
| 859 |
if context is None:
|
| 860 |
+
return PROMPTS["fail_response"]
|
| 861 |
|
| 862 |
sys_prompt_temp = PROMPTS["rag_response"]
|
| 863 |
sys_prompt = sys_prompt_temp.format(
|
|
|
|
| 867 |
query,
|
| 868 |
system_prompt=sys_prompt,
|
| 869 |
)
|
| 870 |
+
return response
|
| 871 |
|
| 872 |
def combine_contexts(high_level_context, low_level_context):
|
| 873 |
# Function to extract entities, relationships, and sources from context strings
|
|
|
|
| 919 |
use_model_func = global_config["llm_model_func"]
|
| 920 |
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
| 921 |
if not len(results):
|
| 922 |
+
return PROMPTS["fail_response"]
|
| 923 |
chunks_ids = [r["id"] for r in results]
|
| 924 |
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
| 925 |
|
| 926 |
maybe_trun_chunks = truncate_list_by_token_size(
|
| 927 |
chunks,
|
| 928 |
key=lambda x: x["content"],
|
| 929 |
+
max_token_size=query_param.max_token_for_text_unit,
|
| 930 |
)
|
| 931 |
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
| 932 |
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
tiktoken
|
| 3 |
+
networkx
|
| 4 |
+
graspologic
|
| 5 |
+
nano-vectordb
|
| 6 |
+
hnswlib
|
| 7 |
+
xxhash
|
| 8 |
+
tenacity
|
setup.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import setuptools
|
| 2 |
+
|
| 3 |
+
with open("README.md", "r") as fh:
|
| 4 |
+
long_description = fh.read()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
vars2find = ["__author__", "__version__", "__url__"]
|
| 8 |
+
vars2readme = {}
|
| 9 |
+
with open("./lightrag/__init__.py") as f:
|
| 10 |
+
for line in f.readlines():
|
| 11 |
+
for v in vars2find:
|
| 12 |
+
if line.startswith(v):
|
| 13 |
+
line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
|
| 14 |
+
vars2readme[v] = line.split("=")[1]
|
| 15 |
+
|
| 16 |
+
deps = []
|
| 17 |
+
with open("./requirements.txt") as f:
|
| 18 |
+
for line in f.readlines():
|
| 19 |
+
if not line.strip():
|
| 20 |
+
continue
|
| 21 |
+
deps.append(line.strip())
|
| 22 |
+
|
| 23 |
+
setuptools.setup(
|
| 24 |
+
name="lightrag",
|
| 25 |
+
url=vars2readme["__url__"],
|
| 26 |
+
version=vars2readme["__version__"],
|
| 27 |
+
author=vars2readme["__author__"],
|
| 28 |
+
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
|
| 29 |
+
long_description=long_description,
|
| 30 |
+
long_description_content_type="text/markdown",
|
| 31 |
+
packages=["lightrag"],
|
| 32 |
+
classifiers=[
|
| 33 |
+
"Programming Language :: Python :: 3",
|
| 34 |
+
"License :: OSI Approved :: MIT License",
|
| 35 |
+
"Operating System :: OS Independent",
|
| 36 |
+
],
|
| 37 |
+
python_requires=">=3.9",
|
| 38 |
+
install_requires=deps,
|
| 39 |
+
)
|