Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

Larfii commited on Oct 8, 2024

Commit

38e1d84

1 Parent(s): a8a6171

update

Browse files

Files changed (11) hide show

LICENSE +21 -0
README.md +5 -0
examples/insert.py +0 -1
examples/query.py +1 -2
lightrag/__init__.py +5 -1
lightrag/__pycache__/__init__.cpython-310.pyc +0 -0
lightrag/__pycache__/base.cpython-310.pyc +0 -0
lightrag/__pycache__/llm.cpython-310.pyc +0 -0
lightrag/operate.py +11 -14
requirements.txt +8 -0
setup.py +39 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Gustavo Ye
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

	@@ -1 +1,6 @@
1	# LightRAG

 # LightRAG
+## Citation
+## Acknowledgement
+The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).

examples/insert.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import sys
-sys.path.append('xxx/xxx/LightRAG')
 from lightrag import LightRAG

 import os
 import sys
 from lightrag import LightRAG

examples/query.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import sys
-sys.path.append('xxx/xxx/LightRAG')
 from lightrag import LightRAG, QueryParam
@@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR)
 mode = 'global'
 query_param = QueryParam(mode=mode)
-result, _ = rag.query("", param=query_param)
 print(result)

 import os
 import sys
 from lightrag import LightRAG, QueryParam
 mode = 'global'
 query_param = QueryParam(mode=mode)
+result = rag.query("", param=query_param)
 print(result)

lightrag/__init__.py CHANGED Viewed

	@@ -1 +1,5 @@
1	- from .lightrag import LightRAG, QueryParam

+from .lightrag import LightRAG, QueryParam
+__version__ = "0.0.1"
+__author__ = "Zirui Guo"
+__url__ = "https://github.com/HKUDS/GraphEdit"

lightrag/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/lightrag/__pycache__/__init__.cpython-310.pyc and b/lightrag/__pycache__/__init__.cpython-310.pyc differ

lightrag/__pycache__/base.cpython-310.pyc CHANGED Viewed

Binary files a/lightrag/__pycache__/base.cpython-310.pyc and b/lightrag/__pycache__/base.cpython-310.pyc differ

lightrag/__pycache__/llm.cpython-310.pyc CHANGED Viewed

Binary files a/lightrag/__pycache__/llm.cpython-310.pyc and b/lightrag/__pycache__/llm.cpython-310.pyc differ

lightrag/operate.py CHANGED Viewed

@@ -176,7 +176,6 @@ async def _merge_edges_then_upsert(
     already_weights = []
     already_source_ids = []
     already_description = []
-    ##################
     already_keywords = []
     if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
@@ -186,7 +185,6 @@ async def _merge_edges_then_upsert(
             split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
         )
         already_description.append(already_edge["description"])
-        ############
         already_keywords.extend(
             split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
         )
@@ -195,7 +193,6 @@ async def _merge_edges_then_upsert(
     description = GRAPH_FIELD_SEP.join(
         sorted(set([dp["description"] for dp in edges_data] + already_description))
     )
-    ##########
     keywords = GRAPH_FIELD_SEP.join(
         sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
     )
@@ -403,7 +400,7 @@ async def local_query(
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
-        return PROMPTS["fail_response"], "None"
     context = await _build_local_query_context(
         keywords,
@@ -415,7 +412,7 @@ async def local_query(
     if query_param.only_need_context:
         return context
     if context is None:
-        return PROMPTS["fail_response"], "None"
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
         context_data=context, response_type=query_param.response_type
@@ -424,7 +421,7 @@ async def local_query(
         query,
         system_prompt=sys_prompt,
     )
-    return response, context
 async def _build_local_query_context(
     query,
@@ -622,7 +619,7 @@ async def global_query(
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
-        return PROMPTS["fail_response"], "None"
     context = await _build_global_query_context(
         keywords,
@@ -636,7 +633,7 @@ async def global_query(
     if query_param.only_need_context:
         return context
     if context is None:
-        return PROMPTS["fail_response"], "None"
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
@@ -646,7 +643,7 @@ async def global_query(
         query,
         system_prompt=sys_prompt,
     )
-    return (response, context)
 async def _build_global_query_context(
     keywords,
@@ -836,7 +833,7 @@ async def hybird_query(
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
-        return PROMPTS["fail_response"], "None"
     low_level_context = await _build_local_query_context(
         ll_keywords,
@@ -860,7 +857,7 @@ async def hybird_query(
     if query_param.only_need_context:
         return context
     if context is None:
-        return PROMPTS["fail_response"], "None"
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
@@ -870,7 +867,7 @@ async def hybird_query(
         query,
         system_prompt=sys_prompt,
     )
-    return (response, context)
 def combine_contexts(high_level_context, low_level_context):
     # Function to extract entities, relationships, and sources from context strings
@@ -922,14 +919,14 @@ async def naive_query(
     use_model_func = global_config["llm_model_func"]
     results = await chunks_vdb.query(query, top_k=query_param.top_k)
     if not len(results):
-        return PROMPTS["fail_response"], "None"
     chunks_ids = [r["id"] for r in results]
     chunks = await text_chunks_db.get_by_ids(chunks_ids)
     maybe_trun_chunks = truncate_list_by_token_size(
         chunks,
         key=lambda x: x["content"],
-        max_token_size=query_param.naive_max_token_for_text_unit,
     )
     logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
     section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])

     already_weights = []
     already_source_ids = []
     already_description = []
     already_keywords = []
     if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
             split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
         )
         already_description.append(already_edge["description"])
         already_keywords.extend(
             split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
         )
     description = GRAPH_FIELD_SEP.join(
         sorted(set([dp["description"] for dp in edges_data] + already_description))
     )
     keywords = GRAPH_FIELD_SEP.join(
         sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
     )
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
+        return PROMPTS["fail_response"]
     context = await _build_local_query_context(
         keywords,
     if query_param.only_need_context:
         return context
     if context is None:
+        return PROMPTS["fail_response"]
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
         context_data=context, response_type=query_param.response_type
         query,
         system_prompt=sys_prompt,
     )
+    return response
 async def _build_local_query_context(
     query,
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
+        return PROMPTS["fail_response"]
     context = await _build_global_query_context(
         keywords,
     if query_param.only_need_context:
         return context
     if context is None:
+        return PROMPTS["fail_response"]
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
         query,
         system_prompt=sys_prompt,
     )
+    return response
 async def _build_global_query_context(
     keywords,
     except json.JSONDecodeError as e:
         # Handle parsing error
         print(f"JSON parsing error: {e}")
+        return PROMPTS["fail_response"]
     low_level_context = await _build_local_query_context(
         ll_keywords,
     if query_param.only_need_context:
         return context
     if context is None:
+        return PROMPTS["fail_response"]
     sys_prompt_temp = PROMPTS["rag_response"]
     sys_prompt = sys_prompt_temp.format(
         query,
         system_prompt=sys_prompt,
     )
+    return response
 def combine_contexts(high_level_context, low_level_context):
     # Function to extract entities, relationships, and sources from context strings
     use_model_func = global_config["llm_model_func"]
     results = await chunks_vdb.query(query, top_k=query_param.top_k)
     if not len(results):
+        return PROMPTS["fail_response"]
     chunks_ids = [r["id"] for r in results]
     chunks = await text_chunks_db.get_by_ids(chunks_ids)
     maybe_trun_chunks = truncate_list_by_token_size(
         chunks,
         key=lambda x: x["content"],
+        max_token_size=query_param.max_token_for_text_unit,
     )
     logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
     section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+openai
+tiktoken
+networkx
+graspologic
+nano-vectordb
+hnswlib
+xxhash
+tenacity

setup.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import setuptools
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+vars2find = ["__author__", "__version__", "__url__"]
+vars2readme = {}
+with open("./lightrag/__init__.py") as f:
+    for line in f.readlines():
+        for v in vars2find:
+            if line.startswith(v):
+                line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
+                vars2readme[v] = line.split("=")[1]
+deps = []
+with open("./requirements.txt") as f:
+    for line in f.readlines():
+        if not line.strip():
+            continue
+        deps.append(line.strip())
+setuptools.setup(
+    name="lightrag",
+    url=vars2readme["__url__"],
+    version=vars2readme["__version__"],
+    author=vars2readme["__author__"],
+    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    packages=["lightrag"],
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires=">=3.9",
+    install_requires=deps,
+)