Larfii commited on
Commit
38e1d84
·
1 Parent(s): a8a6171
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Gustavo Ye
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1 +1,6 @@
1
  # LightRAG
 
 
 
 
 
 
1
  # LightRAG
2
+
3
+ ## Citation
4
+ ## Acknowledgement
5
+
6
+ The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
examples/insert.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import sys
3
- sys.path.append('xxx/xxx/LightRAG')
4
 
5
  from lightrag import LightRAG
6
 
 
1
  import os
2
  import sys
 
3
 
4
  from lightrag import LightRAG
5
 
examples/query.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  import sys
3
- sys.path.append('xxx/xxx/LightRAG')
4
 
5
  from lightrag import LightRAG, QueryParam
6
 
@@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR)
13
  mode = 'global'
14
  query_param = QueryParam(mode=mode)
15
 
16
- result, _ = rag.query("", param=query_param)
17
  print(result)
 
1
  import os
2
  import sys
 
3
 
4
  from lightrag import LightRAG, QueryParam
5
 
 
12
  mode = 'global'
13
  query_param = QueryParam(mode=mode)
14
 
15
+ result = rag.query("", param=query_param)
16
  print(result)
lightrag/__init__.py CHANGED
@@ -1 +1,5 @@
1
- from .lightrag import LightRAG, QueryParam
 
 
 
 
 
1
+ from .lightrag import LightRAG, QueryParam
2
+
3
+ __version__ = "0.0.1"
4
+ __author__ = "Zirui Guo"
5
+ __url__ = "https://github.com/HKUDS/GraphEdit"
lightrag/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/lightrag/__pycache__/__init__.cpython-310.pyc and b/lightrag/__pycache__/__init__.cpython-310.pyc differ
 
lightrag/__pycache__/base.cpython-310.pyc CHANGED
Binary files a/lightrag/__pycache__/base.cpython-310.pyc and b/lightrag/__pycache__/base.cpython-310.pyc differ
 
lightrag/__pycache__/llm.cpython-310.pyc CHANGED
Binary files a/lightrag/__pycache__/llm.cpython-310.pyc and b/lightrag/__pycache__/llm.cpython-310.pyc differ
 
lightrag/operate.py CHANGED
@@ -176,7 +176,6 @@ async def _merge_edges_then_upsert(
176
  already_weights = []
177
  already_source_ids = []
178
  already_description = []
179
- ##################
180
  already_keywords = []
181
 
182
  if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
@@ -186,7 +185,6 @@ async def _merge_edges_then_upsert(
186
  split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
187
  )
188
  already_description.append(already_edge["description"])
189
- ############
190
  already_keywords.extend(
191
  split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
192
  )
@@ -195,7 +193,6 @@ async def _merge_edges_then_upsert(
195
  description = GRAPH_FIELD_SEP.join(
196
  sorted(set([dp["description"] for dp in edges_data] + already_description))
197
  )
198
- ##########
199
  keywords = GRAPH_FIELD_SEP.join(
200
  sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
201
  )
@@ -403,7 +400,7 @@ async def local_query(
403
  except json.JSONDecodeError as e:
404
  # Handle parsing error
405
  print(f"JSON parsing error: {e}")
406
- return PROMPTS["fail_response"], "None"
407
 
408
  context = await _build_local_query_context(
409
  keywords,
@@ -415,7 +412,7 @@ async def local_query(
415
  if query_param.only_need_context:
416
  return context
417
  if context is None:
418
- return PROMPTS["fail_response"], "None"
419
  sys_prompt_temp = PROMPTS["rag_response"]
420
  sys_prompt = sys_prompt_temp.format(
421
  context_data=context, response_type=query_param.response_type
@@ -424,7 +421,7 @@ async def local_query(
424
  query,
425
  system_prompt=sys_prompt,
426
  )
427
- return response, context
428
 
429
  async def _build_local_query_context(
430
  query,
@@ -622,7 +619,7 @@ async def global_query(
622
  except json.JSONDecodeError as e:
623
  # Handle parsing error
624
  print(f"JSON parsing error: {e}")
625
- return PROMPTS["fail_response"], "None"
626
 
627
  context = await _build_global_query_context(
628
  keywords,
@@ -636,7 +633,7 @@ async def global_query(
636
  if query_param.only_need_context:
637
  return context
638
  if context is None:
639
- return PROMPTS["fail_response"], "None"
640
 
641
  sys_prompt_temp = PROMPTS["rag_response"]
642
  sys_prompt = sys_prompt_temp.format(
@@ -646,7 +643,7 @@ async def global_query(
646
  query,
647
  system_prompt=sys_prompt,
648
  )
649
- return (response, context)
650
 
651
  async def _build_global_query_context(
652
  keywords,
@@ -836,7 +833,7 @@ async def hybird_query(
836
  except json.JSONDecodeError as e:
837
  # Handle parsing error
838
  print(f"JSON parsing error: {e}")
839
- return PROMPTS["fail_response"], "None"
840
 
841
  low_level_context = await _build_local_query_context(
842
  ll_keywords,
@@ -860,7 +857,7 @@ async def hybird_query(
860
  if query_param.only_need_context:
861
  return context
862
  if context is None:
863
- return PROMPTS["fail_response"], "None"
864
 
865
  sys_prompt_temp = PROMPTS["rag_response"]
866
  sys_prompt = sys_prompt_temp.format(
@@ -870,7 +867,7 @@ async def hybird_query(
870
  query,
871
  system_prompt=sys_prompt,
872
  )
873
- return (response, context)
874
 
875
  def combine_contexts(high_level_context, low_level_context):
876
  # Function to extract entities, relationships, and sources from context strings
@@ -922,14 +919,14 @@ async def naive_query(
922
  use_model_func = global_config["llm_model_func"]
923
  results = await chunks_vdb.query(query, top_k=query_param.top_k)
924
  if not len(results):
925
- return PROMPTS["fail_response"], "None"
926
  chunks_ids = [r["id"] for r in results]
927
  chunks = await text_chunks_db.get_by_ids(chunks_ids)
928
 
929
  maybe_trun_chunks = truncate_list_by_token_size(
930
  chunks,
931
  key=lambda x: x["content"],
932
- max_token_size=query_param.naive_max_token_for_text_unit,
933
  )
934
  logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
935
  section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
 
176
  already_weights = []
177
  already_source_ids = []
178
  already_description = []
 
179
  already_keywords = []
180
 
181
  if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
 
185
  split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
186
  )
187
  already_description.append(already_edge["description"])
 
188
  already_keywords.extend(
189
  split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
190
  )
 
193
  description = GRAPH_FIELD_SEP.join(
194
  sorted(set([dp["description"] for dp in edges_data] + already_description))
195
  )
 
196
  keywords = GRAPH_FIELD_SEP.join(
197
  sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
198
  )
 
400
  except json.JSONDecodeError as e:
401
  # Handle parsing error
402
  print(f"JSON parsing error: {e}")
403
+ return PROMPTS["fail_response"]
404
 
405
  context = await _build_local_query_context(
406
  keywords,
 
412
  if query_param.only_need_context:
413
  return context
414
  if context is None:
415
+ return PROMPTS["fail_response"]
416
  sys_prompt_temp = PROMPTS["rag_response"]
417
  sys_prompt = sys_prompt_temp.format(
418
  context_data=context, response_type=query_param.response_type
 
421
  query,
422
  system_prompt=sys_prompt,
423
  )
424
+ return response
425
 
426
  async def _build_local_query_context(
427
  query,
 
619
  except json.JSONDecodeError as e:
620
  # Handle parsing error
621
  print(f"JSON parsing error: {e}")
622
+ return PROMPTS["fail_response"]
623
 
624
  context = await _build_global_query_context(
625
  keywords,
 
633
  if query_param.only_need_context:
634
  return context
635
  if context is None:
636
+ return PROMPTS["fail_response"]
637
 
638
  sys_prompt_temp = PROMPTS["rag_response"]
639
  sys_prompt = sys_prompt_temp.format(
 
643
  query,
644
  system_prompt=sys_prompt,
645
  )
646
+ return response
647
 
648
  async def _build_global_query_context(
649
  keywords,
 
833
  except json.JSONDecodeError as e:
834
  # Handle parsing error
835
  print(f"JSON parsing error: {e}")
836
+ return PROMPTS["fail_response"]
837
 
838
  low_level_context = await _build_local_query_context(
839
  ll_keywords,
 
857
  if query_param.only_need_context:
858
  return context
859
  if context is None:
860
+ return PROMPTS["fail_response"]
861
 
862
  sys_prompt_temp = PROMPTS["rag_response"]
863
  sys_prompt = sys_prompt_temp.format(
 
867
  query,
868
  system_prompt=sys_prompt,
869
  )
870
+ return response
871
 
872
  def combine_contexts(high_level_context, low_level_context):
873
  # Function to extract entities, relationships, and sources from context strings
 
919
  use_model_func = global_config["llm_model_func"]
920
  results = await chunks_vdb.query(query, top_k=query_param.top_k)
921
  if not len(results):
922
+ return PROMPTS["fail_response"]
923
  chunks_ids = [r["id"] for r in results]
924
  chunks = await text_chunks_db.get_by_ids(chunks_ids)
925
 
926
  maybe_trun_chunks = truncate_list_by_token_size(
927
  chunks,
928
  key=lambda x: x["content"],
929
+ max_token_size=query_param.max_token_for_text_unit,
930
  )
931
  logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
932
  section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ tiktoken
3
+ networkx
4
+ graspologic
5
+ nano-vectordb
6
+ hnswlib
7
+ xxhash
8
+ tenacity
setup.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import setuptools
2
+
3
+ with open("README.md", "r") as fh:
4
+ long_description = fh.read()
5
+
6
+
7
+ vars2find = ["__author__", "__version__", "__url__"]
8
+ vars2readme = {}
9
+ with open("./lightrag/__init__.py") as f:
10
+ for line in f.readlines():
11
+ for v in vars2find:
12
+ if line.startswith(v):
13
+ line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
14
+ vars2readme[v] = line.split("=")[1]
15
+
16
+ deps = []
17
+ with open("./requirements.txt") as f:
18
+ for line in f.readlines():
19
+ if not line.strip():
20
+ continue
21
+ deps.append(line.strip())
22
+
23
+ setuptools.setup(
24
+ name="lightrag",
25
+ url=vars2readme["__url__"],
26
+ version=vars2readme["__version__"],
27
+ author=vars2readme["__author__"],
28
+ description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
29
+ long_description=long_description,
30
+ long_description_content_type="text/markdown",
31
+ packages=["lightrag"],
32
+ classifiers=[
33
+ "Programming Language :: Python :: 3",
34
+ "License :: OSI Approved :: MIT License",
35
+ "Operating System :: OS Independent",
36
+ ],
37
+ python_requires=">=3.9",
38
+ install_requires=deps,
39
+ )