jin commited on
Commit
f54356b
·
1 Parent(s): d5299f4
Files changed (5) hide show
  1. .gitignore +1 -0
  2. lightrag/base.py +1 -1
  3. lightrag/operate.py +7 -3
  4. lightrag/prompt.py +1 -1
  5. test.py +1 -1
.gitignore CHANGED
@@ -10,3 +10,4 @@ local_neo4jWorkDir/
10
  neo4jWorkDir/
11
  ignore_this.txt
12
  .venv/
 
 
10
  neo4jWorkDir/
11
  ignore_this.txt
12
  .venv/
13
+ *.ignore.*
lightrag/base.py CHANGED
@@ -84,7 +84,7 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
84
 
85
  @dataclass
86
  class BaseGraphStorage(StorageNameSpace):
87
- embedding_func: EmbeddingFunc
88
  async def has_node(self, node_id: str) -> bool:
89
  raise NotImplementedError
90
 
 
84
 
85
  @dataclass
86
  class BaseGraphStorage(StorageNameSpace):
87
+ embedding_func: EmbeddingFunc = None
88
  async def has_node(self, node_id: str) -> bool:
89
  raise NotImplementedError
90
 
lightrag/operate.py CHANGED
@@ -16,6 +16,7 @@ from .utils import (
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
18
  process_combine_contexts,
 
19
  )
20
  from .base import (
21
  BaseGraphStorage,
@@ -403,9 +404,10 @@ async def local_query(
403
  kw_prompt_temp = PROMPTS["keywords_extraction"]
404
  kw_prompt = kw_prompt_temp.format(query=query)
405
  result = await use_model_func(kw_prompt)
 
406
 
407
  try:
408
- keywords_data = json.loads(result)
409
  keywords = keywords_data.get("low_level_keywords", [])
410
  keywords = ", ".join(keywords)
411
  except json.JSONDecodeError:
@@ -670,9 +672,10 @@ async def global_query(
670
  kw_prompt_temp = PROMPTS["keywords_extraction"]
671
  kw_prompt = kw_prompt_temp.format(query=query)
672
  result = await use_model_func(kw_prompt)
 
673
 
674
  try:
675
- keywords_data = json.loads(result)
676
  keywords = keywords_data.get("high_level_keywords", [])
677
  keywords = ", ".join(keywords)
678
  except json.JSONDecodeError:
@@ -911,8 +914,9 @@ async def hybrid_query(
911
  kw_prompt = kw_prompt_temp.format(query=query)
912
 
913
  result = await use_model_func(kw_prompt)
 
914
  try:
915
- keywords_data = json.loads(result)
916
  hl_keywords = keywords_data.get("high_level_keywords", [])
917
  ll_keywords = keywords_data.get("low_level_keywords", [])
918
  hl_keywords = ", ".join(hl_keywords)
 
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
18
  process_combine_contexts,
19
+ locate_json_string_body_from_string
20
  )
21
  from .base import (
22
  BaseGraphStorage,
 
404
  kw_prompt_temp = PROMPTS["keywords_extraction"]
405
  kw_prompt = kw_prompt_temp.format(query=query)
406
  result = await use_model_func(kw_prompt)
407
+ json_text = locate_json_string_body_from_string(result)
408
 
409
  try:
410
+ keywords_data = json.loads(json_text)
411
  keywords = keywords_data.get("low_level_keywords", [])
412
  keywords = ", ".join(keywords)
413
  except json.JSONDecodeError:
 
672
  kw_prompt_temp = PROMPTS["keywords_extraction"]
673
  kw_prompt = kw_prompt_temp.format(query=query)
674
  result = await use_model_func(kw_prompt)
675
+ json_text = locate_json_string_body_from_string(result)
676
 
677
  try:
678
+ keywords_data = json.loads(json_text)
679
  keywords = keywords_data.get("high_level_keywords", [])
680
  keywords = ", ".join(keywords)
681
  except json.JSONDecodeError:
 
914
  kw_prompt = kw_prompt_temp.format(query=query)
915
 
916
  result = await use_model_func(kw_prompt)
917
+ json_text = locate_json_string_body_from_string(result)
918
  try:
919
+ keywords_data = json.loads(json_text)
920
  hl_keywords = keywords_data.get("high_level_keywords", [])
921
  ll_keywords = keywords_data.get("low_level_keywords", [])
922
  hl_keywords = ", ".join(hl_keywords)
lightrag/prompt.py CHANGED
@@ -14,7 +14,7 @@ Given a text document that is potentially relevant to this activity and a list o
14
 
15
  -Steps-
16
  1. Identify all entities. For each identified entity, extract the following information:
17
- - entity_name: Name of the entity, capitalized
18
  - entity_type: One of the following types: [{entity_types}]
19
  - entity_description: Comprehensive description of the entity's attributes and activities
20
  Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
 
14
 
15
  -Steps-
16
  1. Identify all entities. For each identified entity, extract the following information:
17
+ - entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
18
  - entity_type: One of the following types: [{entity_types}]
19
  - entity_description: Comprehensive description of the entity's attributes and activities
20
  Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
test.py CHANGED
@@ -18,7 +18,7 @@ rag = LightRAG(
18
  # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
19
  )
20
 
21
- with open("./book.txt") as f:
22
  rag.insert(f.read())
23
 
24
  # Perform naive search
 
18
  # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
19
  )
20
 
21
+ with open("./dickens/book.txt", "r", encoding="utf-8") as f:
22
  rag.insert(f.read())
23
 
24
  # Perform naive search