jin
commited on
Commit
·
f54356b
1
Parent(s):
d5299f4
fix bug
Browse files- .gitignore +1 -0
- lightrag/base.py +1 -1
- lightrag/operate.py +7 -3
- lightrag/prompt.py +1 -1
- test.py +1 -1
.gitignore
CHANGED
@@ -10,3 +10,4 @@ local_neo4jWorkDir/
|
|
10 |
neo4jWorkDir/
|
11 |
ignore_this.txt
|
12 |
.venv/
|
|
|
|
10 |
neo4jWorkDir/
|
11 |
ignore_this.txt
|
12 |
.venv/
|
13 |
+
*.ignore.*
|
lightrag/base.py
CHANGED
@@ -84,7 +84,7 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
|
|
84 |
|
85 |
@dataclass
|
86 |
class BaseGraphStorage(StorageNameSpace):
|
87 |
-
embedding_func: EmbeddingFunc
|
88 |
async def has_node(self, node_id: str) -> bool:
|
89 |
raise NotImplementedError
|
90 |
|
|
|
84 |
|
85 |
@dataclass
|
86 |
class BaseGraphStorage(StorageNameSpace):
|
87 |
+
embedding_func: EmbeddingFunc = None
|
88 |
async def has_node(self, node_id: str) -> bool:
|
89 |
raise NotImplementedError
|
90 |
|
lightrag/operate.py
CHANGED
@@ -16,6 +16,7 @@ from .utils import (
|
|
16 |
split_string_by_multi_markers,
|
17 |
truncate_list_by_token_size,
|
18 |
process_combine_contexts,
|
|
|
19 |
)
|
20 |
from .base import (
|
21 |
BaseGraphStorage,
|
@@ -403,9 +404,10 @@ async def local_query(
|
|
403 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
404 |
kw_prompt = kw_prompt_temp.format(query=query)
|
405 |
result = await use_model_func(kw_prompt)
|
|
|
406 |
|
407 |
try:
|
408 |
-
keywords_data = json.loads(
|
409 |
keywords = keywords_data.get("low_level_keywords", [])
|
410 |
keywords = ", ".join(keywords)
|
411 |
except json.JSONDecodeError:
|
@@ -670,9 +672,10 @@ async def global_query(
|
|
670 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
671 |
kw_prompt = kw_prompt_temp.format(query=query)
|
672 |
result = await use_model_func(kw_prompt)
|
|
|
673 |
|
674 |
try:
|
675 |
-
keywords_data = json.loads(
|
676 |
keywords = keywords_data.get("high_level_keywords", [])
|
677 |
keywords = ", ".join(keywords)
|
678 |
except json.JSONDecodeError:
|
@@ -911,8 +914,9 @@ async def hybrid_query(
|
|
911 |
kw_prompt = kw_prompt_temp.format(query=query)
|
912 |
|
913 |
result = await use_model_func(kw_prompt)
|
|
|
914 |
try:
|
915 |
-
keywords_data = json.loads(
|
916 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
917 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
918 |
hl_keywords = ", ".join(hl_keywords)
|
|
|
16 |
split_string_by_multi_markers,
|
17 |
truncate_list_by_token_size,
|
18 |
process_combine_contexts,
|
19 |
+
locate_json_string_body_from_string
|
20 |
)
|
21 |
from .base import (
|
22 |
BaseGraphStorage,
|
|
|
404 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
405 |
kw_prompt = kw_prompt_temp.format(query=query)
|
406 |
result = await use_model_func(kw_prompt)
|
407 |
+
json_text = locate_json_string_body_from_string(result)
|
408 |
|
409 |
try:
|
410 |
+
keywords_data = json.loads(json_text)
|
411 |
keywords = keywords_data.get("low_level_keywords", [])
|
412 |
keywords = ", ".join(keywords)
|
413 |
except json.JSONDecodeError:
|
|
|
672 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
673 |
kw_prompt = kw_prompt_temp.format(query=query)
|
674 |
result = await use_model_func(kw_prompt)
|
675 |
+
json_text = locate_json_string_body_from_string(result)
|
676 |
|
677 |
try:
|
678 |
+
keywords_data = json.loads(json_text)
|
679 |
keywords = keywords_data.get("high_level_keywords", [])
|
680 |
keywords = ", ".join(keywords)
|
681 |
except json.JSONDecodeError:
|
|
|
914 |
kw_prompt = kw_prompt_temp.format(query=query)
|
915 |
|
916 |
result = await use_model_func(kw_prompt)
|
917 |
+
json_text = locate_json_string_body_from_string(result)
|
918 |
try:
|
919 |
+
keywords_data = json.loads(json_text)
|
920 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
921 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
922 |
hl_keywords = ", ".join(hl_keywords)
|
lightrag/prompt.py
CHANGED
@@ -14,7 +14,7 @@ Given a text document that is potentially relevant to this activity and a list o
|
|
14 |
|
15 |
-Steps-
|
16 |
1. Identify all entities. For each identified entity, extract the following information:
|
17 |
-
- entity_name: Name of the entity, capitalized
|
18 |
- entity_type: One of the following types: [{entity_types}]
|
19 |
- entity_description: Comprehensive description of the entity's attributes and activities
|
20 |
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
|
|
|
14 |
|
15 |
-Steps-
|
16 |
1. Identify all entities. For each identified entity, extract the following information:
|
17 |
+
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
|
18 |
- entity_type: One of the following types: [{entity_types}]
|
19 |
- entity_description: Comprehensive description of the entity's attributes and activities
|
20 |
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
|
test.py
CHANGED
@@ -18,7 +18,7 @@ rag = LightRAG(
|
|
18 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
19 |
)
|
20 |
|
21 |
-
with open("./book.txt") as f:
|
22 |
rag.insert(f.read())
|
23 |
|
24 |
# Perform naive search
|
|
|
18 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
19 |
)
|
20 |
|
21 |
+
with open("./dickens/book.txt", "r", encoding="utf-8") as f:
|
22 |
rag.insert(f.read())
|
23 |
|
24 |
# Perform naive search
|