jin
commited on
Commit
·
f54356b
1
Parent(s):
d5299f4
fix bug
Browse files- .gitignore +1 -0
- lightrag/base.py +1 -1
- lightrag/operate.py +7 -3
- lightrag/prompt.py +1 -1
- test.py +1 -1
.gitignore
CHANGED
|
@@ -10,3 +10,4 @@ local_neo4jWorkDir/
|
|
| 10 |
neo4jWorkDir/
|
| 11 |
ignore_this.txt
|
| 12 |
.venv/
|
|
|
|
|
|
| 10 |
neo4jWorkDir/
|
| 11 |
ignore_this.txt
|
| 12 |
.venv/
|
| 13 |
+
*.ignore.*
|
lightrag/base.py
CHANGED
|
@@ -84,7 +84,7 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
|
|
| 84 |
|
| 85 |
@dataclass
|
| 86 |
class BaseGraphStorage(StorageNameSpace):
|
| 87 |
-
embedding_func: EmbeddingFunc
|
| 88 |
async def has_node(self, node_id: str) -> bool:
|
| 89 |
raise NotImplementedError
|
| 90 |
|
|
|
|
| 84 |
|
| 85 |
@dataclass
|
| 86 |
class BaseGraphStorage(StorageNameSpace):
|
| 87 |
+
embedding_func: EmbeddingFunc = None
|
| 88 |
async def has_node(self, node_id: str) -> bool:
|
| 89 |
raise NotImplementedError
|
| 90 |
|
lightrag/operate.py
CHANGED
|
@@ -16,6 +16,7 @@ from .utils import (
|
|
| 16 |
split_string_by_multi_markers,
|
| 17 |
truncate_list_by_token_size,
|
| 18 |
process_combine_contexts,
|
|
|
|
| 19 |
)
|
| 20 |
from .base import (
|
| 21 |
BaseGraphStorage,
|
|
@@ -403,9 +404,10 @@ async def local_query(
|
|
| 403 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
| 404 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 405 |
result = await use_model_func(kw_prompt)
|
|
|
|
| 406 |
|
| 407 |
try:
|
| 408 |
-
keywords_data = json.loads(
|
| 409 |
keywords = keywords_data.get("low_level_keywords", [])
|
| 410 |
keywords = ", ".join(keywords)
|
| 411 |
except json.JSONDecodeError:
|
|
@@ -670,9 +672,10 @@ async def global_query(
|
|
| 670 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
| 671 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 672 |
result = await use_model_func(kw_prompt)
|
|
|
|
| 673 |
|
| 674 |
try:
|
| 675 |
-
keywords_data = json.loads(
|
| 676 |
keywords = keywords_data.get("high_level_keywords", [])
|
| 677 |
keywords = ", ".join(keywords)
|
| 678 |
except json.JSONDecodeError:
|
|
@@ -911,8 +914,9 @@ async def hybrid_query(
|
|
| 911 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 912 |
|
| 913 |
result = await use_model_func(kw_prompt)
|
|
|
|
| 914 |
try:
|
| 915 |
-
keywords_data = json.loads(
|
| 916 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
| 917 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
| 918 |
hl_keywords = ", ".join(hl_keywords)
|
|
|
|
| 16 |
split_string_by_multi_markers,
|
| 17 |
truncate_list_by_token_size,
|
| 18 |
process_combine_contexts,
|
| 19 |
+
locate_json_string_body_from_string
|
| 20 |
)
|
| 21 |
from .base import (
|
| 22 |
BaseGraphStorage,
|
|
|
|
| 404 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
| 405 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 406 |
result = await use_model_func(kw_prompt)
|
| 407 |
+
json_text = locate_json_string_body_from_string(result)
|
| 408 |
|
| 409 |
try:
|
| 410 |
+
keywords_data = json.loads(json_text)
|
| 411 |
keywords = keywords_data.get("low_level_keywords", [])
|
| 412 |
keywords = ", ".join(keywords)
|
| 413 |
except json.JSONDecodeError:
|
|
|
|
| 672 |
kw_prompt_temp = PROMPTS["keywords_extraction"]
|
| 673 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 674 |
result = await use_model_func(kw_prompt)
|
| 675 |
+
json_text = locate_json_string_body_from_string(result)
|
| 676 |
|
| 677 |
try:
|
| 678 |
+
keywords_data = json.loads(json_text)
|
| 679 |
keywords = keywords_data.get("high_level_keywords", [])
|
| 680 |
keywords = ", ".join(keywords)
|
| 681 |
except json.JSONDecodeError:
|
|
|
|
| 914 |
kw_prompt = kw_prompt_temp.format(query=query)
|
| 915 |
|
| 916 |
result = await use_model_func(kw_prompt)
|
| 917 |
+
json_text = locate_json_string_body_from_string(result)
|
| 918 |
try:
|
| 919 |
+
keywords_data = json.loads(json_text)
|
| 920 |
hl_keywords = keywords_data.get("high_level_keywords", [])
|
| 921 |
ll_keywords = keywords_data.get("low_level_keywords", [])
|
| 922 |
hl_keywords = ", ".join(hl_keywords)
|
lightrag/prompt.py
CHANGED
|
@@ -14,7 +14,7 @@ Given a text document that is potentially relevant to this activity and a list o
|
|
| 14 |
|
| 15 |
-Steps-
|
| 16 |
1. Identify all entities. For each identified entity, extract the following information:
|
| 17 |
-
- entity_name: Name of the entity, capitalized
|
| 18 |
- entity_type: One of the following types: [{entity_types}]
|
| 19 |
- entity_description: Comprehensive description of the entity's attributes and activities
|
| 20 |
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
|
|
|
|
| 14 |
|
| 15 |
-Steps-
|
| 16 |
1. Identify all entities. For each identified entity, extract the following information:
|
| 17 |
+
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
|
| 18 |
- entity_type: One of the following types: [{entity_types}]
|
| 19 |
- entity_description: Comprehensive description of the entity's attributes and activities
|
| 20 |
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
|
test.py
CHANGED
|
@@ -18,7 +18,7 @@ rag = LightRAG(
|
|
| 18 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
| 19 |
)
|
| 20 |
|
| 21 |
-
with open("./book.txt") as f:
|
| 22 |
rag.insert(f.read())
|
| 23 |
|
| 24 |
# Perform naive search
|
|
|
|
| 18 |
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
| 19 |
)
|
| 20 |
|
| 21 |
+
with open("./dickens/book.txt", "r", encoding="utf-8") as f:
|
| 22 |
rag.insert(f.read())
|
| 23 |
|
| 24 |
# Perform naive search
|