Merge pull request #797 from danielaskdd/add-env-settings
Browse filesAdd the token size truncation for local query and token size setting by env
- .env.example +14 -6
- .gitignore +1 -1
- lightrag/api/README.md +1 -0
- lightrag/api/lightrag_server.py +16 -2
- lightrag/api/ollama_api.py +4 -12
- lightrag/base.py +8 -5
- lightrag/lightrag.py +3 -3
- lightrag/llm/openai.py +10 -6
- lightrag/llm/zhipu.py +2 -1
- lightrag/operate.py +66 -5
- lightrag/utils.py +17 -0
.env.example
CHANGED
@@ -18,6 +18,7 @@
|
|
18 |
|
19 |
### Logging level
|
20 |
LOG_LEVEL=INFO
|
|
|
21 |
|
22 |
### Optional Timeout
|
23 |
TIMEOUT=300
|
@@ -27,14 +28,21 @@ TIMEOUT=300
|
|
27 |
|
28 |
### RAG Configuration
|
29 |
MAX_ASYNC=4
|
30 |
-
MAX_TOKENS=32768
|
31 |
EMBEDDING_DIM=1024
|
32 |
MAX_EMBED_TOKENS=8192
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
40 |
### Ollama example
|
|
|
18 |
|
19 |
### Logging level
|
20 |
LOG_LEVEL=INFO
|
21 |
+
VERBOSE=False
|
22 |
|
23 |
### Optional Timeout
|
24 |
TIMEOUT=300
|
|
|
28 |
|
29 |
### RAG Configuration
|
30 |
MAX_ASYNC=4
|
|
|
31 |
EMBEDDING_DIM=1024
|
32 |
MAX_EMBED_TOKENS=8192
|
33 |
+
### Settings relative to query
|
34 |
+
HISTORY_TURNS=3
|
35 |
+
COSINE_THRESHOLD=0.2
|
36 |
+
TOP_K=60
|
37 |
+
MAX_TOKEN_TEXT_CHUNK=4000
|
38 |
+
MAX_TOKEN_RELATION_DESC=4000
|
39 |
+
MAX_TOKEN_ENTITY_DESC=4000
|
40 |
+
### Settings relative to indexing
|
41 |
+
CHUNK_SIZE=1200
|
42 |
+
CHUNK_OVERLAP_SIZE=100
|
43 |
+
MAX_TOKENS=32768
|
44 |
+
MAX_TOKEN_SUMMARY=500
|
45 |
+
SUMMARY_LANGUAGE=English
|
46 |
|
47 |
### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
48 |
### Ollama example
|
.gitignore
CHANGED
@@ -5,7 +5,7 @@ __pycache__/
|
|
5 |
.eggs/
|
6 |
*.tgz
|
7 |
*.tar.gz
|
8 |
-
*.ini
|
9 |
|
10 |
# Virtual Environment
|
11 |
.venv/
|
|
|
5 |
.eggs/
|
6 |
*.tgz
|
7 |
*.tar.gz
|
8 |
+
*.ini
|
9 |
|
10 |
# Virtual Environment
|
11 |
.venv/
|
lightrag/api/README.md
CHANGED
@@ -222,6 +222,7 @@ You can select storage implementation by enviroment variables or command line a
|
|
222 |
| --max-embed-tokens | 8192 | Maximum embedding token size |
|
223 |
| --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
|
224 |
| --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
|
|
|
225 |
| --key | None | API key for authentication. Protects lightrag server against unauthorized access |
|
226 |
| --ssl | False | Enable HTTPS |
|
227 |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
|
|
222 |
| --max-embed-tokens | 8192 | Maximum embedding token size |
|
223 |
| --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
|
224 |
| --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
|
225 |
+
| --verbose | False | Verbose debug output (True, Flase) |
|
226 |
| --key | None | API key for authentication. Protects lightrag server against unauthorized access |
|
227 |
| --ssl | False | Enable HTTPS |
|
228 |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
lightrag/api/lightrag_server.py
CHANGED
@@ -133,8 +133,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any:
|
|
133 |
if value is None:
|
134 |
return default
|
135 |
|
136 |
-
if
|
137 |
-
return value.lower() in ("true", "1", "yes")
|
138 |
try:
|
139 |
return value_type(value)
|
140 |
except ValueError:
|
@@ -236,6 +236,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
236 |
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
|
237 |
ASCIIColors.white(" ├─ Log Level: ", end="")
|
238 |
ASCIIColors.yellow(f"{args.log_level}")
|
|
|
|
|
239 |
ASCIIColors.white(" └─ Timeout: ", end="")
|
240 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
241 |
|
@@ -565,6 +567,13 @@ def parse_args() -> argparse.Namespace:
|
|
565 |
help="Prefix of the namespace",
|
566 |
)
|
567 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
args = parser.parse_args()
|
569 |
|
570 |
# conver relative path to absolute path
|
@@ -768,6 +777,11 @@ temp_prefix = "__tmp_" # prefix for temporary files
|
|
768 |
|
769 |
|
770 |
def create_app(args):
|
|
|
|
|
|
|
|
|
|
|
771 |
global global_top_k
|
772 |
global_top_k = args.top_k # save top_k from args
|
773 |
|
|
|
133 |
if value is None:
|
134 |
return default
|
135 |
|
136 |
+
if value_type is bool:
|
137 |
+
return value.lower() in ("true", "1", "yes", "t", "on")
|
138 |
try:
|
139 |
return value_type(value)
|
140 |
except ValueError:
|
|
|
236 |
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
|
237 |
ASCIIColors.white(" ├─ Log Level: ", end="")
|
238 |
ASCIIColors.yellow(f"{args.log_level}")
|
239 |
+
ASCIIColors.white(" ├─ Verbose Debug: ", end="")
|
240 |
+
ASCIIColors.yellow(f"{args.verbose}")
|
241 |
ASCIIColors.white(" └─ Timeout: ", end="")
|
242 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
243 |
|
|
|
567 |
help="Prefix of the namespace",
|
568 |
)
|
569 |
|
570 |
+
parser.add_argument(
|
571 |
+
"--verbose",
|
572 |
+
type=bool,
|
573 |
+
default=get_env_value("VERBOSE", False, bool),
|
574 |
+
help="Verbose debug output(default: from env or false)",
|
575 |
+
)
|
576 |
+
|
577 |
args = parser.parse_args()
|
578 |
|
579 |
# conver relative path to absolute path
|
|
|
777 |
|
778 |
|
779 |
def create_app(args):
|
780 |
+
# Initialize verbose debug setting
|
781 |
+
from lightrag.utils import set_verbose_debug
|
782 |
+
|
783 |
+
set_verbose_debug(args.verbose)
|
784 |
+
|
785 |
global global_top_k
|
786 |
global_top_k = args.top_k # save top_k from args
|
787 |
|
lightrag/api/ollama_api.py
CHANGED
@@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse
|
|
11 |
import asyncio
|
12 |
from ascii_colors import trace_exception
|
13 |
from lightrag import LightRAG, QueryParam
|
|
|
14 |
from dotenv import load_dotenv
|
15 |
|
16 |
|
@@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel):
|
|
111 |
|
112 |
|
113 |
def estimate_tokens(text: str) -> int:
|
114 |
-
"""Estimate the number of tokens in text
|
115 |
-
|
116 |
-
|
117 |
-
"""
|
118 |
-
# Use regex to match Chinese and non-Chinese characters separately
|
119 |
-
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
|
120 |
-
non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text))
|
121 |
-
|
122 |
-
# Calculate estimated token count
|
123 |
-
tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25
|
124 |
-
|
125 |
-
return int(tokens)
|
126 |
|
127 |
|
128 |
def parse_query_mode(query: str) -> tuple[str, SearchMode]:
|
|
|
11 |
import asyncio
|
12 |
from ascii_colors import trace_exception
|
13 |
from lightrag import LightRAG, QueryParam
|
14 |
+
from lightrag.utils import encode_string_by_tiktoken
|
15 |
from dotenv import load_dotenv
|
16 |
|
17 |
|
|
|
112 |
|
113 |
|
114 |
def estimate_tokens(text: str) -> int:
|
115 |
+
"""Estimate the number of tokens in text using tiktoken"""
|
116 |
+
tokens = encode_string_by_tiktoken(text)
|
117 |
+
return len(tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
|
120 |
def parse_query_mode(query: str) -> tuple[str, SearchMode]:
|
lightrag/base.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from __future__ import annotations
|
2 |
|
3 |
import os
|
|
|
4 |
from dataclasses import dataclass, field
|
5 |
from enum import Enum
|
6 |
from typing import (
|
@@ -9,12 +10,12 @@ from typing import (
|
|
9 |
TypedDict,
|
10 |
TypeVar,
|
11 |
)
|
12 |
-
|
13 |
import numpy as np
|
14 |
-
|
15 |
from .utils import EmbeddingFunc
|
16 |
from .types import KnowledgeGraph
|
17 |
|
|
|
|
|
18 |
|
19 |
class TextChunkSchema(TypedDict):
|
20 |
tokens: int
|
@@ -54,13 +55,15 @@ class QueryParam:
|
|
54 |
top_k: int = int(os.getenv("TOP_K", "60"))
|
55 |
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
|
56 |
|
57 |
-
max_token_for_text_unit: int = 4000
|
58 |
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
59 |
|
60 |
-
max_token_for_global_context: int =
|
|
|
|
|
61 |
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
62 |
|
63 |
-
max_token_for_local_context: int = 4000
|
64 |
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
65 |
|
66 |
hl_keywords: list[str] = field(default_factory=list)
|
|
|
1 |
from __future__ import annotations
|
2 |
|
3 |
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
from dataclasses import dataclass, field
|
6 |
from enum import Enum
|
7 |
from typing import (
|
|
|
10 |
TypedDict,
|
11 |
TypeVar,
|
12 |
)
|
|
|
13 |
import numpy as np
|
|
|
14 |
from .utils import EmbeddingFunc
|
15 |
from .types import KnowledgeGraph
|
16 |
|
17 |
+
load_dotenv()
|
18 |
+
|
19 |
|
20 |
class TextChunkSchema(TypedDict):
|
21 |
tokens: int
|
|
|
55 |
top_k: int = int(os.getenv("TOP_K", "60"))
|
56 |
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
|
57 |
|
58 |
+
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
|
59 |
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
60 |
|
61 |
+
max_token_for_global_context: int = int(
|
62 |
+
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
63 |
+
)
|
64 |
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
65 |
|
66 |
+
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
67 |
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
68 |
|
69 |
hl_keywords: list[str] = field(default_factory=list)
|
lightrag/lightrag.py
CHANGED
@@ -268,10 +268,10 @@ class LightRAG:
|
|
268 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
269 |
|
270 |
# Text chunking
|
271 |
-
chunk_token_size: int = 1200
|
272 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
273 |
|
274 |
-
chunk_overlap_token_size: int = 100
|
275 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
276 |
|
277 |
tiktoken_model_name: str = "gpt-4o-mini"
|
@@ -281,7 +281,7 @@ class LightRAG:
|
|
281 |
entity_extract_max_gleaning: int = 1
|
282 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
283 |
|
284 |
-
entity_summary_to_max_tokens: int = 500
|
285 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
286 |
|
287 |
# Node embedding
|
|
|
268 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
269 |
|
270 |
# Text chunking
|
271 |
+
chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
|
272 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
273 |
|
274 |
+
chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
|
275 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
276 |
|
277 |
tiktoken_model_name: str = "gpt-4o-mini"
|
|
|
281 |
entity_extract_max_gleaning: int = 1
|
282 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
283 |
|
284 |
+
entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
|
285 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
286 |
|
287 |
# Node embedding
|
lightrag/llm/openai.py
CHANGED
@@ -40,9 +40,10 @@ __version__ = "1.0.0"
|
|
40 |
__author__ = "lightrag Team"
|
41 |
__status__ = "Production"
|
42 |
|
43 |
-
|
44 |
import sys
|
45 |
import os
|
|
|
46 |
|
47 |
if sys.version_info < (3, 9):
|
48 |
from typing import AsyncIterator
|
@@ -110,6 +111,11 @@ async def openai_complete_if_cache(
|
|
110 |
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
|
111 |
"Content-Type": "application/json",
|
112 |
}
|
|
|
|
|
|
|
|
|
|
|
113 |
openai_async_client = (
|
114 |
AsyncOpenAI(default_headers=default_headers, api_key=api_key)
|
115 |
if base_url is None
|
@@ -125,13 +131,11 @@ async def openai_complete_if_cache(
|
|
125 |
messages.extend(history_messages)
|
126 |
messages.append({"role": "user", "content": prompt})
|
127 |
|
128 |
-
|
129 |
-
logger.debug("===== Query Input to LLM =====")
|
130 |
logger.debug(f"Model: {model} Base URL: {base_url}")
|
131 |
logger.debug(f"Additional kwargs: {kwargs}")
|
132 |
-
|
133 |
-
|
134 |
-
# logger.debug(f"Messages: {messages}")
|
135 |
|
136 |
try:
|
137 |
if "response_format" in kwargs:
|
|
|
40 |
__author__ = "lightrag Team"
|
41 |
__status__ = "Production"
|
42 |
|
43 |
+
from ..utils import verbose_debug, VERBOSE_DEBUG
|
44 |
import sys
|
45 |
import os
|
46 |
+
import logging
|
47 |
|
48 |
if sys.version_info < (3, 9):
|
49 |
from typing import AsyncIterator
|
|
|
111 |
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
|
112 |
"Content-Type": "application/json",
|
113 |
}
|
114 |
+
|
115 |
+
# Set openai logger level to INFO when VERBOSE_DEBUG is off
|
116 |
+
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
117 |
+
logging.getLogger("openai").setLevel(logging.INFO)
|
118 |
+
|
119 |
openai_async_client = (
|
120 |
AsyncOpenAI(default_headers=default_headers, api_key=api_key)
|
121 |
if base_url is None
|
|
|
131 |
messages.extend(history_messages)
|
132 |
messages.append({"role": "user", "content": prompt})
|
133 |
|
134 |
+
logger.debug("===== Sending Query to LLM =====")
|
|
|
135 |
logger.debug(f"Model: {model} Base URL: {base_url}")
|
136 |
logger.debug(f"Additional kwargs: {kwargs}")
|
137 |
+
verbose_debug(f"Query: {prompt}")
|
138 |
+
verbose_debug(f"System prompt: {system_prompt}")
|
|
|
139 |
|
140 |
try:
|
141 |
if "response_format" in kwargs:
|
lightrag/llm/zhipu.py
CHANGED
@@ -43,6 +43,7 @@ __status__ = "Production"
|
|
43 |
import sys
|
44 |
import re
|
45 |
import json
|
|
|
46 |
|
47 |
if sys.version_info < (3, 9):
|
48 |
pass
|
@@ -119,7 +120,7 @@ async def zhipu_complete_if_cache(
|
|
119 |
# Add debug logging
|
120 |
logger.debug("===== Query Input to LLM =====")
|
121 |
logger.debug(f"Query: {prompt}")
|
122 |
-
|
123 |
|
124 |
# Remove unsupported kwargs
|
125 |
kwargs = {
|
|
|
43 |
import sys
|
44 |
import re
|
45 |
import json
|
46 |
+
from ..utils import verbose_debug
|
47 |
|
48 |
if sys.version_info < (3, 9):
|
49 |
pass
|
|
|
120 |
# Add debug logging
|
121 |
logger.debug("===== Query Input to LLM =====")
|
122 |
logger.debug(f"Query: {prompt}")
|
123 |
+
verbose_debug(f"System prompt: {system_prompt}")
|
124 |
|
125 |
# Remove unsupported kwargs
|
126 |
kwargs = {
|
lightrag/operate.py
CHANGED
@@ -687,6 +687,9 @@ async def kg_query(
|
|
687 |
if query_param.only_need_prompt:
|
688 |
return sys_prompt
|
689 |
|
|
|
|
|
|
|
690 |
response = await use_model_func(
|
691 |
query,
|
692 |
system_prompt=sys_prompt,
|
@@ -772,6 +775,9 @@ async def extract_keywords_only(
|
|
772 |
query=text, examples=examples, language=language, history=history_context
|
773 |
)
|
774 |
|
|
|
|
|
|
|
775 |
# 5. Call the LLM for keyword extraction
|
776 |
use_model_func = global_config["llm_model_func"]
|
777 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
@@ -935,7 +941,9 @@ async def mix_kg_vector_query(
|
|
935 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
936 |
formatted_chunks.append(chunk_text)
|
937 |
|
938 |
-
logger.
|
|
|
|
|
939 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
940 |
except Exception as e:
|
941 |
logger.error(f"Error in get_vector_context: {e}")
|
@@ -968,6 +976,9 @@ async def mix_kg_vector_query(
|
|
968 |
if query_param.only_need_prompt:
|
969 |
return sys_prompt
|
970 |
|
|
|
|
|
|
|
971 |
# 6. Generate response
|
972 |
response = await use_model_func(
|
973 |
query,
|
@@ -1073,7 +1084,7 @@ async def _build_query_context(
|
|
1073 |
if not entities_context.strip() and not relations_context.strip():
|
1074 |
return None
|
1075 |
|
1076 |
-
|
1077 |
-----Entities-----
|
1078 |
```csv
|
1079 |
{entities_context}
|
@@ -1087,6 +1098,15 @@ async def _build_query_context(
|
|
1087 |
{text_units_context}
|
1088 |
```
|
1089 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1090 |
|
1091 |
|
1092 |
async def _get_node_data(
|
@@ -1130,8 +1150,19 @@ async def _get_node_data(
|
|
1130 |
node_datas, query_param, knowledge_graph_inst
|
1131 |
),
|
1132 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1133 |
logger.info(
|
1134 |
-
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)}
|
1135 |
)
|
1136 |
|
1137 |
# build prompt
|
@@ -1264,6 +1295,10 @@ async def _find_most_related_text_unit_from_entities(
|
|
1264 |
max_token_size=query_param.max_token_for_text_unit,
|
1265 |
)
|
1266 |
|
|
|
|
|
|
|
|
|
1267 |
all_text_units = [t["data"] for t in all_text_units]
|
1268 |
return all_text_units
|
1269 |
|
@@ -1305,6 +1340,11 @@ async def _find_most_related_edges_from_entities(
|
|
1305 |
key=lambda x: x["description"],
|
1306 |
max_token_size=query_param.max_token_for_global_context,
|
1307 |
)
|
|
|
|
|
|
|
|
|
|
|
1308 |
return all_edges_data
|
1309 |
|
1310 |
|
@@ -1352,11 +1392,15 @@ async def _get_edge_data(
|
|
1352 |
edge_datas = sorted(
|
1353 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
1354 |
)
|
|
|
1355 |
edge_datas = truncate_list_by_token_size(
|
1356 |
edge_datas,
|
1357 |
key=lambda x: x["description"],
|
1358 |
max_token_size=query_param.max_token_for_global_context,
|
1359 |
)
|
|
|
|
|
|
|
1360 |
|
1361 |
use_entities, use_text_units = await asyncio.gather(
|
1362 |
_find_most_related_entities_from_relationships(
|
@@ -1367,7 +1411,7 @@ async def _get_edge_data(
|
|
1367 |
),
|
1368 |
)
|
1369 |
logger.info(
|
1370 |
-
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)}
|
1371 |
)
|
1372 |
|
1373 |
relations_section_list = [
|
@@ -1456,11 +1500,15 @@ async def _find_most_related_entities_from_relationships(
|
|
1456 |
for k, n, d in zip(entity_names, node_datas, node_degrees)
|
1457 |
]
|
1458 |
|
|
|
1459 |
node_datas = truncate_list_by_token_size(
|
1460 |
node_datas,
|
1461 |
key=lambda x: x["description"],
|
1462 |
max_token_size=query_param.max_token_for_local_context,
|
1463 |
)
|
|
|
|
|
|
|
1464 |
|
1465 |
return node_datas
|
1466 |
|
@@ -1516,6 +1564,10 @@ async def _find_related_text_unit_from_relationships(
|
|
1516 |
max_token_size=query_param.max_token_for_text_unit,
|
1517 |
)
|
1518 |
|
|
|
|
|
|
|
|
|
1519 |
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
|
1520 |
|
1521 |
return all_text_units
|
@@ -1583,7 +1635,10 @@ async def naive_query(
|
|
1583 |
logger.warning("No chunks left after truncation")
|
1584 |
return PROMPTS["fail_response"]
|
1585 |
|
1586 |
-
logger.
|
|
|
|
|
|
|
1587 |
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
1588 |
|
1589 |
if query_param.only_need_context:
|
@@ -1606,6 +1661,9 @@ async def naive_query(
|
|
1606 |
if query_param.only_need_prompt:
|
1607 |
return sys_prompt
|
1608 |
|
|
|
|
|
|
|
1609 |
response = await use_model_func(
|
1610 |
query,
|
1611 |
system_prompt=sys_prompt,
|
@@ -1748,6 +1806,9 @@ async def kg_query_with_keywords(
|
|
1748 |
if query_param.only_need_prompt:
|
1749 |
return sys_prompt
|
1750 |
|
|
|
|
|
|
|
1751 |
response = await use_model_func(
|
1752 |
query,
|
1753 |
system_prompt=sys_prompt,
|
|
|
687 |
if query_param.only_need_prompt:
|
688 |
return sys_prompt
|
689 |
|
690 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
691 |
+
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
692 |
+
|
693 |
response = await use_model_func(
|
694 |
query,
|
695 |
system_prompt=sys_prompt,
|
|
|
775 |
query=text, examples=examples, language=language, history=history_context
|
776 |
)
|
777 |
|
778 |
+
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
|
779 |
+
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
780 |
+
|
781 |
# 5. Call the LLM for keyword extraction
|
782 |
use_model_func = global_config["llm_model_func"]
|
783 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
|
|
941 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
942 |
formatted_chunks.append(chunk_text)
|
943 |
|
944 |
+
logger.debug(
|
945 |
+
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
946 |
+
)
|
947 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
948 |
except Exception as e:
|
949 |
logger.error(f"Error in get_vector_context: {e}")
|
|
|
976 |
if query_param.only_need_prompt:
|
977 |
return sys_prompt
|
978 |
|
979 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
980 |
+
logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
|
981 |
+
|
982 |
# 6. Generate response
|
983 |
response = await use_model_func(
|
984 |
query,
|
|
|
1084 |
if not entities_context.strip() and not relations_context.strip():
|
1085 |
return None
|
1086 |
|
1087 |
+
result = f"""
|
1088 |
-----Entities-----
|
1089 |
```csv
|
1090 |
{entities_context}
|
|
|
1098 |
{text_units_context}
|
1099 |
```
|
1100 |
"""
|
1101 |
+
contex_tokens = len(encode_string_by_tiktoken(result))
|
1102 |
+
entities_tokens = len(encode_string_by_tiktoken(entities_context))
|
1103 |
+
relations_tokens = len(encode_string_by_tiktoken(relations_context))
|
1104 |
+
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
|
1105 |
+
logger.debug(
|
1106 |
+
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
|
1107 |
+
)
|
1108 |
+
|
1109 |
+
return result
|
1110 |
|
1111 |
|
1112 |
async def _get_node_data(
|
|
|
1150 |
node_datas, query_param, knowledge_graph_inst
|
1151 |
),
|
1152 |
)
|
1153 |
+
|
1154 |
+
len_node_datas = len(node_datas)
|
1155 |
+
node_datas = truncate_list_by_token_size(
|
1156 |
+
node_datas,
|
1157 |
+
key=lambda x: x["description"],
|
1158 |
+
max_token_size=query_param.max_token_for_local_context,
|
1159 |
+
)
|
1160 |
+
logger.debug(
|
1161 |
+
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
1162 |
+
)
|
1163 |
+
|
1164 |
logger.info(
|
1165 |
+
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
1166 |
)
|
1167 |
|
1168 |
# build prompt
|
|
|
1295 |
max_token_size=query_param.max_token_for_text_unit,
|
1296 |
)
|
1297 |
|
1298 |
+
logger.debug(
|
1299 |
+
f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
1300 |
+
)
|
1301 |
+
|
1302 |
all_text_units = [t["data"] for t in all_text_units]
|
1303 |
return all_text_units
|
1304 |
|
|
|
1340 |
key=lambda x: x["description"],
|
1341 |
max_token_size=query_param.max_token_for_global_context,
|
1342 |
)
|
1343 |
+
|
1344 |
+
logger.debug(
|
1345 |
+
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
1346 |
+
)
|
1347 |
+
|
1348 |
return all_edges_data
|
1349 |
|
1350 |
|
|
|
1392 |
edge_datas = sorted(
|
1393 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
1394 |
)
|
1395 |
+
len_edge_datas = len(edge_datas)
|
1396 |
edge_datas = truncate_list_by_token_size(
|
1397 |
edge_datas,
|
1398 |
key=lambda x: x["description"],
|
1399 |
max_token_size=query_param.max_token_for_global_context,
|
1400 |
)
|
1401 |
+
logger.debug(
|
1402 |
+
f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
|
1403 |
+
)
|
1404 |
|
1405 |
use_entities, use_text_units = await asyncio.gather(
|
1406 |
_find_most_related_entities_from_relationships(
|
|
|
1411 |
),
|
1412 |
)
|
1413 |
logger.info(
|
1414 |
+
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
1415 |
)
|
1416 |
|
1417 |
relations_section_list = [
|
|
|
1500 |
for k, n, d in zip(entity_names, node_datas, node_degrees)
|
1501 |
]
|
1502 |
|
1503 |
+
len_node_datas = len(node_datas)
|
1504 |
node_datas = truncate_list_by_token_size(
|
1505 |
node_datas,
|
1506 |
key=lambda x: x["description"],
|
1507 |
max_token_size=query_param.max_token_for_local_context,
|
1508 |
)
|
1509 |
+
logger.debug(
|
1510 |
+
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
1511 |
+
)
|
1512 |
|
1513 |
return node_datas
|
1514 |
|
|
|
1564 |
max_token_size=query_param.max_token_for_text_unit,
|
1565 |
)
|
1566 |
|
1567 |
+
logger.debug(
|
1568 |
+
f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
1569 |
+
)
|
1570 |
+
|
1571 |
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
|
1572 |
|
1573 |
return all_text_units
|
|
|
1635 |
logger.warning("No chunks left after truncation")
|
1636 |
return PROMPTS["fail_response"]
|
1637 |
|
1638 |
+
logger.debug(
|
1639 |
+
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
1640 |
+
)
|
1641 |
+
|
1642 |
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
1643 |
|
1644 |
if query_param.only_need_context:
|
|
|
1661 |
if query_param.only_need_prompt:
|
1662 |
return sys_prompt
|
1663 |
|
1664 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
1665 |
+
logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}")
|
1666 |
+
|
1667 |
response = await use_model_func(
|
1668 |
query,
|
1669 |
system_prompt=sys_prompt,
|
|
|
1806 |
if query_param.only_need_prompt:
|
1807 |
return sys_prompt
|
1808 |
|
1809 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
1810 |
+
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
1811 |
+
|
1812 |
response = await use_model_func(
|
1813 |
query,
|
1814 |
system_prompt=sys_prompt,
|
lightrag/utils.py
CHANGED
@@ -20,6 +20,23 @@ import tiktoken
|
|
20 |
|
21 |
from lightrag.prompt import PROMPTS
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
class UnlimitedSemaphore:
|
25 |
"""A context manager that allows unlimited access."""
|
|
|
20 |
|
21 |
from lightrag.prompt import PROMPTS
|
22 |
|
23 |
+
VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
|
24 |
+
|
25 |
+
|
26 |
+
def verbose_debug(msg: str, *args, **kwargs):
|
27 |
+
"""Function for outputting detailed debug information.
|
28 |
+
When VERBOSE_DEBUG=True, outputs the complete message.
|
29 |
+
When VERBOSE_DEBUG=False, outputs only the first 30 characters.
|
30 |
+
"""
|
31 |
+
if VERBOSE_DEBUG:
|
32 |
+
logger.debug(msg, *args, **kwargs)
|
33 |
+
|
34 |
+
|
35 |
+
def set_verbose_debug(enabled: bool):
|
36 |
+
"""Enable or disable verbose debug output"""
|
37 |
+
global VERBOSE_DEBUG
|
38 |
+
VERBOSE_DEBUG = enabled
|
39 |
+
|
40 |
|
41 |
class UnlimitedSemaphore:
|
42 |
"""A context manager that allows unlimited access."""
|