zrguo commited on
Commit
ff6f836
·
unverified ·
2 Parent(s): e2950a4 4383b53

Merge pull request #797 from danielaskdd/add-env-settings

Browse files

Add the token size truncation for local query and token size setting by env

.env.example CHANGED
@@ -18,6 +18,7 @@
18
 
19
  ### Logging level
20
  LOG_LEVEL=INFO
 
21
 
22
  ### Optional Timeout
23
  TIMEOUT=300
@@ -27,14 +28,21 @@ TIMEOUT=300
27
 
28
  ### RAG Configuration
29
  MAX_ASYNC=4
30
- MAX_TOKENS=32768
31
  EMBEDDING_DIM=1024
32
  MAX_EMBED_TOKENS=8192
33
- #HISTORY_TURNS=3
34
- #CHUNK_SIZE=1200
35
- #CHUNK_OVERLAP_SIZE=100
36
- #COSINE_THRESHOLD=0.2
37
- #TOP_K=60
 
 
 
 
 
 
 
 
38
 
39
  ### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
40
  ### Ollama example
 
18
 
19
  ### Logging level
20
  LOG_LEVEL=INFO
21
+ VERBOSE=False
22
 
23
  ### Optional Timeout
24
  TIMEOUT=300
 
28
 
29
  ### RAG Configuration
30
  MAX_ASYNC=4
 
31
  EMBEDDING_DIM=1024
32
  MAX_EMBED_TOKENS=8192
33
+ ### Settings relative to query
34
+ HISTORY_TURNS=3
35
+ COSINE_THRESHOLD=0.2
36
+ TOP_K=60
37
+ MAX_TOKEN_TEXT_CHUNK=4000
38
+ MAX_TOKEN_RELATION_DESC=4000
39
+ MAX_TOKEN_ENTITY_DESC=4000
40
+ ### Settings relative to indexing
41
+ CHUNK_SIZE=1200
42
+ CHUNK_OVERLAP_SIZE=100
43
+ MAX_TOKENS=32768
44
+ MAX_TOKEN_SUMMARY=500
45
+ SUMMARY_LANGUAGE=English
46
 
47
  ### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
48
  ### Ollama example
.gitignore CHANGED
@@ -5,7 +5,7 @@ __pycache__/
5
  .eggs/
6
  *.tgz
7
  *.tar.gz
8
- *.ini # Remove config.ini from repo
9
 
10
  # Virtual Environment
11
  .venv/
 
5
  .eggs/
6
  *.tgz
7
  *.tar.gz
8
+ *.ini
9
 
10
  # Virtual Environment
11
  .venv/
lightrag/api/README.md CHANGED
@@ -222,6 +222,7 @@ You can select storage implementation by enviroment variables or command line a
222
  | --max-embed-tokens | 8192 | Maximum embedding token size |
223
  | --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
224
  | --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
 
225
  | --key | None | API key for authentication. Protects lightrag server against unauthorized access |
226
  | --ssl | False | Enable HTTPS |
227
  | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
 
222
  | --max-embed-tokens | 8192 | Maximum embedding token size |
223
  | --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
224
  | --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
225
+ | --verbose | False | Verbose debug output (True, Flase) |
226
  | --key | None | API key for authentication. Protects lightrag server against unauthorized access |
227
  | --ssl | False | Enable HTTPS |
228
  | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
lightrag/api/lightrag_server.py CHANGED
@@ -133,8 +133,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any:
133
  if value is None:
134
  return default
135
 
136
- if isinstance(value_type, bool):
137
- return value.lower() in ("true", "1", "yes")
138
  try:
139
  return value_type(value)
140
  except ValueError:
@@ -236,6 +236,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
236
  ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
237
  ASCIIColors.white(" ├─ Log Level: ", end="")
238
  ASCIIColors.yellow(f"{args.log_level}")
 
 
239
  ASCIIColors.white(" └─ Timeout: ", end="")
240
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
241
 
@@ -565,6 +567,13 @@ def parse_args() -> argparse.Namespace:
565
  help="Prefix of the namespace",
566
  )
567
 
 
 
 
 
 
 
 
568
  args = parser.parse_args()
569
 
570
  # conver relative path to absolute path
@@ -768,6 +777,11 @@ temp_prefix = "__tmp_" # prefix for temporary files
768
 
769
 
770
  def create_app(args):
 
 
 
 
 
771
  global global_top_k
772
  global_top_k = args.top_k # save top_k from args
773
 
 
133
  if value is None:
134
  return default
135
 
136
+ if value_type is bool:
137
+ return value.lower() in ("true", "1", "yes", "t", "on")
138
  try:
139
  return value_type(value)
140
  except ValueError:
 
236
  ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
237
  ASCIIColors.white(" ├─ Log Level: ", end="")
238
  ASCIIColors.yellow(f"{args.log_level}")
239
+ ASCIIColors.white(" ├─ Verbose Debug: ", end="")
240
+ ASCIIColors.yellow(f"{args.verbose}")
241
  ASCIIColors.white(" └─ Timeout: ", end="")
242
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
243
 
 
567
  help="Prefix of the namespace",
568
  )
569
 
570
+ parser.add_argument(
571
+ "--verbose",
572
+ type=bool,
573
+ default=get_env_value("VERBOSE", False, bool),
574
+ help="Verbose debug output(default: from env or false)",
575
+ )
576
+
577
  args = parser.parse_args()
578
 
579
  # conver relative path to absolute path
 
777
 
778
 
779
  def create_app(args):
780
+ # Initialize verbose debug setting
781
+ from lightrag.utils import set_verbose_debug
782
+
783
+ set_verbose_debug(args.verbose)
784
+
785
  global global_top_k
786
  global_top_k = args.top_k # save top_k from args
787
 
lightrag/api/ollama_api.py CHANGED
@@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse
11
  import asyncio
12
  from ascii_colors import trace_exception
13
  from lightrag import LightRAG, QueryParam
 
14
  from dotenv import load_dotenv
15
 
16
 
@@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel):
111
 
112
 
113
  def estimate_tokens(text: str) -> int:
114
- """Estimate the number of tokens in text
115
- Chinese characters: approximately 1.5 tokens per character
116
- English characters: approximately 0.25 tokens per character
117
- """
118
- # Use regex to match Chinese and non-Chinese characters separately
119
- chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
120
- non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text))
121
-
122
- # Calculate estimated token count
123
- tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25
124
-
125
- return int(tokens)
126
 
127
 
128
  def parse_query_mode(query: str) -> tuple[str, SearchMode]:
 
11
  import asyncio
12
  from ascii_colors import trace_exception
13
  from lightrag import LightRAG, QueryParam
14
+ from lightrag.utils import encode_string_by_tiktoken
15
  from dotenv import load_dotenv
16
 
17
 
 
112
 
113
 
114
  def estimate_tokens(text: str) -> int:
115
+ """Estimate the number of tokens in text using tiktoken"""
116
+ tokens = encode_string_by_tiktoken(text)
117
+ return len(tokens)
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  def parse_query_mode(query: str) -> tuple[str, SearchMode]:
lightrag/base.py CHANGED
@@ -1,6 +1,7 @@
1
  from __future__ import annotations
2
 
3
  import os
 
4
  from dataclasses import dataclass, field
5
  from enum import Enum
6
  from typing import (
@@ -9,12 +10,12 @@ from typing import (
9
  TypedDict,
10
  TypeVar,
11
  )
12
-
13
  import numpy as np
14
-
15
  from .utils import EmbeddingFunc
16
  from .types import KnowledgeGraph
17
 
 
 
18
 
19
  class TextChunkSchema(TypedDict):
20
  tokens: int
@@ -54,13 +55,15 @@ class QueryParam:
54
  top_k: int = int(os.getenv("TOP_K", "60"))
55
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
56
 
57
- max_token_for_text_unit: int = 4000
58
  """Maximum number of tokens allowed for each retrieved text chunk."""
59
 
60
- max_token_for_global_context: int = 4000
 
 
61
  """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
62
 
63
- max_token_for_local_context: int = 4000
64
  """Maximum number of tokens allocated for entity descriptions in local retrieval."""
65
 
66
  hl_keywords: list[str] = field(default_factory=list)
 
1
  from __future__ import annotations
2
 
3
  import os
4
+ from dotenv import load_dotenv
5
  from dataclasses import dataclass, field
6
  from enum import Enum
7
  from typing import (
 
10
  TypedDict,
11
  TypeVar,
12
  )
 
13
  import numpy as np
 
14
  from .utils import EmbeddingFunc
15
  from .types import KnowledgeGraph
16
 
17
+ load_dotenv()
18
+
19
 
20
  class TextChunkSchema(TypedDict):
21
  tokens: int
 
55
  top_k: int = int(os.getenv("TOP_K", "60"))
56
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
57
 
58
+ max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
59
  """Maximum number of tokens allowed for each retrieved text chunk."""
60
 
61
+ max_token_for_global_context: int = int(
62
+ os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
63
+ )
64
  """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
65
 
66
+ max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
67
  """Maximum number of tokens allocated for entity descriptions in local retrieval."""
68
 
69
  hl_keywords: list[str] = field(default_factory=list)
lightrag/lightrag.py CHANGED
@@ -268,10 +268,10 @@ class LightRAG:
268
  """Directory where logs are stored. Defaults to the current working directory."""
269
 
270
  # Text chunking
271
- chunk_token_size: int = 1200
272
  """Maximum number of tokens per text chunk when splitting documents."""
273
 
274
- chunk_overlap_token_size: int = 100
275
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
276
 
277
  tiktoken_model_name: str = "gpt-4o-mini"
@@ -281,7 +281,7 @@ class LightRAG:
281
  entity_extract_max_gleaning: int = 1
282
  """Maximum number of entity extraction attempts for ambiguous content."""
283
 
284
- entity_summary_to_max_tokens: int = 500
285
  """Maximum number of tokens used for summarizing extracted entities."""
286
 
287
  # Node embedding
 
268
  """Directory where logs are stored. Defaults to the current working directory."""
269
 
270
  # Text chunking
271
+ chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
272
  """Maximum number of tokens per text chunk when splitting documents."""
273
 
274
+ chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
275
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
276
 
277
  tiktoken_model_name: str = "gpt-4o-mini"
 
281
  entity_extract_max_gleaning: int = 1
282
  """Maximum number of entity extraction attempts for ambiguous content."""
283
 
284
+ entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
285
  """Maximum number of tokens used for summarizing extracted entities."""
286
 
287
  # Node embedding
lightrag/llm/openai.py CHANGED
@@ -40,9 +40,10 @@ __version__ = "1.0.0"
40
  __author__ = "lightrag Team"
41
  __status__ = "Production"
42
 
43
-
44
  import sys
45
  import os
 
46
 
47
  if sys.version_info < (3, 9):
48
  from typing import AsyncIterator
@@ -110,6 +111,11 @@ async def openai_complete_if_cache(
110
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
111
  "Content-Type": "application/json",
112
  }
 
 
 
 
 
113
  openai_async_client = (
114
  AsyncOpenAI(default_headers=default_headers, api_key=api_key)
115
  if base_url is None
@@ -125,13 +131,11 @@ async def openai_complete_if_cache(
125
  messages.extend(history_messages)
126
  messages.append({"role": "user", "content": prompt})
127
 
128
- # 添加日志输出
129
- logger.debug("===== Query Input to LLM =====")
130
  logger.debug(f"Model: {model} Base URL: {base_url}")
131
  logger.debug(f"Additional kwargs: {kwargs}")
132
- logger.debug(f"Query: {prompt}")
133
- logger.debug(f"System prompt: {system_prompt}")
134
- # logger.debug(f"Messages: {messages}")
135
 
136
  try:
137
  if "response_format" in kwargs:
 
40
  __author__ = "lightrag Team"
41
  __status__ = "Production"
42
 
43
+ from ..utils import verbose_debug, VERBOSE_DEBUG
44
  import sys
45
  import os
46
+ import logging
47
 
48
  if sys.version_info < (3, 9):
49
  from typing import AsyncIterator
 
111
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
112
  "Content-Type": "application/json",
113
  }
114
+
115
+ # Set openai logger level to INFO when VERBOSE_DEBUG is off
116
+ if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
117
+ logging.getLogger("openai").setLevel(logging.INFO)
118
+
119
  openai_async_client = (
120
  AsyncOpenAI(default_headers=default_headers, api_key=api_key)
121
  if base_url is None
 
131
  messages.extend(history_messages)
132
  messages.append({"role": "user", "content": prompt})
133
 
134
+ logger.debug("===== Sending Query to LLM =====")
 
135
  logger.debug(f"Model: {model} Base URL: {base_url}")
136
  logger.debug(f"Additional kwargs: {kwargs}")
137
+ verbose_debug(f"Query: {prompt}")
138
+ verbose_debug(f"System prompt: {system_prompt}")
 
139
 
140
  try:
141
  if "response_format" in kwargs:
lightrag/llm/zhipu.py CHANGED
@@ -43,6 +43,7 @@ __status__ = "Production"
43
  import sys
44
  import re
45
  import json
 
46
 
47
  if sys.version_info < (3, 9):
48
  pass
@@ -119,7 +120,7 @@ async def zhipu_complete_if_cache(
119
  # Add debug logging
120
  logger.debug("===== Query Input to LLM =====")
121
  logger.debug(f"Query: {prompt}")
122
- logger.debug(f"System prompt: {system_prompt}")
123
 
124
  # Remove unsupported kwargs
125
  kwargs = {
 
43
  import sys
44
  import re
45
  import json
46
+ from ..utils import verbose_debug
47
 
48
  if sys.version_info < (3, 9):
49
  pass
 
120
  # Add debug logging
121
  logger.debug("===== Query Input to LLM =====")
122
  logger.debug(f"Query: {prompt}")
123
+ verbose_debug(f"System prompt: {system_prompt}")
124
 
125
  # Remove unsupported kwargs
126
  kwargs = {
lightrag/operate.py CHANGED
@@ -687,6 +687,9 @@ async def kg_query(
687
  if query_param.only_need_prompt:
688
  return sys_prompt
689
 
 
 
 
690
  response = await use_model_func(
691
  query,
692
  system_prompt=sys_prompt,
@@ -772,6 +775,9 @@ async def extract_keywords_only(
772
  query=text, examples=examples, language=language, history=history_context
773
  )
774
 
 
 
 
775
  # 5. Call the LLM for keyword extraction
776
  use_model_func = global_config["llm_model_func"]
777
  result = await use_model_func(kw_prompt, keyword_extraction=True)
@@ -935,7 +941,9 @@ async def mix_kg_vector_query(
935
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
936
  formatted_chunks.append(chunk_text)
937
 
938
- logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks")
 
 
939
  return "\n--New Chunk--\n".join(formatted_chunks)
940
  except Exception as e:
941
  logger.error(f"Error in get_vector_context: {e}")
@@ -968,6 +976,9 @@ async def mix_kg_vector_query(
968
  if query_param.only_need_prompt:
969
  return sys_prompt
970
 
 
 
 
971
  # 6. Generate response
972
  response = await use_model_func(
973
  query,
@@ -1073,7 +1084,7 @@ async def _build_query_context(
1073
  if not entities_context.strip() and not relations_context.strip():
1074
  return None
1075
 
1076
- return f"""
1077
  -----Entities-----
1078
  ```csv
1079
  {entities_context}
@@ -1087,6 +1098,15 @@ async def _build_query_context(
1087
  {text_units_context}
1088
  ```
1089
  """
 
 
 
 
 
 
 
 
 
1090
 
1091
 
1092
  async def _get_node_data(
@@ -1130,8 +1150,19 @@ async def _get_node_data(
1130
  node_datas, query_param, knowledge_graph_inst
1131
  ),
1132
  )
 
 
 
 
 
 
 
 
 
 
 
1133
  logger.info(
1134
- f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
1135
  )
1136
 
1137
  # build prompt
@@ -1264,6 +1295,10 @@ async def _find_most_related_text_unit_from_entities(
1264
  max_token_size=query_param.max_token_for_text_unit,
1265
  )
1266
 
 
 
 
 
1267
  all_text_units = [t["data"] for t in all_text_units]
1268
  return all_text_units
1269
 
@@ -1305,6 +1340,11 @@ async def _find_most_related_edges_from_entities(
1305
  key=lambda x: x["description"],
1306
  max_token_size=query_param.max_token_for_global_context,
1307
  )
 
 
 
 
 
1308
  return all_edges_data
1309
 
1310
 
@@ -1352,11 +1392,15 @@ async def _get_edge_data(
1352
  edge_datas = sorted(
1353
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
1354
  )
 
1355
  edge_datas = truncate_list_by_token_size(
1356
  edge_datas,
1357
  key=lambda x: x["description"],
1358
  max_token_size=query_param.max_token_for_global_context,
1359
  )
 
 
 
1360
 
1361
  use_entities, use_text_units = await asyncio.gather(
1362
  _find_most_related_entities_from_relationships(
@@ -1367,7 +1411,7 @@ async def _get_edge_data(
1367
  ),
1368
  )
1369
  logger.info(
1370
- f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
1371
  )
1372
 
1373
  relations_section_list = [
@@ -1456,11 +1500,15 @@ async def _find_most_related_entities_from_relationships(
1456
  for k, n, d in zip(entity_names, node_datas, node_degrees)
1457
  ]
1458
 
 
1459
  node_datas = truncate_list_by_token_size(
1460
  node_datas,
1461
  key=lambda x: x["description"],
1462
  max_token_size=query_param.max_token_for_local_context,
1463
  )
 
 
 
1464
 
1465
  return node_datas
1466
 
@@ -1516,6 +1564,10 @@ async def _find_related_text_unit_from_relationships(
1516
  max_token_size=query_param.max_token_for_text_unit,
1517
  )
1518
 
 
 
 
 
1519
  all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
1520
 
1521
  return all_text_units
@@ -1583,7 +1635,10 @@ async def naive_query(
1583
  logger.warning("No chunks left after truncation")
1584
  return PROMPTS["fail_response"]
1585
 
1586
- logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
 
 
 
1587
  section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
1588
 
1589
  if query_param.only_need_context:
@@ -1606,6 +1661,9 @@ async def naive_query(
1606
  if query_param.only_need_prompt:
1607
  return sys_prompt
1608
 
 
 
 
1609
  response = await use_model_func(
1610
  query,
1611
  system_prompt=sys_prompt,
@@ -1748,6 +1806,9 @@ async def kg_query_with_keywords(
1748
  if query_param.only_need_prompt:
1749
  return sys_prompt
1750
 
 
 
 
1751
  response = await use_model_func(
1752
  query,
1753
  system_prompt=sys_prompt,
 
687
  if query_param.only_need_prompt:
688
  return sys_prompt
689
 
690
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
691
+ logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
692
+
693
  response = await use_model_func(
694
  query,
695
  system_prompt=sys_prompt,
 
775
  query=text, examples=examples, language=language, history=history_context
776
  )
777
 
778
+ len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
779
+ logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
780
+
781
  # 5. Call the LLM for keyword extraction
782
  use_model_func = global_config["llm_model_func"]
783
  result = await use_model_func(kw_prompt, keyword_extraction=True)
 
941
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
942
  formatted_chunks.append(chunk_text)
943
 
944
+ logger.debug(
945
+ f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
946
+ )
947
  return "\n--New Chunk--\n".join(formatted_chunks)
948
  except Exception as e:
949
  logger.error(f"Error in get_vector_context: {e}")
 
976
  if query_param.only_need_prompt:
977
  return sys_prompt
978
 
979
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
980
+ logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
981
+
982
  # 6. Generate response
983
  response = await use_model_func(
984
  query,
 
1084
  if not entities_context.strip() and not relations_context.strip():
1085
  return None
1086
 
1087
+ result = f"""
1088
  -----Entities-----
1089
  ```csv
1090
  {entities_context}
 
1098
  {text_units_context}
1099
  ```
1100
  """
1101
+ contex_tokens = len(encode_string_by_tiktoken(result))
1102
+ entities_tokens = len(encode_string_by_tiktoken(entities_context))
1103
+ relations_tokens = len(encode_string_by_tiktoken(relations_context))
1104
+ text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
1105
+ logger.debug(
1106
+ f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
1107
+ )
1108
+
1109
+ return result
1110
 
1111
 
1112
  async def _get_node_data(
 
1150
  node_datas, query_param, knowledge_graph_inst
1151
  ),
1152
  )
1153
+
1154
+ len_node_datas = len(node_datas)
1155
+ node_datas = truncate_list_by_token_size(
1156
+ node_datas,
1157
+ key=lambda x: x["description"],
1158
+ max_token_size=query_param.max_token_for_local_context,
1159
+ )
1160
+ logger.debug(
1161
+ f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
1162
+ )
1163
+
1164
  logger.info(
1165
+ f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
1166
  )
1167
 
1168
  # build prompt
 
1295
  max_token_size=query_param.max_token_for_text_unit,
1296
  )
1297
 
1298
+ logger.debug(
1299
+ f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
1300
+ )
1301
+
1302
  all_text_units = [t["data"] for t in all_text_units]
1303
  return all_text_units
1304
 
 
1340
  key=lambda x: x["description"],
1341
  max_token_size=query_param.max_token_for_global_context,
1342
  )
1343
+
1344
+ logger.debug(
1345
+ f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
1346
+ )
1347
+
1348
  return all_edges_data
1349
 
1350
 
 
1392
  edge_datas = sorted(
1393
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
1394
  )
1395
+ len_edge_datas = len(edge_datas)
1396
  edge_datas = truncate_list_by_token_size(
1397
  edge_datas,
1398
  key=lambda x: x["description"],
1399
  max_token_size=query_param.max_token_for_global_context,
1400
  )
1401
+ logger.debug(
1402
+ f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
1403
+ )
1404
 
1405
  use_entities, use_text_units = await asyncio.gather(
1406
  _find_most_related_entities_from_relationships(
 
1411
  ),
1412
  )
1413
  logger.info(
1414
+ f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
1415
  )
1416
 
1417
  relations_section_list = [
 
1500
  for k, n, d in zip(entity_names, node_datas, node_degrees)
1501
  ]
1502
 
1503
+ len_node_datas = len(node_datas)
1504
  node_datas = truncate_list_by_token_size(
1505
  node_datas,
1506
  key=lambda x: x["description"],
1507
  max_token_size=query_param.max_token_for_local_context,
1508
  )
1509
+ logger.debug(
1510
+ f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
1511
+ )
1512
 
1513
  return node_datas
1514
 
 
1564
  max_token_size=query_param.max_token_for_text_unit,
1565
  )
1566
 
1567
+ logger.debug(
1568
+ f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
1569
+ )
1570
+
1571
  all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
1572
 
1573
  return all_text_units
 
1635
  logger.warning("No chunks left after truncation")
1636
  return PROMPTS["fail_response"]
1637
 
1638
+ logger.debug(
1639
+ f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
1640
+ )
1641
+
1642
  section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
1643
 
1644
  if query_param.only_need_context:
 
1661
  if query_param.only_need_prompt:
1662
  return sys_prompt
1663
 
1664
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
1665
+ logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}")
1666
+
1667
  response = await use_model_func(
1668
  query,
1669
  system_prompt=sys_prompt,
 
1806
  if query_param.only_need_prompt:
1807
  return sys_prompt
1808
 
1809
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
1810
+ logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
1811
+
1812
  response = await use_model_func(
1813
  query,
1814
  system_prompt=sys_prompt,
lightrag/utils.py CHANGED
@@ -20,6 +20,23 @@ import tiktoken
20
 
21
  from lightrag.prompt import PROMPTS
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  class UnlimitedSemaphore:
25
  """A context manager that allows unlimited access."""
 
20
 
21
  from lightrag.prompt import PROMPTS
22
 
23
+ VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
24
+
25
+
26
+ def verbose_debug(msg: str, *args, **kwargs):
27
+ """Function for outputting detailed debug information.
28
+ When VERBOSE_DEBUG=True, outputs the complete message.
29
+ When VERBOSE_DEBUG=False, outputs only the first 30 characters.
30
+ """
31
+ if VERBOSE_DEBUG:
32
+ logger.debug(msg, *args, **kwargs)
33
+
34
+
35
+ def set_verbose_debug(enabled: bool):
36
+ """Enable or disable verbose debug output"""
37
+ global VERBOSE_DEBUG
38
+ VERBOSE_DEBUG = enabled
39
+
40
 
41
  class UnlimitedSemaphore:
42
  """A context manager that allows unlimited access."""