zrguo commited on
Commit
53952fe
·
1 Parent(s): 4644ee6

Update token limit

Browse files
README-zh.md CHANGED
@@ -304,16 +304,14 @@ class QueryParam:
304
  If None, keeps all chunks returned from initial retrieval.
305
  """
306
 
307
- max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
308
- """Maximum number of tokens allowed for each retrieved text chunk."""
309
 
310
- max_token_for_global_context: int = int(
311
- os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
312
- )
313
- """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
314
 
315
- max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
316
- """Maximum number of tokens allocated for entity descriptions in local retrieval."""
317
 
318
  hl_keywords: list[str] = field(default_factory=list)
319
  """List of high-level keywords to prioritize in retrieval."""
 
304
  If None, keeps all chunks returned from initial retrieval.
305
  """
306
 
307
+ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
308
+ """Maximum number of tokens allocated for entity context in unified token control system."""
309
 
310
+ max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
311
+ """Maximum number of tokens allocated for relationship context in unified token control system."""
 
 
312
 
313
+ max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
314
+ """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
315
 
316
  hl_keywords: list[str] = field(default_factory=list)
317
  """List of high-level keywords to prioritize in retrieval."""
README.md CHANGED
@@ -311,16 +311,14 @@ class QueryParam:
311
  If None, keeps all chunks returned from initial retrieval.
312
  """
313
 
314
- max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
315
- """Maximum number of tokens allowed for each retrieved text chunk."""
316
 
317
- max_token_for_global_context: int = int(
318
- os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
319
- )
320
- """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
321
 
322
- max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
323
- """Maximum number of tokens allocated for entity descriptions in local retrieval."""
324
 
325
  conversation_history: list[dict[str, str]] = field(default_factory=list)
326
  """Stores past conversation history to maintain context.
 
311
  If None, keeps all chunks returned from initial retrieval.
312
  """
313
 
314
+ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
315
+ """Maximum number of tokens allocated for entity context in unified token control system."""
316
 
317
+ max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
318
+ """Maximum number of tokens allocated for relationship context in unified token control system."""
 
 
319
 
320
+ max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
321
+ """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
322
 
323
  conversation_history: list[dict[str, str]] = field(default_factory=list)
324
  """Stores past conversation history to maintain context.
env.example CHANGED
@@ -50,9 +50,12 @@ OLLAMA_EMULATING_MODEL_TAG=latest
50
 
51
  ### RAG Query Configuration
52
  # HISTORY_TURNS=3
53
- # MAX_TOKEN_TEXT_CHUNK=6000
54
- # MAX_TOKEN_RELATION_DESC=4000
55
- # MAX_TOKEN_ENTITY_DESC=4000
 
 
 
56
  # COSINE_THRESHOLD=0.2
57
  ### Number of entities or relations to retrieve from KG
58
  # TOP_K=60
 
50
 
51
  ### RAG Query Configuration
52
  # HISTORY_TURNS=3
53
+
54
+ ### These parameters provide more precise control over total token usage
55
+ # MAX_ENTITY_TOKENS=10000
56
+ # MAX_RELATION_TOKENS=10000
57
+ # MAX_TOTAL_TOKENS=32000
58
+
59
  # COSINE_THRESHOLD=0.2
60
  ### Number of entities or relations to retrieve from KG
61
  # TOP_K=60
lightrag/api/routers/query_routes.py CHANGED
@@ -61,22 +61,22 @@ class QueryRequest(BaseModel):
61
  description="Number of text chunks to keep after reranking.",
62
  )
63
 
64
- max_token_for_text_unit: Optional[int] = Field(
65
- gt=1,
66
  default=None,
67
- description="Maximum number of tokens allowed for each retrieved text chunk.",
 
68
  )
69
 
70
- max_token_for_global_context: Optional[int] = Field(
71
- gt=1,
72
  default=None,
73
- description="Maximum number of tokens allocated for relationship descriptions in global retrieval.",
 
74
  )
75
 
76
- max_token_for_local_context: Optional[int] = Field(
77
- gt=1,
78
  default=None,
79
- description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
 
80
  )
81
 
82
  conversation_history: Optional[List[Dict[str, Any]]] = Field(
 
61
  description="Number of text chunks to keep after reranking.",
62
  )
63
 
64
+ max_entity_tokens: Optional[int] = Field(
 
65
  default=None,
66
+ description="Maximum number of tokens allocated for entity context in unified token control system.",
67
+ ge=1,
68
  )
69
 
70
+ max_relation_tokens: Optional[int] = Field(
 
71
  default=None,
72
+ description="Maximum number of tokens allocated for relationship context in unified token control system.",
73
+ ge=1,
74
  )
75
 
76
+ max_total_tokens: Optional[int] = Field(
 
77
  default=None,
78
+ description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
79
+ ge=1,
80
  )
81
 
82
  conversation_history: Optional[List[Dict[str, Any]]] = Field(
lightrag/base.py CHANGED
@@ -70,16 +70,14 @@ class QueryParam:
70
  If None, keeps all chunks returned from initial retrieval.
71
  """
72
 
73
- max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000"))
74
- """Maximum number of tokens allowed for each retrieved text chunk."""
75
 
76
- max_token_for_global_context: int = int(
77
- os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
78
- )
79
- """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
80
 
81
- max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
82
- """Maximum number of tokens allocated for entity descriptions in local retrieval."""
83
 
84
  hl_keywords: list[str] = field(default_factory=list)
85
  """List of high-level keywords to prioritize in retrieval."""
 
70
  If None, keeps all chunks returned from initial retrieval.
71
  """
72
 
73
+ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
74
+ """Maximum number of tokens allocated for entity context in unified token control system."""
75
 
76
+ max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
77
+ """Maximum number of tokens allocated for relationship context in unified token control system."""
 
 
78
 
79
+ max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
80
+ """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
81
 
82
  hl_keywords: list[str] = field(default_factory=list)
83
  """List of high-level keywords to prioritize in retrieval."""
lightrag/operate.py CHANGED
@@ -1569,7 +1569,9 @@ async def kg_query(
1569
 
1570
  tokenizer: Tokenizer = global_config["tokenizer"]
1571
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
1572
- logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
 
 
1573
 
1574
  response = await use_model_func(
1575
  query,
@@ -1692,7 +1694,9 @@ async def extract_keywords_only(
1692
 
1693
  tokenizer: Tokenizer = global_config["tokenizer"]
1694
  len_of_prompts = len(tokenizer.encode(kw_prompt))
1695
- logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
 
 
1696
 
1697
  # 5. Call the LLM for keyword extraction
1698
  if param.model_func:
@@ -1864,7 +1868,7 @@ async def _build_query_context(
1864
 
1865
  # Combine entities and relations contexts
1866
  entities_context = process_combine_contexts(
1867
- hl_entities_context, ll_entities_context
1868
  )
1869
  relations_context = process_combine_contexts(
1870
  hl_relations_context, ll_relations_context
@@ -1894,6 +1898,163 @@ async def _build_query_context(
1894
  f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
1895
  )
1896
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1897
  # not necessary to use LLM to generate a response
1898
  if not entities_context and not relations_context:
1899
  return None
@@ -1982,18 +2143,6 @@ async def _get_node_data(
1982
  knowledge_graph_inst,
1983
  )
1984
 
1985
- tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
1986
- len_node_datas = len(node_datas)
1987
- node_datas = truncate_list_by_token_size(
1988
- node_datas,
1989
- key=lambda x: x["description"] if x["description"] is not None else "",
1990
- max_token_size=query_param.max_token_for_local_context,
1991
- tokenizer=tokenizer,
1992
- )
1993
- logger.debug(
1994
- f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
1995
- )
1996
-
1997
  logger.info(
1998
  f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
1999
  )
@@ -2199,20 +2348,9 @@ async def _find_most_related_edges_from_entities(
2199
  }
2200
  all_edges_data.append(combined)
2201
 
2202
- tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
2203
  all_edges_data = sorted(
2204
  all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
2205
  )
2206
- all_edges_data = truncate_list_by_token_size(
2207
- all_edges_data,
2208
- key=lambda x: x["description"] if x["description"] is not None else "",
2209
- max_token_size=query_param.max_token_for_global_context,
2210
- tokenizer=tokenizer,
2211
- )
2212
-
2213
- logger.debug(
2214
- f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
2215
- )
2216
 
2217
  return all_edges_data
2218
 
@@ -2269,16 +2407,9 @@ async def _get_edge_data(
2269
  }
2270
  edge_datas.append(combined)
2271
 
2272
- tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
2273
  edge_datas = sorted(
2274
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
2275
  )
2276
- edge_datas = truncate_list_by_token_size(
2277
- edge_datas,
2278
- key=lambda x: x["description"] if x["description"] is not None else "",
2279
- max_token_size=query_param.max_token_for_global_context,
2280
- tokenizer=tokenizer,
2281
- )
2282
  use_entities, use_text_units = await asyncio.gather(
2283
  _find_most_related_entities_from_relationships(
2284
  edge_datas,
@@ -2388,18 +2519,6 @@ async def _find_most_related_entities_from_relationships(
2388
  combined = {**node, "entity_name": entity_name, "rank": degree}
2389
  node_datas.append(combined)
2390
 
2391
- tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
2392
- len_node_datas = len(node_datas)
2393
- node_datas = truncate_list_by_token_size(
2394
- node_datas,
2395
- key=lambda x: x["description"] if x["description"] is not None else "",
2396
- max_token_size=query_param.max_token_for_local_context,
2397
- tokenizer=tokenizer,
2398
- )
2399
- logger.debug(
2400
- f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
2401
- )
2402
-
2403
  return node_datas
2404
 
2405
 
@@ -2491,13 +2610,64 @@ async def naive_query(
2491
  if chunks is None or len(chunks) == 0:
2492
  return PROMPTS["fail_response"]
2493
 
2494
- # Process chunks using unified processing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2495
  processed_chunks = await process_chunks_unified(
2496
  query=query,
2497
  chunks=chunks,
2498
  query_param=query_param,
2499
  global_config=global_config,
2500
  source_type="vector",
 
2501
  )
2502
 
2503
  logger.info(f"Final context: {len(processed_chunks)} chunks")
@@ -2548,7 +2718,9 @@ async def naive_query(
2548
  return sys_prompt
2549
 
2550
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
2551
- logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
 
 
2552
 
2553
  response = await use_model_func(
2554
  query,
@@ -2672,7 +2844,9 @@ async def kg_query_with_keywords(
2672
 
2673
  tokenizer: Tokenizer = global_config["tokenizer"]
2674
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
2675
- logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
 
 
2676
 
2677
  # 6. Generate response
2678
  response = await use_model_func(
@@ -2849,6 +3023,7 @@ async def process_chunks_unified(
2849
  query_param: QueryParam,
2850
  global_config: dict,
2851
  source_type: str = "mixed",
 
2852
  ) -> list[dict]:
2853
  """
2854
  Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
@@ -2859,6 +3034,7 @@ async def process_chunks_unified(
2859
  query_param: Query parameters containing configuration
2860
  global_config: Global configuration dictionary
2861
  source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
 
2862
 
2863
  Returns:
2864
  Processed and filtered list of text chunks
@@ -2901,16 +3077,25 @@ async def process_chunks_unified(
2901
  # 4. Token-based final truncation
2902
  tokenizer = global_config.get("tokenizer")
2903
  if tokenizer and unique_chunks:
 
 
 
 
 
 
 
 
 
2904
  original_count = len(unique_chunks)
2905
  unique_chunks = truncate_list_by_token_size(
2906
  unique_chunks,
2907
  key=lambda x: x.get("content", ""),
2908
- max_token_size=query_param.max_token_for_text_unit,
2909
  tokenizer=tokenizer,
2910
  )
2911
  logger.debug(
2912
  f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
2913
- f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})"
2914
  )
2915
 
2916
  return unique_chunks
 
1569
 
1570
  tokenizer: Tokenizer = global_config["tokenizer"]
1571
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
1572
+ logger.debug(
1573
+ f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
1574
+ )
1575
 
1576
  response = await use_model_func(
1577
  query,
 
1694
 
1695
  tokenizer: Tokenizer = global_config["tokenizer"]
1696
  len_of_prompts = len(tokenizer.encode(kw_prompt))
1697
+ logger.debug(
1698
+ f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
1699
+ )
1700
 
1701
  # 5. Call the LLM for keyword extraction
1702
  if param.model_func:
 
1868
 
1869
  # Combine entities and relations contexts
1870
  entities_context = process_combine_contexts(
1871
+ ll_entities_context, hl_entities_context
1872
  )
1873
  relations_context = process_combine_contexts(
1874
  hl_relations_context, ll_relations_context
 
1898
  f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
1899
  )
1900
 
1901
+ # Unified token control system - Apply precise token limits to entities and relations
1902
+ tokenizer = text_chunks_db.global_config.get("tokenizer")
1903
+ if tokenizer:
1904
+ # Get new token limits from query_param (with fallback to global_config)
1905
+ max_entity_tokens = getattr(
1906
+ query_param,
1907
+ "max_entity_tokens",
1908
+ text_chunks_db.global_config.get("MAX_ENTITY_TOKENS", 8000),
1909
+ )
1910
+ max_relation_tokens = getattr(
1911
+ query_param,
1912
+ "max_relation_tokens",
1913
+ text_chunks_db.global_config.get("MAX_RELATION_TOKENS", 6000),
1914
+ )
1915
+ max_total_tokens = getattr(
1916
+ query_param,
1917
+ "max_total_tokens",
1918
+ text_chunks_db.global_config.get("MAX_TOTAL_TOKENS", 32000),
1919
+ )
1920
+
1921
+ # Truncate entities based on complete JSON serialization
1922
+ if entities_context:
1923
+ original_entity_count = len(entities_context)
1924
+ entities_context = truncate_list_by_token_size(
1925
+ entities_context,
1926
+ key=lambda x: json.dumps(x, ensure_ascii=False),
1927
+ max_token_size=max_entity_tokens,
1928
+ tokenizer=tokenizer,
1929
+ )
1930
+ if len(entities_context) < original_entity_count:
1931
+ logger.debug(
1932
+ f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})"
1933
+ )
1934
+
1935
+ # Truncate relations based on complete JSON serialization
1936
+ if relations_context:
1937
+ original_relation_count = len(relations_context)
1938
+ relations_context = truncate_list_by_token_size(
1939
+ relations_context,
1940
+ key=lambda x: json.dumps(x, ensure_ascii=False),
1941
+ max_token_size=max_relation_tokens,
1942
+ tokenizer=tokenizer,
1943
+ )
1944
+ if len(relations_context) < original_relation_count:
1945
+ logger.debug(
1946
+ f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})"
1947
+ )
1948
+
1949
+ # Calculate dynamic token limit for text chunks
1950
+ entities_str = json.dumps(entities_context, ensure_ascii=False)
1951
+ relations_str = json.dumps(relations_context, ensure_ascii=False)
1952
+
1953
+ # Calculate base context tokens (entities + relations + template)
1954
+ kg_context_template = """-----Entities(KG)-----
1955
+
1956
+ ```json
1957
+ {entities_str}
1958
+ ```
1959
+
1960
+ -----Relationships(KG)-----
1961
+
1962
+ ```json
1963
+ {relations_str}
1964
+ ```
1965
+
1966
+ -----Document Chunks(DC)-----
1967
+
1968
+ ```json
1969
+ []
1970
+ ```
1971
+
1972
+ """
1973
+ kg_context = kg_context_template.format(
1974
+ entities_str=entities_str, relations_str=relations_str
1975
+ )
1976
+ kg_context_tokens = len(tokenizer.encode(kg_context))
1977
+
1978
+ # Calculate actual system prompt overhead dynamically
1979
+ # 1. Calculate conversation history tokens
1980
+ history_context = ""
1981
+ if query_param.conversation_history:
1982
+ history_context = get_conversation_turns(
1983
+ query_param.conversation_history, query_param.history_turns
1984
+ )
1985
+ history_tokens = (
1986
+ len(tokenizer.encode(history_context)) if history_context else 0
1987
+ )
1988
+
1989
+ # 2. Calculate system prompt template tokens (excluding context_data)
1990
+ user_prompt = query_param.user_prompt if query_param.user_prompt else ""
1991
+ response_type = (
1992
+ query_param.response_type
1993
+ if query_param.response_type
1994
+ else "Multiple Paragraphs"
1995
+ )
1996
+
1997
+ # Get the system prompt template from PROMPTS
1998
+ sys_prompt_template = text_chunks_db.global_config.get(
1999
+ "system_prompt_template", PROMPTS["rag_response"]
2000
+ )
2001
+
2002
+ # Create a sample system prompt with placeholders filled (excluding context_data)
2003
+ sample_sys_prompt = sys_prompt_template.format(
2004
+ history=history_context,
2005
+ context_data="", # Empty for overhead calculation
2006
+ response_type=response_type,
2007
+ user_prompt=user_prompt,
2008
+ )
2009
+ sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
2010
+
2011
+ # Total system prompt overhead = template + query tokens
2012
+ query_tokens = len(tokenizer.encode(query))
2013
+ sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
2014
+
2015
+ buffer_tokens = 100 # Safety buffer as requested
2016
+
2017
+ # Calculate available tokens for text chunks
2018
+ used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
2019
+ available_chunk_tokens = max_total_tokens - used_tokens
2020
+
2021
+ logger.debug(
2022
+ f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
2023
+ )
2024
+
2025
+ # Re-process chunks with dynamic token limit
2026
+ if text_units_context:
2027
+ # Create a temporary query_param copy with adjusted chunk token limit
2028
+ temp_chunks = [
2029
+ {"content": chunk["content"], "file_path": chunk["file_path"]}
2030
+ for chunk in text_units_context
2031
+ ]
2032
+
2033
+ # Apply token truncation to chunks using the dynamic limit
2034
+ truncated_chunks = await process_chunks_unified(
2035
+ query=query,
2036
+ chunks=temp_chunks,
2037
+ query_param=query_param,
2038
+ global_config=text_chunks_db.global_config,
2039
+ source_type="mixed",
2040
+ chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
2041
+ )
2042
+
2043
+ # Rebuild text_units_context with truncated chunks
2044
+ text_units_context = []
2045
+ for i, chunk in enumerate(truncated_chunks):
2046
+ text_units_context.append(
2047
+ {
2048
+ "id": i + 1,
2049
+ "content": chunk["content"],
2050
+ "file_path": chunk.get("file_path", "unknown_source"),
2051
+ }
2052
+ )
2053
+
2054
+ logger.debug(
2055
+ f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
2056
+ )
2057
+
2058
  # not necessary to use LLM to generate a response
2059
  if not entities_context and not relations_context:
2060
  return None
 
2143
  knowledge_graph_inst,
2144
  )
2145
 
 
 
 
 
 
 
 
 
 
 
 
 
2146
  logger.info(
2147
  f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
2148
  )
 
2348
  }
2349
  all_edges_data.append(combined)
2350
 
 
2351
  all_edges_data = sorted(
2352
  all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
2353
  )
 
 
 
 
 
 
 
 
 
 
2354
 
2355
  return all_edges_data
2356
 
 
2407
  }
2408
  edge_datas.append(combined)
2409
 
 
2410
  edge_datas = sorted(
2411
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
2412
  )
 
 
 
 
 
 
2413
  use_entities, use_text_units = await asyncio.gather(
2414
  _find_most_related_entities_from_relationships(
2415
  edge_datas,
 
2519
  combined = {**node, "entity_name": entity_name, "rank": degree}
2520
  node_datas.append(combined)
2521
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
  return node_datas
2523
 
2524
 
 
2610
  if chunks is None or len(chunks) == 0:
2611
  return PROMPTS["fail_response"]
2612
 
2613
+ # Calculate dynamic token limit for chunks
2614
+ # Get token limits from query_param (with fallback to global_config)
2615
+ max_total_tokens = getattr(
2616
+ query_param, "max_total_tokens", global_config.get("MAX_TOTAL_TOKENS", 32000)
2617
+ )
2618
+
2619
+ # Calculate conversation history tokens
2620
+ history_context = ""
2621
+ if query_param.conversation_history:
2622
+ history_context = get_conversation_turns(
2623
+ query_param.conversation_history, query_param.history_turns
2624
+ )
2625
+ history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
2626
+
2627
+ # Calculate system prompt template tokens (excluding content_data)
2628
+ user_prompt = query_param.user_prompt if query_param.user_prompt else ""
2629
+ response_type = (
2630
+ query_param.response_type
2631
+ if query_param.response_type
2632
+ else "Multiple Paragraphs"
2633
+ )
2634
+
2635
+ # Use the provided system prompt or default
2636
+ sys_prompt_template = (
2637
+ system_prompt if system_prompt else PROMPTS["naive_rag_response"]
2638
+ )
2639
+
2640
+ # Create a sample system prompt with empty content_data to calculate overhead
2641
+ sample_sys_prompt = sys_prompt_template.format(
2642
+ content_data="", # Empty for overhead calculation
2643
+ response_type=response_type,
2644
+ history=history_context,
2645
+ user_prompt=user_prompt,
2646
+ )
2647
+ sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
2648
+
2649
+ # Total system prompt overhead = template + query tokens
2650
+ query_tokens = len(tokenizer.encode(query))
2651
+ sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
2652
+
2653
+ buffer_tokens = 100 # Safety buffer
2654
+
2655
+ # Calculate available tokens for chunks
2656
+ used_tokens = sys_prompt_overhead + buffer_tokens
2657
+ available_chunk_tokens = max_total_tokens - used_tokens
2658
+
2659
+ logger.debug(
2660
+ f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
2661
+ )
2662
+
2663
+ # Process chunks using unified processing with dynamic token limit
2664
  processed_chunks = await process_chunks_unified(
2665
  query=query,
2666
  chunks=chunks,
2667
  query_param=query_param,
2668
  global_config=global_config,
2669
  source_type="vector",
2670
+ chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
2671
  )
2672
 
2673
  logger.info(f"Final context: {len(processed_chunks)} chunks")
 
2718
  return sys_prompt
2719
 
2720
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
2721
+ logger.debug(
2722
+ f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
2723
+ )
2724
 
2725
  response = await use_model_func(
2726
  query,
 
2844
 
2845
  tokenizer: Tokenizer = global_config["tokenizer"]
2846
  len_of_prompts = len(tokenizer.encode(query + sys_prompt))
2847
+ logger.debug(
2848
+ f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
2849
+ )
2850
 
2851
  # 6. Generate response
2852
  response = await use_model_func(
 
3023
  query_param: QueryParam,
3024
  global_config: dict,
3025
  source_type: str = "mixed",
3026
+ chunk_token_limit: int = None, # Add parameter for dynamic token limit
3027
  ) -> list[dict]:
3028
  """
3029
  Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
 
3034
  query_param: Query parameters containing configuration
3035
  global_config: Global configuration dictionary
3036
  source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
3037
+ chunk_token_limit: Dynamic token limit for chunks (if None, uses default)
3038
 
3039
  Returns:
3040
  Processed and filtered list of text chunks
 
3077
  # 4. Token-based final truncation
3078
  tokenizer = global_config.get("tokenizer")
3079
  if tokenizer and unique_chunks:
3080
+ # Set default chunk_token_limit if not provided
3081
+ if chunk_token_limit is None:
3082
+ # Get default from query_param or global_config
3083
+ chunk_token_limit = getattr(
3084
+ query_param,
3085
+ "max_total_tokens",
3086
+ global_config.get("MAX_TOTAL_TOKENS", 32000),
3087
+ )
3088
+
3089
  original_count = len(unique_chunks)
3090
  unique_chunks = truncate_list_by_token_size(
3091
  unique_chunks,
3092
  key=lambda x: x.get("content", ""),
3093
+ max_token_size=chunk_token_limit,
3094
  tokenizer=tokenizer,
3095
  )
3096
  logger.debug(
3097
  f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
3098
+ f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
3099
  )
3100
 
3101
  return unique_chunks
lightrag_webui/src/api/lightrag.ts CHANGED
@@ -90,12 +90,16 @@ export type QueryRequest = {
90
  stream?: boolean
91
  /** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
92
  top_k?: number
93
- /** Maximum number of tokens allowed for each retrieved text chunk. */
94
- max_token_for_text_unit?: number
95
- /** Maximum number of tokens allocated for relationship descriptions in global retrieval. */
96
- max_token_for_global_context?: number
97
- /** Maximum number of tokens allocated for entity descriptions in local retrieval. */
98
- max_token_for_local_context?: number
 
 
 
 
99
  /**
100
  * Stores past conversation history to maintain context.
101
  * Format: [{"role": "user/assistant", "content": "message"}].
 
90
  stream?: boolean
91
  /** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
92
  top_k?: number
93
+ /** Maximum number of text chunks to retrieve and process. */
94
+ chunk_top_k?: number
95
+ /** Number of text chunks to keep after reranking. */
96
+ chunk_rerank_top_k?: number
97
+ /** Maximum number of tokens allocated for entity context in unified token control system. */
98
+ max_entity_tokens?: number
99
+ /** Maximum number of tokens allocated for relationship context in unified token control system. */
100
+ max_relation_tokens?: number
101
+ /** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */
102
+ max_total_tokens?: number
103
  /**
104
  * Stores past conversation history to maintain context.
105
  * Format: [{"role": "user/assistant", "content": "message"}].
lightrag_webui/src/components/retrieval/QuerySettings.tsx CHANGED
@@ -132,30 +132,81 @@ export default function QuerySettings() {
132
  </div>
133
  </>
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  {/* Max Tokens */}
136
  <>
137
  <>
138
  <TooltipProvider>
139
  <Tooltip>
140
  <TooltipTrigger asChild>
141
- <label htmlFor="max_token_for_text_unit" className="ml-1 cursor-help">
142
- {t('retrievePanel.querySettings.maxTokensTextUnit')}
143
  </label>
144
  </TooltipTrigger>
145
  <TooltipContent side="left">
146
- <p>{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}</p>
147
  </TooltipContent>
148
  </Tooltip>
149
  </TooltipProvider>
150
  <div>
151
- {/* Removed sr-only label */}
152
  <NumberInput
153
- id="max_token_for_text_unit"
154
  stepper={500}
155
- value={querySettings.max_token_for_text_unit}
156
- onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
157
  min={1}
158
- placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
159
  />
160
  </div>
161
  </>
@@ -164,24 +215,23 @@ export default function QuerySettings() {
164
  <TooltipProvider>
165
  <Tooltip>
166
  <TooltipTrigger asChild>
167
- <label htmlFor="max_token_for_global_context" className="ml-1 cursor-help">
168
- {t('retrievePanel.querySettings.maxTokensGlobalContext')}
169
  </label>
170
  </TooltipTrigger>
171
  <TooltipContent side="left">
172
- <p>{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}</p>
173
  </TooltipContent>
174
  </Tooltip>
175
  </TooltipProvider>
176
  <div>
177
- {/* Removed sr-only label */}
178
  <NumberInput
179
- id="max_token_for_global_context"
180
  stepper={500}
181
- value={querySettings.max_token_for_global_context}
182
- onValueChange={(v) => handleChange('max_token_for_global_context', v)}
183
  min={1}
184
- placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
185
  />
186
  </div>
187
  </>
@@ -190,24 +240,23 @@ export default function QuerySettings() {
190
  <TooltipProvider>
191
  <Tooltip>
192
  <TooltipTrigger asChild>
193
- <label htmlFor="max_token_for_local_context" className="ml-1 cursor-help">
194
- {t('retrievePanel.querySettings.maxTokensLocalContext')}
195
  </label>
196
  </TooltipTrigger>
197
  <TooltipContent side="left">
198
- <p>{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}</p>
199
  </TooltipContent>
200
  </Tooltip>
201
  </TooltipProvider>
202
  <div>
203
- {/* Removed sr-only label */}
204
  <NumberInput
205
- id="max_token_for_local_context"
206
- stepper={500}
207
- value={querySettings.max_token_for_local_context}
208
- onValueChange={(v) => handleChange('max_token_for_local_context', v)}
209
  min={1}
210
- placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
211
  />
212
  </div>
213
  </>
 
132
  </div>
133
  </>
134
 
135
+ {/* Chunk Top K */}
136
+ <>
137
+ <TooltipProvider>
138
+ <Tooltip>
139
+ <TooltipTrigger asChild>
140
+ <label htmlFor="chunk_top_k" className="ml-1 cursor-help">
141
+ {t('retrievePanel.querySettings.chunkTopK')}
142
+ </label>
143
+ </TooltipTrigger>
144
+ <TooltipContent side="left">
145
+ <p>{t('retrievePanel.querySettings.chunkTopKTooltip')}</p>
146
+ </TooltipContent>
147
+ </Tooltip>
148
+ </TooltipProvider>
149
+ <div>
150
+ <NumberInput
151
+ id="chunk_top_k"
152
+ stepper={1}
153
+ value={querySettings.chunk_top_k}
154
+ onValueChange={(v) => handleChange('chunk_top_k', v)}
155
+ min={1}
156
+ placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')}
157
+ />
158
+ </div>
159
+ </>
160
+
161
+ {/* Chunk Rerank Top K */}
162
+ <>
163
+ <TooltipProvider>
164
+ <Tooltip>
165
+ <TooltipTrigger asChild>
166
+ <label htmlFor="chunk_rerank_top_k" className="ml-1 cursor-help">
167
+ {t('retrievePanel.querySettings.chunkRerankTopK')}
168
+ </label>
169
+ </TooltipTrigger>
170
+ <TooltipContent side="left">
171
+ <p>{t('retrievePanel.querySettings.chunkRerankTopKTooltip')}</p>
172
+ </TooltipContent>
173
+ </Tooltip>
174
+ </TooltipProvider>
175
+ <div>
176
+ <NumberInput
177
+ id="chunk_rerank_top_k"
178
+ stepper={1}
179
+ value={querySettings.chunk_rerank_top_k}
180
+ onValueChange={(v) => handleChange('chunk_rerank_top_k', v)}
181
+ min={1}
182
+ placeholder={t('retrievePanel.querySettings.chunkRerankTopKPlaceholder')}
183
+ />
184
+ </div>
185
+ </>
186
+
187
  {/* Max Tokens */}
188
  <>
189
  <>
190
  <TooltipProvider>
191
  <Tooltip>
192
  <TooltipTrigger asChild>
193
+ <label htmlFor="max_entity_tokens" className="ml-1 cursor-help">
194
+ {t('retrievePanel.querySettings.maxEntityTokens')}
195
  </label>
196
  </TooltipTrigger>
197
  <TooltipContent side="left">
198
+ <p>{t('retrievePanel.querySettings.maxEntityTokensTooltip')}</p>
199
  </TooltipContent>
200
  </Tooltip>
201
  </TooltipProvider>
202
  <div>
 
203
  <NumberInput
204
+ id="max_entity_tokens"
205
  stepper={500}
206
+ value={querySettings.max_entity_tokens}
207
+ onValueChange={(v) => handleChange('max_entity_tokens', v)}
208
  min={1}
209
+ placeholder={t('retrievePanel.querySettings.maxEntityTokens')}
210
  />
211
  </div>
212
  </>
 
215
  <TooltipProvider>
216
  <Tooltip>
217
  <TooltipTrigger asChild>
218
+ <label htmlFor="max_relation_tokens" className="ml-1 cursor-help">
219
+ {t('retrievePanel.querySettings.maxRelationTokens')}
220
  </label>
221
  </TooltipTrigger>
222
  <TooltipContent side="left">
223
+ <p>{t('retrievePanel.querySettings.maxRelationTokensTooltip')}</p>
224
  </TooltipContent>
225
  </Tooltip>
226
  </TooltipProvider>
227
  <div>
 
228
  <NumberInput
229
+ id="max_relation_tokens"
230
  stepper={500}
231
+ value={querySettings.max_relation_tokens}
232
+ onValueChange={(v) => handleChange('max_relation_tokens', v)}
233
  min={1}
234
+ placeholder={t('retrievePanel.querySettings.maxRelationTokens')}
235
  />
236
  </div>
237
  </>
 
240
  <TooltipProvider>
241
  <Tooltip>
242
  <TooltipTrigger asChild>
243
+ <label htmlFor="max_total_tokens" className="ml-1 cursor-help">
244
+ {t('retrievePanel.querySettings.maxTotalTokens')}
245
  </label>
246
  </TooltipTrigger>
247
  <TooltipContent side="left">
248
+ <p>{t('retrievePanel.querySettings.maxTotalTokensTooltip')}</p>
249
  </TooltipContent>
250
  </Tooltip>
251
  </TooltipProvider>
252
  <div>
 
253
  <NumberInput
254
+ id="max_total_tokens"
255
+ stepper={1000}
256
+ value={querySettings.max_total_tokens}
257
+ onValueChange={(v) => handleChange('max_total_tokens', v)}
258
  min={1}
259
+ placeholder={t('retrievePanel.querySettings.maxTotalTokens')}
260
  />
261
  </div>
262
  </>
lightrag_webui/src/locales/ar.json CHANGED
@@ -359,16 +359,22 @@
359
  "singleParagraph": "فقرة واحدة",
360
  "bulletPoints": "نقاط نقطية"
361
  },
362
- "topK": "أعلى K نتائج",
363
- "topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'",
364
- "topKPlaceholder": "عدد النتائج",
365
- "maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص",
366
- "maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع",
367
- "maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي",
368
- "maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي",
369
- "maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي",
370
- "maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي",
371
- "historyTurns": "دورات التاريخ",
 
 
 
 
 
 
372
  "historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
373
  "historyTurnsPlaceholder": "عدد دورات التاريخ",
374
  "onlyNeedContext": "تحتاج فقط إلى السياق",
 
359
  "singleParagraph": "فقرة واحدة",
360
  "bulletPoints": "نقاط نقطية"
361
  },
362
+ "topK": "أعلى K",
363
+ "topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.",
364
+ "topKPlaceholder": "أدخل قيمة أعلى k",
365
+ "chunkTopK": "أعلى K للقطع",
366
+ "chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.",
367
+ "chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع",
368
+ "chunkRerankTopK": "أعلى K لإعادة الترتيب",
369
+ "chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.",
370
+ "chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب",
371
+ "maxEntityTokens": "الحد الأقصى لرموز الكيان",
372
+ "maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز",
373
+ "maxRelationTokens": "الحد الأقصى لرموز العلاقة",
374
+ "maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز",
375
+ "maxTotalTokens": "إجمالي الحد الأقصى للرموز",
376
+ "maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)",
377
+ "historyTurns": "أدوار التاريخ",
378
  "historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
379
  "historyTurnsPlaceholder": "عدد دورات التاريخ",
380
  "onlyNeedContext": "تحتاج فقط إلى السياق",
lightrag_webui/src/locales/en.json CHANGED
@@ -359,15 +359,21 @@
359
  "singleParagraph": "Single Paragraph",
360
  "bulletPoints": "Bullet Points"
361
  },
362
- "topK": "Top K Results",
363
- "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
364
- "topKPlaceholder": "Number of results",
365
- "maxTokensTextUnit": "Max Tokens for Text Unit",
366
- "maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
367
- "maxTokensGlobalContext": "Max Tokens for Global Context",
368
- "maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
369
- "maxTokensLocalContext": "Max Tokens for Local Context",
370
- "maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
 
 
 
 
 
 
371
  "historyTurns": "History Turns",
372
  "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
373
  "historyTurnsPlaceholder": "Number of history turns",
 
359
  "singleParagraph": "Single Paragraph",
360
  "bulletPoints": "Bullet Points"
361
  },
362
+ "topK": "Top K",
363
+ "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
364
+ "topKPlaceholder": "Enter top k value",
365
+ "chunkTopK": "Chunk Top K",
366
+ "chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.",
367
+ "chunkTopKPlaceholder": "Enter chunk top k value",
368
+ "chunkRerankTopK": "Chunk Rerank Top K",
369
+ "chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.",
370
+ "chunkRerankTopKPlaceholder": "Enter rerank top k value",
371
+ "maxEntityTokens": "Max Entity Tokens",
372
+ "maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system",
373
+ "maxRelationTokens": "Max Relation Tokens",
374
+ "maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system",
375
+ "maxTotalTokens": "Max Total Tokens",
376
+ "maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)",
377
  "historyTurns": "History Turns",
378
  "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
379
  "historyTurnsPlaceholder": "Number of history turns",
lightrag_webui/src/locales/fr.json CHANGED
@@ -359,15 +359,21 @@
359
  "singleParagraph": "Paragraphe unique",
360
  "bulletPoints": "Points à puces"
361
  },
362
- "topK": "Top K résultats",
363
- "topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'",
364
- "topKPlaceholder": "Nombre de résultats",
365
- "maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte",
366
- "maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré",
367
- "maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global",
368
- "maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale",
369
- "maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local",
370
- "maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale",
 
 
 
 
 
 
371
  "historyTurns": "Tours d'historique",
372
  "historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
373
  "historyTurnsPlaceholder": "Nombre de tours d'historique",
 
359
  "singleParagraph": "Paragraphe unique",
360
  "bulletPoints": "Points à puces"
361
  },
362
+ "topK": "Top K",
363
+ "topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.",
364
+ "topKPlaceholder": "Entrez la valeur top k",
365
+ "chunkTopK": "Top K des Chunks",
366
+ "chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.",
367
+ "chunkTopKPlaceholder": "Entrez la valeur top k des chunks",
368
+ "chunkRerankTopK": "Top K du Reclassement",
369
+ "chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.",
370
+ "chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement",
371
+ "maxEntityTokens": "Limite de jetons d'entité",
372
+ "maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié",
373
+ "maxRelationTokens": "Limite de jetons de relation",
374
+ "maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié",
375
+ "maxTotalTokens": "Limite totale de jetons",
376
+ "maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)",
377
  "historyTurns": "Tours d'historique",
378
  "historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
379
  "historyTurnsPlaceholder": "Nombre de tours d'historique",
lightrag_webui/src/locales/zh.json CHANGED
@@ -359,15 +359,21 @@
359
  "singleParagraph": "单段落",
360
  "bulletPoints": "要点"
361
  },
362
- "topK": "Top K结果",
363
- "topKTooltip": "检索的顶部项目数。在'local'模式下表示实体,在'global'模式下表示关系",
364
- "topKPlaceholder": "结果数量",
365
- "maxTokensTextUnit": "文本单元最大令牌数",
366
- "maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数",
367
- "maxTokensGlobalContext": "全局上下文最大令牌数",
368
- "maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数",
369
- "maxTokensLocalContext": "本地上下文最大令牌数",
370
- "maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数",
 
 
 
 
 
 
371
  "historyTurns": "历史轮次",
372
  "historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
373
  "historyTurnsPlaceholder": "历史轮次数",
 
359
  "singleParagraph": "单段落",
360
  "bulletPoints": "要点"
361
  },
362
+ "topK": "Top K",
363
+ "topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。",
364
+ "topKPlaceholder": "输入top k值",
365
+ "chunkTopK": "文本块 Top K",
366
+ "chunkTopKTooltip": "检索和处理的最大文本块数量。",
367
+ "chunkTopKPlaceholder": "输入文本块top k值",
368
+ "chunkRerankTopK": "重排序 Top K",
369
+ "chunkRerankTopKTooltip": "重排序后保留的文本块数量。",
370
+ "chunkRerankTopKPlaceholder": "输入重排序top k值",
371
+ "maxEntityTokens": "实体令牌数上限",
372
+ "maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数",
373
+ "maxRelationTokens": "关系令牌数上限",
374
+ "maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数",
375
+ "maxTotalTokens": "总令牌数上限",
376
+ "maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)",
377
  "historyTurns": "历史轮次",
378
  "historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
379
  "historyTurnsPlaceholder": "历史轮次数",
lightrag_webui/src/locales/zh_TW.json CHANGED
@@ -300,7 +300,7 @@
300
  "file_path": "來源",
301
  "keywords": "Keys",
302
  "weight": "權重"
303
- }
304
  },
305
  "edge": {
306
  "title": "關係",
@@ -359,15 +359,15 @@
359
  "singleParagraph": "單段落",
360
  "bulletPoints": "重點"
361
  },
362
- "topK": "Top K結果",
363
- "topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體,在'global'模式下表示關係",
364
- "topKPlaceholder": "結果數量",
365
- "maxTokensTextUnit": "文字單元最大權杖數",
366
- "maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數",
367
- "maxTokensGlobalContext": "全域上下文最大權杖數",
368
- "maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數",
369
- "maxTokensLocalContext": "本地上下文最大權杖數",
370
- "maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數",
371
  "historyTurns": "歷史輪次",
372
  "historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
373
  "historyTurnsPlaceholder": "歷史輪次數",
@@ -379,7 +379,13 @@
379
  "streamResponseTooltip": "如果為True,啟用即時串流輸出回應",
380
  "userPrompt": "用戶提示詞",
381
  "userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。",
382
- "userPromptPlaceholder": "輸入自定義提示詞(可選)"
 
 
 
 
 
 
383
  }
384
  },
385
  "apiSite": {
 
300
  "file_path": "來源",
301
  "keywords": "Keys",
302
  "weight": "權重"
303
+ }
304
  },
305
  "edge": {
306
  "title": "關係",
 
359
  "singleParagraph": "單段落",
360
  "bulletPoints": "重點"
361
  },
362
+ "topK": "Top K",
363
+ "topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係。",
364
+ "topKPlaceholder": "輸入top k值",
365
+ "chunkTopK": "文字區塊 Top K",
366
+ "chunkTopKTooltip": "檢索和處理的最大文字區塊數量。",
367
+ "chunkTopKPlaceholder": "輸入文字區塊top k值",
368
+ "chunkRerankTopK": "重新排序 Top K",
369
+ "chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。",
370
+ "chunkRerankTopKPlaceholder": "輸入重新排序top k值",
371
  "historyTurns": "歷史輪次",
372
  "historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
373
  "historyTurnsPlaceholder": "歷史輪次數",
 
379
  "streamResponseTooltip": "如果為True,啟用即時串流輸出回應",
380
  "userPrompt": "用戶提示詞",
381
  "userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。",
382
+ "userPromptPlaceholder": "輸入自定義提示詞(可選)",
383
+ "maxEntityTokens": "實體令牌數上限",
384
+ "maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數",
385
+ "maxRelationTokens": "關係令牌數上限",
386
+ "maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數",
387
+ "maxTotalTokens": "總令牌數上限",
388
+ "maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)"
389
  }
390
  },
391
  "apiSite": {
lightrag_webui/src/stores/settings.ts CHANGED
@@ -111,9 +111,11 @@ const useSettingsStoreBase = create<SettingsState>()(
111
  mode: 'global',
112
  response_type: 'Multiple Paragraphs',
113
  top_k: 10,
114
- max_token_for_text_unit: 6000,
115
- max_token_for_global_context: 4000,
116
- max_token_for_local_context: 4000,
 
 
117
  only_need_context: false,
118
  only_need_prompt: false,
119
  stream: true,
@@ -192,7 +194,7 @@ const useSettingsStoreBase = create<SettingsState>()(
192
  {
193
  name: 'settings-storage',
194
  storage: createJSONStorage(() => localStorage),
195
- version: 14,
196
  migrate: (state: any, version: number) => {
197
  if (version < 2) {
198
  state.showEdgeLabel = false
@@ -215,9 +217,9 @@ const useSettingsStoreBase = create<SettingsState>()(
215
  mode: 'global',
216
  response_type: 'Multiple Paragraphs',
217
  top_k: 10,
218
- max_token_for_text_unit: 4000,
219
- max_token_for_global_context: 4000,
220
- max_token_for_local_context: 4000,
221
  only_need_context: false,
222
  only_need_prompt: false,
223
  stream: true,
@@ -260,6 +262,26 @@ const useSettingsStoreBase = create<SettingsState>()(
260
  // Add backendMaxGraphNodes field for older versions
261
  state.backendMaxGraphNodes = null
262
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  return state
264
  }
265
  }
 
111
  mode: 'global',
112
  response_type: 'Multiple Paragraphs',
113
  top_k: 10,
114
+ chunk_top_k: 5,
115
+ chunk_rerank_top_k: 5,
116
+ max_entity_tokens: 10000,
117
+ max_relation_tokens: 10000,
118
+ max_total_tokens: 32000,
119
  only_need_context: false,
120
  only_need_prompt: false,
121
  stream: true,
 
194
  {
195
  name: 'settings-storage',
196
  storage: createJSONStorage(() => localStorage),
197
+ version: 15,
198
  migrate: (state: any, version: number) => {
199
  if (version < 2) {
200
  state.showEdgeLabel = false
 
217
  mode: 'global',
218
  response_type: 'Multiple Paragraphs',
219
  top_k: 10,
220
+ max_entity_tokens: 10000,
221
+ max_relation_tokens: 10000,
222
+ max_total_tokens: 32000,
223
  only_need_context: false,
224
  only_need_prompt: false,
225
  stream: true,
 
262
  // Add backendMaxGraphNodes field for older versions
263
  state.backendMaxGraphNodes = null
264
  }
265
+ if (version < 15) {
266
+ // 完整更新querySettings到统一token控制系统
267
+ state.querySettings = {
268
+ mode: 'global',
269
+ response_type: 'Multiple Paragraphs',
270
+ top_k: 10,
271
+ chunk_top_k: 5,
272
+ chunk_rerank_top_k: 5,
273
+ max_entity_tokens: 10000,
274
+ max_relation_tokens: 10000,
275
+ max_total_tokens: 32000,
276
+ only_need_context: false,
277
+ only_need_prompt: false,
278
+ stream: true,
279
+ history_turns: 3,
280
+ hl_keywords: [],
281
+ ll_keywords: [],
282
+ user_prompt: ''
283
+ }
284
+ }
285
  return state
286
  }
287
  }