zrguo
commited on
Commit
·
53952fe
1
Parent(s):
4644ee6
Update token limit
Browse files- README-zh.md +6 -8
- README.md +6 -8
- env.example +6 -3
- lightrag/api/routers/query_routes.py +9 -9
- lightrag/base.py +6 -8
- lightrag/operate.py +235 -50
- lightrag_webui/src/api/lightrag.ts +10 -6
- lightrag_webui/src/components/retrieval/QuerySettings.tsx +74 -25
- lightrag_webui/src/locales/ar.json +16 -10
- lightrag_webui/src/locales/en.json +15 -9
- lightrag_webui/src/locales/fr.json +15 -9
- lightrag_webui/src/locales/zh.json +15 -9
- lightrag_webui/src/locales/zh_TW.json +17 -11
- lightrag_webui/src/stores/settings.ts +29 -7
README-zh.md
CHANGED
@@ -304,16 +304,14 @@ class QueryParam:
|
|
304 |
If None, keeps all chunks returned from initial retrieval.
|
305 |
"""
|
306 |
|
307 |
-
|
308 |
-
"""Maximum number of tokens
|
309 |
|
310 |
-
|
311 |
-
|
312 |
-
)
|
313 |
-
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
314 |
|
315 |
-
|
316 |
-
"""Maximum
|
317 |
|
318 |
hl_keywords: list[str] = field(default_factory=list)
|
319 |
"""List of high-level keywords to prioritize in retrieval."""
|
|
|
304 |
If None, keeps all chunks returned from initial retrieval.
|
305 |
"""
|
306 |
|
307 |
+
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
308 |
+
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
309 |
|
310 |
+
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
311 |
+
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
|
|
|
|
312 |
|
313 |
+
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
314 |
+
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
315 |
|
316 |
hl_keywords: list[str] = field(default_factory=list)
|
317 |
"""List of high-level keywords to prioritize in retrieval."""
|
README.md
CHANGED
@@ -311,16 +311,14 @@ class QueryParam:
|
|
311 |
If None, keeps all chunks returned from initial retrieval.
|
312 |
"""
|
313 |
|
314 |
-
|
315 |
-
"""Maximum number of tokens
|
316 |
|
317 |
-
|
318 |
-
|
319 |
-
)
|
320 |
-
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
321 |
|
322 |
-
|
323 |
-
"""Maximum
|
324 |
|
325 |
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
326 |
"""Stores past conversation history to maintain context.
|
|
|
311 |
If None, keeps all chunks returned from initial retrieval.
|
312 |
"""
|
313 |
|
314 |
+
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
315 |
+
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
316 |
|
317 |
+
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
318 |
+
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
|
|
|
|
319 |
|
320 |
+
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
321 |
+
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
322 |
|
323 |
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
324 |
"""Stores past conversation history to maintain context.
|
env.example
CHANGED
@@ -50,9 +50,12 @@ OLLAMA_EMULATING_MODEL_TAG=latest
|
|
50 |
|
51 |
### RAG Query Configuration
|
52 |
# HISTORY_TURNS=3
|
53 |
-
|
54 |
-
|
55 |
-
#
|
|
|
|
|
|
|
56 |
# COSINE_THRESHOLD=0.2
|
57 |
### Number of entities or relations to retrieve from KG
|
58 |
# TOP_K=60
|
|
|
50 |
|
51 |
### RAG Query Configuration
|
52 |
# HISTORY_TURNS=3
|
53 |
+
|
54 |
+
### These parameters provide more precise control over total token usage
|
55 |
+
# MAX_ENTITY_TOKENS=10000
|
56 |
+
# MAX_RELATION_TOKENS=10000
|
57 |
+
# MAX_TOTAL_TOKENS=32000
|
58 |
+
|
59 |
# COSINE_THRESHOLD=0.2
|
60 |
### Number of entities or relations to retrieve from KG
|
61 |
# TOP_K=60
|
lightrag/api/routers/query_routes.py
CHANGED
@@ -61,22 +61,22 @@ class QueryRequest(BaseModel):
|
|
61 |
description="Number of text chunks to keep after reranking.",
|
62 |
)
|
63 |
|
64 |
-
|
65 |
-
gt=1,
|
66 |
default=None,
|
67 |
-
description="Maximum number of tokens
|
|
|
68 |
)
|
69 |
|
70 |
-
|
71 |
-
gt=1,
|
72 |
default=None,
|
73 |
-
description="Maximum number of tokens allocated for relationship
|
|
|
74 |
)
|
75 |
|
76 |
-
|
77 |
-
gt=1,
|
78 |
default=None,
|
79 |
-
description="Maximum
|
|
|
80 |
)
|
81 |
|
82 |
conversation_history: Optional[List[Dict[str, Any]]] = Field(
|
|
|
61 |
description="Number of text chunks to keep after reranking.",
|
62 |
)
|
63 |
|
64 |
+
max_entity_tokens: Optional[int] = Field(
|
|
|
65 |
default=None,
|
66 |
+
description="Maximum number of tokens allocated for entity context in unified token control system.",
|
67 |
+
ge=1,
|
68 |
)
|
69 |
|
70 |
+
max_relation_tokens: Optional[int] = Field(
|
|
|
71 |
default=None,
|
72 |
+
description="Maximum number of tokens allocated for relationship context in unified token control system.",
|
73 |
+
ge=1,
|
74 |
)
|
75 |
|
76 |
+
max_total_tokens: Optional[int] = Field(
|
|
|
77 |
default=None,
|
78 |
+
description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
|
79 |
+
ge=1,
|
80 |
)
|
81 |
|
82 |
conversation_history: Optional[List[Dict[str, Any]]] = Field(
|
lightrag/base.py
CHANGED
@@ -70,16 +70,14 @@ class QueryParam:
|
|
70 |
If None, keeps all chunks returned from initial retrieval.
|
71 |
"""
|
72 |
|
73 |
-
|
74 |
-
"""Maximum number of tokens
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
)
|
79 |
-
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
80 |
|
81 |
-
|
82 |
-
"""Maximum
|
83 |
|
84 |
hl_keywords: list[str] = field(default_factory=list)
|
85 |
"""List of high-level keywords to prioritize in retrieval."""
|
|
|
70 |
If None, keeps all chunks returned from initial retrieval.
|
71 |
"""
|
72 |
|
73 |
+
max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
|
74 |
+
"""Maximum number of tokens allocated for entity context in unified token control system."""
|
75 |
|
76 |
+
max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
|
77 |
+
"""Maximum number of tokens allocated for relationship context in unified token control system."""
|
|
|
|
|
78 |
|
79 |
+
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
|
80 |
+
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
81 |
|
82 |
hl_keywords: list[str] = field(default_factory=list)
|
83 |
"""List of high-level keywords to prioritize in retrieval."""
|
lightrag/operate.py
CHANGED
@@ -1569,7 +1569,9 @@ async def kg_query(
|
|
1569 |
|
1570 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
1571 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
1572 |
-
logger.debug(
|
|
|
|
|
1573 |
|
1574 |
response = await use_model_func(
|
1575 |
query,
|
@@ -1692,7 +1694,9 @@ async def extract_keywords_only(
|
|
1692 |
|
1693 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
1694 |
len_of_prompts = len(tokenizer.encode(kw_prompt))
|
1695 |
-
logger.debug(
|
|
|
|
|
1696 |
|
1697 |
# 5. Call the LLM for keyword extraction
|
1698 |
if param.model_func:
|
@@ -1864,7 +1868,7 @@ async def _build_query_context(
|
|
1864 |
|
1865 |
# Combine entities and relations contexts
|
1866 |
entities_context = process_combine_contexts(
|
1867 |
-
|
1868 |
)
|
1869 |
relations_context = process_combine_contexts(
|
1870 |
hl_relations_context, ll_relations_context
|
@@ -1894,6 +1898,163 @@ async def _build_query_context(
|
|
1894 |
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
|
1895 |
)
|
1896 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1897 |
# not necessary to use LLM to generate a response
|
1898 |
if not entities_context and not relations_context:
|
1899 |
return None
|
@@ -1982,18 +2143,6 @@ async def _get_node_data(
|
|
1982 |
knowledge_graph_inst,
|
1983 |
)
|
1984 |
|
1985 |
-
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
|
1986 |
-
len_node_datas = len(node_datas)
|
1987 |
-
node_datas = truncate_list_by_token_size(
|
1988 |
-
node_datas,
|
1989 |
-
key=lambda x: x["description"] if x["description"] is not None else "",
|
1990 |
-
max_token_size=query_param.max_token_for_local_context,
|
1991 |
-
tokenizer=tokenizer,
|
1992 |
-
)
|
1993 |
-
logger.debug(
|
1994 |
-
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
1995 |
-
)
|
1996 |
-
|
1997 |
logger.info(
|
1998 |
f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
1999 |
)
|
@@ -2199,20 +2348,9 @@ async def _find_most_related_edges_from_entities(
|
|
2199 |
}
|
2200 |
all_edges_data.append(combined)
|
2201 |
|
2202 |
-
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
|
2203 |
all_edges_data = sorted(
|
2204 |
all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
2205 |
)
|
2206 |
-
all_edges_data = truncate_list_by_token_size(
|
2207 |
-
all_edges_data,
|
2208 |
-
key=lambda x: x["description"] if x["description"] is not None else "",
|
2209 |
-
max_token_size=query_param.max_token_for_global_context,
|
2210 |
-
tokenizer=tokenizer,
|
2211 |
-
)
|
2212 |
-
|
2213 |
-
logger.debug(
|
2214 |
-
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
2215 |
-
)
|
2216 |
|
2217 |
return all_edges_data
|
2218 |
|
@@ -2269,16 +2407,9 @@ async def _get_edge_data(
|
|
2269 |
}
|
2270 |
edge_datas.append(combined)
|
2271 |
|
2272 |
-
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
|
2273 |
edge_datas = sorted(
|
2274 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
2275 |
)
|
2276 |
-
edge_datas = truncate_list_by_token_size(
|
2277 |
-
edge_datas,
|
2278 |
-
key=lambda x: x["description"] if x["description"] is not None else "",
|
2279 |
-
max_token_size=query_param.max_token_for_global_context,
|
2280 |
-
tokenizer=tokenizer,
|
2281 |
-
)
|
2282 |
use_entities, use_text_units = await asyncio.gather(
|
2283 |
_find_most_related_entities_from_relationships(
|
2284 |
edge_datas,
|
@@ -2388,18 +2519,6 @@ async def _find_most_related_entities_from_relationships(
|
|
2388 |
combined = {**node, "entity_name": entity_name, "rank": degree}
|
2389 |
node_datas.append(combined)
|
2390 |
|
2391 |
-
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
|
2392 |
-
len_node_datas = len(node_datas)
|
2393 |
-
node_datas = truncate_list_by_token_size(
|
2394 |
-
node_datas,
|
2395 |
-
key=lambda x: x["description"] if x["description"] is not None else "",
|
2396 |
-
max_token_size=query_param.max_token_for_local_context,
|
2397 |
-
tokenizer=tokenizer,
|
2398 |
-
)
|
2399 |
-
logger.debug(
|
2400 |
-
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
2401 |
-
)
|
2402 |
-
|
2403 |
return node_datas
|
2404 |
|
2405 |
|
@@ -2491,13 +2610,64 @@ async def naive_query(
|
|
2491 |
if chunks is None or len(chunks) == 0:
|
2492 |
return PROMPTS["fail_response"]
|
2493 |
|
2494 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2495 |
processed_chunks = await process_chunks_unified(
|
2496 |
query=query,
|
2497 |
chunks=chunks,
|
2498 |
query_param=query_param,
|
2499 |
global_config=global_config,
|
2500 |
source_type="vector",
|
|
|
2501 |
)
|
2502 |
|
2503 |
logger.info(f"Final context: {len(processed_chunks)} chunks")
|
@@ -2548,7 +2718,9 @@ async def naive_query(
|
|
2548 |
return sys_prompt
|
2549 |
|
2550 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
2551 |
-
logger.debug(
|
|
|
|
|
2552 |
|
2553 |
response = await use_model_func(
|
2554 |
query,
|
@@ -2672,7 +2844,9 @@ async def kg_query_with_keywords(
|
|
2672 |
|
2673 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
2674 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
2675 |
-
logger.debug(
|
|
|
|
|
2676 |
|
2677 |
# 6. Generate response
|
2678 |
response = await use_model_func(
|
@@ -2849,6 +3023,7 @@ async def process_chunks_unified(
|
|
2849 |
query_param: QueryParam,
|
2850 |
global_config: dict,
|
2851 |
source_type: str = "mixed",
|
|
|
2852 |
) -> list[dict]:
|
2853 |
"""
|
2854 |
Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
|
@@ -2859,6 +3034,7 @@ async def process_chunks_unified(
|
|
2859 |
query_param: Query parameters containing configuration
|
2860 |
global_config: Global configuration dictionary
|
2861 |
source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
|
|
|
2862 |
|
2863 |
Returns:
|
2864 |
Processed and filtered list of text chunks
|
@@ -2901,16 +3077,25 @@ async def process_chunks_unified(
|
|
2901 |
# 4. Token-based final truncation
|
2902 |
tokenizer = global_config.get("tokenizer")
|
2903 |
if tokenizer and unique_chunks:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2904 |
original_count = len(unique_chunks)
|
2905 |
unique_chunks = truncate_list_by_token_size(
|
2906 |
unique_chunks,
|
2907 |
key=lambda x: x.get("content", ""),
|
2908 |
-
max_token_size=
|
2909 |
tokenizer=tokenizer,
|
2910 |
)
|
2911 |
logger.debug(
|
2912 |
f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
|
2913 |
-
f"(
|
2914 |
)
|
2915 |
|
2916 |
return unique_chunks
|
|
|
1569 |
|
1570 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
1571 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
1572 |
+
logger.debug(
|
1573 |
+
f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
1574 |
+
)
|
1575 |
|
1576 |
response = await use_model_func(
|
1577 |
query,
|
|
|
1694 |
|
1695 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
1696 |
len_of_prompts = len(tokenizer.encode(kw_prompt))
|
1697 |
+
logger.debug(
|
1698 |
+
f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
|
1699 |
+
)
|
1700 |
|
1701 |
# 5. Call the LLM for keyword extraction
|
1702 |
if param.model_func:
|
|
|
1868 |
|
1869 |
# Combine entities and relations contexts
|
1870 |
entities_context = process_combine_contexts(
|
1871 |
+
ll_entities_context, hl_entities_context
|
1872 |
)
|
1873 |
relations_context = process_combine_contexts(
|
1874 |
hl_relations_context, ll_relations_context
|
|
|
1898 |
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
|
1899 |
)
|
1900 |
|
1901 |
+
# Unified token control system - Apply precise token limits to entities and relations
|
1902 |
+
tokenizer = text_chunks_db.global_config.get("tokenizer")
|
1903 |
+
if tokenizer:
|
1904 |
+
# Get new token limits from query_param (with fallback to global_config)
|
1905 |
+
max_entity_tokens = getattr(
|
1906 |
+
query_param,
|
1907 |
+
"max_entity_tokens",
|
1908 |
+
text_chunks_db.global_config.get("MAX_ENTITY_TOKENS", 8000),
|
1909 |
+
)
|
1910 |
+
max_relation_tokens = getattr(
|
1911 |
+
query_param,
|
1912 |
+
"max_relation_tokens",
|
1913 |
+
text_chunks_db.global_config.get("MAX_RELATION_TOKENS", 6000),
|
1914 |
+
)
|
1915 |
+
max_total_tokens = getattr(
|
1916 |
+
query_param,
|
1917 |
+
"max_total_tokens",
|
1918 |
+
text_chunks_db.global_config.get("MAX_TOTAL_TOKENS", 32000),
|
1919 |
+
)
|
1920 |
+
|
1921 |
+
# Truncate entities based on complete JSON serialization
|
1922 |
+
if entities_context:
|
1923 |
+
original_entity_count = len(entities_context)
|
1924 |
+
entities_context = truncate_list_by_token_size(
|
1925 |
+
entities_context,
|
1926 |
+
key=lambda x: json.dumps(x, ensure_ascii=False),
|
1927 |
+
max_token_size=max_entity_tokens,
|
1928 |
+
tokenizer=tokenizer,
|
1929 |
+
)
|
1930 |
+
if len(entities_context) < original_entity_count:
|
1931 |
+
logger.debug(
|
1932 |
+
f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})"
|
1933 |
+
)
|
1934 |
+
|
1935 |
+
# Truncate relations based on complete JSON serialization
|
1936 |
+
if relations_context:
|
1937 |
+
original_relation_count = len(relations_context)
|
1938 |
+
relations_context = truncate_list_by_token_size(
|
1939 |
+
relations_context,
|
1940 |
+
key=lambda x: json.dumps(x, ensure_ascii=False),
|
1941 |
+
max_token_size=max_relation_tokens,
|
1942 |
+
tokenizer=tokenizer,
|
1943 |
+
)
|
1944 |
+
if len(relations_context) < original_relation_count:
|
1945 |
+
logger.debug(
|
1946 |
+
f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})"
|
1947 |
+
)
|
1948 |
+
|
1949 |
+
# Calculate dynamic token limit for text chunks
|
1950 |
+
entities_str = json.dumps(entities_context, ensure_ascii=False)
|
1951 |
+
relations_str = json.dumps(relations_context, ensure_ascii=False)
|
1952 |
+
|
1953 |
+
# Calculate base context tokens (entities + relations + template)
|
1954 |
+
kg_context_template = """-----Entities(KG)-----
|
1955 |
+
|
1956 |
+
```json
|
1957 |
+
{entities_str}
|
1958 |
+
```
|
1959 |
+
|
1960 |
+
-----Relationships(KG)-----
|
1961 |
+
|
1962 |
+
```json
|
1963 |
+
{relations_str}
|
1964 |
+
```
|
1965 |
+
|
1966 |
+
-----Document Chunks(DC)-----
|
1967 |
+
|
1968 |
+
```json
|
1969 |
+
[]
|
1970 |
+
```
|
1971 |
+
|
1972 |
+
"""
|
1973 |
+
kg_context = kg_context_template.format(
|
1974 |
+
entities_str=entities_str, relations_str=relations_str
|
1975 |
+
)
|
1976 |
+
kg_context_tokens = len(tokenizer.encode(kg_context))
|
1977 |
+
|
1978 |
+
# Calculate actual system prompt overhead dynamically
|
1979 |
+
# 1. Calculate conversation history tokens
|
1980 |
+
history_context = ""
|
1981 |
+
if query_param.conversation_history:
|
1982 |
+
history_context = get_conversation_turns(
|
1983 |
+
query_param.conversation_history, query_param.history_turns
|
1984 |
+
)
|
1985 |
+
history_tokens = (
|
1986 |
+
len(tokenizer.encode(history_context)) if history_context else 0
|
1987 |
+
)
|
1988 |
+
|
1989 |
+
# 2. Calculate system prompt template tokens (excluding context_data)
|
1990 |
+
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
1991 |
+
response_type = (
|
1992 |
+
query_param.response_type
|
1993 |
+
if query_param.response_type
|
1994 |
+
else "Multiple Paragraphs"
|
1995 |
+
)
|
1996 |
+
|
1997 |
+
# Get the system prompt template from PROMPTS
|
1998 |
+
sys_prompt_template = text_chunks_db.global_config.get(
|
1999 |
+
"system_prompt_template", PROMPTS["rag_response"]
|
2000 |
+
)
|
2001 |
+
|
2002 |
+
# Create a sample system prompt with placeholders filled (excluding context_data)
|
2003 |
+
sample_sys_prompt = sys_prompt_template.format(
|
2004 |
+
history=history_context,
|
2005 |
+
context_data="", # Empty for overhead calculation
|
2006 |
+
response_type=response_type,
|
2007 |
+
user_prompt=user_prompt,
|
2008 |
+
)
|
2009 |
+
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
2010 |
+
|
2011 |
+
# Total system prompt overhead = template + query tokens
|
2012 |
+
query_tokens = len(tokenizer.encode(query))
|
2013 |
+
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
2014 |
+
|
2015 |
+
buffer_tokens = 100 # Safety buffer as requested
|
2016 |
+
|
2017 |
+
# Calculate available tokens for text chunks
|
2018 |
+
used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
|
2019 |
+
available_chunk_tokens = max_total_tokens - used_tokens
|
2020 |
+
|
2021 |
+
logger.debug(
|
2022 |
+
f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
2023 |
+
)
|
2024 |
+
|
2025 |
+
# Re-process chunks with dynamic token limit
|
2026 |
+
if text_units_context:
|
2027 |
+
# Create a temporary query_param copy with adjusted chunk token limit
|
2028 |
+
temp_chunks = [
|
2029 |
+
{"content": chunk["content"], "file_path": chunk["file_path"]}
|
2030 |
+
for chunk in text_units_context
|
2031 |
+
]
|
2032 |
+
|
2033 |
+
# Apply token truncation to chunks using the dynamic limit
|
2034 |
+
truncated_chunks = await process_chunks_unified(
|
2035 |
+
query=query,
|
2036 |
+
chunks=temp_chunks,
|
2037 |
+
query_param=query_param,
|
2038 |
+
global_config=text_chunks_db.global_config,
|
2039 |
+
source_type="mixed",
|
2040 |
+
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
2041 |
+
)
|
2042 |
+
|
2043 |
+
# Rebuild text_units_context with truncated chunks
|
2044 |
+
text_units_context = []
|
2045 |
+
for i, chunk in enumerate(truncated_chunks):
|
2046 |
+
text_units_context.append(
|
2047 |
+
{
|
2048 |
+
"id": i + 1,
|
2049 |
+
"content": chunk["content"],
|
2050 |
+
"file_path": chunk.get("file_path", "unknown_source"),
|
2051 |
+
}
|
2052 |
+
)
|
2053 |
+
|
2054 |
+
logger.debug(
|
2055 |
+
f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
|
2056 |
+
)
|
2057 |
+
|
2058 |
# not necessary to use LLM to generate a response
|
2059 |
if not entities_context and not relations_context:
|
2060 |
return None
|
|
|
2143 |
knowledge_graph_inst,
|
2144 |
)
|
2145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2146 |
logger.info(
|
2147 |
f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
2148 |
)
|
|
|
2348 |
}
|
2349 |
all_edges_data.append(combined)
|
2350 |
|
|
|
2351 |
all_edges_data = sorted(
|
2352 |
all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
2353 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2354 |
|
2355 |
return all_edges_data
|
2356 |
|
|
|
2407 |
}
|
2408 |
edge_datas.append(combined)
|
2409 |
|
|
|
2410 |
edge_datas = sorted(
|
2411 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
2412 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2413 |
use_entities, use_text_units = await asyncio.gather(
|
2414 |
_find_most_related_entities_from_relationships(
|
2415 |
edge_datas,
|
|
|
2519 |
combined = {**node, "entity_name": entity_name, "rank": degree}
|
2520 |
node_datas.append(combined)
|
2521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2522 |
return node_datas
|
2523 |
|
2524 |
|
|
|
2610 |
if chunks is None or len(chunks) == 0:
|
2611 |
return PROMPTS["fail_response"]
|
2612 |
|
2613 |
+
# Calculate dynamic token limit for chunks
|
2614 |
+
# Get token limits from query_param (with fallback to global_config)
|
2615 |
+
max_total_tokens = getattr(
|
2616 |
+
query_param, "max_total_tokens", global_config.get("MAX_TOTAL_TOKENS", 32000)
|
2617 |
+
)
|
2618 |
+
|
2619 |
+
# Calculate conversation history tokens
|
2620 |
+
history_context = ""
|
2621 |
+
if query_param.conversation_history:
|
2622 |
+
history_context = get_conversation_turns(
|
2623 |
+
query_param.conversation_history, query_param.history_turns
|
2624 |
+
)
|
2625 |
+
history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
|
2626 |
+
|
2627 |
+
# Calculate system prompt template tokens (excluding content_data)
|
2628 |
+
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
|
2629 |
+
response_type = (
|
2630 |
+
query_param.response_type
|
2631 |
+
if query_param.response_type
|
2632 |
+
else "Multiple Paragraphs"
|
2633 |
+
)
|
2634 |
+
|
2635 |
+
# Use the provided system prompt or default
|
2636 |
+
sys_prompt_template = (
|
2637 |
+
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
|
2638 |
+
)
|
2639 |
+
|
2640 |
+
# Create a sample system prompt with empty content_data to calculate overhead
|
2641 |
+
sample_sys_prompt = sys_prompt_template.format(
|
2642 |
+
content_data="", # Empty for overhead calculation
|
2643 |
+
response_type=response_type,
|
2644 |
+
history=history_context,
|
2645 |
+
user_prompt=user_prompt,
|
2646 |
+
)
|
2647 |
+
sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
|
2648 |
+
|
2649 |
+
# Total system prompt overhead = template + query tokens
|
2650 |
+
query_tokens = len(tokenizer.encode(query))
|
2651 |
+
sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
|
2652 |
+
|
2653 |
+
buffer_tokens = 100 # Safety buffer
|
2654 |
+
|
2655 |
+
# Calculate available tokens for chunks
|
2656 |
+
used_tokens = sys_prompt_overhead + buffer_tokens
|
2657 |
+
available_chunk_tokens = max_total_tokens - used_tokens
|
2658 |
+
|
2659 |
+
logger.debug(
|
2660 |
+
f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
|
2661 |
+
)
|
2662 |
+
|
2663 |
+
# Process chunks using unified processing with dynamic token limit
|
2664 |
processed_chunks = await process_chunks_unified(
|
2665 |
query=query,
|
2666 |
chunks=chunks,
|
2667 |
query_param=query_param,
|
2668 |
global_config=global_config,
|
2669 |
source_type="vector",
|
2670 |
+
chunk_token_limit=available_chunk_tokens, # Pass dynamic limit
|
2671 |
)
|
2672 |
|
2673 |
logger.info(f"Final context: {len(processed_chunks)} chunks")
|
|
|
2718 |
return sys_prompt
|
2719 |
|
2720 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
2721 |
+
logger.debug(
|
2722 |
+
f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
2723 |
+
)
|
2724 |
|
2725 |
response = await use_model_func(
|
2726 |
query,
|
|
|
2844 |
|
2845 |
tokenizer: Tokenizer = global_config["tokenizer"]
|
2846 |
len_of_prompts = len(tokenizer.encode(query + sys_prompt))
|
2847 |
+
logger.debug(
|
2848 |
+
f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
|
2849 |
+
)
|
2850 |
|
2851 |
# 6. Generate response
|
2852 |
response = await use_model_func(
|
|
|
3023 |
query_param: QueryParam,
|
3024 |
global_config: dict,
|
3025 |
source_type: str = "mixed",
|
3026 |
+
chunk_token_limit: int = None, # Add parameter for dynamic token limit
|
3027 |
) -> list[dict]:
|
3028 |
"""
|
3029 |
Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
|
|
|
3034 |
query_param: Query parameters containing configuration
|
3035 |
global_config: Global configuration dictionary
|
3036 |
source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
|
3037 |
+
chunk_token_limit: Dynamic token limit for chunks (if None, uses default)
|
3038 |
|
3039 |
Returns:
|
3040 |
Processed and filtered list of text chunks
|
|
|
3077 |
# 4. Token-based final truncation
|
3078 |
tokenizer = global_config.get("tokenizer")
|
3079 |
if tokenizer and unique_chunks:
|
3080 |
+
# Set default chunk_token_limit if not provided
|
3081 |
+
if chunk_token_limit is None:
|
3082 |
+
# Get default from query_param or global_config
|
3083 |
+
chunk_token_limit = getattr(
|
3084 |
+
query_param,
|
3085 |
+
"max_total_tokens",
|
3086 |
+
global_config.get("MAX_TOTAL_TOKENS", 32000),
|
3087 |
+
)
|
3088 |
+
|
3089 |
original_count = len(unique_chunks)
|
3090 |
unique_chunks = truncate_list_by_token_size(
|
3091 |
unique_chunks,
|
3092 |
key=lambda x: x.get("content", ""),
|
3093 |
+
max_token_size=chunk_token_limit,
|
3094 |
tokenizer=tokenizer,
|
3095 |
)
|
3096 |
logger.debug(
|
3097 |
f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
|
3098 |
+
f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
|
3099 |
)
|
3100 |
|
3101 |
return unique_chunks
|
lightrag_webui/src/api/lightrag.ts
CHANGED
@@ -90,12 +90,16 @@ export type QueryRequest = {
|
|
90 |
stream?: boolean
|
91 |
/** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
|
92 |
top_k?: number
|
93 |
-
/** Maximum number of
|
94 |
-
|
95 |
-
/**
|
96 |
-
|
97 |
-
/** Maximum number of tokens allocated for entity
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
/**
|
100 |
* Stores past conversation history to maintain context.
|
101 |
* Format: [{"role": "user/assistant", "content": "message"}].
|
|
|
90 |
stream?: boolean
|
91 |
/** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
|
92 |
top_k?: number
|
93 |
+
/** Maximum number of text chunks to retrieve and process. */
|
94 |
+
chunk_top_k?: number
|
95 |
+
/** Number of text chunks to keep after reranking. */
|
96 |
+
chunk_rerank_top_k?: number
|
97 |
+
/** Maximum number of tokens allocated for entity context in unified token control system. */
|
98 |
+
max_entity_tokens?: number
|
99 |
+
/** Maximum number of tokens allocated for relationship context in unified token control system. */
|
100 |
+
max_relation_tokens?: number
|
101 |
+
/** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */
|
102 |
+
max_total_tokens?: number
|
103 |
/**
|
104 |
* Stores past conversation history to maintain context.
|
105 |
* Format: [{"role": "user/assistant", "content": "message"}].
|
lightrag_webui/src/components/retrieval/QuerySettings.tsx
CHANGED
@@ -132,30 +132,81 @@ export default function QuerySettings() {
|
|
132 |
</div>
|
133 |
</>
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
{/* Max Tokens */}
|
136 |
<>
|
137 |
<>
|
138 |
<TooltipProvider>
|
139 |
<Tooltip>
|
140 |
<TooltipTrigger asChild>
|
141 |
-
<label htmlFor="
|
142 |
-
{t('retrievePanel.querySettings.
|
143 |
</label>
|
144 |
</TooltipTrigger>
|
145 |
<TooltipContent side="left">
|
146 |
-
<p>{t('retrievePanel.querySettings.
|
147 |
</TooltipContent>
|
148 |
</Tooltip>
|
149 |
</TooltipProvider>
|
150 |
<div>
|
151 |
-
{/* Removed sr-only label */}
|
152 |
<NumberInput
|
153 |
-
id="
|
154 |
stepper={500}
|
155 |
-
value={querySettings.
|
156 |
-
onValueChange={(v) => handleChange('
|
157 |
min={1}
|
158 |
-
placeholder={t('retrievePanel.querySettings.
|
159 |
/>
|
160 |
</div>
|
161 |
</>
|
@@ -164,24 +215,23 @@ export default function QuerySettings() {
|
|
164 |
<TooltipProvider>
|
165 |
<Tooltip>
|
166 |
<TooltipTrigger asChild>
|
167 |
-
<label htmlFor="
|
168 |
-
{t('retrievePanel.querySettings.
|
169 |
</label>
|
170 |
</TooltipTrigger>
|
171 |
<TooltipContent side="left">
|
172 |
-
<p>{t('retrievePanel.querySettings.
|
173 |
</TooltipContent>
|
174 |
</Tooltip>
|
175 |
</TooltipProvider>
|
176 |
<div>
|
177 |
-
{/* Removed sr-only label */}
|
178 |
<NumberInput
|
179 |
-
id="
|
180 |
stepper={500}
|
181 |
-
value={querySettings.
|
182 |
-
onValueChange={(v) => handleChange('
|
183 |
min={1}
|
184 |
-
placeholder={t('retrievePanel.querySettings.
|
185 |
/>
|
186 |
</div>
|
187 |
</>
|
@@ -190,24 +240,23 @@ export default function QuerySettings() {
|
|
190 |
<TooltipProvider>
|
191 |
<Tooltip>
|
192 |
<TooltipTrigger asChild>
|
193 |
-
<label htmlFor="
|
194 |
-
{t('retrievePanel.querySettings.
|
195 |
</label>
|
196 |
</TooltipTrigger>
|
197 |
<TooltipContent side="left">
|
198 |
-
<p>{t('retrievePanel.querySettings.
|
199 |
</TooltipContent>
|
200 |
</Tooltip>
|
201 |
</TooltipProvider>
|
202 |
<div>
|
203 |
-
{/* Removed sr-only label */}
|
204 |
<NumberInput
|
205 |
-
id="
|
206 |
-
stepper={
|
207 |
-
value={querySettings.
|
208 |
-
onValueChange={(v) => handleChange('
|
209 |
min={1}
|
210 |
-
placeholder={t('retrievePanel.querySettings.
|
211 |
/>
|
212 |
</div>
|
213 |
</>
|
|
|
132 |
</div>
|
133 |
</>
|
134 |
|
135 |
+
{/* Chunk Top K */}
|
136 |
+
<>
|
137 |
+
<TooltipProvider>
|
138 |
+
<Tooltip>
|
139 |
+
<TooltipTrigger asChild>
|
140 |
+
<label htmlFor="chunk_top_k" className="ml-1 cursor-help">
|
141 |
+
{t('retrievePanel.querySettings.chunkTopK')}
|
142 |
+
</label>
|
143 |
+
</TooltipTrigger>
|
144 |
+
<TooltipContent side="left">
|
145 |
+
<p>{t('retrievePanel.querySettings.chunkTopKTooltip')}</p>
|
146 |
+
</TooltipContent>
|
147 |
+
</Tooltip>
|
148 |
+
</TooltipProvider>
|
149 |
+
<div>
|
150 |
+
<NumberInput
|
151 |
+
id="chunk_top_k"
|
152 |
+
stepper={1}
|
153 |
+
value={querySettings.chunk_top_k}
|
154 |
+
onValueChange={(v) => handleChange('chunk_top_k', v)}
|
155 |
+
min={1}
|
156 |
+
placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')}
|
157 |
+
/>
|
158 |
+
</div>
|
159 |
+
</>
|
160 |
+
|
161 |
+
{/* Chunk Rerank Top K */}
|
162 |
+
<>
|
163 |
+
<TooltipProvider>
|
164 |
+
<Tooltip>
|
165 |
+
<TooltipTrigger asChild>
|
166 |
+
<label htmlFor="chunk_rerank_top_k" className="ml-1 cursor-help">
|
167 |
+
{t('retrievePanel.querySettings.chunkRerankTopK')}
|
168 |
+
</label>
|
169 |
+
</TooltipTrigger>
|
170 |
+
<TooltipContent side="left">
|
171 |
+
<p>{t('retrievePanel.querySettings.chunkRerankTopKTooltip')}</p>
|
172 |
+
</TooltipContent>
|
173 |
+
</Tooltip>
|
174 |
+
</TooltipProvider>
|
175 |
+
<div>
|
176 |
+
<NumberInput
|
177 |
+
id="chunk_rerank_top_k"
|
178 |
+
stepper={1}
|
179 |
+
value={querySettings.chunk_rerank_top_k}
|
180 |
+
onValueChange={(v) => handleChange('chunk_rerank_top_k', v)}
|
181 |
+
min={1}
|
182 |
+
placeholder={t('retrievePanel.querySettings.chunkRerankTopKPlaceholder')}
|
183 |
+
/>
|
184 |
+
</div>
|
185 |
+
</>
|
186 |
+
|
187 |
{/* Max Tokens */}
|
188 |
<>
|
189 |
<>
|
190 |
<TooltipProvider>
|
191 |
<Tooltip>
|
192 |
<TooltipTrigger asChild>
|
193 |
+
<label htmlFor="max_entity_tokens" className="ml-1 cursor-help">
|
194 |
+
{t('retrievePanel.querySettings.maxEntityTokens')}
|
195 |
</label>
|
196 |
</TooltipTrigger>
|
197 |
<TooltipContent side="left">
|
198 |
+
<p>{t('retrievePanel.querySettings.maxEntityTokensTooltip')}</p>
|
199 |
</TooltipContent>
|
200 |
</Tooltip>
|
201 |
</TooltipProvider>
|
202 |
<div>
|
|
|
203 |
<NumberInput
|
204 |
+
id="max_entity_tokens"
|
205 |
stepper={500}
|
206 |
+
value={querySettings.max_entity_tokens}
|
207 |
+
onValueChange={(v) => handleChange('max_entity_tokens', v)}
|
208 |
min={1}
|
209 |
+
placeholder={t('retrievePanel.querySettings.maxEntityTokens')}
|
210 |
/>
|
211 |
</div>
|
212 |
</>
|
|
|
215 |
<TooltipProvider>
|
216 |
<Tooltip>
|
217 |
<TooltipTrigger asChild>
|
218 |
+
<label htmlFor="max_relation_tokens" className="ml-1 cursor-help">
|
219 |
+
{t('retrievePanel.querySettings.maxRelationTokens')}
|
220 |
</label>
|
221 |
</TooltipTrigger>
|
222 |
<TooltipContent side="left">
|
223 |
+
<p>{t('retrievePanel.querySettings.maxRelationTokensTooltip')}</p>
|
224 |
</TooltipContent>
|
225 |
</Tooltip>
|
226 |
</TooltipProvider>
|
227 |
<div>
|
|
|
228 |
<NumberInput
|
229 |
+
id="max_relation_tokens"
|
230 |
stepper={500}
|
231 |
+
value={querySettings.max_relation_tokens}
|
232 |
+
onValueChange={(v) => handleChange('max_relation_tokens', v)}
|
233 |
min={1}
|
234 |
+
placeholder={t('retrievePanel.querySettings.maxRelationTokens')}
|
235 |
/>
|
236 |
</div>
|
237 |
</>
|
|
|
240 |
<TooltipProvider>
|
241 |
<Tooltip>
|
242 |
<TooltipTrigger asChild>
|
243 |
+
<label htmlFor="max_total_tokens" className="ml-1 cursor-help">
|
244 |
+
{t('retrievePanel.querySettings.maxTotalTokens')}
|
245 |
</label>
|
246 |
</TooltipTrigger>
|
247 |
<TooltipContent side="left">
|
248 |
+
<p>{t('retrievePanel.querySettings.maxTotalTokensTooltip')}</p>
|
249 |
</TooltipContent>
|
250 |
</Tooltip>
|
251 |
</TooltipProvider>
|
252 |
<div>
|
|
|
253 |
<NumberInput
|
254 |
+
id="max_total_tokens"
|
255 |
+
stepper={1000}
|
256 |
+
value={querySettings.max_total_tokens}
|
257 |
+
onValueChange={(v) => handleChange('max_total_tokens', v)}
|
258 |
min={1}
|
259 |
+
placeholder={t('retrievePanel.querySettings.maxTotalTokens')}
|
260 |
/>
|
261 |
</div>
|
262 |
</>
|
lightrag_webui/src/locales/ar.json
CHANGED
@@ -359,16 +359,22 @@
|
|
359 |
"singleParagraph": "فقرة واحدة",
|
360 |
"bulletPoints": "نقاط نقطية"
|
361 |
},
|
362 |
-
"topK": "أعلى K
|
363 |
-
"topKTooltip": "عدد العناصر العلوية
|
364 |
-
"topKPlaceholder": "
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
371 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
"historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
|
373 |
"historyTurnsPlaceholder": "عدد دورات التاريخ",
|
374 |
"onlyNeedContext": "تحتاج فقط إلى السياق",
|
|
|
359 |
"singleParagraph": "فقرة واحدة",
|
360 |
"bulletPoints": "نقاط نقطية"
|
361 |
},
|
362 |
+
"topK": "أعلى K",
|
363 |
+
"topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.",
|
364 |
+
"topKPlaceholder": "أدخل قيمة أعلى k",
|
365 |
+
"chunkTopK": "أعلى K للقطع",
|
366 |
+
"chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.",
|
367 |
+
"chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع",
|
368 |
+
"chunkRerankTopK": "أعلى K لإعادة الترتيب",
|
369 |
+
"chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.",
|
370 |
+
"chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب",
|
371 |
+
"maxEntityTokens": "الحد الأقصى لرموز الكيان",
|
372 |
+
"maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز",
|
373 |
+
"maxRelationTokens": "الحد الأقصى لرموز العلاقة",
|
374 |
+
"maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز",
|
375 |
+
"maxTotalTokens": "إجمالي الحد الأقصى للرموز",
|
376 |
+
"maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)",
|
377 |
+
"historyTurns": "أدوار التاريخ",
|
378 |
"historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
|
379 |
"historyTurnsPlaceholder": "عدد دورات التاريخ",
|
380 |
"onlyNeedContext": "تحتاج فقط إلى السياق",
|
lightrag_webui/src/locales/en.json
CHANGED
@@ -359,15 +359,21 @@
|
|
359 |
"singleParagraph": "Single Paragraph",
|
360 |
"bulletPoints": "Bullet Points"
|
361 |
},
|
362 |
-
"topK": "Top K
|
363 |
-
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
|
364 |
-
"topKPlaceholder": "
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
"historyTurns": "History Turns",
|
372 |
"historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
|
373 |
"historyTurnsPlaceholder": "Number of history turns",
|
|
|
359 |
"singleParagraph": "Single Paragraph",
|
360 |
"bulletPoints": "Bullet Points"
|
361 |
},
|
362 |
+
"topK": "Top K",
|
363 |
+
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
|
364 |
+
"topKPlaceholder": "Enter top k value",
|
365 |
+
"chunkTopK": "Chunk Top K",
|
366 |
+
"chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.",
|
367 |
+
"chunkTopKPlaceholder": "Enter chunk top k value",
|
368 |
+
"chunkRerankTopK": "Chunk Rerank Top K",
|
369 |
+
"chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.",
|
370 |
+
"chunkRerankTopKPlaceholder": "Enter rerank top k value",
|
371 |
+
"maxEntityTokens": "Max Entity Tokens",
|
372 |
+
"maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system",
|
373 |
+
"maxRelationTokens": "Max Relation Tokens",
|
374 |
+
"maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system",
|
375 |
+
"maxTotalTokens": "Max Total Tokens",
|
376 |
+
"maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)",
|
377 |
"historyTurns": "History Turns",
|
378 |
"historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
|
379 |
"historyTurnsPlaceholder": "Number of history turns",
|
lightrag_webui/src/locales/fr.json
CHANGED
@@ -359,15 +359,21 @@
|
|
359 |
"singleParagraph": "Paragraphe unique",
|
360 |
"bulletPoints": "Points à puces"
|
361 |
},
|
362 |
-
"topK": "Top K
|
363 |
-
"topKTooltip": "Nombre d'éléments
|
364 |
-
"topKPlaceholder": "
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
"historyTurns": "Tours d'historique",
|
372 |
"historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
|
373 |
"historyTurnsPlaceholder": "Nombre de tours d'historique",
|
|
|
359 |
"singleParagraph": "Paragraphe unique",
|
360 |
"bulletPoints": "Points à puces"
|
361 |
},
|
362 |
+
"topK": "Top K",
|
363 |
+
"topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.",
|
364 |
+
"topKPlaceholder": "Entrez la valeur top k",
|
365 |
+
"chunkTopK": "Top K des Chunks",
|
366 |
+
"chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.",
|
367 |
+
"chunkTopKPlaceholder": "Entrez la valeur top k des chunks",
|
368 |
+
"chunkRerankTopK": "Top K du Reclassement",
|
369 |
+
"chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.",
|
370 |
+
"chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement",
|
371 |
+
"maxEntityTokens": "Limite de jetons d'entité",
|
372 |
+
"maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié",
|
373 |
+
"maxRelationTokens": "Limite de jetons de relation",
|
374 |
+
"maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié",
|
375 |
+
"maxTotalTokens": "Limite totale de jetons",
|
376 |
+
"maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)",
|
377 |
"historyTurns": "Tours d'historique",
|
378 |
"historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
|
379 |
"historyTurnsPlaceholder": "Nombre de tours d'historique",
|
lightrag_webui/src/locales/zh.json
CHANGED
@@ -359,15 +359,21 @@
|
|
359 |
"singleParagraph": "单段落",
|
360 |
"bulletPoints": "要点"
|
361 |
},
|
362 |
-
"topK": "Top K
|
363 |
-
"topKTooltip": "
|
364 |
-
"topKPlaceholder": "
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
"historyTurns": "历史轮次",
|
372 |
"historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
|
373 |
"historyTurnsPlaceholder": "历史轮次数",
|
|
|
359 |
"singleParagraph": "单段落",
|
360 |
"bulletPoints": "要点"
|
361 |
},
|
362 |
+
"topK": "Top K",
|
363 |
+
"topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。",
|
364 |
+
"topKPlaceholder": "输入top k值",
|
365 |
+
"chunkTopK": "文本块 Top K",
|
366 |
+
"chunkTopKTooltip": "检索和处理的最大文本块数量。",
|
367 |
+
"chunkTopKPlaceholder": "输入文本块top k值",
|
368 |
+
"chunkRerankTopK": "重排序 Top K",
|
369 |
+
"chunkRerankTopKTooltip": "重排序后保留的文本块数量。",
|
370 |
+
"chunkRerankTopKPlaceholder": "输入重排序top k值",
|
371 |
+
"maxEntityTokens": "实体令牌数上限",
|
372 |
+
"maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数",
|
373 |
+
"maxRelationTokens": "关系令牌数上限",
|
374 |
+
"maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数",
|
375 |
+
"maxTotalTokens": "总令牌数上限",
|
376 |
+
"maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)",
|
377 |
"historyTurns": "历史轮次",
|
378 |
"historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量",
|
379 |
"historyTurnsPlaceholder": "历史轮次数",
|
lightrag_webui/src/locales/zh_TW.json
CHANGED
@@ -300,7 +300,7 @@
|
|
300 |
"file_path": "來源",
|
301 |
"keywords": "Keys",
|
302 |
"weight": "權重"
|
303 |
-
|
304 |
},
|
305 |
"edge": {
|
306 |
"title": "關係",
|
@@ -359,15 +359,15 @@
|
|
359 |
"singleParagraph": "單段落",
|
360 |
"bulletPoints": "重點"
|
361 |
},
|
362 |
-
"topK": "Top K
|
363 |
-
"topKTooltip": "
|
364 |
-
"topKPlaceholder": "
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
371 |
"historyTurns": "歷史輪次",
|
372 |
"historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
|
373 |
"historyTurnsPlaceholder": "歷史輪次數",
|
@@ -379,7 +379,13 @@
|
|
379 |
"streamResponseTooltip": "如果為True,啟用即時串流輸出回應",
|
380 |
"userPrompt": "用戶提示詞",
|
381 |
"userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。",
|
382 |
-
"userPromptPlaceholder": "輸入自定義提示詞(可選)"
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
}
|
384 |
},
|
385 |
"apiSite": {
|
|
|
300 |
"file_path": "來源",
|
301 |
"keywords": "Keys",
|
302 |
"weight": "權重"
|
303 |
+
}
|
304 |
},
|
305 |
"edge": {
|
306 |
"title": "關係",
|
|
|
359 |
"singleParagraph": "單段落",
|
360 |
"bulletPoints": "重點"
|
361 |
},
|
362 |
+
"topK": "Top K",
|
363 |
+
"topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係。",
|
364 |
+
"topKPlaceholder": "輸入top k值",
|
365 |
+
"chunkTopK": "文字區塊 Top K",
|
366 |
+
"chunkTopKTooltip": "檢索和處理的最大文字區塊數量。",
|
367 |
+
"chunkTopKPlaceholder": "輸入文字區塊top k值",
|
368 |
+
"chunkRerankTopK": "重新排序 Top K",
|
369 |
+
"chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。",
|
370 |
+
"chunkRerankTopKPlaceholder": "輸入重新排序top k值",
|
371 |
"historyTurns": "歷史輪次",
|
372 |
"historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量",
|
373 |
"historyTurnsPlaceholder": "歷史輪次數",
|
|
|
379 |
"streamResponseTooltip": "如果為True,啟用即時串流輸出回應",
|
380 |
"userPrompt": "用戶提示詞",
|
381 |
"userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。",
|
382 |
+
"userPromptPlaceholder": "輸入自定義提示詞(可選)",
|
383 |
+
"maxEntityTokens": "實體令牌數上限",
|
384 |
+
"maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數",
|
385 |
+
"maxRelationTokens": "關係令牌數上限",
|
386 |
+
"maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數",
|
387 |
+
"maxTotalTokens": "總令牌數上限",
|
388 |
+
"maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)"
|
389 |
}
|
390 |
},
|
391 |
"apiSite": {
|
lightrag_webui/src/stores/settings.ts
CHANGED
@@ -111,9 +111,11 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|
111 |
mode: 'global',
|
112 |
response_type: 'Multiple Paragraphs',
|
113 |
top_k: 10,
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
117 |
only_need_context: false,
|
118 |
only_need_prompt: false,
|
119 |
stream: true,
|
@@ -192,7 +194,7 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|
192 |
{
|
193 |
name: 'settings-storage',
|
194 |
storage: createJSONStorage(() => localStorage),
|
195 |
-
version:
|
196 |
migrate: (state: any, version: number) => {
|
197 |
if (version < 2) {
|
198 |
state.showEdgeLabel = false
|
@@ -215,9 +217,9 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|
215 |
mode: 'global',
|
216 |
response_type: 'Multiple Paragraphs',
|
217 |
top_k: 10,
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
only_need_context: false,
|
222 |
only_need_prompt: false,
|
223 |
stream: true,
|
@@ -260,6 +262,26 @@ const useSettingsStoreBase = create<SettingsState>()(
|
|
260 |
// Add backendMaxGraphNodes field for older versions
|
261 |
state.backendMaxGraphNodes = null
|
262 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
return state
|
264 |
}
|
265 |
}
|
|
|
111 |
mode: 'global',
|
112 |
response_type: 'Multiple Paragraphs',
|
113 |
top_k: 10,
|
114 |
+
chunk_top_k: 5,
|
115 |
+
chunk_rerank_top_k: 5,
|
116 |
+
max_entity_tokens: 10000,
|
117 |
+
max_relation_tokens: 10000,
|
118 |
+
max_total_tokens: 32000,
|
119 |
only_need_context: false,
|
120 |
only_need_prompt: false,
|
121 |
stream: true,
|
|
|
194 |
{
|
195 |
name: 'settings-storage',
|
196 |
storage: createJSONStorage(() => localStorage),
|
197 |
+
version: 15,
|
198 |
migrate: (state: any, version: number) => {
|
199 |
if (version < 2) {
|
200 |
state.showEdgeLabel = false
|
|
|
217 |
mode: 'global',
|
218 |
response_type: 'Multiple Paragraphs',
|
219 |
top_k: 10,
|
220 |
+
max_entity_tokens: 10000,
|
221 |
+
max_relation_tokens: 10000,
|
222 |
+
max_total_tokens: 32000,
|
223 |
only_need_context: false,
|
224 |
only_need_prompt: false,
|
225 |
stream: true,
|
|
|
262 |
// Add backendMaxGraphNodes field for older versions
|
263 |
state.backendMaxGraphNodes = null
|
264 |
}
|
265 |
+
if (version < 15) {
|
266 |
+
// 完整更新querySettings到统一token控制系统
|
267 |
+
state.querySettings = {
|
268 |
+
mode: 'global',
|
269 |
+
response_type: 'Multiple Paragraphs',
|
270 |
+
top_k: 10,
|
271 |
+
chunk_top_k: 5,
|
272 |
+
chunk_rerank_top_k: 5,
|
273 |
+
max_entity_tokens: 10000,
|
274 |
+
max_relation_tokens: 10000,
|
275 |
+
max_total_tokens: 32000,
|
276 |
+
only_need_context: false,
|
277 |
+
only_need_prompt: false,
|
278 |
+
stream: true,
|
279 |
+
history_turns: 3,
|
280 |
+
hl_keywords: [],
|
281 |
+
ll_keywords: [],
|
282 |
+
user_prompt: ''
|
283 |
+
}
|
284 |
+
}
|
285 |
return state
|
286 |
}
|
287 |
}
|