diff --git a/README-zh.md b/README-zh.md index 9f7f314e728a67a70a1b387adfdc8f305513c1b5..5f9614e345d3bca1c369338e3b8e781b6a048b7c 100644 --- a/README-zh.md +++ b/README-zh.md @@ -293,26 +293,19 @@ class QueryParam: top_k: int = int(os.getenv("TOP_K", "60")) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" - chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5")) - """Number of text chunks to retrieve initially from vector search. + chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10")) + """Number of text chunks to retrieve initially from vector search and keep after reranking. If None, defaults to top_k value. """ - chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5")) - """Number of text chunks to keep after reranking. - If None, keeps all chunks returned from initial retrieval. - """ - - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000")) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") - ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" hl_keywords: list[str] = field(default_factory=list) """List of high-level keywords to prioritize in retrieval.""" @@ -341,6 +334,11 @@ class QueryParam: """User-provided prompt for the query. If proivded, this will be use instead of the default vaulue from prompt template. """ + + enable_rerank: bool = True + """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. + Default is True to enable reranking when rerank model is available. + """ ``` > top_k的默认值可以通过环境变量TOP_K更改。 diff --git a/README.md b/README.md index fa2b592458afb1dcf1a59bdf1afda783a78d34ea..0fa6c3d193d30b987377599f9719edddd8ae39fc 100644 --- a/README.md +++ b/README.md @@ -300,26 +300,19 @@ class QueryParam: top_k: int = int(os.getenv("TOP_K", "60")) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" - chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5")) - """Number of text chunks to retrieve initially from vector search. + chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10")) + """Number of text chunks to retrieve initially from vector search and keep after reranking. If None, defaults to top_k value. """ - chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5")) - """Number of text chunks to keep after reranking. - If None, keeps all chunks returned from initial retrieval. - """ - - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000")) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") - ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000")) + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000")) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" conversation_history: list[dict[str, str]] = field(default_factory=list) """Stores past conversation history to maintain context. @@ -342,6 +335,11 @@ class QueryParam: """User-provided prompt for the query. If proivded, this will be use instead of the default vaulue from prompt template. """ + + enable_rerank: bool = True + """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. + Default is True to enable reranking when rerank model is available. + """ ``` > default value of Top_k can be change by environment variables TOP_K. diff --git a/docs/rerank_integration.md b/docs/rerank_integration.md index fdaebfa5148e7a8040c270c8ba8dc4e04b67e38c..4e4d433f657bf3240582644297df03e11dfed26c 100644 --- a/docs/rerank_integration.md +++ b/docs/rerank_integration.md @@ -1,36 +1,24 @@ -# Rerank Integration in LightRAG +# Rerank Integration Guide -This document explains how to configure and use the rerank functionality in LightRAG to improve retrieval quality. +LightRAG supports reranking functionality to improve retrieval quality by re-ordering documents based on their relevance to the query. Reranking is now controlled per query via the `enable_rerank` parameter (default: True). -## Overview - -Reranking is an optional feature that improves the quality of retrieved documents by re-ordering them based on their relevance to the query. This is particularly useful when you want higher precision in document retrieval across all query modes (naive, local, global, hybrid, mix). - -## Architecture - -The rerank integration follows a simplified design pattern: - -- **Single Function Configuration**: All rerank settings (model, API keys, top_k, etc.) are contained within the rerank function -- **Async Processing**: Non-blocking rerank operations -- **Error Handling**: Graceful fallback to original results -- **Optional Feature**: Can be enabled/disabled via configuration -- **Code Reuse**: Single generic implementation for Jina/Cohere compatible APIs - -## Configuration +## Quick Start ### Environment Variables -Set this variable in your `.env` file or environment: +Set these variables in your `.env` file or environment for rerank model configuration: ```bash -# Enable/disable reranking -ENABLE_RERANK=True +# Rerank model configuration (required when enable_rerank=True in queries) +RERANK_MODEL=BAAI/bge-reranker-v2-m3 +RERANK_BINDING_HOST=https://api.your-provider.com/v1/rerank +RERANK_BINDING_API_KEY=your_api_key_here ``` ### Programmatic Configuration ```python -from lightrag import LightRAG +from lightrag import LightRAG, QueryParam from lightrag.rerank import custom_rerank, RerankModel # Method 1: Using a custom rerank function with all settings included @@ -49,8 +37,19 @@ rag = LightRAG( working_dir="./rag_storage", llm_model_func=your_llm_func, embedding_func=your_embedding_func, - enable_rerank=True, - rerank_model_func=my_rerank_func, + rerank_model_func=my_rerank_func, # Configure rerank function +) + +# Query with rerank enabled (default) +result = await rag.aquery( + "your query", + param=QueryParam(enable_rerank=True) # Control rerank per query +) + +# Query with rerank disabled +result = await rag.aquery( + "your query", + param=QueryParam(enable_rerank=False) ) # Method 2: Using RerankModel wrapper @@ -67,9 +66,17 @@ rag = LightRAG( working_dir="./rag_storage", llm_model_func=your_llm_func, embedding_func=your_embedding_func, - enable_rerank=True, rerank_model_func=rerank_model.rerank, ) + +# Control rerank per query +result = await rag.aquery( + "your query", + param=QueryParam( + enable_rerank=True, # Enable rerank for this query + chunk_top_k=5 # Number of chunks to keep after reranking + ) +) ``` ## Supported Providers @@ -164,7 +171,6 @@ async def main(): working_dir="./rag_storage", llm_model_func=gpt_4o_mini_complete, embedding_func=openai_embedding, - enable_rerank=True, rerank_model_func=my_rerank_func, ) @@ -180,7 +186,7 @@ async def main(): # Query with rerank (automatically applied) result = await rag.aquery( "Your question here", - param=QueryParam(mode="hybrid", top_k=5) # This top_k is passed to rerank function + param=QueryParam(enable_rerank=True) # This top_k is passed to rerank function ) print(result) diff --git a/env.example b/env.example index 828c6d247b37d9b683d69a6bb09e2409b96d7258..71bc578845dc88d7ef127f8affefd85717cee484 100644 --- a/env.example +++ b/env.example @@ -1,6 +1,8 @@ ### This is sample file of .env +########################### ### Server Configuration +########################### HOST=0.0.0.0 PORT=9621 WEBUI_TITLE='My Graph KB' @@ -9,29 +11,17 @@ OLLAMA_EMULATING_MODEL_TAG=latest # WORKERS=2 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080 -### Login Configuration -# AUTH_ACCOUNTS='admin:admin123,user1:pass456' -# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server -# TOKEN_EXPIRE_HOURS=48 -# GUEST_TOKEN_EXPIRE_HOURS=24 -# JWT_ALGORITHM=HS256 - -### API-Key to access LightRAG Server API -# LIGHTRAG_API_KEY=your-secure-api-key-here -# WHITELIST_PATHS=/health,/api/* - ### Optional SSL Configuration # SSL=true # SSL_CERTFILE=/path/to/cert.pem # SSL_KEYFILE=/path/to/key.pem ### Directory Configuration (defaults to current working directory) -### Should not be set if deploy by docker (Set by Dockerfile instead of .env) ### Default value is ./inputs and ./rag_storage # INPUT_DIR= # WORKING_DIR= -### Max nodes return from grap retrieval +### Max nodes return from grap retrieval in webui # MAX_GRAPH_NODES=1000 ### Logging level @@ -42,65 +32,97 @@ OLLAMA_EMULATING_MODEL_TAG=latest ### Logfile location (defaults to current working directory) # LOG_DIR=/path/to/log/directory -### RAG Configuration -### Chunk size for document splitting, 500~1500 is recommended -# CHUNK_SIZE=1200 -# CHUNK_OVERLAP_SIZE=100 +##################################### +### Login and API-Key Configuration +##################################### +# AUTH_ACCOUNTS='admin:admin123,user1:pass456' +# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server +# TOKEN_EXPIRE_HOURS=48 +# GUEST_TOKEN_EXPIRE_HOURS=24 +# JWT_ALGORITHM=HS256 -### RAG Query Configuration +### API-Key to access LightRAG Server API +# LIGHTRAG_API_KEY=your-secure-api-key-here +# WHITELIST_PATHS=/health,/api/* + +######################## +### Query Configuration +######################## +# LLM responde cache for query (Not valid for streaming response +ENABLE_LLM_CACHE=true # HISTORY_TURNS=3 -# MAX_TOKEN_TEXT_CHUNK=6000 -# MAX_TOKEN_RELATION_DESC=4000 -# MAX_TOKEN_ENTITY_DESC=4000 # COSINE_THRESHOLD=0.2 -### Number of entities or relations to retrieve from KG -# TOP_K=60 -### Number of text chunks to retrieve initially from vector search -# CHUNK_TOP_K=5 - -### Rerank Configuration -# ENABLE_RERANK=False -### Number of text chunks to keep after reranking (should be <= CHUNK_TOP_K) -# CHUNK_RERANK_TOP_K=5 -### Rerank model configuration (required when ENABLE_RERANK=True) +### Number of entities or relations retrieved from KG +# TOP_K=40 +### Maxmium number or chunks plan to send to LLM +# CHUNK_TOP_K=10 +### control the actual enties send to LLM +# MAX_ENTITY_TOKENS=10000 +### control the actual relations send to LLM +# MAX_RELATION_TOKENS=10000 +### control the maximum tokens send to LLM (include entities, raltions and chunks) +# MAX_TOTAL_TOKENS=32000 +### maxumium related chunks grab from single entity or relations +# RELATED_CHUNK_NUMBER=10 + +### Reranker configuration (Set ENABLE_RERANK to true in reranking model is configed) +ENABLE_RERANK=False # RERANK_MODEL=BAAI/bge-reranker-v2-m3 # RERANK_BINDING_HOST=https://api.your-rerank-provider.com/v1/rerank # RERANK_BINDING_API_KEY=your_rerank_api_key_here -### Entity and relation summarization configuration +######################################## +### Document processing configuration +######################################## ### Language: English, Chinese, French, German ... SUMMARY_LANGUAGE=English +ENABLE_LLM_CACHE_FOR_EXTRACT=true +### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model) +MAX_TOKENS=32000 +### Chunk size for document splitting, 500~1500 is recommended +# CHUNK_SIZE=1200 +# CHUNK_OVERLAP_SIZE=100 +### Entity and relation summarization configuration ### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented) -# FORCE_LLM_SUMMARY_ON_MERGE=6 +# FORCE_LLM_SUMMARY_ON_MERGE=4 ### Maximum number of entity extraction attempts for ambiguous content # MAX_GLEANING=1 -### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended) -# MAX_PARALLEL_INSERT=2 +############################### +### Concurrency Configuration +############################### +### Max concurrency requests of LLM (for both query and document processing) +MAX_ASYNC=4 +### Number of parallel processing documents(between 2~10, MAX_ASYNC/4 is recommended) +MAX_PARALLEL_INSERT=2 +### Max concurrency requests for Embedding +# EMBEDDING_FUNC_MAX_ASYNC=8 +### Num of chunks send to Embedding in single request +# EMBEDDING_BATCH_NUM=10 +####################### ### LLM Configuration -ENABLE_LLM_CACHE=true -ENABLE_LLM_CACHE_FOR_EXTRACT=true +####################### ### Time out in seconds for LLM, None for infinite timeout TIMEOUT=240 ### Some models like o1-mini require temperature to be set to 1 TEMPERATURE=0 -### Max concurrency requests of LLM -MAX_ASYNC=4 -### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model) -MAX_TOKENS=32000 ### LLM Binding type: openai, ollama, lollms, azure_openai LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key + +### Set as num_ctx option for Ollama LLM +# OLLAMA_NUM_CTX=32768 + ### Optional for Azure # AZURE_OPENAI_API_VERSION=2024-08-01-preview # AZURE_OPENAI_DEPLOYMENT=gpt-4o -### set as num_ctx option for Ollama LLM -# OLLAMA_NUM_CTX=32768 -### Embedding Configuration +#################################################################################### +### Embedding Configuration (Should not be changed after the first file processed) +#################################################################################### ### Embedding Binding type: openai, ollama, lollms, azure_openai EMBEDDING_BINDING=ollama EMBEDDING_MODEL=bge-m3:latest @@ -108,51 +130,53 @@ EMBEDDING_DIM=1024 EMBEDDING_BINDING_API_KEY=your_api_key # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost EMBEDDING_BINDING_HOST=http://localhost:11434 -### Num of chunks send to Embedding in single request -# EMBEDDING_BATCH_NUM=10 -### Max concurrency requests for Embedding -# EMBEDDING_FUNC_MAX_ASYNC=8 ### Maximum tokens sent to Embedding for each chunk (no longer in use?) # MAX_EMBED_TOKENS=8192 + ### Optional for Azure # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large # AZURE_EMBEDDING_API_VERSION=2023-05-15 # AZURE_EMBEDDING_ENDPOINT=your_endpoint # AZURE_EMBEDDING_API_KEY=your_api_key -########################### +############################ ### Data storage selection -########################### -### In-memory database with local file persistence(Recommended for small scale deployment) +############################ +### Default storage (Recommended for small scale deployment) # LIGHTRAG_KV_STORAGE=JsonKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage + +### Redis Storage (Recommended for production deployment) +# LIGHTRAG_KV_STORAGE=RedisKVStorage +# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage + +### Vector Storage (Recommended for production deployment) +# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage +# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage + +### Graph Storage (Recommended for production deployment) +# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage +# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage + ### PostgreSQL # LIGHTRAG_KV_STORAGE=PGKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage + ### MongoDB (Vector storage only available on Atlas Cloud) # LIGHTRAG_KV_STORAGE=MongoKVStorage # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage -### Redis Storage (Recommended for production deployment) -# LIGHTRAG_KV_STORAGE=RedisKVStorage -# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage -### Vector Storage (Recommended for production deployment) -# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage -# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage -### Graph Storage (Recommended for production deployment) -# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage -# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage #################################################################### -### Default workspace for all storage types -### For the purpose of isolation of data for each LightRAG instance -### Valid characters: a-z, A-Z, 0-9, and _ +### WORKSPACE setting workspace name for all storage types +### in the purpose of isolating data from LightRAG instances. +### Valid workspace name constraints: a-z, A-Z, 0-9, and _ #################################################################### # WORKSPACE=space1 diff --git a/examples/rerank_example.py b/examples/rerank_example.py index e0e361a5ea0a0308a9ebabaac0bbe27329d9c23d..42b4dd3809d2ac92896522a42e596891c072cd3f 100644 --- a/examples/rerank_example.py +++ b/examples/rerank_example.py @@ -9,7 +9,11 @@ Configuration Required: 2. Set your embedding API key and base URL in embedding_func() 3. Set your rerank API key and base URL in the rerank configuration 4. Or use environment variables (.env file): - - ENABLE_RERANK=True + - RERANK_MODEL=your_rerank_model + - RERANK_BINDING_HOST=your_rerank_endpoint + - RERANK_BINDING_API_KEY=your_rerank_api_key + +Note: Rerank is now controlled per query via the 'enable_rerank' parameter (default: True) """ import asyncio @@ -83,8 +87,7 @@ async def create_rag_with_rerank(): max_token_size=8192, func=embedding_func, ), - # Simplified Rerank Configuration - enable_rerank=True, + # Rerank Configuration - provide the rerank function rerank_model_func=my_rerank_func, ) @@ -120,7 +123,6 @@ async def create_rag_with_rerank_model(): max_token_size=8192, func=embedding_func, ), - enable_rerank=True, rerank_model_func=rerank_model.rerank, ) @@ -130,9 +132,9 @@ async def create_rag_with_rerank_model(): return rag -async def test_rerank_with_different_topk(): +async def test_rerank_with_different_settings(): """ - Test rerank functionality with different top_k settings + Test rerank functionality with different enable_rerank settings """ print("🚀 Setting up LightRAG with Rerank functionality...") @@ -154,16 +156,41 @@ async def test_rerank_with_different_topk(): print(f"\n🔍 Testing query: '{query}'") print("=" * 80) - # Test different top_k values to show parameter priority - top_k_values = [2, 5, 10] - - for top_k in top_k_values: - print(f"\n📊 Testing with QueryParam(top_k={top_k}):") + # Test with rerank enabled (default) + print("\n📊 Testing with enable_rerank=True (default):") + result_with_rerank = await rag.aquery( + query, + param=QueryParam( + mode="naive", + top_k=10, + chunk_top_k=5, + enable_rerank=True, # Explicitly enable rerank + ), + ) + print(f" Result length: {len(result_with_rerank)} characters") + print(f" Preview: {result_with_rerank[:100]}...") + + # Test with rerank disabled + print("\n📊 Testing with enable_rerank=False:") + result_without_rerank = await rag.aquery( + query, + param=QueryParam( + mode="naive", + top_k=10, + chunk_top_k=5, + enable_rerank=False, # Disable rerank + ), + ) + print(f" Result length: {len(result_without_rerank)} characters") + print(f" Preview: {result_without_rerank[:100]}...") - # Test naive mode with specific top_k - result = await rag.aquery(query, param=QueryParam(mode="naive", top_k=top_k)) - print(f" Result length: {len(result)} characters") - print(f" Preview: {result[:100]}...") + # Test with default settings (enable_rerank defaults to True) + print("\n📊 Testing with default settings (enable_rerank defaults to True):") + result_default = await rag.aquery( + query, param=QueryParam(mode="naive", top_k=10, chunk_top_k=5) + ) + print(f" Result length: {len(result_default)} characters") + print(f" Preview: {result_default[:100]}...") async def test_direct_rerank(): @@ -209,17 +236,21 @@ async def main(): print("=" * 60) try: - # Test rerank with different top_k values - await test_rerank_with_different_topk() + # Test rerank with different enable_rerank settings + await test_rerank_with_different_settings() # Test direct rerank await test_direct_rerank() print("\n✅ Example completed successfully!") print("\n💡 Key Points:") - print(" ✓ All rerank configurations are contained within rerank_model_func") - print(" ✓ Rerank improves document relevance ordering") - print(" ✓ Configure API keys within your rerank function") + print(" ✓ Rerank is now controlled per query via 'enable_rerank' parameter") + print(" ✓ Default value for enable_rerank is True") + print(" ✓ Rerank function is configured at LightRAG initialization") + print(" ✓ Per-query enable_rerank setting overrides default behavior") + print( + " ✓ If enable_rerank=True but no rerank model is configured, a warning is issued" + ) print(" ✓ Monitor API usage and costs when using rerank services") except Exception as e: diff --git a/lightrag/api/config.py b/lightrag/api/config.py index e8a9cea38945a34c8894b999e1dc7aa4f6a2e3ce..98b817c102ee9a6b8589a0fa895ab0fd4b8d427c 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -11,6 +11,14 @@ from lightrag.utils import get_env_value from lightrag.constants import ( DEFAULT_WOKERS, DEFAULT_TIMEOUT, + DEFAULT_TOP_K, + DEFAULT_CHUNK_TOP_K, + DEFAULT_HISTORY_TURNS, + DEFAULT_MAX_ENTITY_TOKENS, + DEFAULT_MAX_RELATION_TOKENS, + DEFAULT_MAX_TOTAL_TOKENS, + DEFAULT_COSINE_THRESHOLD, + DEFAULT_RELATED_CHUNK_NUMBER, ) # use the .env that is inside the current folder @@ -151,45 +159,6 @@ def parse_args() -> argparse.Namespace: help="Path to SSL private key file (required if --ssl is enabled)", ) - parser.add_argument( - "--history-turns", - type=int, - default=get_env_value("HISTORY_TURNS", 3, int), - help="Number of conversation history turns to include (default: from env or 3)", - ) - - # Search parameters - parser.add_argument( - "--top-k", - type=int, - default=get_env_value("TOP_K", 60, int), - help="Number of most similar results to return (default: from env or 60)", - ) - parser.add_argument( - "--chunk-top-k", - type=int, - default=get_env_value("CHUNK_TOP_K", 15, int), - help="Number of text chunks to retrieve initially from vector search (default: from env or 15)", - ) - parser.add_argument( - "--chunk-rerank-top-k", - type=int, - default=get_env_value("CHUNK_RERANK_TOP_K", 5, int), - help="Number of text chunks to keep after reranking (default: from env or 5)", - ) - parser.add_argument( - "--enable-rerank", - action="store_true", - default=get_env_value("ENABLE_RERANK", False, bool), - help="Enable rerank functionality (default: from env or False)", - ) - parser.add_argument( - "--cosine-threshold", - type=float, - default=get_env_value("COSINE_THRESHOLD", 0.2, float), - help="Cosine similarity threshold (default: from env or 0.4)", - ) - # Ollama model name parser.add_argument( "--simulated-model-name", @@ -321,6 +290,26 @@ def parse_args() -> argparse.Namespace: args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None) args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None) + # Query configuration + args.history_turns = get_env_value("HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int) + args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int) + args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int) + args.max_entity_tokens = get_env_value( + "MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int + ) + args.max_relation_tokens = get_env_value( + "MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int + ) + args.max_total_tokens = get_env_value( + "MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int + ) + args.cosine_threshold = get_env_value( + "COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float + ) + args.related_chunk_number = get_env_value( + "RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int + ) + ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name return args diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index bd0154c90fdd8957f99671b214ebd313fa6ac0cc..573455e5560f26fdd38d08c37d2eff530680b6ca 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -292,9 +292,9 @@ def create_app(args): ), ) - # Configure rerank function if enabled + # Configure rerank function if model and API are configured rerank_model_func = None - if args.enable_rerank and args.rerank_binding_api_key and args.rerank_binding_host: + if args.rerank_binding_api_key and args.rerank_binding_host: from lightrag.rerank import custom_rerank async def server_rerank_func( @@ -312,10 +312,12 @@ def create_app(args): ) rerank_model_func = server_rerank_func - logger.info(f"Rerank enabled with model: {args.rerank_model}") - elif args.enable_rerank: - logger.warning( - "Rerank enabled but RERANK_BINDING_API_KEY or RERANK_BINDING_HOST not configured. Rerank will be disabled." + logger.info( + f"Rerank model configured: {args.rerank_model} (can be enabled per query)" + ) + else: + logger.info( + "Rerank model not configured. Set RERANK_BINDING_API_KEY and RERANK_BINDING_HOST to enable reranking." ) # Initialize RAG @@ -351,7 +353,6 @@ def create_app(args): }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, - enable_rerank=args.enable_rerank, rerank_model_func=rerank_model_func, auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, @@ -381,7 +382,6 @@ def create_app(args): }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache=args.enable_llm_cache, - enable_rerank=args.enable_rerank, rerank_model_func=rerank_model_func, auto_manage_storages_states=False, max_parallel_insert=args.max_parallel_insert, @@ -512,11 +512,13 @@ def create_app(args): "enable_llm_cache": args.enable_llm_cache, "workspace": args.workspace, "max_graph_nodes": args.max_graph_nodes, - # Rerank configuration - "enable_rerank": args.enable_rerank, - "rerank_model": args.rerank_model if args.enable_rerank else None, + # Rerank configuration (based on whether rerank model is configured) + "enable_rerank": rerank_model_func is not None, + "rerank_model": args.rerank_model + if rerank_model_func is not None + else None, "rerank_binding_host": args.rerank_binding_host - if args.enable_rerank + if rerank_model_func is not None else None, }, "auth_mode": auth_mode, diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py index 0a0c622749573cb39149dd42ea2064bbe10614e2..4d97f151f028a9b4d8240d3977a2c2a377bacb86 100644 --- a/lightrag/api/routers/query_routes.py +++ b/lightrag/api/routers/query_routes.py @@ -52,31 +52,25 @@ class QueryRequest(BaseModel): chunk_top_k: Optional[int] = Field( ge=1, default=None, - description="Number of text chunks to retrieve initially from vector search.", + description="Number of text chunks to retrieve initially from vector search and keep after reranking.", ) - chunk_rerank_top_k: Optional[int] = Field( - ge=1, + max_entity_tokens: Optional[int] = Field( default=None, - description="Number of text chunks to keep after reranking.", - ) - - max_token_for_text_unit: Optional[int] = Field( - gt=1, - default=None, - description="Maximum number of tokens allowed for each retrieved text chunk.", + description="Maximum number of tokens allocated for entity context in unified token control system.", + ge=1, ) - max_token_for_global_context: Optional[int] = Field( - gt=1, + max_relation_tokens: Optional[int] = Field( default=None, - description="Maximum number of tokens allocated for relationship descriptions in global retrieval.", + description="Maximum number of tokens allocated for relationship context in unified token control system.", + ge=1, ) - max_token_for_local_context: Optional[int] = Field( - gt=1, + max_total_tokens: Optional[int] = Field( default=None, - description="Maximum number of tokens allocated for entity descriptions in local retrieval.", + description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).", + ge=1, ) conversation_history: Optional[List[Dict[str, Any]]] = Field( @@ -99,6 +93,11 @@ class QueryRequest(BaseModel): description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.", ) + enable_rerank: Optional[bool] = Field( + default=None, + description="Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.", + ) + @field_validator("query", mode="after") @classmethod def query_strip_after(cls, query: str) -> str: diff --git a/lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js b/lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js similarity index 95% rename from lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js rename to lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js index f23cedcbbf939517a0379ea770f4241e1bdb551a..a964a6a16882b88623676edaef141df22fb168be 100644 Binary files a/lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js and b/lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js differ diff --git a/lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js b/lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js similarity index 98% rename from lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js rename to lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js index f50279694ba8a3e988cd645e0167150d39091498..1cc386ac6d69e7b24f0c7142741ccc60ce66b072 100644 Binary files a/lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js and b/lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js differ diff --git a/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js b/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js similarity index 99% rename from lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js rename to lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js index 34a3316b359d76f5bfc5e8a867b1fc7c33e1c97f..23cf4a6295c9aa8bf7f8882991bc68907057adaf 100644 Binary files a/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js and b/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js differ diff --git a/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js b/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js similarity index 99% rename from lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js rename to lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js index 8348d25ab12771f500a213a489a76a01a9c83779..a011900605999d49b56aac9395993e41e4492e44 100644 Binary files a/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js and b/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js differ diff --git a/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js b/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js similarity index 99% rename from lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js rename to lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js index 01f3a0c0b39f17bfe87e8991b6036f9ec5afbf85..cbeca1b3259c120c669837a3d19cb49b2a62c2d9 100644 Binary files a/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js and b/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js differ diff --git a/lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js b/lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js similarity index 78% rename from lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js rename to lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js index 50ce15aac64e590c93b3a1af81f72cfca8130285..1c6c7b047d3d930e0aecdb700e00d88f942c106d 100644 Binary files a/lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js and b/lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js differ diff --git a/lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js b/lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js similarity index 99% rename from lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js rename to lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js index c59c9d4211aad3f0c213fe90cbcbcdc3433fc0a0..1ec7ee57b7df9859326080b30fdd972ecc65bda8 100644 Binary files a/lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js and b/lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js differ diff --git a/lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js b/lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js similarity index 99% rename from lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js rename to lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js index 2539c37b81625963a25a6bb20872221e56f72cb6..3b671fb8baecfc51806a7d6a44cc6ced0eeb27bd 100644 Binary files a/lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js and b/lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js differ diff --git a/lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js b/lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js similarity index 95% rename from lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js rename to lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js index 03f6dcdc7d8221d9f285a058498c03ca08152cca..961133cb2e5061f0bf7fb535d5ae04b63c875401 100644 Binary files a/lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js and b/lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js differ diff --git a/lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js b/lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js similarity index 81% rename from lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js rename to lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js index ef6235aed902033e1b3fd47040343fab1f277c4f..c49e35eecd960afac3e0a6bec57b4f2185943fa4 100644 Binary files a/lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js and b/lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js differ diff --git a/lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js b/lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js similarity index 67% rename from lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js rename to lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js index fe917915107cf1e2083684cf0b0d715c027ef37e..114a73c34a54d2e67679898589d26e1be716afbf 100644 Binary files a/lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js and b/lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js differ diff --git a/lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js b/lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js similarity index 61% rename from lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js rename to lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js index 27c65696825b5f3b41405636ce0a7a83975c2283..bc8695ab1e0fe84303048be5a5dbc37433de4e22 100644 Binary files a/lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js differ diff --git a/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js b/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js similarity index 61% rename from lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js rename to lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js index 27c65696825b5f3b41405636ce0a7a83975c2283..bc8695ab1e0fe84303048be5a5dbc37433de4e22 100644 Binary files a/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js differ diff --git a/lightrag/api/webui/assets/clone-Dm5jEAXQ.js b/lightrag/api/webui/assets/clone-Dm5jEAXQ.js deleted file mode 100644 index 8b42b7f66c3a0455a4d70ca1621026213b91da0b..0000000000000000000000000000000000000000 Binary files a/lightrag/api/webui/assets/clone-Dm5jEAXQ.js and /dev/null differ diff --git a/lightrag/api/webui/assets/clone-eVzB-9-f.js b/lightrag/api/webui/assets/clone-eVzB-9-f.js new file mode 100644 index 0000000000000000000000000000000000000000..7d96fdff2636dcc8c20cd66f78a9c9d83ae64060 Binary files /dev/null and b/lightrag/api/webui/assets/clone-eVzB-9-f.js differ diff --git a/lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js b/lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js similarity index 97% rename from lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js rename to lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js index 046ac1a0061226fc8514cf5c16816a0d7b5a014a..821f3f18ceac2e70584b07d27ebdd5f98e6e9d26 100644 Binary files a/lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js and b/lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js differ diff --git a/lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js b/lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js similarity index 93% rename from lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js rename to lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js index 7476bf1fcd204be3665418107d66840329486aa3..c497ac8f182d382f9b7e96bf8a9ffda0adfc3897 100644 Binary files a/lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js and b/lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js differ diff --git a/lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js b/lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js similarity index 90% rename from lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js rename to lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js index f975fcccba98e425e5aeec4103850be78fcb6482..53640b5b52391971ccdc003a089ca57015b2199b 100644 Binary files a/lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js and b/lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js differ diff --git a/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js b/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js similarity index 99% rename from lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js rename to lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js index 425a389e5940ed754782567d4ac0656ed7fa0e20..395cdc5c43f99efd2cc654c15ad8c1010b39d629 100644 Binary files a/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js and b/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js differ diff --git a/lightrag/api/webui/assets/feature-documents-oks3sUnM.js b/lightrag/api/webui/assets/feature-documents-DZY3tMAq.js similarity index 99% rename from lightrag/api/webui/assets/feature-documents-oks3sUnM.js rename to lightrag/api/webui/assets/feature-documents-DZY3tMAq.js index 7eb92ba7e886c5780ed10989a32fb8e47d3e03e3..ee42449ccced238b8af06d19f0486d1c1ff16dcb 100644 Binary files a/lightrag/api/webui/assets/feature-documents-oks3sUnM.js and b/lightrag/api/webui/assets/feature-documents-DZY3tMAq.js differ diff --git a/lightrag/api/webui/assets/feature-graph-NODQb6qW.js b/lightrag/api/webui/assets/feature-graph-wF7LCIjH.js similarity index 51% rename from lightrag/api/webui/assets/feature-graph-NODQb6qW.js rename to lightrag/api/webui/assets/feature-graph-wF7LCIjH.js index be23288dc39852c3fe9e7b8d300e695f572fdeb2..0a1fa8ca6351360d502784a96caf75c945a2aaf6 100644 Binary files a/lightrag/api/webui/assets/feature-graph-NODQb6qW.js and b/lightrag/api/webui/assets/feature-graph-wF7LCIjH.js differ diff --git a/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js b/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js deleted file mode 100644 index 1b8ba068ac51d00314ccc211457976c224910c72..0000000000000000000000000000000000000000 Binary files a/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js and /dev/null differ diff --git a/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js b/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js new file mode 100644 index 0000000000000000000000000000000000000000..f82c9b6c3d604c1f886a0a966e1c48e1d2b0e273 Binary files /dev/null and b/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js differ diff --git a/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js b/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js similarity index 99% rename from lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js rename to lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js index 95c972cd1ce263b862f9cf255de9caad8c9ccf37..5cfd266e4fcbeb66a80f68ba2694bff7b8ab6c67 100644 Binary files a/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js and b/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js differ diff --git a/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js b/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js similarity index 99% rename from lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js rename to lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js index 3296abdda179a1212a5fb08f581765ac992737f4..d7731304bed4d646a910722d04f658af510802f8 100644 Binary files a/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js and b/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js differ diff --git a/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js b/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js similarity index 98% rename from lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js rename to lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js index 03955373498e06722e9769ef9f0225d65aff5290..cadc6efb4edf15dbf7105a765205095131f0b93a 100644 Binary files a/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js and b/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js differ diff --git a/lightrag/api/webui/assets/graph-BLnbmvfZ.js b/lightrag/api/webui/assets/graph-DJgPOSDl.js similarity index 97% rename from lightrag/api/webui/assets/graph-BLnbmvfZ.js rename to lightrag/api/webui/assets/graph-DJgPOSDl.js index c1f9f3d52f895e54c5a935b1b9335b5984faf01c..ee724a0d60de96c8c8eba4930605e43a22cf40ac 100644 Binary files a/lightrag/api/webui/assets/graph-BLnbmvfZ.js and b/lightrag/api/webui/assets/graph-DJgPOSDl.js differ diff --git a/lightrag/api/webui/assets/index-yRRg2BZk.js b/lightrag/api/webui/assets/index-D3V9EKqf.js similarity index 69% rename from lightrag/api/webui/assets/index-yRRg2BZk.js rename to lightrag/api/webui/assets/index-D3V9EKqf.js index bebf87b25fbae873fdca209d71df9bbb5c368008..ab8f72e822a01cd4d64e88addb48890931e56075 100644 Binary files a/lightrag/api/webui/assets/index-yRRg2BZk.js and b/lightrag/api/webui/assets/index-D3V9EKqf.js differ diff --git a/lightrag/api/webui/assets/index-1Hy45NwC.js b/lightrag/api/webui/assets/index-DB3D3pNI.js similarity index 91% rename from lightrag/api/webui/assets/index-1Hy45NwC.js rename to lightrag/api/webui/assets/index-DB3D3pNI.js index a63a535a78f44e3ed61ac9aca0c19e6cf54f14d5..110faed713ec78fa1f00552045c7e81786953b4b 100644 Binary files a/lightrag/api/webui/assets/index-1Hy45NwC.js and b/lightrag/api/webui/assets/index-DB3D3pNI.js differ diff --git a/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js b/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js similarity index 61% rename from lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js rename to lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js index 04c4a4621f3020003d02fbda1b8d1c83c7b18630..38a4336d471c39995e4d3937b58dd5963070d0d3 100644 Binary files a/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js and b/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js differ diff --git a/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js b/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js similarity index 99% rename from lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js rename to lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js index e84a0773baddaaeab67b2485812e83fe220208ef..1d59d8a3e157ac1fea24aaf4e7295cd16c804ca8 100644 Binary files a/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js and b/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js differ diff --git a/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js b/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js similarity index 99% rename from lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js rename to lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js index c45220eebb8bd1e3a99a24d2ff72ea4e2b355fc3..fee84e0b215cb4eba8ce44b226c5570495ce6f5e 100644 Binary files a/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js and b/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js differ diff --git a/lightrag/api/webui/assets/layout-DsT4215v.js b/lightrag/api/webui/assets/layout-D_MnvYWV.js similarity index 99% rename from lightrag/api/webui/assets/layout-DsT4215v.js rename to lightrag/api/webui/assets/layout-D_MnvYWV.js index b15fa63d9560fcbde47a670a55959e022968cd93..3b731afe37f25d8c343ba186ae2938aefe71fa80 100644 Binary files a/lightrag/api/webui/assets/layout-DsT4215v.js and b/lightrag/api/webui/assets/layout-D_MnvYWV.js differ diff --git a/lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js b/lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js similarity index 85% rename from lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js rename to lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js index abcd404940c9469f77c89b39aef26c9432322a32..c8c81f04afe4e34e62204cd0824ae6c8187e7010 100644 Binary files a/lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js and b/lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js differ diff --git a/lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js b/lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js similarity index 99% rename from lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js rename to lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js index 4b3ddee7eee7dca790e9015f8e2ebfc5db821039..65b01a060d7e219a9c4637371a8472613b5c09a7 100644 Binary files a/lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js and b/lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js differ diff --git a/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js b/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js similarity index 99% rename from lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js rename to lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js index 3b5f0e99882eac1772b1c68140e6ca47b67ecd9d..32143c9d0cb009a75ac95ed5e49d69b801f8e611 100644 Binary files a/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js and b/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js differ diff --git a/lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js b/lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js similarity index 91% rename from lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js rename to lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js index 880ab323db8dcf8ab5e015e7e71ec5a59918147a..9f4b64e9f3bc1eec98a000a58d8109f4d33b089c 100644 Binary files a/lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js and b/lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js differ diff --git a/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js b/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js similarity index 99% rename from lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js rename to lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js index bf4daf5fa311cd95b6fdfe818fac530a2d7acc4f..fc71293e729d697866c4ee71e67f4041433ec4d0 100644 Binary files a/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js and b/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js differ diff --git a/lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js b/lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js similarity index 99% rename from lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js rename to lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js index 1c632567bd6ad84b1fc0a419f1d7aec1eef1c548..7c8edc94cdda54aaa849ee340f86a45fc94dfb5f 100644 Binary files a/lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js and b/lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js differ diff --git a/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js b/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js similarity index 99% rename from lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js rename to lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js index e0aa3bb9d8d4c552f9dbd94bb0490ff260f2a326..0827b2fccfc37c3182353d8f2ad4661f7c495849 100644 Binary files a/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js and b/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js differ diff --git a/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js b/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js similarity index 99% rename from lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js rename to lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js index cf34615916669049bf0d7255988eb07f3e0857b3..b226074a12e13b58d8acc8e58aa5726f67f406f4 100644 Binary files a/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js and b/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js differ diff --git a/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js b/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js similarity index 99% rename from lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js rename to lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js index 7f5ca0e5e48899541fac336e59ad6e4976ed95cd..9c3326bced7a5551c6540f43b3f5da7502a5a3cf 100644 Binary files a/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js and b/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js differ diff --git a/lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js b/lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js similarity index 96% rename from lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js rename to lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js index 8ec4a30bd73407e62e9e65b97bf41561fa0e65ad..86a727642b2addb1d12ab1a9f26366c707d674e6 100644 Binary files a/lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js and b/lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js differ diff --git a/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js b/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js similarity index 61% rename from lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js rename to lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js index 046adada205c5f0a4e66c14173a77aab1ccbfc6c..ba4f4e05ea3b1c724fd28c4472abce3314e6704c 100644 Binary files a/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js and b/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js differ diff --git a/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js b/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js similarity index 99% rename from lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js rename to lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js index 2023240e3cdce027f3b6764e9cee03750c1aea2d..6e50b5ea9b5b48db21a0e44063af8679dcd87966 100644 Binary files a/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js and b/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js differ diff --git a/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js b/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js similarity index 99% rename from lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js rename to lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js index a361dc9b741bc2244b6dc7a4fbe3468de04c8424..95511aad4b872751a99f4fb293cc0446508c2410 100644 Binary files a/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js and b/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js differ diff --git a/lightrag/api/webui/index.html b/lightrag/api/webui/index.html index 20c25c2046b843201d194a5096d76fa680bc4319..461f6ee086d1653d532bd6753e61d446d4884f41 100644 Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ diff --git a/lightrag/base.py b/lightrag/base.py index 97564ac2e84fa704933fbd80f1b34e5e548d1878..ac0545ce7f19b4d62e0427b19758e226d7996f7a 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -14,7 +14,16 @@ from typing import ( ) from .utils import EmbeddingFunc from .types import KnowledgeGraph -from .constants import GRAPH_FIELD_SEP +from .constants import ( + GRAPH_FIELD_SEP, + DEFAULT_TOP_K, + DEFAULT_CHUNK_TOP_K, + DEFAULT_MAX_ENTITY_TOKENS, + DEFAULT_MAX_RELATION_TOKENS, + DEFAULT_MAX_TOTAL_TOKENS, + DEFAULT_HISTORY_TURNS, + DEFAULT_ENABLE_RERANK, +) # use the .env that is inside the current folder # allows to use different .env file for each lightrag instance @@ -36,7 +45,7 @@ T = TypeVar("T") class QueryParam: """Configuration parameters for query execution in LightRAG.""" - mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global" + mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "mix" """Specifies the retrieval mode: - "local": Focuses on context-dependent information. - "global": Utilizes global knowledge. @@ -57,29 +66,28 @@ class QueryParam: stream: bool = False """If True, enables streaming output for real-time responses.""" - top_k: int = int(os.getenv("TOP_K", "60")) + top_k: int = int(os.getenv("TOP_K", str(DEFAULT_TOP_K))) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" - chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5")) - """Number of text chunks to retrieve initially from vector search. + chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", str(DEFAULT_CHUNK_TOP_K))) + """Number of text chunks to retrieve initially from vector search and keep after reranking. If None, defaults to top_k value. """ - chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5")) - """Number of text chunks to keep after reranking. - If None, keeps all chunks returned from initial retrieval. - """ - - max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000")) - """Maximum number of tokens allowed for each retrieved text chunk.""" + max_entity_tokens: int = int( + os.getenv("MAX_ENTITY_TOKENS", str(DEFAULT_MAX_ENTITY_TOKENS)) + ) + """Maximum number of tokens allocated for entity context in unified token control system.""" - max_token_for_global_context: int = int( - os.getenv("MAX_TOKEN_RELATION_DESC", "4000") + max_relation_tokens: int = int( + os.getenv("MAX_RELATION_TOKENS", str(DEFAULT_MAX_RELATION_TOKENS)) ) - """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + """Maximum number of tokens allocated for relationship context in unified token control system.""" - max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) - """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + max_total_tokens: int = int( + os.getenv("MAX_TOTAL_TOKENS", str(DEFAULT_MAX_TOTAL_TOKENS)) + ) + """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).""" hl_keywords: list[str] = field(default_factory=list) """List of high-level keywords to prioritize in retrieval.""" @@ -92,7 +100,7 @@ class QueryParam: Format: [{"role": "user/assistant", "content": "message"}]. """ - history_turns: int = 3 + history_turns: int = int(os.getenv("HISTORY_TURNS", str(DEFAULT_HISTORY_TURNS))) """Number of complete conversation turns (user-assistant pairs) to consider in the response context.""" ids: list[str] | None = None @@ -109,6 +117,13 @@ class QueryParam: If proivded, this will be use instead of the default vaulue from prompt template. """ + enable_rerank: bool = ( + os.getenv("ENABLE_RERANK", str(DEFAULT_ENABLE_RERANK).lower()).lower() == "true" + ) + """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. + Default is True to enable reranking when rerank model is available. + """ + @dataclass class StorageNameSpace(ABC): diff --git a/lightrag/constants.py b/lightrag/constants.py index c3fd653188ccc201d475f2b24fe3a6d7a10e05d4..628583649af447c7114e669404af0c4ddc55d302 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -12,6 +12,17 @@ DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4 DEFAULT_WOKERS = 2 DEFAULT_TIMEOUT = 150 +# Query and retrieval configuration defaults +DEFAULT_TOP_K = 40 +DEFAULT_CHUNK_TOP_K = 10 +DEFAULT_MAX_ENTITY_TOKENS = 10000 +DEFAULT_MAX_RELATION_TOKENS = 10000 +DEFAULT_MAX_TOTAL_TOKENS = 32000 +DEFAULT_HISTORY_TURNS = 3 +DEFAULT_ENABLE_RERANK = True +DEFAULT_COSINE_THRESHOLD = 0.2 +DEFAULT_RELATED_CHUNK_NUMBER = 10 + # Separator for graph fields GRAPH_FIELD_SEP = "" diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index c6ee099d3a49b6310530a91aca4be253f9608c55..448ca5ef087ea20b169cd65313a9aad987c120a5 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -185,10 +185,6 @@ class FaissVectorDBStorage(BaseVectorStorage): embedding = np.array(embedding, dtype=np.float32) faiss.normalize_L2(embedding) # we do in-place normalization - logger.info( - f"Query: {query}, top_k: {top_k}, threshold: {self.cosine_better_than_threshold}" - ) - # Perform the similarity search index = await self._get_index() distances, indices = index.search(embedding, top_k) diff --git a/lightrag/kg/shared_storage.py b/lightrag/kg/shared_storage.py index 56fce4e5be8e844e340cb1bf9327ba7a7bc408a3..228bf272a951d34995ad23c0bdbf4a17e6056bbb 100644 --- a/lightrag/kg/shared_storage.py +++ b/lightrag/kg/shared_storage.py @@ -20,6 +20,9 @@ def direct_log(message, enable_output: bool = False, level: str = "DEBUG"): level: Log level (default: "DEBUG") enable_output: Whether to actually output the log (default: True) """ + if not enable_output: + return + # Get the current logger level from the lightrag logger try: from lightrag.utils import logger @@ -40,7 +43,7 @@ def direct_log(message, enable_output: bool = False, level: str = "DEBUG"): message_level = level_mapping.get(level.upper(), logging.DEBUG) # print(f"Diret_log: {level.upper()} {message_level} ? {current_level}", file=sys.stderr, flush=True) - if enable_output or (message_level >= current_level): + if message_level >= current_level: print(f"{level}: {message}", file=sys.stderr, flush=True) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 6ee61e2dbd2c3ee3f8df567be2c4425b9a284838..092e06ebceab5b11faaae4ccf4ebd543c99064b9 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -24,6 +24,13 @@ from typing import ( from lightrag.constants import ( DEFAULT_MAX_GLEANING, DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, + DEFAULT_TOP_K, + DEFAULT_CHUNK_TOP_K, + DEFAULT_MAX_ENTITY_TOKENS, + DEFAULT_MAX_RELATION_TOKENS, + DEFAULT_MAX_TOTAL_TOKENS, + DEFAULT_COSINE_THRESHOLD, + DEFAULT_RELATED_CHUNK_NUMBER, ) from lightrag.utils import get_env_value @@ -125,6 +132,42 @@ class LightRAG: log_level: int | None = field(default=None) log_file_path: str | None = field(default=None) + # Query parameters + # --- + + top_k: int = field(default=get_env_value("TOP_K", DEFAULT_TOP_K, int)) + """Number of entities/relations to retrieve for each query.""" + + chunk_top_k: int = field( + default=get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int) + ) + """Maximum number of chunks in context.""" + + max_entity_tokens: int = field( + default=get_env_value("MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int) + ) + """Maximum number of tokens for entity in context.""" + + max_relation_tokens: int = field( + default=get_env_value("MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int) + ) + """Maximum number of tokens for relation in context.""" + + max_total_tokens: int = field( + default=get_env_value("MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int) + ) + """Maximum total tokens in context (including system prompt, entities, relations and chunks).""" + + cosine_threshold: int = field( + default=get_env_value("COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, int) + ) + """Cosine threshold of vector DB retrieval for entities, relations and chunks.""" + + related_chunk_number: int = field( + default=get_env_value("RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int) + ) + """Number of related chunks to grab from single entity or relation.""" + # Entity extraction # --- @@ -238,11 +281,6 @@ class LightRAG: # Rerank Configuration # --- - enable_rerank: bool = field( - default=bool(os.getenv("ENABLE_RERANK", "False").lower() == "true") - ) - """Enable reranking for improved retrieval quality. Defaults to False.""" - rerank_model_func: Callable[..., object] | None = field(default=None) """Function for reranking retrieved documents. All rerank configurations (model name, API keys, top_k, etc.) should be included in this function. Optional.""" @@ -454,9 +492,9 @@ class LightRAG: ) # Init Rerank - if self.enable_rerank and self.rerank_model_func: + if self.rerank_model_func: logger.info("Rerank model initialized for improved retrieval quality") - elif self.enable_rerank and not self.rerank_model_func: + else: logger.warning( "Rerank is enabled but no rerank_model_func provided. Reranking will be skipped." ) diff --git a/lightrag/operate.py b/lightrag/operate.py index 4bf579d1e25104a77f09c38a66dcc6084d1e871d..e3456d9247f1b67da238ee928ab45763e0479d48 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -36,7 +36,13 @@ from .base import ( QueryParam, ) from .prompt import PROMPTS -from .constants import GRAPH_FIELD_SEP +from .constants import ( + GRAPH_FIELD_SEP, + DEFAULT_MAX_ENTITY_TOKENS, + DEFAULT_MAX_RELATION_TOKENS, + DEFAULT_MAX_TOTAL_TOKENS, + DEFAULT_RELATED_CHUNK_NUMBER, +) from .kg.shared_storage import get_storage_keyed_lock import time from dotenv import load_dotenv @@ -1643,7 +1649,9 @@ async def kg_query( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) response = await use_model_func( query, @@ -1766,7 +1774,9 @@ async def extract_keywords_only( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(kw_prompt)) - logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})" + ) # 5. Call the LLM for keyword extraction if param.model_func: @@ -1883,51 +1893,63 @@ async def _build_query_context( entities_context = [] relations_context = [] + # Store original data for later text chunk retrieval + original_node_datas = [] + original_edge_datas = [] + # Handle local and global modes if query_param.mode == "local": - entities_context, relations_context, entity_chunks = await _get_node_data( + ( + entities_context, + relations_context, + node_datas, + use_relations, + ) = await _get_node_data( ll_keywords, knowledge_graph_inst, entities_vdb, - text_chunks_db, query_param, ) - all_chunks.extend(entity_chunks) + original_node_datas = node_datas + original_edge_datas = use_relations elif query_param.mode == "global": - entities_context, relations_context, relationship_chunks = await _get_edge_data( + ( + entities_context, + relations_context, + edge_datas, + use_entities, + ) = await _get_edge_data( hl_keywords, knowledge_graph_inst, relationships_vdb, - text_chunks_db, query_param, ) - all_chunks.extend(relationship_chunks) + original_edge_datas = edge_datas + original_node_datas = use_entities else: # hybrid or mix mode ll_data = await _get_node_data( ll_keywords, knowledge_graph_inst, entities_vdb, - text_chunks_db, query_param, ) hl_data = await _get_edge_data( hl_keywords, knowledge_graph_inst, relationships_vdb, - text_chunks_db, query_param, ) - (ll_entities_context, ll_relations_context, ll_chunks) = ll_data - (hl_entities_context, hl_relations_context, hl_chunks) = hl_data - - # Collect chunks from entity and relationship sources - all_chunks.extend(ll_chunks) - all_chunks.extend(hl_chunks) + (ll_entities_context, ll_relations_context, ll_node_datas, ll_edge_datas) = ( + ll_data + ) + (hl_entities_context, hl_relations_context, hl_edge_datas, hl_node_datas) = ( + hl_data + ) - # Get vector chunks if in mix mode + # Get vector chunks first if in mix mode if query_param.mode == "mix" and chunks_vdb: vector_chunks = await _get_vector_context( query, @@ -1936,34 +1958,260 @@ async def _build_query_context( ) all_chunks.extend(vector_chunks) + # Store original data from both sources + original_node_datas = ll_node_datas + hl_node_datas + original_edge_datas = ll_edge_datas + hl_edge_datas + # Combine entities and relations contexts entities_context = process_combine_contexts( - hl_entities_context, ll_entities_context + ll_entities_context, hl_entities_context ) relations_context = process_combine_contexts( hl_relations_context, ll_relations_context ) - # Process all chunks uniformly: deduplication, reranking, and token truncation - processed_chunks = await process_chunks_unified( - query=query, - chunks=all_chunks, - query_param=query_param, - global_config=text_chunks_db.global_config, - source_type="mixed", + logger.info( + f"Initial context: {len(entities_context)} entities, {len(relations_context)} relations, {len(all_chunks)} chunks" ) - # Build final text_units_context from processed chunks + # Unified token control system - Apply precise token limits to entities and relations + tokenizer = text_chunks_db.global_config.get("tokenizer") + if tokenizer: + # Get new token limits from query_param (with fallback to global_config) + max_entity_tokens = getattr( + query_param, + "max_entity_tokens", + text_chunks_db.global_config.get( + "max_entity_tokens", DEFAULT_MAX_ENTITY_TOKENS + ), + ) + max_relation_tokens = getattr( + query_param, + "max_relation_tokens", + text_chunks_db.global_config.get( + "max_relation_tokens", DEFAULT_MAX_RELATION_TOKENS + ), + ) + max_total_tokens = getattr( + query_param, + "max_total_tokens", + text_chunks_db.global_config.get( + "max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS + ), + ) + + # Truncate entities based on complete JSON serialization + if entities_context: + original_entity_count = len(entities_context) + + # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields + for entity in entities_context: + if "file_path" in entity and entity["file_path"]: + entity["file_path"] = entity["file_path"].replace( + GRAPH_FIELD_SEP, ";" + ) + + entities_context = truncate_list_by_token_size( + entities_context, + key=lambda x: json.dumps(x, ensure_ascii=False), + max_token_size=max_entity_tokens, + tokenizer=tokenizer, + ) + if len(entities_context) < original_entity_count: + logger.debug( + f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})" + ) + + # Truncate relations based on complete JSON serialization + if relations_context: + original_relation_count = len(relations_context) + + # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields + for relation in relations_context: + if "file_path" in relation and relation["file_path"]: + relation["file_path"] = relation["file_path"].replace( + GRAPH_FIELD_SEP, ";" + ) + + relations_context = truncate_list_by_token_size( + relations_context, + key=lambda x: json.dumps(x, ensure_ascii=False), + max_token_size=max_relation_tokens, + tokenizer=tokenizer, + ) + if len(relations_context) < original_relation_count: + logger.debug( + f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})" + ) + + # After truncation, get text chunks based on final entities and relations + logger.info("Getting text chunks based on truncated entities and relations...") + + # Create filtered data based on truncated context + final_node_datas = [] + if entities_context and original_node_datas: + final_entity_names = {e["entity"] for e in entities_context} + seen_nodes = set() + for node in original_node_datas: + name = node.get("entity_name") + if name in final_entity_names and name not in seen_nodes: + final_node_datas.append(node) + seen_nodes.add(name) + + final_edge_datas = [] + if relations_context and original_edge_datas: + final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context} + seen_edges = set() + for edge in original_edge_datas: + src, tgt = edge.get("src_id"), edge.get("tgt_id") + if src is None or tgt is None: + src, tgt = edge.get("src_tgt", (None, None)) + + pair = (src, tgt) + if pair in final_relation_pairs and pair not in seen_edges: + final_edge_datas.append(edge) + seen_edges.add(pair) + + # Get text chunks based on final filtered data + text_chunk_tasks = [] + + if final_node_datas: + text_chunk_tasks.append( + _find_most_related_text_unit_from_entities( + final_node_datas, + query_param, + text_chunks_db, + knowledge_graph_inst, + ) + ) + + if final_edge_datas: + text_chunk_tasks.append( + _find_related_text_unit_from_relationships( + final_edge_datas, + query_param, + text_chunks_db, + ) + ) + + # Execute text chunk retrieval in parallel + if text_chunk_tasks: + text_chunk_results = await asyncio.gather(*text_chunk_tasks) + for chunks in text_chunk_results: + if chunks: + all_chunks.extend(chunks) + + # Apply token processing to chunks if tokenizer is available text_units_context = [] - for i, chunk in enumerate(processed_chunks): - text_units_context.append( - { - "id": i + 1, - "content": chunk["content"], - "file_path": chunk.get("file_path", "unknown_source"), - } + if tokenizer and all_chunks: + # Calculate dynamic token limit for text chunks + entities_str = json.dumps(entities_context, ensure_ascii=False) + relations_str = json.dumps(relations_context, ensure_ascii=False) + + # Calculate base context tokens (entities + relations + template) + kg_context_template = """-----Entities(KG)----- + +```json +{entities_str} +``` + +-----Relationships(KG)----- + +```json +{relations_str} +``` + +-----Document Chunks(DC)----- + +```json +[] +``` + +""" + kg_context = kg_context_template.format( + entities_str=entities_str, relations_str=relations_str + ) + kg_context_tokens = len(tokenizer.encode(kg_context)) + + # Calculate actual system prompt overhead dynamically + # 1. Calculate conversation history tokens + history_context = "" + if query_param.conversation_history: + history_context = get_conversation_turns( + query_param.conversation_history, query_param.history_turns + ) + history_tokens = ( + len(tokenizer.encode(history_context)) if history_context else 0 + ) + + # 2. Calculate system prompt template tokens (excluding context_data) + user_prompt = query_param.user_prompt if query_param.user_prompt else "" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" ) + # Get the system prompt template from PROMPTS + sys_prompt_template = text_chunks_db.global_config.get( + "system_prompt_template", PROMPTS["rag_response"] + ) + + # Create a sample system prompt with placeholders filled (excluding context_data) + sample_sys_prompt = sys_prompt_template.format( + history=history_context, + context_data="", # Empty for overhead calculation + response_type=response_type, + user_prompt=user_prompt, + ) + sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) + + # Total system prompt overhead = template + query tokens + query_tokens = len(tokenizer.encode(query)) + sys_prompt_overhead = sys_prompt_template_tokens + query_tokens + + buffer_tokens = 100 # Safety buffer as requested + + # Calculate available tokens for text chunks + used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens + available_chunk_tokens = max_total_tokens - used_tokens + + logger.debug( + f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + ) + + # Re-process chunks with dynamic token limit + if all_chunks: + # Create a temporary query_param copy with adjusted chunk token limit + temp_chunks = [ + {"content": chunk["content"], "file_path": chunk["file_path"]} + for chunk in all_chunks + ] + + # Apply token truncation to chunks using the dynamic limit + truncated_chunks = await process_chunks_unified( + query=query, + chunks=temp_chunks, + query_param=query_param, + global_config=text_chunks_db.global_config, + source_type="mixed", + chunk_token_limit=available_chunk_tokens, # Pass dynamic limit + ) + + # Rebuild text_units_context with truncated chunks + for i, chunk in enumerate(truncated_chunks): + text_units_context.append( + { + "id": i + 1, + "content": chunk["content"], + "file_path": chunk.get("file_path", "unknown_source"), + } + ) + + logger.debug( + f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})" + ) + logger.info( f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks" ) @@ -2002,7 +2250,6 @@ async def _get_node_data( query: str, knowledge_graph_inst: BaseGraphStorage, entities_vdb: BaseVectorStorage, - text_chunks_db: BaseKVStorage, query_param: QueryParam, ): # get similar entities @@ -2015,7 +2262,7 @@ async def _get_node_data( ) if not len(results): - return "", "", "" + return "", "", [], [] # Extract all entity IDs from your results list node_ids = [r["entity_name"] for r in results] @@ -2042,34 +2289,16 @@ async def _get_node_data( } for k, n, d in zip(results, node_datas, node_degrees) if n is not None - ] # what is this text_chunks_db doing. dont remember it in airvx. check the diagram. - # get entitytext chunk - use_text_units = await _find_most_related_text_unit_from_entities( - node_datas, - query_param, - text_chunks_db, - knowledge_graph_inst, - ) + ] + use_relations = await _find_most_related_edges_from_entities( node_datas, query_param, knowledge_graph_inst, ) - tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer") - len_node_datas = len(node_datas) - node_datas = truncate_list_by_token_size( - node_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_local_context, - tokenizer=tokenizer, - ) - logger.debug( - f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" - ) - logger.info( - f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks" + f"Local query: {len(node_datas)} entites, {len(use_relations)} relations" ) # build prompt @@ -2088,7 +2317,6 @@ async def _get_node_data( "entity": n["entity_name"], "type": n.get("entity_type", "UNKNOWN"), "description": n.get("description", "UNKNOWN"), - "rank": n["rank"], "created_at": created_at, "file_path": file_path, } @@ -2110,15 +2338,12 @@ async def _get_node_data( "entity1": e["src_tgt"][0], "entity2": e["src_tgt"][1], "description": e["description"], - "keywords": e["keywords"], - "weight": e["weight"], - "rank": e["rank"], "created_at": created_at, "file_path": file_path, } ) - return entities_context, relations_context, use_text_units + return entities_context, relations_context, node_datas, use_relations async def _find_most_related_text_unit_from_entities( @@ -2127,8 +2352,14 @@ async def _find_most_related_text_unit_from_entities( text_chunks_db: BaseKVStorage, knowledge_graph_inst: BaseGraphStorage, ): + logger.debug(f"Searching text chunks for {len(node_datas)} entities") + text_units = [ - split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP]) + split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[ + : text_chunks_db.global_config.get( + "related_chunk_number", DEFAULT_RELATED_CHUNK_NUMBER + ) + ] for dp in node_datas if dp["source_id"] is not None ] @@ -2273,20 +2504,9 @@ async def _find_most_related_edges_from_entities( } all_edges_data.append(combined) - tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer") all_edges_data = sorted( all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True ) - all_edges_data = truncate_list_by_token_size( - all_edges_data, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_global_context, - tokenizer=tokenizer, - ) - - logger.debug( - f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})" - ) return all_edges_data @@ -2295,7 +2515,6 @@ async def _get_edge_data( keywords, knowledge_graph_inst: BaseGraphStorage, relationships_vdb: BaseVectorStorage, - text_chunks_db: BaseKVStorage, query_param: QueryParam, ): logger.info( @@ -2307,7 +2526,7 @@ async def _get_edge_data( ) if not len(results): - return "", "", "" + return "", "", [], [] # Prepare edge pairs in two forms: # For the batch edge properties function, use dicts. @@ -2343,31 +2562,18 @@ async def _get_edge_data( } edge_datas.append(combined) - tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer") edge_datas = sorted( edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True ) - edge_datas = truncate_list_by_token_size( + + use_entities = await _find_most_related_entities_from_relationships( edge_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_global_context, - tokenizer=tokenizer, - ) - use_entities, use_text_units = await asyncio.gather( - _find_most_related_entities_from_relationships( - edge_datas, - query_param, - knowledge_graph_inst, - ), - _find_related_text_unit_from_relationships( - edge_datas, - query_param, - text_chunks_db, - knowledge_graph_inst, - ), + query_param, + knowledge_graph_inst, ) + logger.info( - f"Global query: {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks" + f"Global query: {len(use_entities)} entites, {len(edge_datas)} relations" ) relations_context = [] @@ -2386,9 +2592,6 @@ async def _get_edge_data( "entity1": e["src_id"], "entity2": e["tgt_id"], "description": e["description"], - "keywords": e["keywords"], - "weight": e["weight"], - "rank": e["rank"], "created_at": created_at, "file_path": file_path, } @@ -2410,22 +2613,13 @@ async def _get_edge_data( "entity": n["entity_name"], "type": n.get("entity_type", "UNKNOWN"), "description": n.get("description", "UNKNOWN"), - "rank": n["rank"], "created_at": created_at, "file_path": file_path, } ) - text_units_context = [] - for i, t in enumerate(use_text_units): - text_units_context.append( - { - "id": i + 1, - "content": t["content"], - "file_path": t.get("file_path", "unknown"), - } - ) - return entities_context, relations_context, text_units_context + # Return original data for later text chunk retrieval + return entities_context, relations_context, edge_datas, use_entities async def _find_most_related_entities_from_relationships( @@ -2462,18 +2656,6 @@ async def _find_most_related_entities_from_relationships( combined = {**node, "entity_name": entity_name, "rank": degree} node_datas.append(combined) - tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer") - len_node_datas = len(node_datas) - node_datas = truncate_list_by_token_size( - node_datas, - key=lambda x: x["description"] if x["description"] is not None else "", - max_token_size=query_param.max_token_for_local_context, - tokenizer=tokenizer, - ) - logger.debug( - f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" - ) - return node_datas @@ -2481,10 +2663,15 @@ async def _find_related_text_unit_from_relationships( edge_datas: list[dict], query_param: QueryParam, text_chunks_db: BaseKVStorage, - knowledge_graph_inst: BaseGraphStorage, ): + logger.debug(f"Searching text chunks for {len(edge_datas)} relationships") + text_units = [ - split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP]) + split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[ + : text_chunks_db.global_config.get( + "related_chunk_number", DEFAULT_RELATED_CHUNK_NUMBER + ) + ] for dp in edge_datas if dp["source_id"] is not None ] @@ -2565,13 +2752,66 @@ async def naive_query( if chunks is None or len(chunks) == 0: return PROMPTS["fail_response"] - # Process chunks using unified processing + # Calculate dynamic token limit for chunks + # Get token limits from query_param (with fallback to global_config) + max_total_tokens = getattr( + query_param, + "max_total_tokens", + global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS), + ) + + # Calculate conversation history tokens + history_context = "" + if query_param.conversation_history: + history_context = get_conversation_turns( + query_param.conversation_history, query_param.history_turns + ) + history_tokens = len(tokenizer.encode(history_context)) if history_context else 0 + + # Calculate system prompt template tokens (excluding content_data) + user_prompt = query_param.user_prompt if query_param.user_prompt else "" + response_type = ( + query_param.response_type + if query_param.response_type + else "Multiple Paragraphs" + ) + + # Use the provided system prompt or default + sys_prompt_template = ( + system_prompt if system_prompt else PROMPTS["naive_rag_response"] + ) + + # Create a sample system prompt with empty content_data to calculate overhead + sample_sys_prompt = sys_prompt_template.format( + content_data="", # Empty for overhead calculation + response_type=response_type, + history=history_context, + user_prompt=user_prompt, + ) + sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt)) + + # Total system prompt overhead = template + query tokens + query_tokens = len(tokenizer.encode(query)) + sys_prompt_overhead = sys_prompt_template_tokens + query_tokens + + buffer_tokens = 100 # Safety buffer + + # Calculate available tokens for chunks + used_tokens = sys_prompt_overhead + buffer_tokens + available_chunk_tokens = max_total_tokens - used_tokens + + logger.debug( + f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}" + ) + + # Process chunks using unified processing with dynamic token limit processed_chunks = await process_chunks_unified( query=query, chunks=chunks, query_param=query_param, global_config=global_config, source_type="vector", + chunk_token_limit=available_chunk_tokens, # Pass dynamic limit ) logger.info(f"Final context: {len(processed_chunks)} chunks") @@ -2622,7 +2862,9 @@ async def naive_query( return sys_prompt len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) response = await use_model_func( query, @@ -2746,7 +2988,9 @@ async def kg_query_with_keywords( tokenizer: Tokenizer = global_config["tokenizer"] len_of_prompts = len(tokenizer.encode(query + sys_prompt)) - logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + logger.debug( + f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})" + ) # 6. Generate response response = await use_model_func( @@ -2866,6 +3110,7 @@ async def apply_rerank_if_enabled( query: str, retrieved_docs: list[dict], global_config: dict, + enable_rerank: bool = True, top_k: int = None, ) -> list[dict]: """ @@ -2875,18 +3120,19 @@ async def apply_rerank_if_enabled( query: The search query retrieved_docs: List of retrieved documents global_config: Global configuration containing rerank settings + enable_rerank: Whether to enable reranking from query parameter top_k: Number of top documents to return after reranking Returns: Reranked documents if rerank is enabled, otherwise original documents """ - if not global_config.get("enable_rerank", False) or not retrieved_docs: + if not enable_rerank or not retrieved_docs: return retrieved_docs rerank_func = global_config.get("rerank_model_func") if not rerank_func: - logger.debug( - "Rerank is enabled but no rerank function provided, skipping rerank" + logger.warning( + "Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters." ) return retrieved_docs @@ -2923,6 +3169,7 @@ async def process_chunks_unified( query_param: QueryParam, global_config: dict, source_type: str = "mixed", + chunk_token_limit: int = None, # Add parameter for dynamic token limit ) -> list[dict]: """ Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation. @@ -2933,6 +3180,7 @@ async def process_chunks_unified( query_param: Query parameters containing configuration global_config: Global configuration dictionary source_type: Source type for logging ("vector", "entity", "relationship", "mixed") + chunk_token_limit: Dynamic token limit for chunks (if None, uses default) Returns: Processed and filtered list of text chunks @@ -2954,12 +3202,13 @@ async def process_chunks_unified( ) # 2. Apply reranking if enabled and query is provided - if global_config.get("enable_rerank", False) and query and unique_chunks: - rerank_top_k = query_param.chunk_rerank_top_k or len(unique_chunks) + if query_param.enable_rerank and query and unique_chunks: + rerank_top_k = query_param.chunk_top_k or len(unique_chunks) unique_chunks = await apply_rerank_if_enabled( query=query, retrieved_docs=unique_chunks, global_config=global_config, + enable_rerank=query_param.enable_rerank, top_k=rerank_top_k, ) logger.debug(f"Rerank: {len(unique_chunks)} chunks (source: {source_type})") @@ -2975,16 +3224,25 @@ async def process_chunks_unified( # 4. Token-based final truncation tokenizer = global_config.get("tokenizer") if tokenizer and unique_chunks: + # Set default chunk_token_limit if not provided + if chunk_token_limit is None: + # Get default from query_param or global_config + chunk_token_limit = getattr( + query_param, + "max_total_tokens", + global_config.get("MAX_TOTAL_TOKENS", 32000), + ) + original_count = len(unique_chunks) unique_chunks = truncate_list_by_token_size( unique_chunks, key=lambda x: x.get("content", ""), - max_token_size=query_param.max_token_for_text_unit, + max_token_size=chunk_token_limit, tokenizer=tokenizer, ) logger.debug( f"Token truncation: {len(unique_chunks)} chunks from {original_count} " - f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})" + f"(chunk available tokens: {chunk_token_limit}, source: {source_type})" ) return unique_chunks diff --git a/lightrag/rerank.py b/lightrag/rerank.py index 59719bc9e1a1199c5cf9deb5464905e6ee2c6d84..297fa0539ab476da805446f801c051f8703b39c9 100644 --- a/lightrag/rerank.py +++ b/lightrag/rerank.py @@ -10,55 +10,58 @@ from .utils import logger class RerankModel(BaseModel): """ - Pydantic model class for defining a custom rerank model. + Wrapper for rerank functions that can be used with LightRAG. - This class provides a convenient wrapper for rerank functions, allowing you to - encapsulate all rerank configurations (API keys, model settings, etc.) in one place. + Example usage: + ```python + from lightrag.rerank import RerankModel, jina_rerank + + # Create rerank model + rerank_model = RerankModel( + rerank_func=jina_rerank, + kwargs={ + "model": "BAAI/bge-reranker-v2-m3", + "api_key": "your_api_key_here", + "base_url": "https://api.jina.ai/v1/rerank" + } + ) - Attributes: - rerank_func (Callable[[Any], List[Dict]]): A callable function that reranks documents. - The function should take query and documents as input and return reranked results. - kwargs (Dict[str, Any]): A dictionary that contains the arguments to pass to the callable function. - This should include all necessary configurations such as model name, API key, base_url, etc. + # Use in LightRAG + rag = LightRAG( + rerank_model_func=rerank_model.rerank, + # ... other configurations + ) - Example usage: - Rerank model example with Jina: - ```python - rerank_model = RerankModel( - rerank_func=jina_rerank, - kwargs={ - "model": "BAAI/bge-reranker-v2-m3", - "api_key": "your_api_key_here", - "base_url": "https://api.jina.ai/v1/rerank" - } + # Query with rerank enabled (default) + result = await rag.aquery( + "your query", + param=QueryParam(enable_rerank=True) + ) + ``` + + Or define a custom function directly: + ```python + async def my_rerank_func(query: str, documents: list, top_k: int = None, **kwargs): + return await jina_rerank( + query=query, + documents=documents, + model="BAAI/bge-reranker-v2-m3", + api_key="your_api_key_here", + top_k=top_k or 10, + **kwargs ) - # Use in LightRAG - rag = LightRAG( - enable_rerank=True, - rerank_model_func=rerank_model.rerank, - # ... other configurations - ) - ``` - - Or define a custom function directly: - ```python - async def my_rerank_func(query: str, documents: list, top_k: int = None, **kwargs): - return await jina_rerank( - query=query, - documents=documents, - model="BAAI/bge-reranker-v2-m3", - api_key="your_api_key_here", - top_k=top_k or 10, - **kwargs - ) - - rag = LightRAG( - enable_rerank=True, - rerank_model_func=my_rerank_func, - # ... other configurations - ) - ``` + rag = LightRAG( + rerank_model_func=my_rerank_func, + # ... other configurations + ) + + # Control rerank per query + result = await rag.aquery( + "your query", + param=QueryParam(enable_rerank=True) # Enable rerank for this query + ) + ``` """ rerank_func: Callable[[Any], List[Dict]] diff --git a/lightrag/utils.py b/lightrag/utils.py index 386de3ab952f992173fd473794b70d2d875b56e2..171cf9f6a281cdcc998437cd45365649de33e11b 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -795,7 +795,9 @@ def process_combine_contexts(*context_lists): if not context_list: # Skip empty lists continue for item in context_list: - content_dict = {k: v for k, v in item.items() if k != "id"} + content_dict = { + k: v for k, v in item.items() if k != "id" and k != "created_at" + } content_key = tuple(sorted(content_dict.items())) if content_key not in seen_content: seen_content[content_key] = item diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts index 48298cd1afefe534b2ac84b66c3d42bd0025575e..a050e1c773b0bb6cae87e5b762c0944576cda22e 100644 --- a/lightrag_webui/src/api/lightrag.ts +++ b/lightrag_webui/src/api/lightrag.ts @@ -106,12 +106,14 @@ export type QueryRequest = { stream?: boolean /** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */ top_k?: number - /** Maximum number of tokens allowed for each retrieved text chunk. */ - max_token_for_text_unit?: number - /** Maximum number of tokens allocated for relationship descriptions in global retrieval. */ - max_token_for_global_context?: number - /** Maximum number of tokens allocated for entity descriptions in local retrieval. */ - max_token_for_local_context?: number + /** Maximum number of text chunks to retrieve and keep after reranking. */ + chunk_top_k?: number + /** Maximum number of tokens allocated for entity context in unified token control system. */ + max_entity_tokens?: number + /** Maximum number of tokens allocated for relationship context in unified token control system. */ + max_relation_tokens?: number + /** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */ + max_total_tokens?: number /** * Stores past conversation history to maintain context. * Format: [{"role": "user/assistant", "content": "message"}]. @@ -121,6 +123,8 @@ export type QueryRequest = { history_turns?: number /** User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template. */ user_prompt?: string + /** Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True. */ + enable_rerank?: boolean } export type QueryResponse = { diff --git a/lightrag_webui/src/components/retrieval/QuerySettings.tsx b/lightrag_webui/src/components/retrieval/QuerySettings.tsx index 807884e164fac25d5354f638abd00f054238279d..28e8dc3d7f18a95a95a758279bfecc001068ab95 100644 --- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx +++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx @@ -119,7 +119,6 @@ export default function QuerySettings() {
- {/* Removed sr-only label */}
- {/* Max Tokens */} + {/* Chunk Top K */} <> - <> - - - - - - -

{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}

-
-
-
-
- {/* Removed sr-only label */} - { - const value = e.target.value - handleChange('max_token_for_text_unit', value === '' ? '' : parseInt(value) || 0) - }} - onBlur={(e) => { - const value = e.target.value - if (value === '' || isNaN(parseInt(value))) { - handleChange('max_token_for_text_unit', 10000) - } - }} - min={1} - placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')} - className="h-9" - /> -
- + + + + + + +

{t('retrievePanel.querySettings.chunkTopKTooltip')}

+
+
+
+
+ { + const value = e.target.value + handleChange('chunk_top_k', value === '' ? '' : parseInt(value) || 0) + }} + onBlur={(e) => { + const value = e.target.value + if (value === '' || isNaN(parseInt(value))) { + handleChange('chunk_top_k', 1) + } + }} + min={1} + placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')} + /> +
+ - <> - - - - - - -

{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}

-
-
-
-
- {/* Removed sr-only label */} - { - const value = e.target.value - handleChange('max_token_for_global_context', value === '' ? '' : parseInt(value) || 0) - }} - onBlur={(e) => { - const value = e.target.value - if (value === '' || isNaN(parseInt(value))) { - handleChange('max_token_for_global_context', 4000) - } - }} - min={1} - placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')} - className="h-9" - /> -
- + {/* Max Entity Tokens */} + <> + + + + + + +

{t('retrievePanel.querySettings.maxEntityTokensTooltip')}

+
+
+
+
+ { + const value = e.target.value + handleChange('max_entity_tokens', value === '' ? '' : parseInt(value) || 0) + }} + onBlur={(e) => { + const value = e.target.value + if (value === '' || isNaN(parseInt(value))) { + handleChange('max_entity_tokens', 1000) + } + }} + min={1} + placeholder={t('retrievePanel.querySettings.maxEntityTokensPlaceholder')} + /> +
+ - <> - - - - - - -

{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}

-
-
-
-
- {/* Removed sr-only label */} - { - const value = e.target.value - handleChange('max_token_for_local_context', value === '' ? '' : parseInt(value) || 0) - }} - onBlur={(e) => { - const value = e.target.value - if (value === '' || isNaN(parseInt(value))) { - handleChange('max_token_for_local_context', 4000) - } - }} - min={1} - placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')} - className="h-9" - /> -
- + {/* Max Relation Tokens */} + <> + + + + + + +

{t('retrievePanel.querySettings.maxRelationTokensTooltip')}

+
+
+
+
+ { + const value = e.target.value + handleChange('max_relation_tokens', value === '' ? '' : parseInt(value) || 0) + }} + onBlur={(e) => { + const value = e.target.value + if (value === '' || isNaN(parseInt(value))) { + handleChange('max_relation_tokens', 1000) + } + }} + min={1} + placeholder={t('retrievePanel.querySettings.maxRelationTokensPlaceholder')} + /> +
+ + + {/* Max Total Tokens */} + <> + + + + + + +

{t('retrievePanel.querySettings.maxTotalTokensTooltip')}

+
+
+
+
+ { + const value = e.target.value + handleChange('max_total_tokens', value === '' ? '' : parseInt(value) || 0) + }} + onBlur={(e) => { + const value = e.target.value + if (value === '' || isNaN(parseInt(value))) { + handleChange('max_total_tokens', 1000) + } + }} + min={1} + placeholder={t('retrievePanel.querySettings.maxTotalTokensPlaceholder')} + /> +
{/* History Turns */} @@ -267,7 +294,6 @@ export default function QuerySettings() {
- {/* Removed sr-only label */} +
+ + + + + + +

{t('retrievePanel.querySettings.enableRerankTooltip')}

+
+
+
+ handleChange('enable_rerank', checked)} + /> +
+
@@ -379,6 +426,7 @@ export default function QuerySettings() { />
+
diff --git a/lightrag_webui/src/locales/ar.json b/lightrag_webui/src/locales/ar.json index 0f3b030bfad683832c3b18f5bffea9f867c33e90..939f6869371849fb3052de14bdf7c5c31eeb2aab 100644 --- a/lightrag_webui/src/locales/ar.json +++ b/lightrag_webui/src/locales/ar.json @@ -363,16 +363,22 @@ "singleParagraph": "فقرة واحدة", "bulletPoints": "نقاط نقطية" }, - "topK": "أعلى K نتائج", - "topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'", - "topKPlaceholder": "عدد النتائج", - "maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص", - "maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع", - "maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي", - "maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي", - "maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي", - "maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي", - "historyTurns": "دورات التاريخ", + "topK": "أعلى K", + "topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.", + "topKPlaceholder": "أدخل قيمة أعلى k", + "chunkTopK": "أعلى K للقطع", + "chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.", + "chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع", + "chunkRerankTopK": "أعلى K لإعادة الترتيب", + "chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.", + "chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب", + "maxEntityTokens": "الحد الأقصى لرموز الكيان", + "maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز", + "maxRelationTokens": "الحد الأقصى لرموز العلاقة", + "maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز", + "maxTotalTokens": "إجمالي الحد الأقصى للرموز", + "maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)", + "historyTurns": "أدوار التاريخ", "historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد", "historyTurnsPlaceholder": "عدد دورات التاريخ", "onlyNeedContext": "تحتاج فقط إلى السياق", @@ -383,7 +389,9 @@ "streamResponseTooltip": "إذا كان صحيحًا، يتيح إخراج التدفق للردود في الوقت الفعلي", "userPrompt": "مطالبة مخصصة", "userPromptTooltip": "تقديم متطلبات استجابة إضافية إلى نموذج اللغة الكبير (غير متعلقة بمحتوى الاستعلام، فقط لمعالجة المخرجات).", - "userPromptPlaceholder": "أدخل مطالبة مخصصة (اختياري)" + "userPromptPlaceholder": "أدخل مطالبة مخصصة (اختياري)", + "enableRerank": "تمكين إعادة الترتيب", + "enableRerankTooltip": "تمكين إعادة ترتيب أجزاء النص المسترجعة. إذا كان True ولكن لم يتم تكوين نموذج إعادة الترتيب، فسيتم إصدار تحذير. افتراضي True." } }, "apiSite": { diff --git a/lightrag_webui/src/locales/en.json b/lightrag_webui/src/locales/en.json index e9d5c1ca5dbbec9023e71b66cc4b70c9def8394c..bbc41334c135551a9b454b63a65a2ee1d52d581f 100644 --- a/lightrag_webui/src/locales/en.json +++ b/lightrag_webui/src/locales/en.json @@ -363,15 +363,21 @@ "singleParagraph": "Single Paragraph", "bulletPoints": "Bullet Points" }, - "topK": "Top K Results", - "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode", - "topKPlaceholder": "Number of results", - "maxTokensTextUnit": "Max Tokens for Text Unit", - "maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk", - "maxTokensGlobalContext": "Max Tokens for Global Context", - "maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval", - "maxTokensLocalContext": "Max Tokens for Local Context", - "maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval", + "topK": "Top K", + "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.", + "topKPlaceholder": "Enter top k value", + "chunkTopK": "Chunk Top K", + "chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.", + "chunkTopKPlaceholder": "Enter chunk top k value", + "chunkRerankTopK": "Chunk Rerank Top K", + "chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.", + "chunkRerankTopKPlaceholder": "Enter rerank top k value", + "maxEntityTokens": "Max Entity Tokens", + "maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system", + "maxRelationTokens": "Max Relation Tokens", + "maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system", + "maxTotalTokens": "Max Total Tokens", + "maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)", "historyTurns": "History Turns", "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context", "historyTurnsPlaceholder": "Number of history turns", @@ -383,7 +389,9 @@ "streamResponseTooltip": "If True, enables streaming output for real-time responses", "userPrompt": "User Prompt", "userPromptTooltip": "Provide additional response requirements to the LLM (unrelated to query content, only for output processing).", - "userPromptPlaceholder": "Enter custom prompt (optional)" + "userPromptPlaceholder": "Enter custom prompt (optional)", + "enableRerank": "Enable Rerank", + "enableRerankTooltip": "Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True." } }, "apiSite": { diff --git a/lightrag_webui/src/locales/fr.json b/lightrag_webui/src/locales/fr.json index 46f1216f20c177a57dd973015f20bef49342e43f..8f551e49b36136bd1cfc449cad974f35aa2c02d8 100644 --- a/lightrag_webui/src/locales/fr.json +++ b/lightrag_webui/src/locales/fr.json @@ -363,15 +363,21 @@ "singleParagraph": "Paragraphe unique", "bulletPoints": "Points à puces" }, - "topK": "Top K résultats", - "topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'", - "topKPlaceholder": "Nombre de résultats", - "maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte", - "maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré", - "maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global", - "maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale", - "maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local", - "maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale", + "topK": "Top K", + "topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.", + "topKPlaceholder": "Entrez la valeur top k", + "chunkTopK": "Top K des Chunks", + "chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.", + "chunkTopKPlaceholder": "Entrez la valeur top k des chunks", + "chunkRerankTopK": "Top K du Reclassement", + "chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.", + "chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement", + "maxEntityTokens": "Limite de jetons d'entité", + "maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié", + "maxRelationTokens": "Limite de jetons de relation", + "maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié", + "maxTotalTokens": "Limite totale de jetons", + "maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)", "historyTurns": "Tours d'historique", "historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse", "historyTurnsPlaceholder": "Nombre de tours d'historique", @@ -383,7 +389,9 @@ "streamResponseTooltip": "Si vrai, active la sortie en flux pour des réponses en temps réel", "userPrompt": "Invite personnalisée", "userPromptTooltip": "Fournir des exigences de réponse supplémentaires au LLM (sans rapport avec le contenu de la requête, uniquement pour le traitement de sortie).", - "userPromptPlaceholder": "Entrez une invite personnalisée (facultatif)" + "userPromptPlaceholder": "Entrez une invite personnalisée (facultatif)", + "enableRerank": "Activer le Reclassement", + "enableRerankTooltip": "Active le reclassement pour les fragments de texte récupérés. Si True mais qu'aucun modèle de reclassement n'est configuré, un avertissement sera émis. True par défaut." } }, "apiSite": { diff --git a/lightrag_webui/src/locales/zh.json b/lightrag_webui/src/locales/zh.json index dcf9c5ebd7f8bd8a3213bff65df291dd6ca86b50..c9bad472cfd34b3072bc13b5672da39c2cbe4f8d 100644 --- a/lightrag_webui/src/locales/zh.json +++ b/lightrag_webui/src/locales/zh.json @@ -363,15 +363,21 @@ "singleParagraph": "单段落", "bulletPoints": "要点" }, - "topK": "Top K结果", - "topKTooltip": "检索的顶部项目数。在'local'模式下表示实体,在'global'模式下表示关系", - "topKPlaceholder": "结果数量", - "maxTokensTextUnit": "文本单元最大令牌数", - "maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数", - "maxTokensGlobalContext": "全局上下文最大令牌数", - "maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数", - "maxTokensLocalContext": "本地上下文最大令牌数", - "maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数", + "topK": "Top K", + "topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体,在'global'模式下表示关系。", + "topKPlaceholder": "输入top k值", + "chunkTopK": "文本块 Top K", + "chunkTopKTooltip": "检索和处理的最大文本块数量。", + "chunkTopKPlaceholder": "输入文本块top k值", + "chunkRerankTopK": "重排序 Top K", + "chunkRerankTopKTooltip": "重排序后保留的文本块数量。", + "chunkRerankTopKPlaceholder": "输入重排序top k值", + "maxEntityTokens": "实体令牌数上限", + "maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数", + "maxRelationTokens": "关系令牌数上限", + "maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数", + "maxTotalTokens": "总令牌数上限", + "maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算(实体+关系+文档块+系统提示)", "historyTurns": "历史轮次", "historyTurnsTooltip": "响应上下文中考虑的完整对话轮次(用户-助手对)数量", "historyTurnsPlaceholder": "历史轮次数", @@ -383,7 +389,9 @@ "streamResponseTooltip": "如果为True,启用实时流式输出响应", "userPrompt": "用户提示词", "userPromptTooltip": "向LLM提供额外的响应要求(与查询内容无关,仅用于处理输出)。", - "userPromptPlaceholder": "输入自定义提示词(可选)" + "userPromptPlaceholder": "输入自定义提示词(可选)", + "enableRerank": "启用重排", + "enableRerankTooltip": "为检索到的文本块启用重排。如果为True但未配置重排模型,将发出警告。默认为True。" } }, "apiSite": { diff --git a/lightrag_webui/src/locales/zh_TW.json b/lightrag_webui/src/locales/zh_TW.json index e8f04c1435967600358c736d0a299369665e32ed..0d179408c157c82ada33bac3c7d02e2912e23bff 100644 --- a/lightrag_webui/src/locales/zh_TW.json +++ b/lightrag_webui/src/locales/zh_TW.json @@ -304,7 +304,7 @@ "file_path": "來源", "keywords": "Keys", "weight": "權重" - } + } }, "edge": { "title": "關係", @@ -363,15 +363,15 @@ "singleParagraph": "單段落", "bulletPoints": "重點" }, - "topK": "Top K結果", - "topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體,在'global'模式下表示關係", - "topKPlaceholder": "結果數量", - "maxTokensTextUnit": "文字單元最大權杖數", - "maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數", - "maxTokensGlobalContext": "全域上下文最大權杖數", - "maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數", - "maxTokensLocalContext": "本地上下文最大權杖數", - "maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數", + "topK": "Top K", + "topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體,在'global'模式下表示關係。", + "topKPlaceholder": "輸入top k值", + "chunkTopK": "文字區塊 Top K", + "chunkTopKTooltip": "檢索和處理的最大文字區塊數量。", + "chunkTopKPlaceholder": "輸入文字區塊top k值", + "chunkRerankTopK": "重新排序 Top K", + "chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。", + "chunkRerankTopKPlaceholder": "輸入重新排序top k值", "historyTurns": "歷史輪次", "historyTurnsTooltip": "回應上下文中考慮的完整對話輪次(使用者-助手對)數量", "historyTurnsPlaceholder": "歷史輪次數", @@ -383,7 +383,15 @@ "streamResponseTooltip": "如果為True,啟用即時串流輸出回應", "userPrompt": "用戶提示詞", "userPromptTooltip": "向LLM提供額外的響應要求(與查詢內容無關,僅用於處理輸出)。", - "userPromptPlaceholder": "輸入自定義提示詞(可選)" + "userPromptPlaceholder": "輸入自定義提示詞(可選)", + "enableRerank": "啟用重排", + "enableRerankTooltip": "為檢索到的文本塊啟用重排。如果為True但未配置重排模型,將發出警告。默認為True。", + "maxEntityTokens": "實體令牌數上限", + "maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數", + "maxRelationTokens": "關係令牌數上限", + "maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數", + "maxTotalTokens": "總令牌數上限", + "maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算(實體+關係+文檔塊+系統提示)" } }, "apiSite": { diff --git a/lightrag_webui/src/stores/settings.ts b/lightrag_webui/src/stores/settings.ts index 5942ddca36b8a7f95039c296b066b9eda74e725c..fb0adde0f99aed45e56bbd33b89bcc4493ea838c 100644 --- a/lightrag_webui/src/stores/settings.ts +++ b/lightrag_webui/src/stores/settings.ts @@ -110,17 +110,17 @@ const useSettingsStoreBase = create()( querySettings: { mode: 'global', response_type: 'Multiple Paragraphs', - top_k: 10, - max_token_for_text_unit: 6000, - max_token_for_global_context: 4000, - max_token_for_local_context: 4000, + top_k: 40, + chunk_top_k: 10, + max_entity_tokens: 10000, + max_relation_tokens: 10000, + max_total_tokens: 32000, only_need_context: false, only_need_prompt: false, stream: true, history_turns: 3, - hl_keywords: [], - ll_keywords: [], - user_prompt: '' + user_prompt: '', + enable_rerank: true }, setTheme: (theme: Theme) => set({ theme }), @@ -192,7 +192,7 @@ const useSettingsStoreBase = create()( { name: 'settings-storage', storage: createJSONStorage(() => localStorage), - version: 14, + version: 15, migrate: (state: any, version: number) => { if (version < 2) { state.showEdgeLabel = false @@ -260,6 +260,20 @@ const useSettingsStoreBase = create()( // Add backendMaxGraphNodes field for older versions state.backendMaxGraphNodes = null } + if (version < 15) { + // Add new querySettings + state.querySettings = { + ...state.querySettings, + mode: 'mix', + response_type: 'Multiple Paragraphs', + top_k: 40, + chunk_top_k: 10, + max_entity_tokens: 10000, + max_relation_tokens: 10000, + max_total_tokens: 32000, + enable_rerank: true + } + } return state } }