diff --git a/README-zh.md b/README-zh.md
index 9f7f314e728a67a70a1b387adfdc8f305513c1b5..5f9614e345d3bca1c369338e3b8e781b6a048b7c 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -293,26 +293,19 @@ class QueryParam:
     top_k: int = int(os.getenv("TOP_K", "60"))
     """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
 
-    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5"))
-    """Number of text chunks to retrieve initially from vector search.
+    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10"))
+    """Number of text chunks to retrieve initially from vector search and keep after reranking.
     If None, defaults to top_k value.
     """
 
-    chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5"))
-    """Number of text chunks to keep after reranking.
-    If None, keeps all chunks returned from initial retrieval.
-    """
-
-    max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
-    """Maximum number of tokens allowed for each retrieved text chunk."""
+    max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
+    """Maximum number of tokens allocated for entity context in unified token control system."""
 
-    max_token_for_global_context: int = int(
-        os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
-    )
-    """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
+    max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
+    """Maximum number of tokens allocated for relationship context in unified token control system."""
 
-    max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
-    """Maximum number of tokens allocated for entity descriptions in local retrieval."""
+    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
+    """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
 
     hl_keywords: list[str] = field(default_factory=list)
     """List of high-level keywords to prioritize in retrieval."""
@@ -341,6 +334,11 @@ class QueryParam:
     """User-provided prompt for the query.
     If proivded, this will be use instead of the default vaulue from prompt template.
     """
+
+    enable_rerank: bool = True
+    """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
+    Default is True to enable reranking when rerank model is available.
+    """
 ```
 
 > top_k的默认值可以通过环境变量TOP_K更改。
diff --git a/README.md b/README.md
index fa2b592458afb1dcf1a59bdf1afda783a78d34ea..0fa6c3d193d30b987377599f9719edddd8ae39fc 100644
--- a/README.md
+++ b/README.md
@@ -300,26 +300,19 @@ class QueryParam:
     top_k: int = int(os.getenv("TOP_K", "60"))
     """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
 
-    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5"))
-    """Number of text chunks to retrieve initially from vector search.
+    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10"))
+    """Number of text chunks to retrieve initially from vector search and keep after reranking.
     If None, defaults to top_k value.
     """
 
-    chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5"))
-    """Number of text chunks to keep after reranking.
-    If None, keeps all chunks returned from initial retrieval.
-    """
-
-    max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
-    """Maximum number of tokens allowed for each retrieved text chunk."""
+    max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
+    """Maximum number of tokens allocated for entity context in unified token control system."""
 
-    max_token_for_global_context: int = int(
-        os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
-    )
-    """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
+    max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
+    """Maximum number of tokens allocated for relationship context in unified token control system."""
 
-    max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
-    """Maximum number of tokens allocated for entity descriptions in local retrieval."""
+    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
+    """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
 
     conversation_history: list[dict[str, str]] = field(default_factory=list)
     """Stores past conversation history to maintain context.
@@ -342,6 +335,11 @@ class QueryParam:
     """User-provided prompt for the query.
     If proivded, this will be use instead of the default vaulue from prompt template.
     """
+
+    enable_rerank: bool = True
+    """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
+    Default is True to enable reranking when rerank model is available.
+    """
 ```
 
 > default value of Top_k can be change by environment  variables  TOP_K.
diff --git a/docs/rerank_integration.md b/docs/rerank_integration.md
index fdaebfa5148e7a8040c270c8ba8dc4e04b67e38c..4e4d433f657bf3240582644297df03e11dfed26c 100644
--- a/docs/rerank_integration.md
+++ b/docs/rerank_integration.md
@@ -1,36 +1,24 @@
-# Rerank Integration in LightRAG
+# Rerank Integration Guide
 
-This document explains how to configure and use the rerank functionality in LightRAG to improve retrieval quality.
+LightRAG supports reranking functionality to improve retrieval quality by re-ordering documents based on their relevance to the query. Reranking is now controlled per query via the `enable_rerank` parameter (default: True).
 
-## Overview
-
-Reranking is an optional feature that improves the quality of retrieved documents by re-ordering them based on their relevance to the query. This is particularly useful when you want higher precision in document retrieval across all query modes (naive, local, global, hybrid, mix).
-
-## Architecture
-
-The rerank integration follows a simplified design pattern:
-
-- **Single Function Configuration**: All rerank settings (model, API keys, top_k, etc.) are contained within the rerank function
-- **Async Processing**: Non-blocking rerank operations
-- **Error Handling**: Graceful fallback to original results
-- **Optional Feature**: Can be enabled/disabled via configuration
-- **Code Reuse**: Single generic implementation for Jina/Cohere compatible APIs
-
-## Configuration
+## Quick Start
 
 ### Environment Variables
 
-Set this variable in your `.env` file or environment:
+Set these variables in your `.env` file or environment for rerank model configuration:
 
 ```bash
-# Enable/disable reranking
-ENABLE_RERANK=True
+# Rerank model configuration (required when enable_rerank=True in queries)
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=https://api.your-provider.com/v1/rerank
+RERANK_BINDING_API_KEY=your_api_key_here
 ```
 
 ### Programmatic Configuration
 
 ```python
-from lightrag import LightRAG
+from lightrag import LightRAG, QueryParam
 from lightrag.rerank import custom_rerank, RerankModel
 
 # Method 1: Using a custom rerank function with all settings included
@@ -49,8 +37,19 @@ rag = LightRAG(
     working_dir="./rag_storage",
     llm_model_func=your_llm_func,
     embedding_func=your_embedding_func,
-    enable_rerank=True,
-    rerank_model_func=my_rerank_func,
+    rerank_model_func=my_rerank_func,  # Configure rerank function
+)
+
+# Query with rerank enabled (default)
+result = await rag.aquery(
+    "your query",
+    param=QueryParam(enable_rerank=True)  # Control rerank per query
+)
+
+# Query with rerank disabled
+result = await rag.aquery(
+    "your query",
+    param=QueryParam(enable_rerank=False)
 )
 
 # Method 2: Using RerankModel wrapper
@@ -67,9 +66,17 @@ rag = LightRAG(
     working_dir="./rag_storage",
     llm_model_func=your_llm_func,
     embedding_func=your_embedding_func,
-    enable_rerank=True,
     rerank_model_func=rerank_model.rerank,
 )
+
+# Control rerank per query
+result = await rag.aquery(
+    "your query",
+    param=QueryParam(
+        enable_rerank=True,  # Enable rerank for this query
+        chunk_top_k=5       # Number of chunks to keep after reranking
+    )
+)
 ```
 
 ## Supported Providers
@@ -164,7 +171,6 @@ async def main():
         working_dir="./rag_storage",
         llm_model_func=gpt_4o_mini_complete,
         embedding_func=openai_embedding,
-        enable_rerank=True,
         rerank_model_func=my_rerank_func,
     )
 
@@ -180,7 +186,7 @@ async def main():
     # Query with rerank (automatically applied)
     result = await rag.aquery(
         "Your question here",
-        param=QueryParam(mode="hybrid", top_k=5)  # This top_k is passed to rerank function
+        param=QueryParam(enable_rerank=True)  # This top_k is passed to rerank function
     )
 
     print(result)
diff --git a/env.example b/env.example
index 828c6d247b37d9b683d69a6bb09e2409b96d7258..71bc578845dc88d7ef127f8affefd85717cee484 100644
--- a/env.example
+++ b/env.example
@@ -1,6 +1,8 @@
 ### This is sample file of .env
 
+###########################
 ### Server Configuration
+###########################
 HOST=0.0.0.0
 PORT=9621
 WEBUI_TITLE='My Graph KB'
@@ -9,29 +11,17 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 # WORKERS=2
 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
 
-### Login Configuration
-# AUTH_ACCOUNTS='admin:admin123,user1:pass456'
-# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
-# TOKEN_EXPIRE_HOURS=48
-# GUEST_TOKEN_EXPIRE_HOURS=24
-# JWT_ALGORITHM=HS256
-
-### API-Key to access LightRAG Server API
-# LIGHTRAG_API_KEY=your-secure-api-key-here
-# WHITELIST_PATHS=/health,/api/*
-
 ### Optional SSL Configuration
 # SSL=true
 # SSL_CERTFILE=/path/to/cert.pem
 # SSL_KEYFILE=/path/to/key.pem
 
 ### Directory Configuration (defaults to current working directory)
-### Should not be set if deploy by docker (Set by Dockerfile instead of .env)
 ### Default value is ./inputs and ./rag_storage
 # INPUT_DIR=<absolute_path_for_doc_input_dir>
 # WORKING_DIR=<absolute_path_for_working_dir>
 
-### Max nodes return from grap retrieval
+### Max nodes return from grap retrieval in webui
 # MAX_GRAPH_NODES=1000
 
 ### Logging level
@@ -42,65 +32,97 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 ### Logfile location (defaults to current working directory)
 # LOG_DIR=/path/to/log/directory
 
-### RAG Configuration
-### Chunk size for document splitting, 500~1500 is recommended
-# CHUNK_SIZE=1200
-# CHUNK_OVERLAP_SIZE=100
+#####################################
+### Login and API-Key Configuration
+#####################################
+# AUTH_ACCOUNTS='admin:admin123,user1:pass456'
+# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
+# TOKEN_EXPIRE_HOURS=48
+# GUEST_TOKEN_EXPIRE_HOURS=24
+# JWT_ALGORITHM=HS256
 
-### RAG Query Configuration
+### API-Key to access LightRAG Server API
+# LIGHTRAG_API_KEY=your-secure-api-key-here
+# WHITELIST_PATHS=/health,/api/*
+
+########################
+### Query Configuration
+########################
+# LLM responde cache for query (Not valid for streaming response
+ENABLE_LLM_CACHE=true
 # HISTORY_TURNS=3
-# MAX_TOKEN_TEXT_CHUNK=6000
-# MAX_TOKEN_RELATION_DESC=4000
-# MAX_TOKEN_ENTITY_DESC=4000
 # COSINE_THRESHOLD=0.2
-### Number of entities or relations to retrieve from KG
-# TOP_K=60
-### Number of text chunks to retrieve initially from vector search
-# CHUNK_TOP_K=5
-
-### Rerank Configuration
-# ENABLE_RERANK=False
-### Number of text chunks to keep after reranking (should be <= CHUNK_TOP_K)
-# CHUNK_RERANK_TOP_K=5
-### Rerank model configuration (required when ENABLE_RERANK=True)
+### Number of entities or relations retrieved from KG
+# TOP_K=40
+### Maxmium number or chunks plan to send to LLM
+# CHUNK_TOP_K=10
+### control the actual enties send to LLM
+# MAX_ENTITY_TOKENS=10000
+### control the actual relations send to LLM
+# MAX_RELATION_TOKENS=10000
+### control the maximum tokens send to LLM (include entities, raltions and chunks)
+# MAX_TOTAL_TOKENS=32000
+### maxumium related chunks grab from single entity or relations
+# RELATED_CHUNK_NUMBER=10
+
+### Reranker configuration (Set ENABLE_RERANK to true in reranking model is configed)
+ENABLE_RERANK=False
 # RERANK_MODEL=BAAI/bge-reranker-v2-m3
 # RERANK_BINDING_HOST=https://api.your-rerank-provider.com/v1/rerank
 # RERANK_BINDING_API_KEY=your_rerank_api_key_here
 
-### Entity and relation summarization configuration
+########################################
+### Document processing configuration
+########################################
 ### Language: English, Chinese, French, German ...
 SUMMARY_LANGUAGE=English
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
+MAX_TOKENS=32000
+### Chunk size for document splitting, 500~1500 is recommended
+# CHUNK_SIZE=1200
+# CHUNK_OVERLAP_SIZE=100
+### Entity and relation summarization configuration
 ### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
-# FORCE_LLM_SUMMARY_ON_MERGE=6
+# FORCE_LLM_SUMMARY_ON_MERGE=4
 ### Maximum number of entity extraction attempts for ambiguous content
 # MAX_GLEANING=1
 
-### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended)
-# MAX_PARALLEL_INSERT=2
+###############################
+### Concurrency Configuration
+###############################
+### Max concurrency requests of LLM (for both query and document processing)
+MAX_ASYNC=4
+### Number of parallel processing documents(between 2~10, MAX_ASYNC/4 is recommended)
+MAX_PARALLEL_INSERT=2
+### Max concurrency requests for Embedding
+# EMBEDDING_FUNC_MAX_ASYNC=8
+### Num of chunks send to Embedding in single request
+# EMBEDDING_BATCH_NUM=10
 
+#######################
 ### LLM Configuration
-ENABLE_LLM_CACHE=true
-ENABLE_LLM_CACHE_FOR_EXTRACT=true
+#######################
 ### Time out in seconds for LLM, None for infinite timeout
 TIMEOUT=240
 ### Some models like o1-mini require temperature to be set to 1
 TEMPERATURE=0
-### Max concurrency requests of LLM
-MAX_ASYNC=4
-### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
-MAX_TOKENS=32000
 ### LLM Binding type: openai, ollama, lollms, azure_openai
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
+
+### Set as num_ctx option for Ollama LLM
+# OLLAMA_NUM_CTX=32768
+
 ### Optional for Azure
 # AZURE_OPENAI_API_VERSION=2024-08-01-preview
 # AZURE_OPENAI_DEPLOYMENT=gpt-4o
-### set as num_ctx option for Ollama LLM
-# OLLAMA_NUM_CTX=32768
 
-### Embedding Configuration
+####################################################################################
+### Embedding Configuration (Should not be changed after the first file processed)
+####################################################################################
 ### Embedding Binding type: openai, ollama, lollms, azure_openai
 EMBEDDING_BINDING=ollama
 EMBEDDING_MODEL=bge-m3:latest
@@ -108,51 +130,53 @@ EMBEDDING_DIM=1024
 EMBEDDING_BINDING_API_KEY=your_api_key
 # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
 EMBEDDING_BINDING_HOST=http://localhost:11434
-### Num of chunks send to Embedding in single request
-# EMBEDDING_BATCH_NUM=10
-### Max concurrency requests for Embedding
-# EMBEDDING_FUNC_MAX_ASYNC=8
 ### Maximum tokens sent to Embedding for each chunk (no longer in use?)
 # MAX_EMBED_TOKENS=8192
+
 ### Optional for Azure
 # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
 # AZURE_EMBEDDING_API_VERSION=2023-05-15
 # AZURE_EMBEDDING_ENDPOINT=your_endpoint
 # AZURE_EMBEDDING_API_KEY=your_api_key
 
-###########################
+############################
 ### Data storage selection
-###########################
-### In-memory database with local file persistence(Recommended for small scale deployment)
+############################
+### Default storage (Recommended for small scale deployment)
 # LIGHTRAG_KV_STORAGE=JsonKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
 # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
 # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+
+### Redis Storage (Recommended for production deployment)
+# LIGHTRAG_KV_STORAGE=RedisKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
+
+### Vector Storage (Recommended for production deployment)
+# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
+# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
 # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
+
+### Graph Storage (Recommended for production deployment)
+# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
+# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
+
 ### PostgreSQL
 # LIGHTRAG_KV_STORAGE=PGKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
 # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+
 ### MongoDB (Vector storage only available on Atlas Cloud)
 # LIGHTRAG_KV_STORAGE=MongoKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
 # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
 # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
-### Redis Storage (Recommended for production deployment)
-# LIGHTRAG_KV_STORAGE=RedisKVStorage
-# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
-### Vector Storage (Recommended for production deployment)
-# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
-# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
-### Graph Storage (Recommended for production deployment)
-# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
-# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
 
 ####################################################################
-### Default workspace for all storage types
-### For the purpose of isolation of data for each LightRAG instance
-### Valid characters: a-z, A-Z, 0-9, and _
+### WORKSPACE setting workspace name for all storage types
+### in the purpose of isolating data from LightRAG instances.
+### Valid workspace name constraints: a-z, A-Z, 0-9, and _
 ####################################################################
 # WORKSPACE=space1
 
diff --git a/examples/rerank_example.py b/examples/rerank_example.py
index e0e361a5ea0a0308a9ebabaac0bbe27329d9c23d..42b4dd3809d2ac92896522a42e596891c072cd3f 100644
--- a/examples/rerank_example.py
+++ b/examples/rerank_example.py
@@ -9,7 +9,11 @@ Configuration Required:
 2. Set your embedding API key and base URL in embedding_func()
 3. Set your rerank API key and base URL in the rerank configuration
 4. Or use environment variables (.env file):
-   - ENABLE_RERANK=True
+   - RERANK_MODEL=your_rerank_model
+   - RERANK_BINDING_HOST=your_rerank_endpoint
+   - RERANK_BINDING_API_KEY=your_rerank_api_key
+
+Note: Rerank is now controlled per query via the 'enable_rerank' parameter (default: True)
 """
 
 import asyncio
@@ -83,8 +87,7 @@ async def create_rag_with_rerank():
             max_token_size=8192,
             func=embedding_func,
         ),
-        # Simplified Rerank Configuration
-        enable_rerank=True,
+        # Rerank Configuration - provide the rerank function
         rerank_model_func=my_rerank_func,
     )
 
@@ -120,7 +123,6 @@ async def create_rag_with_rerank_model():
             max_token_size=8192,
             func=embedding_func,
         ),
-        enable_rerank=True,
         rerank_model_func=rerank_model.rerank,
     )
 
@@ -130,9 +132,9 @@ async def create_rag_with_rerank_model():
     return rag
 
 
-async def test_rerank_with_different_topk():
+async def test_rerank_with_different_settings():
     """
-    Test rerank functionality with different top_k settings
+    Test rerank functionality with different enable_rerank settings
     """
     print("🚀 Setting up LightRAG with Rerank functionality...")
 
@@ -154,16 +156,41 @@ async def test_rerank_with_different_topk():
     print(f"\n🔍 Testing query: '{query}'")
     print("=" * 80)
 
-    # Test different top_k values to show parameter priority
-    top_k_values = [2, 5, 10]
-
-    for top_k in top_k_values:
-        print(f"\n📊 Testing with QueryParam(top_k={top_k}):")
+    # Test with rerank enabled (default)
+    print("\n📊 Testing with enable_rerank=True (default):")
+    result_with_rerank = await rag.aquery(
+        query,
+        param=QueryParam(
+            mode="naive",
+            top_k=10,
+            chunk_top_k=5,
+            enable_rerank=True,  # Explicitly enable rerank
+        ),
+    )
+    print(f"   Result length: {len(result_with_rerank)} characters")
+    print(f"   Preview: {result_with_rerank[:100]}...")
+
+    # Test with rerank disabled
+    print("\n📊 Testing with enable_rerank=False:")
+    result_without_rerank = await rag.aquery(
+        query,
+        param=QueryParam(
+            mode="naive",
+            top_k=10,
+            chunk_top_k=5,
+            enable_rerank=False,  # Disable rerank
+        ),
+    )
+    print(f"   Result length: {len(result_without_rerank)} characters")
+    print(f"   Preview: {result_without_rerank[:100]}...")
 
-        # Test naive mode with specific top_k
-        result = await rag.aquery(query, param=QueryParam(mode="naive", top_k=top_k))
-        print(f"   Result length: {len(result)} characters")
-        print(f"   Preview: {result[:100]}...")
+    # Test with default settings (enable_rerank defaults to True)
+    print("\n📊 Testing with default settings (enable_rerank defaults to True):")
+    result_default = await rag.aquery(
+        query, param=QueryParam(mode="naive", top_k=10, chunk_top_k=5)
+    )
+    print(f"   Result length: {len(result_default)} characters")
+    print(f"   Preview: {result_default[:100]}...")
 
 
 async def test_direct_rerank():
@@ -209,17 +236,21 @@ async def main():
     print("=" * 60)
 
     try:
-        # Test rerank with different top_k values
-        await test_rerank_with_different_topk()
+        # Test rerank with different enable_rerank settings
+        await test_rerank_with_different_settings()
 
         # Test direct rerank
         await test_direct_rerank()
 
         print("\n✅ Example completed successfully!")
         print("\n💡 Key Points:")
-        print("   ✓ All rerank configurations are contained within rerank_model_func")
-        print("   ✓ Rerank improves document relevance ordering")
-        print("   ✓ Configure API keys within your rerank function")
+        print("   ✓ Rerank is now controlled per query via 'enable_rerank' parameter")
+        print("   ✓ Default value for enable_rerank is True")
+        print("   ✓ Rerank function is configured at LightRAG initialization")
+        print("   ✓ Per-query enable_rerank setting overrides default behavior")
+        print(
+            "   ✓ If enable_rerank=True but no rerank model is configured, a warning is issued"
+        )
         print("   ✓ Monitor API usage and costs when using rerank services")
 
     except Exception as e:
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index e8a9cea38945a34c8894b999e1dc7aa4f6a2e3ce..98b817c102ee9a6b8589a0fa895ab0fd4b8d427c 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -11,6 +11,14 @@ from lightrag.utils import get_env_value
 from lightrag.constants import (
     DEFAULT_WOKERS,
     DEFAULT_TIMEOUT,
+    DEFAULT_TOP_K,
+    DEFAULT_CHUNK_TOP_K,
+    DEFAULT_HISTORY_TURNS,
+    DEFAULT_MAX_ENTITY_TOKENS,
+    DEFAULT_MAX_RELATION_TOKENS,
+    DEFAULT_MAX_TOTAL_TOKENS,
+    DEFAULT_COSINE_THRESHOLD,
+    DEFAULT_RELATED_CHUNK_NUMBER,
 )
 
 # use the .env that is inside the current folder
@@ -151,45 +159,6 @@ def parse_args() -> argparse.Namespace:
         help="Path to SSL private key file (required if --ssl is enabled)",
     )
 
-    parser.add_argument(
-        "--history-turns",
-        type=int,
-        default=get_env_value("HISTORY_TURNS", 3, int),
-        help="Number of conversation history turns to include (default: from env or 3)",
-    )
-
-    # Search parameters
-    parser.add_argument(
-        "--top-k",
-        type=int,
-        default=get_env_value("TOP_K", 60, int),
-        help="Number of most similar results to return (default: from env or 60)",
-    )
-    parser.add_argument(
-        "--chunk-top-k",
-        type=int,
-        default=get_env_value("CHUNK_TOP_K", 15, int),
-        help="Number of text chunks to retrieve initially from vector search (default: from env or 15)",
-    )
-    parser.add_argument(
-        "--chunk-rerank-top-k",
-        type=int,
-        default=get_env_value("CHUNK_RERANK_TOP_K", 5, int),
-        help="Number of text chunks to keep after reranking (default: from env or 5)",
-    )
-    parser.add_argument(
-        "--enable-rerank",
-        action="store_true",
-        default=get_env_value("ENABLE_RERANK", False, bool),
-        help="Enable rerank functionality (default: from env or False)",
-    )
-    parser.add_argument(
-        "--cosine-threshold",
-        type=float,
-        default=get_env_value("COSINE_THRESHOLD", 0.2, float),
-        help="Cosine similarity threshold (default: from env or 0.4)",
-    )
-
     # Ollama model name
     parser.add_argument(
         "--simulated-model-name",
@@ -321,6 +290,26 @@ def parse_args() -> argparse.Namespace:
     args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None)
     args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None)
 
+    # Query configuration
+    args.history_turns = get_env_value("HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int)
+    args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int)
+    args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int)
+    args.max_entity_tokens = get_env_value(
+        "MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int
+    )
+    args.max_relation_tokens = get_env_value(
+        "MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int
+    )
+    args.max_total_tokens = get_env_value(
+        "MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int
+    )
+    args.cosine_threshold = get_env_value(
+        "COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float
+    )
+    args.related_chunk_number = get_env_value(
+        "RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int
+    )
+
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
 
     return args
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index bd0154c90fdd8957f99671b214ebd313fa6ac0cc..573455e5560f26fdd38d08c37d2eff530680b6ca 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -292,9 +292,9 @@ def create_app(args):
         ),
     )
 
-    # Configure rerank function if enabled
+    # Configure rerank function if model and API are configured
     rerank_model_func = None
-    if args.enable_rerank and args.rerank_binding_api_key and args.rerank_binding_host:
+    if args.rerank_binding_api_key and args.rerank_binding_host:
         from lightrag.rerank import custom_rerank
 
         async def server_rerank_func(
@@ -312,10 +312,12 @@ def create_app(args):
             )
 
         rerank_model_func = server_rerank_func
-        logger.info(f"Rerank enabled with model: {args.rerank_model}")
-    elif args.enable_rerank:
-        logger.warning(
-            "Rerank enabled but RERANK_BINDING_API_KEY or RERANK_BINDING_HOST not configured. Rerank will be disabled."
+        logger.info(
+            f"Rerank model configured: {args.rerank_model} (can be enabled per query)"
+        )
+    else:
+        logger.info(
+            "Rerank model not configured. Set RERANK_BINDING_API_KEY and RERANK_BINDING_HOST to enable reranking."
         )
 
     # Initialize RAG
@@ -351,7 +353,6 @@ def create_app(args):
             },
             enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
             enable_llm_cache=args.enable_llm_cache,
-            enable_rerank=args.enable_rerank,
             rerank_model_func=rerank_model_func,
             auto_manage_storages_states=False,
             max_parallel_insert=args.max_parallel_insert,
@@ -381,7 +382,6 @@ def create_app(args):
             },
             enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
             enable_llm_cache=args.enable_llm_cache,
-            enable_rerank=args.enable_rerank,
             rerank_model_func=rerank_model_func,
             auto_manage_storages_states=False,
             max_parallel_insert=args.max_parallel_insert,
@@ -512,11 +512,13 @@ def create_app(args):
                     "enable_llm_cache": args.enable_llm_cache,
                     "workspace": args.workspace,
                     "max_graph_nodes": args.max_graph_nodes,
-                    # Rerank configuration
-                    "enable_rerank": args.enable_rerank,
-                    "rerank_model": args.rerank_model if args.enable_rerank else None,
+                    # Rerank configuration (based on whether rerank model is configured)
+                    "enable_rerank": rerank_model_func is not None,
+                    "rerank_model": args.rerank_model
+                    if rerank_model_func is not None
+                    else None,
                     "rerank_binding_host": args.rerank_binding_host
-                    if args.enable_rerank
+                    if rerank_model_func is not None
                     else None,
                 },
                 "auth_mode": auth_mode,
diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py
index 0a0c622749573cb39149dd42ea2064bbe10614e2..4d97f151f028a9b4d8240d3977a2c2a377bacb86 100644
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@@ -52,31 +52,25 @@ class QueryRequest(BaseModel):
     chunk_top_k: Optional[int] = Field(
         ge=1,
         default=None,
-        description="Number of text chunks to retrieve initially from vector search.",
+        description="Number of text chunks to retrieve initially from vector search and keep after reranking.",
     )
 
-    chunk_rerank_top_k: Optional[int] = Field(
-        ge=1,
+    max_entity_tokens: Optional[int] = Field(
         default=None,
-        description="Number of text chunks to keep after reranking.",
-    )
-
-    max_token_for_text_unit: Optional[int] = Field(
-        gt=1,
-        default=None,
-        description="Maximum number of tokens allowed for each retrieved text chunk.",
+        description="Maximum number of tokens allocated for entity context in unified token control system.",
+        ge=1,
     )
 
-    max_token_for_global_context: Optional[int] = Field(
-        gt=1,
+    max_relation_tokens: Optional[int] = Field(
         default=None,
-        description="Maximum number of tokens allocated for relationship descriptions in global retrieval.",
+        description="Maximum number of tokens allocated for relationship context in unified token control system.",
+        ge=1,
     )
 
-    max_token_for_local_context: Optional[int] = Field(
-        gt=1,
+    max_total_tokens: Optional[int] = Field(
         default=None,
-        description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
+        description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
+        ge=1,
     )
 
     conversation_history: Optional[List[Dict[str, Any]]] = Field(
@@ -99,6 +93,11 @@ class QueryRequest(BaseModel):
         description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.",
     )
 
+    enable_rerank: Optional[bool] = Field(
+        default=None,
+        description="Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.",
+    )
+
     @field_validator("query", mode="after")
     @classmethod
     def query_strip_after(cls, query: str) -> str:
diff --git a/lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js b/lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js
similarity index 95%
rename from lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js
rename to lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js
index f23cedcbbf939517a0379ea770f4241e1bdb551a..a964a6a16882b88623676edaef141df22fb168be 100644
Binary files a/lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js and b/lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js differ
diff --git a/lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js b/lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js
similarity index 98%
rename from lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js
rename to lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js
index f50279694ba8a3e988cd645e0167150d39091498..1cc386ac6d69e7b24f0c7142741ccc60ce66b072 100644
Binary files a/lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js and b/lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js differ
diff --git a/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js b/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js
similarity index 99%
rename from lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js
rename to lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js
index 34a3316b359d76f5bfc5e8a867b1fc7c33e1c97f..23cf4a6295c9aa8bf7f8882991bc68907057adaf 100644
Binary files a/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js and b/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js differ
diff --git a/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js b/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js
similarity index 99%
rename from lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js
rename to lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js
index 8348d25ab12771f500a213a489a76a01a9c83779..a011900605999d49b56aac9395993e41e4492e44 100644
Binary files a/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js and b/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js differ
diff --git a/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js b/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js
similarity index 99%
rename from lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js
rename to lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js
index 01f3a0c0b39f17bfe87e8991b6036f9ec5afbf85..cbeca1b3259c120c669837a3d19cb49b2a62c2d9 100644
Binary files a/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js and b/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js differ
diff --git a/lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js b/lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js
similarity index 78%
rename from lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js
rename to lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js
index 50ce15aac64e590c93b3a1af81f72cfca8130285..1c6c7b047d3d930e0aecdb700e00d88f942c106d 100644
Binary files a/lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js and b/lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js differ
diff --git a/lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js b/lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js
similarity index 99%
rename from lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js
rename to lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js
index c59c9d4211aad3f0c213fe90cbcbcdc3433fc0a0..1ec7ee57b7df9859326080b30fdd972ecc65bda8 100644
Binary files a/lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js and b/lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js differ
diff --git a/lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js b/lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js
similarity index 99%
rename from lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js
rename to lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js
index 2539c37b81625963a25a6bb20872221e56f72cb6..3b671fb8baecfc51806a7d6a44cc6ced0eeb27bd 100644
Binary files a/lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js and b/lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js differ
diff --git a/lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js b/lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js
similarity index 95%
rename from lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js
rename to lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js
index 03f6dcdc7d8221d9f285a058498c03ca08152cca..961133cb2e5061f0bf7fb535d5ae04b63c875401 100644
Binary files a/lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js and b/lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js differ
diff --git a/lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js b/lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js
similarity index 81%
rename from lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js
rename to lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js
index ef6235aed902033e1b3fd47040343fab1f277c4f..c49e35eecd960afac3e0a6bec57b4f2185943fa4 100644
Binary files a/lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js and b/lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js differ
diff --git a/lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js b/lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js
similarity index 67%
rename from lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js
rename to lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js
index fe917915107cf1e2083684cf0b0d715c027ef37e..114a73c34a54d2e67679898589d26e1be716afbf 100644
Binary files a/lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js and b/lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js differ
diff --git a/lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js b/lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js
similarity index 61%
rename from lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js
rename to lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js
index 27c65696825b5f3b41405636ce0a7a83975c2283..bc8695ab1e0fe84303048be5a5dbc37433de4e22 100644
Binary files a/lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js differ
diff --git a/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js b/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js
similarity index 61%
rename from lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js
rename to lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js
index 27c65696825b5f3b41405636ce0a7a83975c2283..bc8695ab1e0fe84303048be5a5dbc37433de4e22 100644
Binary files a/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js differ
diff --git a/lightrag/api/webui/assets/clone-Dm5jEAXQ.js b/lightrag/api/webui/assets/clone-Dm5jEAXQ.js
deleted file mode 100644
index 8b42b7f66c3a0455a4d70ca1621026213b91da0b..0000000000000000000000000000000000000000
Binary files a/lightrag/api/webui/assets/clone-Dm5jEAXQ.js and /dev/null differ
diff --git a/lightrag/api/webui/assets/clone-eVzB-9-f.js b/lightrag/api/webui/assets/clone-eVzB-9-f.js
new file mode 100644
index 0000000000000000000000000000000000000000..7d96fdff2636dcc8c20cd66f78a9c9d83ae64060
Binary files /dev/null and b/lightrag/api/webui/assets/clone-eVzB-9-f.js differ
diff --git a/lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js b/lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js
similarity index 97%
rename from lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js
rename to lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js
index 046ac1a0061226fc8514cf5c16816a0d7b5a014a..821f3f18ceac2e70584b07d27ebdd5f98e6e9d26 100644
Binary files a/lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js and b/lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js differ
diff --git a/lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js b/lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js
similarity index 93%
rename from lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js
rename to lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js
index 7476bf1fcd204be3665418107d66840329486aa3..c497ac8f182d382f9b7e96bf8a9ffda0adfc3897 100644
Binary files a/lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js and b/lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js differ
diff --git a/lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js b/lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js
similarity index 90%
rename from lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js
rename to lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js
index f975fcccba98e425e5aeec4103850be78fcb6482..53640b5b52391971ccdc003a089ca57015b2199b 100644
Binary files a/lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js and b/lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js differ
diff --git a/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js b/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js
similarity index 99%
rename from lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js
rename to lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js
index 425a389e5940ed754782567d4ac0656ed7fa0e20..395cdc5c43f99efd2cc654c15ad8c1010b39d629 100644
Binary files a/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js and b/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js differ
diff --git a/lightrag/api/webui/assets/feature-documents-oks3sUnM.js b/lightrag/api/webui/assets/feature-documents-DZY3tMAq.js
similarity index 99%
rename from lightrag/api/webui/assets/feature-documents-oks3sUnM.js
rename to lightrag/api/webui/assets/feature-documents-DZY3tMAq.js
index 7eb92ba7e886c5780ed10989a32fb8e47d3e03e3..ee42449ccced238b8af06d19f0486d1c1ff16dcb 100644
Binary files a/lightrag/api/webui/assets/feature-documents-oks3sUnM.js and b/lightrag/api/webui/assets/feature-documents-DZY3tMAq.js differ
diff --git a/lightrag/api/webui/assets/feature-graph-NODQb6qW.js b/lightrag/api/webui/assets/feature-graph-wF7LCIjH.js
similarity index 51%
rename from lightrag/api/webui/assets/feature-graph-NODQb6qW.js
rename to lightrag/api/webui/assets/feature-graph-wF7LCIjH.js
index be23288dc39852c3fe9e7b8d300e695f572fdeb2..0a1fa8ca6351360d502784a96caf75c945a2aaf6 100644
Binary files a/lightrag/api/webui/assets/feature-graph-NODQb6qW.js and b/lightrag/api/webui/assets/feature-graph-wF7LCIjH.js differ
diff --git a/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js b/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js
deleted file mode 100644
index 1b8ba068ac51d00314ccc211457976c224910c72..0000000000000000000000000000000000000000
Binary files a/lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js and /dev/null differ
diff --git a/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js b/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js
new file mode 100644
index 0000000000000000000000000000000000000000..f82c9b6c3d604c1f886a0a966e1c48e1d2b0e273
Binary files /dev/null and b/lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js differ
diff --git a/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js b/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js
similarity index 99%
rename from lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js
rename to lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js
index 95c972cd1ce263b862f9cf255de9caad8c9ccf37..5cfd266e4fcbeb66a80f68ba2694bff7b8ab6c67 100644
Binary files a/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js and b/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js differ
diff --git a/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js b/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js
similarity index 99%
rename from lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js
rename to lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js
index 3296abdda179a1212a5fb08f581765ac992737f4..d7731304bed4d646a910722d04f658af510802f8 100644
Binary files a/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js and b/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js differ
diff --git a/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js b/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js
similarity index 98%
rename from lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js
rename to lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js
index 03955373498e06722e9769ef9f0225d65aff5290..cadc6efb4edf15dbf7105a765205095131f0b93a 100644
Binary files a/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js and b/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js differ
diff --git a/lightrag/api/webui/assets/graph-BLnbmvfZ.js b/lightrag/api/webui/assets/graph-DJgPOSDl.js
similarity index 97%
rename from lightrag/api/webui/assets/graph-BLnbmvfZ.js
rename to lightrag/api/webui/assets/graph-DJgPOSDl.js
index c1f9f3d52f895e54c5a935b1b9335b5984faf01c..ee724a0d60de96c8c8eba4930605e43a22cf40ac 100644
Binary files a/lightrag/api/webui/assets/graph-BLnbmvfZ.js and b/lightrag/api/webui/assets/graph-DJgPOSDl.js differ
diff --git a/lightrag/api/webui/assets/index-yRRg2BZk.js b/lightrag/api/webui/assets/index-D3V9EKqf.js
similarity index 69%
rename from lightrag/api/webui/assets/index-yRRg2BZk.js
rename to lightrag/api/webui/assets/index-D3V9EKqf.js
index bebf87b25fbae873fdca209d71df9bbb5c368008..ab8f72e822a01cd4d64e88addb48890931e56075 100644
Binary files a/lightrag/api/webui/assets/index-yRRg2BZk.js and b/lightrag/api/webui/assets/index-D3V9EKqf.js differ
diff --git a/lightrag/api/webui/assets/index-1Hy45NwC.js b/lightrag/api/webui/assets/index-DB3D3pNI.js
similarity index 91%
rename from lightrag/api/webui/assets/index-1Hy45NwC.js
rename to lightrag/api/webui/assets/index-DB3D3pNI.js
index a63a535a78f44e3ed61ac9aca0c19e6cf54f14d5..110faed713ec78fa1f00552045c7e81786953b4b 100644
Binary files a/lightrag/api/webui/assets/index-1Hy45NwC.js and b/lightrag/api/webui/assets/index-DB3D3pNI.js differ
diff --git a/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js b/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js
similarity index 61%
rename from lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js
rename to lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js
index 04c4a4621f3020003d02fbda1b8d1c83c7b18630..38a4336d471c39995e4d3937b58dd5963070d0d3 100644
Binary files a/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js and b/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js differ
diff --git a/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js b/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js
similarity index 99%
rename from lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js
rename to lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js
index e84a0773baddaaeab67b2485812e83fe220208ef..1d59d8a3e157ac1fea24aaf4e7295cd16c804ca8 100644
Binary files a/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js and b/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js differ
diff --git a/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js b/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js
similarity index 99%
rename from lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js
rename to lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js
index c45220eebb8bd1e3a99a24d2ff72ea4e2b355fc3..fee84e0b215cb4eba8ce44b226c5570495ce6f5e 100644
Binary files a/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js and b/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js differ
diff --git a/lightrag/api/webui/assets/layout-DsT4215v.js b/lightrag/api/webui/assets/layout-D_MnvYWV.js
similarity index 99%
rename from lightrag/api/webui/assets/layout-DsT4215v.js
rename to lightrag/api/webui/assets/layout-D_MnvYWV.js
index b15fa63d9560fcbde47a670a55959e022968cd93..3b731afe37f25d8c343ba186ae2938aefe71fa80 100644
Binary files a/lightrag/api/webui/assets/layout-DsT4215v.js and b/lightrag/api/webui/assets/layout-D_MnvYWV.js differ
diff --git a/lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js b/lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js
similarity index 85%
rename from lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js
rename to lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js
index abcd404940c9469f77c89b39aef26c9432322a32..c8c81f04afe4e34e62204cd0824ae6c8187e7010 100644
Binary files a/lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js and b/lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js differ
diff --git a/lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js b/lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js
similarity index 99%
rename from lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js
rename to lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js
index 4b3ddee7eee7dca790e9015f8e2ebfc5db821039..65b01a060d7e219a9c4637371a8472613b5c09a7 100644
Binary files a/lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js and b/lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js differ
diff --git a/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js b/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js
similarity index 99%
rename from lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js
rename to lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js
index 3b5f0e99882eac1772b1c68140e6ca47b67ecd9d..32143c9d0cb009a75ac95ed5e49d69b801f8e611 100644
Binary files a/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js and b/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js differ
diff --git a/lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js b/lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js
similarity index 91%
rename from lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js
rename to lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js
index 880ab323db8dcf8ab5e015e7e71ec5a59918147a..9f4b64e9f3bc1eec98a000a58d8109f4d33b089c 100644
Binary files a/lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js and b/lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js differ
diff --git a/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js b/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js
similarity index 99%
rename from lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js
rename to lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js
index bf4daf5fa311cd95b6fdfe818fac530a2d7acc4f..fc71293e729d697866c4ee71e67f4041433ec4d0 100644
Binary files a/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js and b/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js differ
diff --git a/lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js b/lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js
similarity index 99%
rename from lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js
rename to lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js
index 1c632567bd6ad84b1fc0a419f1d7aec1eef1c548..7c8edc94cdda54aaa849ee340f86a45fc94dfb5f 100644
Binary files a/lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js and b/lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js differ
diff --git a/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js b/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js
similarity index 99%
rename from lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js
rename to lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js
index e0aa3bb9d8d4c552f9dbd94bb0490ff260f2a326..0827b2fccfc37c3182353d8f2ad4661f7c495849 100644
Binary files a/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js and b/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js differ
diff --git a/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js b/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js
similarity index 99%
rename from lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js
rename to lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js
index cf34615916669049bf0d7255988eb07f3e0857b3..b226074a12e13b58d8acc8e58aa5726f67f406f4 100644
Binary files a/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js and b/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js differ
diff --git a/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js b/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js
similarity index 99%
rename from lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js
rename to lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js
index 7f5ca0e5e48899541fac336e59ad6e4976ed95cd..9c3326bced7a5551c6540f43b3f5da7502a5a3cf 100644
Binary files a/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js and b/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js differ
diff --git a/lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js b/lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js
similarity index 96%
rename from lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js
rename to lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js
index 8ec4a30bd73407e62e9e65b97bf41561fa0e65ad..86a727642b2addb1d12ab1a9f26366c707d674e6 100644
Binary files a/lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js and b/lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js differ
diff --git a/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js b/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js
similarity index 61%
rename from lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js
rename to lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js
index 046adada205c5f0a4e66c14173a77aab1ccbfc6c..ba4f4e05ea3b1c724fd28c4472abce3314e6704c 100644
Binary files a/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-sVx8nHiu.js and b/lightrag/api/webui/assets/stateDiagram-v2-YXO3MK2T-lbiDwad_.js differ
diff --git a/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js b/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js
similarity index 99%
rename from lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js
rename to lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js
index 2023240e3cdce027f3b6764e9cee03750c1aea2d..6e50b5ea9b5b48db21a0e44063af8679dcd87966 100644
Binary files a/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-FwPl5FEj.js and b/lightrag/api/webui/assets/timeline-definition-BDJGKUSR-C0uTfaoS.js differ
diff --git a/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js b/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js
similarity index 99%
rename from lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js
rename to lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js
index a361dc9b741bc2244b6dc7a4fbe3468de04c8424..95511aad4b872751a99f4fb293cc0446508c2410 100644
Binary files a/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-BHnqzGXj.js and b/lightrag/api/webui/assets/xychartDiagram-VJFVF3MP-Be7THF3w.js differ
diff --git a/lightrag/api/webui/index.html b/lightrag/api/webui/index.html
index 20c25c2046b843201d194a5096d76fa680bc4319..461f6ee086d1653d532bd6753e61d446d4884f41 100644
Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ
diff --git a/lightrag/base.py b/lightrag/base.py
index 97564ac2e84fa704933fbd80f1b34e5e548d1878..ac0545ce7f19b4d62e0427b19758e226d7996f7a 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -14,7 +14,16 @@ from typing import (
 )
 from .utils import EmbeddingFunc
 from .types import KnowledgeGraph
-from .constants import GRAPH_FIELD_SEP
+from .constants import (
+    GRAPH_FIELD_SEP,
+    DEFAULT_TOP_K,
+    DEFAULT_CHUNK_TOP_K,
+    DEFAULT_MAX_ENTITY_TOKENS,
+    DEFAULT_MAX_RELATION_TOKENS,
+    DEFAULT_MAX_TOTAL_TOKENS,
+    DEFAULT_HISTORY_TURNS,
+    DEFAULT_ENABLE_RERANK,
+)
 
 # use the .env that is inside the current folder
 # allows to use different .env file for each lightrag instance
@@ -36,7 +45,7 @@ T = TypeVar("T")
 class QueryParam:
     """Configuration parameters for query execution in LightRAG."""
 
-    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "mix"
     """Specifies the retrieval mode:
     - "local": Focuses on context-dependent information.
     - "global": Utilizes global knowledge.
@@ -57,29 +66,28 @@ class QueryParam:
     stream: bool = False
     """If True, enables streaming output for real-time responses."""
 
-    top_k: int = int(os.getenv("TOP_K", "60"))
+    top_k: int = int(os.getenv("TOP_K", str(DEFAULT_TOP_K)))
     """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
 
-    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5"))
-    """Number of text chunks to retrieve initially from vector search.
+    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", str(DEFAULT_CHUNK_TOP_K)))
+    """Number of text chunks to retrieve initially from vector search and keep after reranking.
     If None, defaults to top_k value.
     """
 
-    chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5"))
-    """Number of text chunks to keep after reranking.
-    If None, keeps all chunks returned from initial retrieval.
-    """
-
-    max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "6000"))
-    """Maximum number of tokens allowed for each retrieved text chunk."""
+    max_entity_tokens: int = int(
+        os.getenv("MAX_ENTITY_TOKENS", str(DEFAULT_MAX_ENTITY_TOKENS))
+    )
+    """Maximum number of tokens allocated for entity context in unified token control system."""
 
-    max_token_for_global_context: int = int(
-        os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
+    max_relation_tokens: int = int(
+        os.getenv("MAX_RELATION_TOKENS", str(DEFAULT_MAX_RELATION_TOKENS))
     )
-    """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
+    """Maximum number of tokens allocated for relationship context in unified token control system."""
 
-    max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
-    """Maximum number of tokens allocated for entity descriptions in local retrieval."""
+    max_total_tokens: int = int(
+        os.getenv("MAX_TOTAL_TOKENS", str(DEFAULT_MAX_TOTAL_TOKENS))
+    )
+    """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
 
     hl_keywords: list[str] = field(default_factory=list)
     """List of high-level keywords to prioritize in retrieval."""
@@ -92,7 +100,7 @@ class QueryParam:
     Format: [{"role": "user/assistant", "content": "message"}].
     """
 
-    history_turns: int = 3
+    history_turns: int = int(os.getenv("HISTORY_TURNS", str(DEFAULT_HISTORY_TURNS)))
     """Number of complete conversation turns (user-assistant pairs) to consider in the response context."""
 
     ids: list[str] | None = None
@@ -109,6 +117,13 @@ class QueryParam:
     If proivded, this will be use instead of the default vaulue from prompt template.
     """
 
+    enable_rerank: bool = (
+        os.getenv("ENABLE_RERANK", str(DEFAULT_ENABLE_RERANK).lower()).lower() == "true"
+    )
+    """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
+    Default is True to enable reranking when rerank model is available.
+    """
+
 
 @dataclass
 class StorageNameSpace(ABC):
diff --git a/lightrag/constants.py b/lightrag/constants.py
index c3fd653188ccc201d475f2b24fe3a6d7a10e05d4..628583649af447c7114e669404af0c4ddc55d302 100644
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@@ -12,6 +12,17 @@ DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 4
 DEFAULT_WOKERS = 2
 DEFAULT_TIMEOUT = 150
 
+# Query and retrieval configuration defaults
+DEFAULT_TOP_K = 40
+DEFAULT_CHUNK_TOP_K = 10
+DEFAULT_MAX_ENTITY_TOKENS = 10000
+DEFAULT_MAX_RELATION_TOKENS = 10000
+DEFAULT_MAX_TOTAL_TOKENS = 32000
+DEFAULT_HISTORY_TURNS = 3
+DEFAULT_ENABLE_RERANK = True
+DEFAULT_COSINE_THRESHOLD = 0.2
+DEFAULT_RELATED_CHUNK_NUMBER = 10
+
 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
 
diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py
index c6ee099d3a49b6310530a91aca4be253f9608c55..448ca5ef087ea20b169cd65313a9aad987c120a5 100644
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -185,10 +185,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
         embedding = np.array(embedding, dtype=np.float32)
         faiss.normalize_L2(embedding)  # we do in-place normalization
 
-        logger.info(
-            f"Query: {query}, top_k: {top_k}, threshold: {self.cosine_better_than_threshold}"
-        )
-
         # Perform the similarity search
         index = await self._get_index()
         distances, indices = index.search(embedding, top_k)
diff --git a/lightrag/kg/shared_storage.py b/lightrag/kg/shared_storage.py
index 56fce4e5be8e844e340cb1bf9327ba7a7bc408a3..228bf272a951d34995ad23c0bdbf4a17e6056bbb 100644
--- a/lightrag/kg/shared_storage.py
+++ b/lightrag/kg/shared_storage.py
@@ -20,6 +20,9 @@ def direct_log(message, enable_output: bool = False, level: str = "DEBUG"):
         level: Log level (default: "DEBUG")
         enable_output: Whether to actually output the log (default: True)
     """
+    if not enable_output:
+        return
+
     # Get the current logger level from the lightrag logger
     try:
         from lightrag.utils import logger
@@ -40,7 +43,7 @@ def direct_log(message, enable_output: bool = False, level: str = "DEBUG"):
     message_level = level_mapping.get(level.upper(), logging.DEBUG)
 
     # print(f"Diret_log: {level.upper()} {message_level} ? {current_level}", file=sys.stderr, flush=True)
-    if enable_output or (message_level >= current_level):
+    if message_level >= current_level:
         print(f"{level}: {message}", file=sys.stderr, flush=True)
 
 
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 6ee61e2dbd2c3ee3f8df567be2c4425b9a284838..092e06ebceab5b11faaae4ccf4ebd543c99064b9 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -24,6 +24,13 @@ from typing import (
 from lightrag.constants import (
     DEFAULT_MAX_GLEANING,
     DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
+    DEFAULT_TOP_K,
+    DEFAULT_CHUNK_TOP_K,
+    DEFAULT_MAX_ENTITY_TOKENS,
+    DEFAULT_MAX_RELATION_TOKENS,
+    DEFAULT_MAX_TOTAL_TOKENS,
+    DEFAULT_COSINE_THRESHOLD,
+    DEFAULT_RELATED_CHUNK_NUMBER,
 )
 from lightrag.utils import get_env_value
 
@@ -125,6 +132,42 @@ class LightRAG:
     log_level: int | None = field(default=None)
     log_file_path: str | None = field(default=None)
 
+    # Query parameters
+    # ---
+
+    top_k: int = field(default=get_env_value("TOP_K", DEFAULT_TOP_K, int))
+    """Number of entities/relations to retrieve for each query."""
+
+    chunk_top_k: int = field(
+        default=get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int)
+    )
+    """Maximum number of chunks in context."""
+
+    max_entity_tokens: int = field(
+        default=get_env_value("MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int)
+    )
+    """Maximum number of tokens for entity in context."""
+
+    max_relation_tokens: int = field(
+        default=get_env_value("MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int)
+    )
+    """Maximum number of tokens for relation in context."""
+
+    max_total_tokens: int = field(
+        default=get_env_value("MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int)
+    )
+    """Maximum total tokens in context (including system prompt, entities, relations and chunks)."""
+
+    cosine_threshold: int = field(
+        default=get_env_value("COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, int)
+    )
+    """Cosine threshold of vector DB retrieval for entities, relations and chunks."""
+
+    related_chunk_number: int = field(
+        default=get_env_value("RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int)
+    )
+    """Number of related chunks to grab from single entity or relation."""
+
     # Entity extraction
     # ---
 
@@ -238,11 +281,6 @@ class LightRAG:
     # Rerank Configuration
     # ---
 
-    enable_rerank: bool = field(
-        default=bool(os.getenv("ENABLE_RERANK", "False").lower() == "true")
-    )
-    """Enable reranking for improved retrieval quality. Defaults to False."""
-
     rerank_model_func: Callable[..., object] | None = field(default=None)
     """Function for reranking retrieved documents. All rerank configurations (model name, API keys, top_k, etc.) should be included in this function. Optional."""
 
@@ -454,9 +492,9 @@ class LightRAG:
         )
 
         # Init Rerank
-        if self.enable_rerank and self.rerank_model_func:
+        if self.rerank_model_func:
             logger.info("Rerank model initialized for improved retrieval quality")
-        elif self.enable_rerank and not self.rerank_model_func:
+        else:
             logger.warning(
                 "Rerank is enabled but no rerank_model_func provided. Reranking will be skipped."
             )
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 4bf579d1e25104a77f09c38a66dcc6084d1e871d..e3456d9247f1b67da238ee928ab45763e0479d48 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -36,7 +36,13 @@ from .base import (
     QueryParam,
 )
 from .prompt import PROMPTS
-from .constants import GRAPH_FIELD_SEP
+from .constants import (
+    GRAPH_FIELD_SEP,
+    DEFAULT_MAX_ENTITY_TOKENS,
+    DEFAULT_MAX_RELATION_TOKENS,
+    DEFAULT_MAX_TOTAL_TOKENS,
+    DEFAULT_RELATED_CHUNK_NUMBER,
+)
 from .kg.shared_storage import get_storage_keyed_lock
 import time
 from dotenv import load_dotenv
@@ -1643,7 +1649,9 @@ async def kg_query(
 
     tokenizer: Tokenizer = global_config["tokenizer"]
     len_of_prompts = len(tokenizer.encode(query + sys_prompt))
-    logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
+    logger.debug(
+        f"[kg_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
+    )
 
     response = await use_model_func(
         query,
@@ -1766,7 +1774,9 @@ async def extract_keywords_only(
 
     tokenizer: Tokenizer = global_config["tokenizer"]
     len_of_prompts = len(tokenizer.encode(kw_prompt))
-    logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
+    logger.debug(
+        f"[extract_keywords] Sending to LLM: {len_of_prompts:,} tokens (Prompt: {len_of_prompts})"
+    )
 
     # 5. Call the LLM for keyword extraction
     if param.model_func:
@@ -1883,51 +1893,63 @@ async def _build_query_context(
     entities_context = []
     relations_context = []
 
+    # Store original data for later text chunk retrieval
+    original_node_datas = []
+    original_edge_datas = []
+
     # Handle local and global modes
     if query_param.mode == "local":
-        entities_context, relations_context, entity_chunks = await _get_node_data(
+        (
+            entities_context,
+            relations_context,
+            node_datas,
+            use_relations,
+        ) = await _get_node_data(
             ll_keywords,
             knowledge_graph_inst,
             entities_vdb,
-            text_chunks_db,
             query_param,
         )
-        all_chunks.extend(entity_chunks)
+        original_node_datas = node_datas
+        original_edge_datas = use_relations
 
     elif query_param.mode == "global":
-        entities_context, relations_context, relationship_chunks = await _get_edge_data(
+        (
+            entities_context,
+            relations_context,
+            edge_datas,
+            use_entities,
+        ) = await _get_edge_data(
             hl_keywords,
             knowledge_graph_inst,
             relationships_vdb,
-            text_chunks_db,
             query_param,
         )
-        all_chunks.extend(relationship_chunks)
+        original_edge_datas = edge_datas
+        original_node_datas = use_entities
 
     else:  # hybrid or mix mode
         ll_data = await _get_node_data(
             ll_keywords,
             knowledge_graph_inst,
             entities_vdb,
-            text_chunks_db,
             query_param,
         )
         hl_data = await _get_edge_data(
             hl_keywords,
             knowledge_graph_inst,
             relationships_vdb,
-            text_chunks_db,
             query_param,
         )
 
-        (ll_entities_context, ll_relations_context, ll_chunks) = ll_data
-        (hl_entities_context, hl_relations_context, hl_chunks) = hl_data
-
-        # Collect chunks from entity and relationship sources
-        all_chunks.extend(ll_chunks)
-        all_chunks.extend(hl_chunks)
+        (ll_entities_context, ll_relations_context, ll_node_datas, ll_edge_datas) = (
+            ll_data
+        )
+        (hl_entities_context, hl_relations_context, hl_edge_datas, hl_node_datas) = (
+            hl_data
+        )
 
-        # Get vector chunks if in mix mode
+        # Get vector chunks first if in mix mode
         if query_param.mode == "mix" and chunks_vdb:
             vector_chunks = await _get_vector_context(
                 query,
@@ -1936,34 +1958,260 @@ async def _build_query_context(
             )
             all_chunks.extend(vector_chunks)
 
+        # Store original data from both sources
+        original_node_datas = ll_node_datas + hl_node_datas
+        original_edge_datas = ll_edge_datas + hl_edge_datas
+
         # Combine entities and relations contexts
         entities_context = process_combine_contexts(
-            hl_entities_context, ll_entities_context
+            ll_entities_context, hl_entities_context
         )
         relations_context = process_combine_contexts(
             hl_relations_context, ll_relations_context
         )
 
-    # Process all chunks uniformly: deduplication, reranking, and token truncation
-    processed_chunks = await process_chunks_unified(
-        query=query,
-        chunks=all_chunks,
-        query_param=query_param,
-        global_config=text_chunks_db.global_config,
-        source_type="mixed",
+    logger.info(
+        f"Initial context: {len(entities_context)} entities, {len(relations_context)} relations, {len(all_chunks)} chunks"
     )
 
-    # Build final text_units_context from processed chunks
+    # Unified token control system - Apply precise token limits to entities and relations
+    tokenizer = text_chunks_db.global_config.get("tokenizer")
+    if tokenizer:
+        # Get new token limits from query_param (with fallback to global_config)
+        max_entity_tokens = getattr(
+            query_param,
+            "max_entity_tokens",
+            text_chunks_db.global_config.get(
+                "max_entity_tokens", DEFAULT_MAX_ENTITY_TOKENS
+            ),
+        )
+        max_relation_tokens = getattr(
+            query_param,
+            "max_relation_tokens",
+            text_chunks_db.global_config.get(
+                "max_relation_tokens", DEFAULT_MAX_RELATION_TOKENS
+            ),
+        )
+        max_total_tokens = getattr(
+            query_param,
+            "max_total_tokens",
+            text_chunks_db.global_config.get(
+                "max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS
+            ),
+        )
+
+        # Truncate entities based on complete JSON serialization
+        if entities_context:
+            original_entity_count = len(entities_context)
+
+            # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
+            for entity in entities_context:
+                if "file_path" in entity and entity["file_path"]:
+                    entity["file_path"] = entity["file_path"].replace(
+                        GRAPH_FIELD_SEP, ";"
+                    )
+
+            entities_context = truncate_list_by_token_size(
+                entities_context,
+                key=lambda x: json.dumps(x, ensure_ascii=False),
+                max_token_size=max_entity_tokens,
+                tokenizer=tokenizer,
+            )
+            if len(entities_context) < original_entity_count:
+                logger.debug(
+                    f"Truncated entities: {original_entity_count} -> {len(entities_context)} (entity max tokens: {max_entity_tokens})"
+                )
+
+        # Truncate relations based on complete JSON serialization
+        if relations_context:
+            original_relation_count = len(relations_context)
+
+            # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
+            for relation in relations_context:
+                if "file_path" in relation and relation["file_path"]:
+                    relation["file_path"] = relation["file_path"].replace(
+                        GRAPH_FIELD_SEP, ";"
+                    )
+
+            relations_context = truncate_list_by_token_size(
+                relations_context,
+                key=lambda x: json.dumps(x, ensure_ascii=False),
+                max_token_size=max_relation_tokens,
+                tokenizer=tokenizer,
+            )
+            if len(relations_context) < original_relation_count:
+                logger.debug(
+                    f"Truncated relations: {original_relation_count} -> {len(relations_context)} (relation max tokens: {max_relation_tokens})"
+                )
+
+    # After truncation, get text chunks based on final entities and relations
+    logger.info("Getting text chunks based on truncated entities and relations...")
+
+    # Create filtered data based on truncated context
+    final_node_datas = []
+    if entities_context and original_node_datas:
+        final_entity_names = {e["entity"] for e in entities_context}
+        seen_nodes = set()
+        for node in original_node_datas:
+            name = node.get("entity_name")
+            if name in final_entity_names and name not in seen_nodes:
+                final_node_datas.append(node)
+                seen_nodes.add(name)
+
+    final_edge_datas = []
+    if relations_context and original_edge_datas:
+        final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context}
+        seen_edges = set()
+        for edge in original_edge_datas:
+            src, tgt = edge.get("src_id"), edge.get("tgt_id")
+            if src is None or tgt is None:
+                src, tgt = edge.get("src_tgt", (None, None))
+
+            pair = (src, tgt)
+            if pair in final_relation_pairs and pair not in seen_edges:
+                final_edge_datas.append(edge)
+                seen_edges.add(pair)
+
+    # Get text chunks based on final filtered data
+    text_chunk_tasks = []
+
+    if final_node_datas:
+        text_chunk_tasks.append(
+            _find_most_related_text_unit_from_entities(
+                final_node_datas,
+                query_param,
+                text_chunks_db,
+                knowledge_graph_inst,
+            )
+        )
+
+    if final_edge_datas:
+        text_chunk_tasks.append(
+            _find_related_text_unit_from_relationships(
+                final_edge_datas,
+                query_param,
+                text_chunks_db,
+            )
+        )
+
+    # Execute text chunk retrieval in parallel
+    if text_chunk_tasks:
+        text_chunk_results = await asyncio.gather(*text_chunk_tasks)
+        for chunks in text_chunk_results:
+            if chunks:
+                all_chunks.extend(chunks)
+
+    # Apply token processing to chunks if tokenizer is available
     text_units_context = []
-    for i, chunk in enumerate(processed_chunks):
-        text_units_context.append(
-            {
-                "id": i + 1,
-                "content": chunk["content"],
-                "file_path": chunk.get("file_path", "unknown_source"),
-            }
+    if tokenizer and all_chunks:
+        # Calculate dynamic token limit for text chunks
+        entities_str = json.dumps(entities_context, ensure_ascii=False)
+        relations_str = json.dumps(relations_context, ensure_ascii=False)
+
+        # Calculate base context tokens (entities + relations + template)
+        kg_context_template = """-----Entities(KG)-----
+
+```json
+{entities_str}
+```
+
+-----Relationships(KG)-----
+
+```json
+{relations_str}
+```
+
+-----Document Chunks(DC)-----
+
+```json
+[]
+```
+
+"""
+        kg_context = kg_context_template.format(
+            entities_str=entities_str, relations_str=relations_str
+        )
+        kg_context_tokens = len(tokenizer.encode(kg_context))
+
+        # Calculate actual system prompt overhead dynamically
+        # 1. Calculate conversation history tokens
+        history_context = ""
+        if query_param.conversation_history:
+            history_context = get_conversation_turns(
+                query_param.conversation_history, query_param.history_turns
+            )
+        history_tokens = (
+            len(tokenizer.encode(history_context)) if history_context else 0
+        )
+
+        # 2. Calculate system prompt template tokens (excluding context_data)
+        user_prompt = query_param.user_prompt if query_param.user_prompt else ""
+        response_type = (
+            query_param.response_type
+            if query_param.response_type
+            else "Multiple Paragraphs"
         )
 
+        # Get the system prompt template from PROMPTS
+        sys_prompt_template = text_chunks_db.global_config.get(
+            "system_prompt_template", PROMPTS["rag_response"]
+        )
+
+        # Create a sample system prompt with placeholders filled (excluding context_data)
+        sample_sys_prompt = sys_prompt_template.format(
+            history=history_context,
+            context_data="",  # Empty for overhead calculation
+            response_type=response_type,
+            user_prompt=user_prompt,
+        )
+        sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
+
+        # Total system prompt overhead = template + query tokens
+        query_tokens = len(tokenizer.encode(query))
+        sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
+
+        buffer_tokens = 100  # Safety buffer as requested
+
+        # Calculate available tokens for text chunks
+        used_tokens = kg_context_tokens + sys_prompt_overhead + buffer_tokens
+        available_chunk_tokens = max_total_tokens - used_tokens
+
+        logger.debug(
+            f"Token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, KG: {kg_context_tokens}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+        )
+
+        # Re-process chunks with dynamic token limit
+        if all_chunks:
+            # Create a temporary query_param copy with adjusted chunk token limit
+            temp_chunks = [
+                {"content": chunk["content"], "file_path": chunk["file_path"]}
+                for chunk in all_chunks
+            ]
+
+            # Apply token truncation to chunks using the dynamic limit
+            truncated_chunks = await process_chunks_unified(
+                query=query,
+                chunks=temp_chunks,
+                query_param=query_param,
+                global_config=text_chunks_db.global_config,
+                source_type="mixed",
+                chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
+            )
+
+            # Rebuild text_units_context with truncated chunks
+            for i, chunk in enumerate(truncated_chunks):
+                text_units_context.append(
+                    {
+                        "id": i + 1,
+                        "content": chunk["content"],
+                        "file_path": chunk.get("file_path", "unknown_source"),
+                    }
+                )
+
+            logger.debug(
+                f"Re-truncated chunks for dynamic token limit: {len(temp_chunks)} -> {len(text_units_context)} (chunk available tokens: {available_chunk_tokens})"
+            )
+
     logger.info(
         f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(text_units_context)} chunks"
     )
@@ -2002,7 +2250,6 @@ async def _get_node_data(
     query: str,
     knowledge_graph_inst: BaseGraphStorage,
     entities_vdb: BaseVectorStorage,
-    text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
 ):
     # get similar entities
@@ -2015,7 +2262,7 @@ async def _get_node_data(
     )
 
     if not len(results):
-        return "", "", ""
+        return "", "", [], []
 
     # Extract all entity IDs from your results list
     node_ids = [r["entity_name"] for r in results]
@@ -2042,34 +2289,16 @@ async def _get_node_data(
         }
         for k, n, d in zip(results, node_datas, node_degrees)
         if n is not None
-    ]  # what is this text_chunks_db doing.  dont remember it in airvx.  check the diagram.
-    # get entitytext chunk
-    use_text_units = await _find_most_related_text_unit_from_entities(
-        node_datas,
-        query_param,
-        text_chunks_db,
-        knowledge_graph_inst,
-    )
+    ]
+
     use_relations = await _find_most_related_edges_from_entities(
         node_datas,
         query_param,
         knowledge_graph_inst,
     )
 
-    tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
-    len_node_datas = len(node_datas)
-    node_datas = truncate_list_by_token_size(
-        node_datas,
-        key=lambda x: x["description"] if x["description"] is not None else "",
-        max_token_size=query_param.max_token_for_local_context,
-        tokenizer=tokenizer,
-    )
-    logger.debug(
-        f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
-    )
-
     logger.info(
-        f"Local query: {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
+        f"Local query: {len(node_datas)} entites, {len(use_relations)} relations"
     )
 
     # build prompt
@@ -2088,7 +2317,6 @@ async def _get_node_data(
                 "entity": n["entity_name"],
                 "type": n.get("entity_type", "UNKNOWN"),
                 "description": n.get("description", "UNKNOWN"),
-                "rank": n["rank"],
                 "created_at": created_at,
                 "file_path": file_path,
             }
@@ -2110,15 +2338,12 @@ async def _get_node_data(
                 "entity1": e["src_tgt"][0],
                 "entity2": e["src_tgt"][1],
                 "description": e["description"],
-                "keywords": e["keywords"],
-                "weight": e["weight"],
-                "rank": e["rank"],
                 "created_at": created_at,
                 "file_path": file_path,
             }
         )
 
-    return entities_context, relations_context, use_text_units
+    return entities_context, relations_context, node_datas, use_relations
 
 
 async def _find_most_related_text_unit_from_entities(
@@ -2127,8 +2352,14 @@ async def _find_most_related_text_unit_from_entities(
     text_chunks_db: BaseKVStorage,
     knowledge_graph_inst: BaseGraphStorage,
 ):
+    logger.debug(f"Searching text chunks for {len(node_datas)} entities")
+
     text_units = [
-        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
+            : text_chunks_db.global_config.get(
+                "related_chunk_number", DEFAULT_RELATED_CHUNK_NUMBER
+            )
+        ]
         for dp in node_datas
         if dp["source_id"] is not None
     ]
@@ -2273,20 +2504,9 @@ async def _find_most_related_edges_from_entities(
             }
             all_edges_data.append(combined)
 
-    tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
     all_edges_data = sorted(
         all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
     )
-    all_edges_data = truncate_list_by_token_size(
-        all_edges_data,
-        key=lambda x: x["description"] if x["description"] is not None else "",
-        max_token_size=query_param.max_token_for_global_context,
-        tokenizer=tokenizer,
-    )
-
-    logger.debug(
-        f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
-    )
 
     return all_edges_data
 
@@ -2295,7 +2515,6 @@ async def _get_edge_data(
     keywords,
     knowledge_graph_inst: BaseGraphStorage,
     relationships_vdb: BaseVectorStorage,
-    text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
 ):
     logger.info(
@@ -2307,7 +2526,7 @@ async def _get_edge_data(
     )
 
     if not len(results):
-        return "", "", ""
+        return "", "", [], []
 
     # Prepare edge pairs in two forms:
     # For the batch edge properties function, use dicts.
@@ -2343,31 +2562,18 @@ async def _get_edge_data(
             }
             edge_datas.append(combined)
 
-    tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
     edge_datas = sorted(
         edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
     )
-    edge_datas = truncate_list_by_token_size(
+
+    use_entities = await _find_most_related_entities_from_relationships(
         edge_datas,
-        key=lambda x: x["description"] if x["description"] is not None else "",
-        max_token_size=query_param.max_token_for_global_context,
-        tokenizer=tokenizer,
-    )
-    use_entities, use_text_units = await asyncio.gather(
-        _find_most_related_entities_from_relationships(
-            edge_datas,
-            query_param,
-            knowledge_graph_inst,
-        ),
-        _find_related_text_unit_from_relationships(
-            edge_datas,
-            query_param,
-            text_chunks_db,
-            knowledge_graph_inst,
-        ),
+        query_param,
+        knowledge_graph_inst,
     )
+
     logger.info(
-        f"Global query: {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
+        f"Global query: {len(use_entities)} entites, {len(edge_datas)} relations"
     )
 
     relations_context = []
@@ -2386,9 +2592,6 @@ async def _get_edge_data(
                 "entity1": e["src_id"],
                 "entity2": e["tgt_id"],
                 "description": e["description"],
-                "keywords": e["keywords"],
-                "weight": e["weight"],
-                "rank": e["rank"],
                 "created_at": created_at,
                 "file_path": file_path,
             }
@@ -2410,22 +2613,13 @@ async def _get_edge_data(
                 "entity": n["entity_name"],
                 "type": n.get("entity_type", "UNKNOWN"),
                 "description": n.get("description", "UNKNOWN"),
-                "rank": n["rank"],
                 "created_at": created_at,
                 "file_path": file_path,
             }
         )
 
-    text_units_context = []
-    for i, t in enumerate(use_text_units):
-        text_units_context.append(
-            {
-                "id": i + 1,
-                "content": t["content"],
-                "file_path": t.get("file_path", "unknown"),
-            }
-        )
-    return entities_context, relations_context, text_units_context
+    # Return original data for later text chunk retrieval
+    return entities_context, relations_context, edge_datas, use_entities
 
 
 async def _find_most_related_entities_from_relationships(
@@ -2462,18 +2656,6 @@ async def _find_most_related_entities_from_relationships(
         combined = {**node, "entity_name": entity_name, "rank": degree}
         node_datas.append(combined)
 
-    tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
-    len_node_datas = len(node_datas)
-    node_datas = truncate_list_by_token_size(
-        node_datas,
-        key=lambda x: x["description"] if x["description"] is not None else "",
-        max_token_size=query_param.max_token_for_local_context,
-        tokenizer=tokenizer,
-    )
-    logger.debug(
-        f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
-    )
-
     return node_datas
 
 
@@ -2481,10 +2663,15 @@ async def _find_related_text_unit_from_relationships(
     edge_datas: list[dict],
     query_param: QueryParam,
     text_chunks_db: BaseKVStorage,
-    knowledge_graph_inst: BaseGraphStorage,
 ):
+    logger.debug(f"Searching text chunks for {len(edge_datas)} relationships")
+
     text_units = [
-        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
+        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
+            : text_chunks_db.global_config.get(
+                "related_chunk_number", DEFAULT_RELATED_CHUNK_NUMBER
+            )
+        ]
         for dp in edge_datas
         if dp["source_id"] is not None
     ]
@@ -2565,13 +2752,66 @@ async def naive_query(
     if chunks is None or len(chunks) == 0:
         return PROMPTS["fail_response"]
 
-    # Process chunks using unified processing
+    # Calculate dynamic token limit for chunks
+    # Get token limits from query_param (with fallback to global_config)
+    max_total_tokens = getattr(
+        query_param,
+        "max_total_tokens",
+        global_config.get("max_total_tokens", DEFAULT_MAX_TOTAL_TOKENS),
+    )
+
+    # Calculate conversation history tokens
+    history_context = ""
+    if query_param.conversation_history:
+        history_context = get_conversation_turns(
+            query_param.conversation_history, query_param.history_turns
+        )
+    history_tokens = len(tokenizer.encode(history_context)) if history_context else 0
+
+    # Calculate system prompt template tokens (excluding content_data)
+    user_prompt = query_param.user_prompt if query_param.user_prompt else ""
+    response_type = (
+        query_param.response_type
+        if query_param.response_type
+        else "Multiple Paragraphs"
+    )
+
+    # Use the provided system prompt or default
+    sys_prompt_template = (
+        system_prompt if system_prompt else PROMPTS["naive_rag_response"]
+    )
+
+    # Create a sample system prompt with empty content_data to calculate overhead
+    sample_sys_prompt = sys_prompt_template.format(
+        content_data="",  # Empty for overhead calculation
+        response_type=response_type,
+        history=history_context,
+        user_prompt=user_prompt,
+    )
+    sys_prompt_template_tokens = len(tokenizer.encode(sample_sys_prompt))
+
+    # Total system prompt overhead = template + query tokens
+    query_tokens = len(tokenizer.encode(query))
+    sys_prompt_overhead = sys_prompt_template_tokens + query_tokens
+
+    buffer_tokens = 100  # Safety buffer
+
+    # Calculate available tokens for chunks
+    used_tokens = sys_prompt_overhead + buffer_tokens
+    available_chunk_tokens = max_total_tokens - used_tokens
+
+    logger.debug(
+        f"Naive query token allocation - Total: {max_total_tokens}, History: {history_tokens}, SysPrompt: {sys_prompt_overhead}, Buffer: {buffer_tokens}, Available for chunks: {available_chunk_tokens}"
+    )
+
+    # Process chunks using unified processing with dynamic token limit
     processed_chunks = await process_chunks_unified(
         query=query,
         chunks=chunks,
         query_param=query_param,
         global_config=global_config,
         source_type="vector",
+        chunk_token_limit=available_chunk_tokens,  # Pass dynamic limit
     )
 
     logger.info(f"Final context: {len(processed_chunks)} chunks")
@@ -2622,7 +2862,9 @@ async def naive_query(
         return sys_prompt
 
     len_of_prompts = len(tokenizer.encode(query + sys_prompt))
-    logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
+    logger.debug(
+        f"[naive_query] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
+    )
 
     response = await use_model_func(
         query,
@@ -2746,7 +2988,9 @@ async def kg_query_with_keywords(
 
     tokenizer: Tokenizer = global_config["tokenizer"]
     len_of_prompts = len(tokenizer.encode(query + sys_prompt))
-    logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
+    logger.debug(
+        f"[kg_query_with_keywords] Sending to LLM: {len_of_prompts:,} tokens (Query: {len(tokenizer.encode(query))}, System: {len(tokenizer.encode(sys_prompt))})"
+    )
 
     # 6. Generate response
     response = await use_model_func(
@@ -2866,6 +3110,7 @@ async def apply_rerank_if_enabled(
     query: str,
     retrieved_docs: list[dict],
     global_config: dict,
+    enable_rerank: bool = True,
     top_k: int = None,
 ) -> list[dict]:
     """
@@ -2875,18 +3120,19 @@ async def apply_rerank_if_enabled(
         query: The search query
         retrieved_docs: List of retrieved documents
         global_config: Global configuration containing rerank settings
+        enable_rerank: Whether to enable reranking from query parameter
         top_k: Number of top documents to return after reranking
 
     Returns:
         Reranked documents if rerank is enabled, otherwise original documents
     """
-    if not global_config.get("enable_rerank", False) or not retrieved_docs:
+    if not enable_rerank or not retrieved_docs:
         return retrieved_docs
 
     rerank_func = global_config.get("rerank_model_func")
     if not rerank_func:
-        logger.debug(
-            "Rerank is enabled but no rerank function provided, skipping rerank"
+        logger.warning(
+            "Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters."
         )
         return retrieved_docs
 
@@ -2923,6 +3169,7 @@ async def process_chunks_unified(
     query_param: QueryParam,
     global_config: dict,
     source_type: str = "mixed",
+    chunk_token_limit: int = None,  # Add parameter for dynamic token limit
 ) -> list[dict]:
     """
     Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.
@@ -2933,6 +3180,7 @@ async def process_chunks_unified(
         query_param: Query parameters containing configuration
         global_config: Global configuration dictionary
         source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
+        chunk_token_limit: Dynamic token limit for chunks (if None, uses default)
 
     Returns:
         Processed and filtered list of text chunks
@@ -2954,12 +3202,13 @@ async def process_chunks_unified(
     )
 
     # 2. Apply reranking if enabled and query is provided
-    if global_config.get("enable_rerank", False) and query and unique_chunks:
-        rerank_top_k = query_param.chunk_rerank_top_k or len(unique_chunks)
+    if query_param.enable_rerank and query and unique_chunks:
+        rerank_top_k = query_param.chunk_top_k or len(unique_chunks)
         unique_chunks = await apply_rerank_if_enabled(
             query=query,
             retrieved_docs=unique_chunks,
             global_config=global_config,
+            enable_rerank=query_param.enable_rerank,
             top_k=rerank_top_k,
         )
         logger.debug(f"Rerank: {len(unique_chunks)} chunks (source: {source_type})")
@@ -2975,16 +3224,25 @@ async def process_chunks_unified(
     # 4. Token-based final truncation
     tokenizer = global_config.get("tokenizer")
     if tokenizer and unique_chunks:
+        # Set default chunk_token_limit if not provided
+        if chunk_token_limit is None:
+            # Get default from query_param or global_config
+            chunk_token_limit = getattr(
+                query_param,
+                "max_total_tokens",
+                global_config.get("MAX_TOTAL_TOKENS", 32000),
+            )
+
         original_count = len(unique_chunks)
         unique_chunks = truncate_list_by_token_size(
             unique_chunks,
             key=lambda x: x.get("content", ""),
-            max_token_size=query_param.max_token_for_text_unit,
+            max_token_size=chunk_token_limit,
             tokenizer=tokenizer,
         )
         logger.debug(
             f"Token truncation: {len(unique_chunks)} chunks from {original_count} "
-            f"(max tokens: {query_param.max_token_for_text_unit}, source: {source_type})"
+            f"(chunk available tokens: {chunk_token_limit}, source: {source_type})"
         )
 
     return unique_chunks
diff --git a/lightrag/rerank.py b/lightrag/rerank.py
index 59719bc9e1a1199c5cf9deb5464905e6ee2c6d84..297fa0539ab476da805446f801c051f8703b39c9 100644
--- a/lightrag/rerank.py
+++ b/lightrag/rerank.py
@@ -10,55 +10,58 @@ from .utils import logger
 
 class RerankModel(BaseModel):
     """
-    Pydantic model class for defining a custom rerank model.
+    Wrapper for rerank functions that can be used with LightRAG.
 
-    This class provides a convenient wrapper for rerank functions, allowing you to
-    encapsulate all rerank configurations (API keys, model settings, etc.) in one place.
+    Example usage:
+    ```python
+    from lightrag.rerank import RerankModel, jina_rerank
+
+    # Create rerank model
+    rerank_model = RerankModel(
+        rerank_func=jina_rerank,
+        kwargs={
+            "model": "BAAI/bge-reranker-v2-m3",
+            "api_key": "your_api_key_here",
+            "base_url": "https://api.jina.ai/v1/rerank"
+        }
+    )
 
-    Attributes:
-        rerank_func (Callable[[Any], List[Dict]]): A callable function that reranks documents.
-            The function should take query and documents as input and return reranked results.
-        kwargs (Dict[str, Any]): A dictionary that contains the arguments to pass to the callable function.
-            This should include all necessary configurations such as model name, API key, base_url, etc.
+    # Use in LightRAG
+    rag = LightRAG(
+        rerank_model_func=rerank_model.rerank,
+        # ... other configurations
+    )
 
-    Example usage:
-        Rerank model example with Jina:
-        ```python
-        rerank_model = RerankModel(
-            rerank_func=jina_rerank,
-            kwargs={
-                "model": "BAAI/bge-reranker-v2-m3",
-                "api_key": "your_api_key_here",
-                "base_url": "https://api.jina.ai/v1/rerank"
-            }
+    # Query with rerank enabled (default)
+    result = await rag.aquery(
+        "your query",
+        param=QueryParam(enable_rerank=True)
+    )
+    ```
+
+    Or define a custom function directly:
+    ```python
+    async def my_rerank_func(query: str, documents: list, top_k: int = None, **kwargs):
+        return await jina_rerank(
+            query=query,
+            documents=documents,
+            model="BAAI/bge-reranker-v2-m3",
+            api_key="your_api_key_here",
+            top_k=top_k or 10,
+            **kwargs
         )
 
-        # Use in LightRAG
-        rag = LightRAG(
-            enable_rerank=True,
-            rerank_model_func=rerank_model.rerank,
-            # ... other configurations
-        )
-        ```
-
-        Or define a custom function directly:
-        ```python
-        async def my_rerank_func(query: str, documents: list, top_k: int = None, **kwargs):
-            return await jina_rerank(
-                query=query,
-                documents=documents,
-                model="BAAI/bge-reranker-v2-m3",
-                api_key="your_api_key_here",
-                top_k=top_k or 10,
-                **kwargs
-            )
-
-        rag = LightRAG(
-            enable_rerank=True,
-            rerank_model_func=my_rerank_func,
-            # ... other configurations
-        )
-        ```
+    rag = LightRAG(
+        rerank_model_func=my_rerank_func,
+        # ... other configurations
+    )
+
+    # Control rerank per query
+    result = await rag.aquery(
+        "your query",
+        param=QueryParam(enable_rerank=True)  # Enable rerank for this query
+    )
+    ```
     """
 
     rerank_func: Callable[[Any], List[Dict]]
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 386de3ab952f992173fd473794b70d2d875b56e2..171cf9f6a281cdcc998437cd45365649de33e11b 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -795,7 +795,9 @@ def process_combine_contexts(*context_lists):
         if not context_list:  # Skip empty lists
             continue
         for item in context_list:
-            content_dict = {k: v for k, v in item.items() if k != "id"}
+            content_dict = {
+                k: v for k, v in item.items() if k != "id" and k != "created_at"
+            }
             content_key = tuple(sorted(content_dict.items()))
             if content_key not in seen_content:
                 seen_content[content_key] = item
diff --git a/lightrag_webui/src/api/lightrag.ts b/lightrag_webui/src/api/lightrag.ts
index 48298cd1afefe534b2ac84b66c3d42bd0025575e..a050e1c773b0bb6cae87e5b762c0944576cda22e 100644
--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@@ -106,12 +106,14 @@ export type QueryRequest = {
   stream?: boolean
   /** Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode. */
   top_k?: number
-  /** Maximum number of tokens allowed for each retrieved text chunk. */
-  max_token_for_text_unit?: number
-  /** Maximum number of tokens allocated for relationship descriptions in global retrieval. */
-  max_token_for_global_context?: number
-  /** Maximum number of tokens allocated for entity descriptions in local retrieval. */
-  max_token_for_local_context?: number
+  /** Maximum number of text chunks to retrieve and keep after reranking. */
+  chunk_top_k?: number
+  /** Maximum number of tokens allocated for entity context in unified token control system. */
+  max_entity_tokens?: number
+  /** Maximum number of tokens allocated for relationship context in unified token control system. */
+  max_relation_tokens?: number
+  /** Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt). */
+  max_total_tokens?: number
   /**
    * Stores past conversation history to maintain context.
    * Format: [{"role": "user/assistant", "content": "message"}].
@@ -121,6 +123,8 @@ export type QueryRequest = {
   history_turns?: number
   /** User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template. */
   user_prompt?: string
+  /** Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True. */
+  enable_rerank?: boolean
 }
 
 export type QueryResponse = {
diff --git a/lightrag_webui/src/components/retrieval/QuerySettings.tsx b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
index 807884e164fac25d5354f638abd00f054238279d..28e8dc3d7f18a95a95a758279bfecc001068ab95 100644
--- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx
+++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
@@ -119,7 +119,6 @@ export default function QuerySettings() {
                 </Tooltip>
               </TooltipProvider>
               <div>
-                {/* Removed sr-only label */}
                 <Input
                   id="top_k"
                   type="number"
@@ -136,120 +135,148 @@ export default function QuerySettings() {
                   }}
                   min={1}
                   placeholder={t('retrievePanel.querySettings.topKPlaceholder')}
-                  className="h-9"
                 />
               </div>
             </>
 
-            {/* Max Tokens */}
+            {/* Chunk Top K */}
             <>
-              <>
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <label htmlFor="max_token_for_text_unit" className="ml-1 cursor-help">
-                        {t('retrievePanel.querySettings.maxTokensTextUnit')}
-                      </label>
-                    </TooltipTrigger>
-                    <TooltipContent side="left">
-                      <p>{t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-                <div>
-                  {/* Removed sr-only label */}
-                  <Input
-                    id="max_token_for_text_unit"
-                    type="number"
-                    value={querySettings.max_token_for_text_unit ?? ''}
-                    onChange={(e) => {
-                      const value = e.target.value
-                      handleChange('max_token_for_text_unit', value === '' ? '' : parseInt(value) || 0)
-                    }}
-                    onBlur={(e) => {
-                      const value = e.target.value
-                      if (value === '' || isNaN(parseInt(value))) {
-                        handleChange('max_token_for_text_unit', 10000)
-                      }
-                    }}
-                    min={1}
-                    placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
-                    className="h-9"
-                  />
-                </div>
-              </>
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <label htmlFor="chunk_top_k" className="ml-1 cursor-help">
+                      {t('retrievePanel.querySettings.chunkTopK')}
+                    </label>
+                  </TooltipTrigger>
+                  <TooltipContent side="left">
+                    <p>{t('retrievePanel.querySettings.chunkTopKTooltip')}</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+              <div>
+                <Input
+                  id="chunk_top_k"
+                  type="number"
+                  value={querySettings.chunk_top_k ?? ''}
+                  onChange={(e) => {
+                    const value = e.target.value
+                    handleChange('chunk_top_k', value === '' ? '' : parseInt(value) || 0)
+                  }}
+                  onBlur={(e) => {
+                    const value = e.target.value
+                    if (value === '' || isNaN(parseInt(value))) {
+                      handleChange('chunk_top_k', 1)
+                    }
+                  }}
+                  min={1}
+                  placeholder={t('retrievePanel.querySettings.chunkTopKPlaceholder')}
+                />
+              </div>
+            </>
 
-              <>
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <label htmlFor="max_token_for_global_context" className="ml-1 cursor-help">
-                        {t('retrievePanel.querySettings.maxTokensGlobalContext')}
-                      </label>
-                    </TooltipTrigger>
-                    <TooltipContent side="left">
-                      <p>{t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-                <div>
-                  {/* Removed sr-only label */}
-                  <Input
-                    id="max_token_for_global_context"
-                    type="number"
-                    value={querySettings.max_token_for_global_context ?? ''}
-                    onChange={(e) => {
-                      const value = e.target.value
-                      handleChange('max_token_for_global_context', value === '' ? '' : parseInt(value) || 0)
-                    }}
-                    onBlur={(e) => {
-                      const value = e.target.value
-                      if (value === '' || isNaN(parseInt(value))) {
-                        handleChange('max_token_for_global_context', 4000)
-                      }
-                    }}
-                    min={1}
-                    placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
-                    className="h-9"
-                  />
-                </div>
-              </>
+            {/* Max Entity Tokens */}
+            <>
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <label htmlFor="max_entity_tokens" className="ml-1 cursor-help">
+                      {t('retrievePanel.querySettings.maxEntityTokens')}
+                    </label>
+                  </TooltipTrigger>
+                  <TooltipContent side="left">
+                    <p>{t('retrievePanel.querySettings.maxEntityTokensTooltip')}</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+              <div>
+                <Input
+                  id="max_entity_tokens"
+                  type="number"
+                  value={querySettings.max_entity_tokens ?? ''}
+                  onChange={(e) => {
+                    const value = e.target.value
+                    handleChange('max_entity_tokens', value === '' ? '' : parseInt(value) || 0)
+                  }}
+                  onBlur={(e) => {
+                    const value = e.target.value
+                    if (value === '' || isNaN(parseInt(value))) {
+                      handleChange('max_entity_tokens', 1000)
+                    }
+                  }}
+                  min={1}
+                  placeholder={t('retrievePanel.querySettings.maxEntityTokensPlaceholder')}
+                />
+              </div>
+            </>
 
-              <>
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <label htmlFor="max_token_for_local_context" className="ml-1 cursor-help">
-                        {t('retrievePanel.querySettings.maxTokensLocalContext')}
-                      </label>
-                    </TooltipTrigger>
-                    <TooltipContent side="left">
-                      <p>{t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-                <div>
-                  {/* Removed sr-only label */}
-                  <Input
-                    id="max_token_for_local_context"
-                    type="number"
-                    value={querySettings.max_token_for_local_context ?? ''}
-                    onChange={(e) => {
-                      const value = e.target.value
-                      handleChange('max_token_for_local_context', value === '' ? '' : parseInt(value) || 0)
-                    }}
-                    onBlur={(e) => {
-                      const value = e.target.value
-                      if (value === '' || isNaN(parseInt(value))) {
-                        handleChange('max_token_for_local_context', 4000)
-                      }
-                    }}
-                    min={1}
-                    placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
-                    className="h-9"
-                  />
-                </div>
-              </>
+            {/* Max Relation Tokens */}
+            <>
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <label htmlFor="max_relation_tokens" className="ml-1 cursor-help">
+                      {t('retrievePanel.querySettings.maxRelationTokens')}
+                    </label>
+                  </TooltipTrigger>
+                  <TooltipContent side="left">
+                    <p>{t('retrievePanel.querySettings.maxRelationTokensTooltip')}</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+              <div>
+                <Input
+                  id="max_relation_tokens"
+                  type="number"
+                  value={querySettings.max_relation_tokens ?? ''}
+                  onChange={(e) => {
+                    const value = e.target.value
+                    handleChange('max_relation_tokens', value === '' ? '' : parseInt(value) || 0)
+                  }}
+                  onBlur={(e) => {
+                    const value = e.target.value
+                    if (value === '' || isNaN(parseInt(value))) {
+                      handleChange('max_relation_tokens', 1000)
+                    }
+                  }}
+                  min={1}
+                  placeholder={t('retrievePanel.querySettings.maxRelationTokensPlaceholder')}
+                />
+              </div>
+            </>
+
+            {/* Max Total Tokens */}
+            <>
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <label htmlFor="max_total_tokens" className="ml-1 cursor-help">
+                      {t('retrievePanel.querySettings.maxTotalTokens')}
+                    </label>
+                  </TooltipTrigger>
+                  <TooltipContent side="left">
+                    <p>{t('retrievePanel.querySettings.maxTotalTokensTooltip')}</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+              <div>
+                <Input
+                  id="max_total_tokens"
+                  type="number"
+                  value={querySettings.max_total_tokens ?? ''}
+                  onChange={(e) => {
+                    const value = e.target.value
+                    handleChange('max_total_tokens', value === '' ? '' : parseInt(value) || 0)
+                  }}
+                  onBlur={(e) => {
+                    const value = e.target.value
+                    if (value === '' || isNaN(parseInt(value))) {
+                      handleChange('max_total_tokens', 1000)
+                    }
+                  }}
+                  min={1}
+                  placeholder={t('retrievePanel.querySettings.maxTotalTokensPlaceholder')}
+                />
+              </div>
             </>
 
             {/* History Turns */}
@@ -267,7 +294,6 @@ export default function QuerySettings() {
                 </Tooltip>
               </TooltipProvider>
               <div>
-                {/* Removed sr-only label */}
                 <Input
                   id="history_turns"
                   type="number"
@@ -316,6 +342,27 @@ export default function QuerySettings() {
 
             {/* Toggle Options */}
             <>
+              <div className="flex items-center gap-2">
+                <TooltipProvider>
+                  <Tooltip>
+                    <TooltipTrigger asChild>
+                      <label htmlFor="enable_rerank" className="flex-1 ml-1 cursor-help">
+                        {t('retrievePanel.querySettings.enableRerank')}
+                      </label>
+                    </TooltipTrigger>
+                    <TooltipContent side="left">
+                      <p>{t('retrievePanel.querySettings.enableRerankTooltip')}</p>
+                    </TooltipContent>
+                  </Tooltip>
+                </TooltipProvider>
+                <Checkbox
+                  className="mr-1 cursor-pointer"
+                  id="enable_rerank"
+                  checked={querySettings.enable_rerank}
+                  onCheckedChange={(checked) => handleChange('enable_rerank', checked)}
+                />
+              </div>
+
               <div className="flex items-center gap-2">
                 <TooltipProvider>
                   <Tooltip>
@@ -379,6 +426,7 @@ export default function QuerySettings() {
                 />
               </div>
             </>
+
           </div>
         </div>
       </CardContent>
diff --git a/lightrag_webui/src/locales/ar.json b/lightrag_webui/src/locales/ar.json
index 0f3b030bfad683832c3b18f5bffea9f867c33e90..939f6869371849fb3052de14bdf7c5c31eeb2aab 100644
--- a/lightrag_webui/src/locales/ar.json
+++ b/lightrag_webui/src/locales/ar.json
@@ -363,16 +363,22 @@
         "singleParagraph": "فقرة واحدة",
         "bulletPoints": "نقاط نقطية"
       },
-      "topK": "أعلى K نتائج",
-      "topKTooltip": "عدد العناصر العلوية للاسترجاع. يمثل الكيانات في وضع 'محلي' والعلاقات في وضع 'عالمي'",
-      "topKPlaceholder": "عدد النتائج",
-      "maxTokensTextUnit": "أقصى عدد من الرموز لوحدة النص",
-      "maxTokensTextUnitTooltip": "الحد الأقصى لعدد الرموز المسموح به لكل جزء نصي مسترجع",
-      "maxTokensGlobalContext": "أقصى عدد من الرموز للسياق العالمي",
-      "maxTokensGlobalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف العلاقات في الاسترجاع العالمي",
-      "maxTokensLocalContext": "أقصى عدد من الرموز للسياق المحلي",
-      "maxTokensLocalContextTooltip": "الحد الأقصى لعدد الرموز المخصص لأوصاف الكيانات في الاسترجاع المحلي",
-      "historyTurns": "دورات التاريخ",
+      "topK": "أعلى K",
+      "topKTooltip": "عدد العناصر العلوية للاسترداد. يمثل الكيانات في الوضع 'المحلي' والعلاقات في الوضع 'العالمي'.",
+      "topKPlaceholder": "أدخل قيمة أعلى k",
+      "chunkTopK": "أعلى K للقطع",
+      "chunkTopKTooltip": "العدد الأقصى لقطع النص المراد استردادها ومعالجتها.",
+      "chunkTopKPlaceholder": "أدخل قيمة أعلى k للقطع",
+      "chunkRerankTopK": "أعلى K لإعادة الترتيب",
+      "chunkRerankTopKTooltip": "عدد قطع النص المراد الاحتفاظ بها بعد إعادة الترتيب.",
+      "chunkRerankTopKPlaceholder": "أدخل قيمة أعلى k لإعادة الترتيب",
+      "maxEntityTokens": "الحد الأقصى لرموز الكيان",
+      "maxEntityTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق الكيان في نظام التحكم الموحد في الرموز",
+      "maxRelationTokens": "الحد الأقصى لرموز العلاقة",
+      "maxRelationTokensTooltip": "الحد الأقصى لعدد الرموز المخصصة لسياق العلاقة في نظام التحكم الموحد في الرموز",
+      "maxTotalTokens": "إجمالي الحد الأقصى للرموز",
+      "maxTotalTokensTooltip": "الحد الأقصى الإجمالي لميزانية الرموز لسياق الاستعلام بالكامل (الكيانات + العلاقات + الأجزاء + موجه النظام)",
+      "historyTurns": "أدوار التاريخ",
       "historyTurnsTooltip": "عدد الدورات الكاملة للمحادثة (أزواج المستخدم-المساعد) التي يجب مراعاتها في سياق الرد",
       "historyTurnsPlaceholder": "عدد دورات التاريخ",
       "onlyNeedContext": "تحتاج فقط إلى السياق",
@@ -383,7 +389,9 @@
       "streamResponseTooltip": "إذا كان صحيحًا، يتيح إخراج التدفق للردود في الوقت الفعلي",
       "userPrompt": "مطالبة مخصصة",
       "userPromptTooltip": "تقديم متطلبات استجابة إضافية إلى نموذج اللغة الكبير (غير متعلقة بمحتوى الاستعلام، فقط لمعالجة المخرجات).",
-      "userPromptPlaceholder": "أدخل مطالبة مخصصة (اختياري)"
+      "userPromptPlaceholder": "أدخل مطالبة مخصصة (اختياري)",
+      "enableRerank": "تمكين إعادة الترتيب",
+      "enableRerankTooltip": "تمكين إعادة ترتيب أجزاء النص المسترجعة. إذا كان True ولكن لم يتم تكوين نموذج إعادة الترتيب، فسيتم إصدار تحذير. افتراضي True."
     }
   },
   "apiSite": {
diff --git a/lightrag_webui/src/locales/en.json b/lightrag_webui/src/locales/en.json
index e9d5c1ca5dbbec9023e71b66cc4b70c9def8394c..bbc41334c135551a9b454b63a65a2ee1d52d581f 100644
--- a/lightrag_webui/src/locales/en.json
+++ b/lightrag_webui/src/locales/en.json
@@ -363,15 +363,21 @@
         "singleParagraph": "Single Paragraph",
         "bulletPoints": "Bullet Points"
       },
-      "topK": "Top K Results",
-      "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
-      "topKPlaceholder": "Number of results",
-      "maxTokensTextUnit": "Max Tokens for Text Unit",
-      "maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
-      "maxTokensGlobalContext": "Max Tokens for Global Context",
-      "maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
-      "maxTokensLocalContext": "Max Tokens for Local Context",
-      "maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
+      "topK": "Top K",
+      "topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.",
+      "topKPlaceholder": "Enter top k value",
+      "chunkTopK": "Chunk Top K",
+      "chunkTopKTooltip": "Maximum number of text chunks to retrieve and process.",
+      "chunkTopKPlaceholder": "Enter chunk top k value",
+      "chunkRerankTopK": "Chunk Rerank Top K",
+      "chunkRerankTopKTooltip": "Number of text chunks to keep after reranking.",
+      "chunkRerankTopKPlaceholder": "Enter rerank top k value",
+      "maxEntityTokens": "Max Entity Tokens",
+      "maxEntityTokensTooltip": "Maximum number of tokens allocated for entity context in unified token control system",
+      "maxRelationTokens": "Max Relation Tokens",
+      "maxRelationTokensTooltip": "Maximum number of tokens allocated for relationship context in unified token control system",
+      "maxTotalTokens": "Max Total Tokens",
+      "maxTotalTokensTooltip": "Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)",
       "historyTurns": "History Turns",
       "historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
       "historyTurnsPlaceholder": "Number of history turns",
@@ -383,7 +389,9 @@
       "streamResponseTooltip": "If True, enables streaming output for real-time responses",
       "userPrompt": "User Prompt",
       "userPromptTooltip": "Provide additional response requirements to the LLM (unrelated to query content, only for output processing).",
-      "userPromptPlaceholder": "Enter custom prompt (optional)"
+      "userPromptPlaceholder": "Enter custom prompt (optional)",
+      "enableRerank": "Enable Rerank",
+      "enableRerankTooltip": "Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True."
     }
   },
   "apiSite": {
diff --git a/lightrag_webui/src/locales/fr.json b/lightrag_webui/src/locales/fr.json
index 46f1216f20c177a57dd973015f20bef49342e43f..8f551e49b36136bd1cfc449cad974f35aa2c02d8 100644
--- a/lightrag_webui/src/locales/fr.json
+++ b/lightrag_webui/src/locales/fr.json
@@ -363,15 +363,21 @@
         "singleParagraph": "Paragraphe unique",
         "bulletPoints": "Points à puces"
       },
-      "topK": "Top K résultats",
-      "topKTooltip": "Nombre d'éléments supérieurs à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'",
-      "topKPlaceholder": "Nombre de résultats",
-      "maxTokensTextUnit": "Nombre maximum de jetons pour l'unité de texte",
-      "maxTokensTextUnitTooltip": "Nombre maximum de jetons autorisés pour chaque fragment de texte récupéré",
-      "maxTokensGlobalContext": "Nombre maximum de jetons pour le contexte global",
-      "maxTokensGlobalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des relations dans la récupération globale",
-      "maxTokensLocalContext": "Nombre maximum de jetons pour le contexte local",
-      "maxTokensLocalContextTooltip": "Nombre maximum de jetons alloués pour les descriptions des entités dans la récupération locale",
+      "topK": "Top K",
+      "topKTooltip": "Nombre d'éléments principaux à récupérer. Représente les entités en mode 'local' et les relations en mode 'global'.",
+      "topKPlaceholder": "Entrez la valeur top k",
+      "chunkTopK": "Top K des Chunks",
+      "chunkTopKTooltip": "Nombre maximum de chunks de texte à récupérer et traiter.",
+      "chunkTopKPlaceholder": "Entrez la valeur top k des chunks",
+      "chunkRerankTopK": "Top K du Reclassement",
+      "chunkRerankTopKTooltip": "Nombre de chunks de texte à conserver après reclassement.",
+      "chunkRerankTopKPlaceholder": "Entrez la valeur top k du reclassement",
+      "maxEntityTokens": "Limite de jetons d'entité",
+      "maxEntityTokensTooltip": "Nombre maximum de jetons alloués au contexte d'entité dans le système de contrôle de jetons unifié",
+      "maxRelationTokens": "Limite de jetons de relation",
+      "maxRelationTokensTooltip": "Nombre maximum de jetons alloués au contexte de relation dans le système de contrôle de jetons unifié",
+      "maxTotalTokens": "Limite totale de jetons",
+      "maxTotalTokensTooltip": "Budget total maximum de jetons pour l'ensemble du contexte de requête (entités + relations + blocs + prompt système)",
       "historyTurns": "Tours d'historique",
       "historyTurnsTooltip": "Nombre de tours complets de conversation (paires utilisateur-assistant) à prendre en compte dans le contexte de la réponse",
       "historyTurnsPlaceholder": "Nombre de tours d'historique",
@@ -383,7 +389,9 @@
       "streamResponseTooltip": "Si vrai, active la sortie en flux pour des réponses en temps réel",
       "userPrompt": "Invite personnalisée",
       "userPromptTooltip": "Fournir des exigences de réponse supplémentaires au LLM (sans rapport avec le contenu de la requête, uniquement pour le traitement de sortie).",
-      "userPromptPlaceholder": "Entrez une invite personnalisée (facultatif)"
+      "userPromptPlaceholder": "Entrez une invite personnalisée (facultatif)",
+      "enableRerank": "Activer le Reclassement",
+      "enableRerankTooltip": "Active le reclassement pour les fragments de texte récupérés. Si True mais qu'aucun modèle de reclassement n'est configuré, un avertissement sera émis. True par défaut."
     }
   },
   "apiSite": {
diff --git a/lightrag_webui/src/locales/zh.json b/lightrag_webui/src/locales/zh.json
index dcf9c5ebd7f8bd8a3213bff65df291dd6ca86b50..c9bad472cfd34b3072bc13b5672da39c2cbe4f8d 100644
--- a/lightrag_webui/src/locales/zh.json
+++ b/lightrag_webui/src/locales/zh.json
@@ -363,15 +363,21 @@
         "singleParagraph": "单段落",
         "bulletPoints": "要点"
       },
-      "topK": "Top K结果",
-      "topKTooltip": "检索的顶部项目数。在'local'模式下表示实体，在'global'模式下表示关系",
-      "topKPlaceholder": "结果数量",
-      "maxTokensTextUnit": "文本单元最大令牌数",
-      "maxTokensTextUnitTooltip": "每个检索文本块允许的最大令牌数",
-      "maxTokensGlobalContext": "全局上下文最大令牌数",
-      "maxTokensGlobalContextTooltip": "全局检索中关系描述的最大令牌数",
-      "maxTokensLocalContext": "本地上下文最大令牌数",
-      "maxTokensLocalContextTooltip": "本地检索中实体描述的最大令牌数",
+      "topK": "Top K",
+      "topKTooltip": "检索的顶部条目数量。在'local'模式下表示实体，在'global'模式下表示关系。",
+      "topKPlaceholder": "输入top k值",
+      "chunkTopK": "文本块 Top K",
+      "chunkTopKTooltip": "检索和处理的最大文本块数量。",
+      "chunkTopKPlaceholder": "输入文本块top k值",
+      "chunkRerankTopK": "重排序 Top K",
+      "chunkRerankTopKTooltip": "重排序后保留的文本块数量。",
+      "chunkRerankTopKPlaceholder": "输入重排序top k值",
+      "maxEntityTokens": "实体令牌数上限",
+      "maxEntityTokensTooltip": "统一令牌控制系统中分配给实体上下文的最大令牌数",
+      "maxRelationTokens": "关系令牌数上限",
+      "maxRelationTokensTooltip": "统一令牌控制系统中分配给关系上下文的最大令牌数",
+      "maxTotalTokens": "总令牌数上限",
+      "maxTotalTokensTooltip": "整个查询上下文的最大总令牌预算（实体+关系+文档块+系统提示）",
       "historyTurns": "历史轮次",
       "historyTurnsTooltip": "响应上下文中考虑的完整对话轮次（用户-助手对）数量",
       "historyTurnsPlaceholder": "历史轮次数",
@@ -383,7 +389,9 @@
       "streamResponseTooltip": "如果为True，启用实时流式输出响应",
       "userPrompt": "用户提示词",
       "userPromptTooltip": "向LLM提供额外的响应要求（与查询内容无关，仅用于处理输出）。",
-      "userPromptPlaceholder": "输入自定义提示词（可选）"
+      "userPromptPlaceholder": "输入自定义提示词（可选）",
+      "enableRerank": "启用重排",
+      "enableRerankTooltip": "为检索到的文本块启用重排。如果为True但未配置重排模型，将发出警告。默认为True。"
     }
   },
   "apiSite": {
diff --git a/lightrag_webui/src/locales/zh_TW.json b/lightrag_webui/src/locales/zh_TW.json
index e8f04c1435967600358c736d0a299369665e32ed..0d179408c157c82ada33bac3c7d02e2912e23bff 100644
--- a/lightrag_webui/src/locales/zh_TW.json
+++ b/lightrag_webui/src/locales/zh_TW.json
@@ -304,7 +304,7 @@
           "file_path": "來源",
           "keywords": "Keys",
           "weight": "權重"
-          }
+        }
       },
       "edge": {
         "title": "關係",
@@ -363,15 +363,15 @@
         "singleParagraph": "單段落",
         "bulletPoints": "重點"
       },
-      "topK": "Top K結果",
-      "topKTooltip": "檢索的前幾項結果數。在'local'模式下表示實體，在'global'模式下表示關係",
-      "topKPlaceholder": "結果數量",
-      "maxTokensTextUnit": "文字單元最大權杖數",
-      "maxTokensTextUnitTooltip": "每個檢索文字區塊允許的最大權杖數",
-      "maxTokensGlobalContext": "全域上下文最大權杖數",
-      "maxTokensGlobalContextTooltip": "全域檢索中關係描述的最大權杖數",
-      "maxTokensLocalContext": "本地上下文最大權杖數",
-      "maxTokensLocalContextTooltip": "本地檢索中實體描述的最大權杖數",
+      "topK": "Top K",
+      "topKTooltip": "檢索的頂部條目數量。在'local'模式下表示實體，在'global'模式下表示關係。",
+      "topKPlaceholder": "輸入top k值",
+      "chunkTopK": "文字區塊 Top K",
+      "chunkTopKTooltip": "檢索和處理的最大文字區塊數量。",
+      "chunkTopKPlaceholder": "輸入文字區塊top k值",
+      "chunkRerankTopK": "重新排序 Top K",
+      "chunkRerankTopKTooltip": "重新排序後保留的文字區塊數量。",
+      "chunkRerankTopKPlaceholder": "輸入重新排序top k值",
       "historyTurns": "歷史輪次",
       "historyTurnsTooltip": "回應上下文中考慮的完整對話輪次（使用者-助手對）數量",
       "historyTurnsPlaceholder": "歷史輪次數",
@@ -383,7 +383,15 @@
       "streamResponseTooltip": "如果為True，啟用即時串流輸出回應",
       "userPrompt": "用戶提示詞",
       "userPromptTooltip": "向LLM提供額外的響應要求（與查詢內容無關，僅用於處理輸出）。",
-      "userPromptPlaceholder": "輸入自定義提示詞（可選）"
+      "userPromptPlaceholder": "輸入自定義提示詞（可選）",
+      "enableRerank": "啟用重排",
+      "enableRerankTooltip": "為檢索到的文本塊啟用重排。如果為True但未配置重排模型，將發出警告。默認為True。",
+      "maxEntityTokens": "實體令牌數上限",
+      "maxEntityTokensTooltip": "統一令牌控制系統中分配給實體上下文的最大令牌數",
+      "maxRelationTokens": "關係令牌數上限",
+      "maxRelationTokensTooltip": "統一令牌控制系統中分配給關係上下文的最大令牌數",
+      "maxTotalTokens": "總令牌數上限",
+      "maxTotalTokensTooltip": "整個查詢上下文的最大總令牌預算（實體+關係+文檔塊+系統提示）"
     }
   },
   "apiSite": {
diff --git a/lightrag_webui/src/stores/settings.ts b/lightrag_webui/src/stores/settings.ts
index 5942ddca36b8a7f95039c296b066b9eda74e725c..fb0adde0f99aed45e56bbd33b89bcc4493ea838c 100644
--- a/lightrag_webui/src/stores/settings.ts
+++ b/lightrag_webui/src/stores/settings.ts
@@ -110,17 +110,17 @@ const useSettingsStoreBase = create<SettingsState>()(
       querySettings: {
         mode: 'global',
         response_type: 'Multiple Paragraphs',
-        top_k: 10,
-        max_token_for_text_unit: 6000,
-        max_token_for_global_context: 4000,
-        max_token_for_local_context: 4000,
+        top_k: 40,
+        chunk_top_k: 10,
+        max_entity_tokens: 10000,
+        max_relation_tokens: 10000,
+        max_total_tokens: 32000,
         only_need_context: false,
         only_need_prompt: false,
         stream: true,
         history_turns: 3,
-        hl_keywords: [],
-        ll_keywords: [],
-        user_prompt: ''
+        user_prompt: '',
+        enable_rerank: true
       },
 
       setTheme: (theme: Theme) => set({ theme }),
@@ -192,7 +192,7 @@ const useSettingsStoreBase = create<SettingsState>()(
     {
       name: 'settings-storage',
       storage: createJSONStorage(() => localStorage),
-      version: 14,
+      version: 15,
       migrate: (state: any, version: number) => {
         if (version < 2) {
           state.showEdgeLabel = false
@@ -260,6 +260,20 @@ const useSettingsStoreBase = create<SettingsState>()(
           // Add backendMaxGraphNodes field for older versions
           state.backendMaxGraphNodes = null
         }
+        if (version < 15) {
+          // Add new querySettings
+          state.querySettings = {
+            ...state.querySettings,
+            mode: 'mix',
+            response_type: 'Multiple Paragraphs',
+            top_k: 40,
+            chunk_top_k: 10,
+            max_entity_tokens: 10000,
+            max_relation_tokens: 10000,
+            max_total_tokens: 32000,
+            enable_rerank: true
+          }
+        }
         return state
       }
     }