Daniel.y commited on
Commit
48855dd
·
unverified ·
2 Parent(s): 375b4f4 234e9fa

Merge pull request #1782 from HKUDS/rerank

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README-zh.md +13 -15
  2. README.md +13 -15
  3. docs/rerank_integration.md +32 -26
  4. env.example +87 -63
  5. examples/rerank_example.py +51 -20
  6. lightrag/api/config.py +28 -39
  7. lightrag/api/lightrag_server.py +14 -12
  8. lightrag/api/routers/query_routes.py +15 -16
  9. lightrag/api/webui/assets/{_basePickBy-D3PHsJjq.js → _basePickBy-DV1dBXEu.js} +0 -0
  10. lightrag/api/webui/assets/{_baseUniq-CtAZZJ8e.js → _baseUniq-BZ3hvks1.js} +0 -0
  11. lightrag/api/webui/assets/{architectureDiagram-IEHRJDOE-Bou3pEJo.js → architectureDiagram-IEHRJDOE-0ddCq26Q.js} +0 -0
  12. lightrag/api/webui/assets/{blockDiagram-JOT3LUYC-BxXXNv1O.js → blockDiagram-JOT3LUYC-DezBiNXY.js} +0 -0
  13. lightrag/api/webui/assets/{c4Diagram-VJAJSXHY-BpY1T-jk.js → c4Diagram-VJAJSXHY-BMYcCHQE.js} +0 -0
  14. lightrag/api/webui/assets/{chunk-4BMEZGHF-CAhtCpmT.js → chunk-4BMEZGHF-DM9xX3Iw.js} +0 -0
  15. lightrag/api/webui/assets/{chunk-A2AXSNBT-B91iiasA.js → chunk-A2AXSNBT-CRex3-yW.js} +0 -0
  16. lightrag/api/webui/assets/{chunk-AEK57VVT-gQ4j2jcG.js → chunk-AEK57VVT-DlsJi6tH.js} +0 -0
  17. lightrag/api/webui/assets/{chunk-D6G4REZN-CGaqGId9.js → chunk-D6G4REZN-5j_Vlndu.js} +0 -0
  18. lightrag/api/webui/assets/{chunk-RZ5BOZE2-B615FLH4.js → chunk-RZ5BOZE2-CdnIs5Fb.js} +0 -0
  19. lightrag/api/webui/assets/{chunk-XZIHB7SX-c4P7PYPk.js → chunk-XZIHB7SX-gcyrJN2U.js} +0 -0
  20. lightrag/api/webui/assets/{classDiagram-GIVACNV2-DBTA8XwB.js → classDiagram-GIVACNV2-DZXU66uW.js} +0 -0
  21. lightrag/api/webui/assets/{classDiagram-v2-COTLJTTW-DBTA8XwB.js → classDiagram-v2-COTLJTTW-DZXU66uW.js} +0 -0
  22. lightrag/api/webui/assets/{clone-Dm5jEAXQ.js → clone-eVzB-9-f.js} +0 -0
  23. lightrag/api/webui/assets/{dagre-OKDRZEBW-CqR4Poz4.js → dagre-OKDRZEBW-Cas2IJD5.js} +0 -0
  24. lightrag/api/webui/assets/{diagram-SSKATNLV-pBYsrik-.js → diagram-SSKATNLV-CA9pCZ-g.js} +0 -0
  25. lightrag/api/webui/assets/{diagram-VNBRO52H-Bu64Jus9.js → diagram-VNBRO52H-B9-Mlqta.js} +0 -0
  26. lightrag/api/webui/assets/{erDiagram-Q7BY3M3F-BTmP3B4h.js → erDiagram-Q7BY3M3F-CX4Di1zm.js} +0 -0
  27. lightrag/api/webui/assets/{feature-documents-oks3sUnM.js → feature-documents-DZY3tMAq.js} +0 -0
  28. lightrag/api/webui/assets/{feature-graph-NODQb6qW.js → feature-graph-wF7LCIjH.js} +0 -0
  29. lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js +0 -0
  30. lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js +0 -0
  31. lightrag/api/webui/assets/{flowDiagram-4HSFHLVR-DZNySYxV.js → flowDiagram-4HSFHLVR-BDwWKjb6.js} +0 -0
  32. lightrag/api/webui/assets/{ganttDiagram-APWFNJXF-GWTNv7FR.js → ganttDiagram-APWFNJXF-Du3IUDRk.js} +0 -0
  33. lightrag/api/webui/assets/{gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js → gitGraphDiagram-7IBYFJ6S-CD8MAiok.js} +0 -0
  34. lightrag/api/webui/assets/{graph-BLnbmvfZ.js → graph-DJgPOSDl.js} +0 -0
  35. lightrag/api/webui/assets/{index-yRRg2BZk.js → index-D3V9EKqf.js} +0 -0
  36. lightrag/api/webui/assets/{index-1Hy45NwC.js → index-DB3D3pNI.js} +0 -0
  37. lightrag/api/webui/assets/{infoDiagram-PH2N3AL5-DAtlRRqj.js → infoDiagram-PH2N3AL5-DYkrQwoL.js} +0 -0
  38. lightrag/api/webui/assets/{journeyDiagram-U35MCT3I-BscxFTBa.js → journeyDiagram-U35MCT3I-CZecBGFk.js} +0 -0
  39. lightrag/api/webui/assets/{kanban-definition-NDS4AKOZ-QESEl0tA.js → kanban-definition-NDS4AKOZ-CD8vwi41.js} +0 -0
  40. lightrag/api/webui/assets/{layout-DsT4215v.js → layout-D_MnvYWV.js} +0 -0
  41. lightrag/api/webui/assets/{markdown-vendor-DmIvJdn7.js → markdown-vendor-ZbbHR4ge.js} +0 -0
  42. lightrag/api/webui/assets/{mermaid-vendor-D0f_SE0h.js → mermaid-vendor-CR44n-lC.js} +0 -0
  43. lightrag/api/webui/assets/{mindmap-definition-ALO5MXBD-aQwMTShx.js → mindmap-definition-ALO5MXBD-CEOit9vG.js} +0 -0
  44. lightrag/api/webui/assets/{pieDiagram-IB7DONF6-D6N6SEu_.js → pieDiagram-IB7DONF6-Ca5AV9bY.js} +0 -0
  45. lightrag/api/webui/assets/{quadrantDiagram-7GDLP6J5-COkzo7lS.js → quadrantDiagram-7GDLP6J5-D5ZAOmhC.js} +0 -0
  46. lightrag/api/webui/assets/{radar-MK3ICKWK-DOAXm8cx.js → radar-MK3ICKWK-B97XRKGx.js} +0 -0
  47. lightrag/api/webui/assets/{requirementDiagram-KVF5MWMF-lKW1n5a1.js → requirementDiagram-KVF5MWMF-BzPWhOZW.js} +0 -0
  48. lightrag/api/webui/assets/{sankeyDiagram-QLVOVGJD-BqECU7xS.js → sankeyDiagram-QLVOVGJD-DYZFDO6U.js} +0 -0
  49. lightrag/api/webui/assets/{sequenceDiagram-X6HHIX6F-ByOWqALm.js → sequenceDiagram-X6HHIX6F-GAQ6Ejep.js} +0 -0
  50. lightrag/api/webui/assets/{stateDiagram-DGXRK772-DjKMsne-.js → stateDiagram-DGXRK772-pI_aBJdi.js} +0 -0
README-zh.md CHANGED
@@ -293,26 +293,19 @@ class QueryParam:
293
  top_k: int = int(os.getenv("TOP_K", "60"))
294
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
295
 
296
- chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5"))
297
- """Number of text chunks to retrieve initially from vector search.
298
  If None, defaults to top_k value.
299
  """
300
 
301
- chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5"))
302
- """Number of text chunks to keep after reranking.
303
- If None, keeps all chunks returned from initial retrieval.
304
- """
305
-
306
- max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
307
- """Maximum number of tokens allowed for each retrieved text chunk."""
308
 
309
- max_token_for_global_context: int = int(
310
- os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
311
- )
312
- """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
313
 
314
- max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
315
- """Maximum number of tokens allocated for entity descriptions in local retrieval."""
316
 
317
  hl_keywords: list[str] = field(default_factory=list)
318
  """List of high-level keywords to prioritize in retrieval."""
@@ -341,6 +334,11 @@ class QueryParam:
341
  """User-provided prompt for the query.
342
  If proivded, this will be use instead of the default vaulue from prompt template.
343
  """
 
 
 
 
 
344
  ```
345
 
346
  > top_k的默认值可以通过环境变量TOP_K更改。
 
293
  top_k: int = int(os.getenv("TOP_K", "60"))
294
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
295
 
296
+ chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10"))
297
+ """Number of text chunks to retrieve initially from vector search and keep after reranking.
298
  If None, defaults to top_k value.
299
  """
300
 
301
+ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
302
+ """Maximum number of tokens allocated for entity context in unified token control system."""
 
 
 
 
 
303
 
304
+ max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
305
+ """Maximum number of tokens allocated for relationship context in unified token control system."""
 
 
306
 
307
+ max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
308
+ """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
309
 
310
  hl_keywords: list[str] = field(default_factory=list)
311
  """List of high-level keywords to prioritize in retrieval."""
 
334
  """User-provided prompt for the query.
335
  If proivded, this will be use instead of the default vaulue from prompt template.
336
  """
337
+
338
+ enable_rerank: bool = True
339
+ """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
340
+ Default is True to enable reranking when rerank model is available.
341
+ """
342
  ```
343
 
344
  > top_k的默认值可以通过环境变量TOP_K更改。
README.md CHANGED
@@ -300,26 +300,19 @@ class QueryParam:
300
  top_k: int = int(os.getenv("TOP_K", "60"))
301
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
302
 
303
- chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "5"))
304
- """Number of text chunks to retrieve initially from vector search.
305
  If None, defaults to top_k value.
306
  """
307
 
308
- chunk_rerank_top_k: int = int(os.getenv("CHUNK_RERANK_TOP_K", "5"))
309
- """Number of text chunks to keep after reranking.
310
- If None, keeps all chunks returned from initial retrieval.
311
- """
312
-
313
- max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
314
- """Maximum number of tokens allowed for each retrieved text chunk."""
315
 
316
- max_token_for_global_context: int = int(
317
- os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
318
- )
319
- """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
320
 
321
- max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
322
- """Maximum number of tokens allocated for entity descriptions in local retrieval."""
323
 
324
  conversation_history: list[dict[str, str]] = field(default_factory=list)
325
  """Stores past conversation history to maintain context.
@@ -342,6 +335,11 @@ class QueryParam:
342
  """User-provided prompt for the query.
343
  If proivded, this will be use instead of the default vaulue from prompt template.
344
  """
 
 
 
 
 
345
  ```
346
 
347
  > default value of Top_k can be change by environment variables TOP_K.
 
300
  top_k: int = int(os.getenv("TOP_K", "60"))
301
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
302
 
303
+ chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "10"))
304
+ """Number of text chunks to retrieve initially from vector search and keep after reranking.
305
  If None, defaults to top_k value.
306
  """
307
 
308
+ max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "10000"))
309
+ """Maximum number of tokens allocated for entity context in unified token control system."""
 
 
 
 
 
310
 
311
+ max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "10000"))
312
+ """Maximum number of tokens allocated for relationship context in unified token control system."""
 
 
313
 
314
+ max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "32000"))
315
+ """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
316
 
317
  conversation_history: list[dict[str, str]] = field(default_factory=list)
318
  """Stores past conversation history to maintain context.
 
335
  """User-provided prompt for the query.
336
  If proivded, this will be use instead of the default vaulue from prompt template.
337
  """
338
+
339
+ enable_rerank: bool = True
340
+ """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
341
+ Default is True to enable reranking when rerank model is available.
342
+ """
343
  ```
344
 
345
  > default value of Top_k can be change by environment variables TOP_K.
docs/rerank_integration.md CHANGED
@@ -1,36 +1,24 @@
1
- # Rerank Integration in LightRAG
2
 
3
- This document explains how to configure and use the rerank functionality in LightRAG to improve retrieval quality.
4
 
5
- ## Overview
6
-
7
- Reranking is an optional feature that improves the quality of retrieved documents by re-ordering them based on their relevance to the query. This is particularly useful when you want higher precision in document retrieval across all query modes (naive, local, global, hybrid, mix).
8
-
9
- ## Architecture
10
-
11
- The rerank integration follows a simplified design pattern:
12
-
13
- - **Single Function Configuration**: All rerank settings (model, API keys, top_k, etc.) are contained within the rerank function
14
- - **Async Processing**: Non-blocking rerank operations
15
- - **Error Handling**: Graceful fallback to original results
16
- - **Optional Feature**: Can be enabled/disabled via configuration
17
- - **Code Reuse**: Single generic implementation for Jina/Cohere compatible APIs
18
-
19
- ## Configuration
20
 
21
  ### Environment Variables
22
 
23
- Set this variable in your `.env` file or environment:
24
 
25
  ```bash
26
- # Enable/disable reranking
27
- ENABLE_RERANK=True
 
 
28
  ```
29
 
30
  ### Programmatic Configuration
31
 
32
  ```python
33
- from lightrag import LightRAG
34
  from lightrag.rerank import custom_rerank, RerankModel
35
 
36
  # Method 1: Using a custom rerank function with all settings included
@@ -49,8 +37,19 @@ rag = LightRAG(
49
  working_dir="./rag_storage",
50
  llm_model_func=your_llm_func,
51
  embedding_func=your_embedding_func,
52
- enable_rerank=True,
53
- rerank_model_func=my_rerank_func,
 
 
 
 
 
 
 
 
 
 
 
54
  )
55
 
56
  # Method 2: Using RerankModel wrapper
@@ -67,9 +66,17 @@ rag = LightRAG(
67
  working_dir="./rag_storage",
68
  llm_model_func=your_llm_func,
69
  embedding_func=your_embedding_func,
70
- enable_rerank=True,
71
  rerank_model_func=rerank_model.rerank,
72
  )
 
 
 
 
 
 
 
 
 
73
  ```
74
 
75
  ## Supported Providers
@@ -164,7 +171,6 @@ async def main():
164
  working_dir="./rag_storage",
165
  llm_model_func=gpt_4o_mini_complete,
166
  embedding_func=openai_embedding,
167
- enable_rerank=True,
168
  rerank_model_func=my_rerank_func,
169
  )
170
 
@@ -180,7 +186,7 @@ async def main():
180
  # Query with rerank (automatically applied)
181
  result = await rag.aquery(
182
  "Your question here",
183
- param=QueryParam(mode="hybrid", top_k=5) # This top_k is passed to rerank function
184
  )
185
 
186
  print(result)
 
1
+ # Rerank Integration Guide
2
 
3
+ LightRAG supports reranking functionality to improve retrieval quality by re-ordering documents based on their relevance to the query. Reranking is now controlled per query via the `enable_rerank` parameter (default: True).
4
 
5
+ ## Quick Start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  ### Environment Variables
8
 
9
+ Set these variables in your `.env` file or environment for rerank model configuration:
10
 
11
  ```bash
12
+ # Rerank model configuration (required when enable_rerank=True in queries)
13
+ RERANK_MODEL=BAAI/bge-reranker-v2-m3
14
+ RERANK_BINDING_HOST=https://api.your-provider.com/v1/rerank
15
+ RERANK_BINDING_API_KEY=your_api_key_here
16
  ```
17
 
18
  ### Programmatic Configuration
19
 
20
  ```python
21
+ from lightrag import LightRAG, QueryParam
22
  from lightrag.rerank import custom_rerank, RerankModel
23
 
24
  # Method 1: Using a custom rerank function with all settings included
 
37
  working_dir="./rag_storage",
38
  llm_model_func=your_llm_func,
39
  embedding_func=your_embedding_func,
40
+ rerank_model_func=my_rerank_func, # Configure rerank function
41
+ )
42
+
43
+ # Query with rerank enabled (default)
44
+ result = await rag.aquery(
45
+ "your query",
46
+ param=QueryParam(enable_rerank=True) # Control rerank per query
47
+ )
48
+
49
+ # Query with rerank disabled
50
+ result = await rag.aquery(
51
+ "your query",
52
+ param=QueryParam(enable_rerank=False)
53
  )
54
 
55
  # Method 2: Using RerankModel wrapper
 
66
  working_dir="./rag_storage",
67
  llm_model_func=your_llm_func,
68
  embedding_func=your_embedding_func,
 
69
  rerank_model_func=rerank_model.rerank,
70
  )
71
+
72
+ # Control rerank per query
73
+ result = await rag.aquery(
74
+ "your query",
75
+ param=QueryParam(
76
+ enable_rerank=True, # Enable rerank for this query
77
+ chunk_top_k=5 # Number of chunks to keep after reranking
78
+ )
79
+ )
80
  ```
81
 
82
  ## Supported Providers
 
171
  working_dir="./rag_storage",
172
  llm_model_func=gpt_4o_mini_complete,
173
  embedding_func=openai_embedding,
 
174
  rerank_model_func=my_rerank_func,
175
  )
176
 
 
186
  # Query with rerank (automatically applied)
187
  result = await rag.aquery(
188
  "Your question here",
189
+ param=QueryParam(enable_rerank=True) # This top_k is passed to rerank function
190
  )
191
 
192
  print(result)
env.example CHANGED
@@ -1,6 +1,8 @@
1
  ### This is sample file of .env
2
 
 
3
  ### Server Configuration
 
4
  HOST=0.0.0.0
5
  PORT=9621
6
  WEBUI_TITLE='My Graph KB'
@@ -9,29 +11,17 @@ OLLAMA_EMULATING_MODEL_TAG=latest
9
  # WORKERS=2
10
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
11
 
12
- ### Login Configuration
13
- # AUTH_ACCOUNTS='admin:admin123,user1:pass456'
14
- # TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
15
- # TOKEN_EXPIRE_HOURS=48
16
- # GUEST_TOKEN_EXPIRE_HOURS=24
17
- # JWT_ALGORITHM=HS256
18
-
19
- ### API-Key to access LightRAG Server API
20
- # LIGHTRAG_API_KEY=your-secure-api-key-here
21
- # WHITELIST_PATHS=/health,/api/*
22
-
23
  ### Optional SSL Configuration
24
  # SSL=true
25
  # SSL_CERTFILE=/path/to/cert.pem
26
  # SSL_KEYFILE=/path/to/key.pem
27
 
28
  ### Directory Configuration (defaults to current working directory)
29
- ### Should not be set if deploy by docker (Set by Dockerfile instead of .env)
30
  ### Default value is ./inputs and ./rag_storage
31
  # INPUT_DIR=<absolute_path_for_doc_input_dir>
32
  # WORKING_DIR=<absolute_path_for_working_dir>
33
 
34
- ### Max nodes return from grap retrieval
35
  # MAX_GRAPH_NODES=1000
36
 
37
  ### Logging level
@@ -42,65 +32,97 @@ OLLAMA_EMULATING_MODEL_TAG=latest
42
  ### Logfile location (defaults to current working directory)
43
  # LOG_DIR=/path/to/log/directory
44
 
45
- ### RAG Configuration
46
- ### Chunk size for document splitting, 500~1500 is recommended
47
- # CHUNK_SIZE=1200
48
- # CHUNK_OVERLAP_SIZE=100
 
 
 
 
49
 
50
- ### RAG Query Configuration
 
 
 
 
 
 
 
 
51
  # HISTORY_TURNS=3
52
- # MAX_TOKEN_TEXT_CHUNK=6000
53
- # MAX_TOKEN_RELATION_DESC=4000
54
- # MAX_TOKEN_ENTITY_DESC=4000
55
  # COSINE_THRESHOLD=0.2
56
- ### Number of entities or relations to retrieve from KG
57
- # TOP_K=60
58
- ### Number of text chunks to retrieve initially from vector search
59
- # CHUNK_TOP_K=5
60
-
61
- ### Rerank Configuration
62
- # ENABLE_RERANK=False
63
- ### Number of text chunks to keep after reranking (should be <= CHUNK_TOP_K)
64
- # CHUNK_RERANK_TOP_K=5
65
- ### Rerank model configuration (required when ENABLE_RERANK=True)
 
 
 
 
 
66
  # RERANK_MODEL=BAAI/bge-reranker-v2-m3
67
  # RERANK_BINDING_HOST=https://api.your-rerank-provider.com/v1/rerank
68
  # RERANK_BINDING_API_KEY=your_rerank_api_key_here
69
 
70
- ### Entity and relation summarization configuration
 
 
71
  ### Language: English, Chinese, French, German ...
72
  SUMMARY_LANGUAGE=English
 
 
 
 
 
 
 
73
  ### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
74
- # FORCE_LLM_SUMMARY_ON_MERGE=6
75
  ### Maximum number of entity extraction attempts for ambiguous content
76
  # MAX_GLEANING=1
77
 
78
- ### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended)
79
- # MAX_PARALLEL_INSERT=2
 
 
 
 
 
 
 
 
 
80
 
 
81
  ### LLM Configuration
82
- ENABLE_LLM_CACHE=true
83
- ENABLE_LLM_CACHE_FOR_EXTRACT=true
84
  ### Time out in seconds for LLM, None for infinite timeout
85
  TIMEOUT=240
86
  ### Some models like o1-mini require temperature to be set to 1
87
  TEMPERATURE=0
88
- ### Max concurrency requests of LLM
89
- MAX_ASYNC=4
90
- ### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
91
- MAX_TOKENS=32000
92
  ### LLM Binding type: openai, ollama, lollms, azure_openai
93
  LLM_BINDING=openai
94
  LLM_MODEL=gpt-4o
95
  LLM_BINDING_HOST=https://api.openai.com/v1
96
  LLM_BINDING_API_KEY=your_api_key
 
 
 
 
97
  ### Optional for Azure
98
  # AZURE_OPENAI_API_VERSION=2024-08-01-preview
99
  # AZURE_OPENAI_DEPLOYMENT=gpt-4o
100
- ### set as num_ctx option for Ollama LLM
101
- # OLLAMA_NUM_CTX=32768
102
 
103
- ### Embedding Configuration
 
 
104
  ### Embedding Binding type: openai, ollama, lollms, azure_openai
105
  EMBEDDING_BINDING=ollama
106
  EMBEDDING_MODEL=bge-m3:latest
@@ -108,51 +130,53 @@ EMBEDDING_DIM=1024
108
  EMBEDDING_BINDING_API_KEY=your_api_key
109
  # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
110
  EMBEDDING_BINDING_HOST=http://localhost:11434
111
- ### Num of chunks send to Embedding in single request
112
- # EMBEDDING_BATCH_NUM=10
113
- ### Max concurrency requests for Embedding
114
- # EMBEDDING_FUNC_MAX_ASYNC=8
115
  ### Maximum tokens sent to Embedding for each chunk (no longer in use?)
116
  # MAX_EMBED_TOKENS=8192
 
117
  ### Optional for Azure
118
  # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
119
  # AZURE_EMBEDDING_API_VERSION=2023-05-15
120
  # AZURE_EMBEDDING_ENDPOINT=your_endpoint
121
  # AZURE_EMBEDDING_API_KEY=your_api_key
122
 
123
- ###########################
124
  ### Data storage selection
125
- ###########################
126
- ### In-memory database with local file persistence(Recommended for small scale deployment)
127
  # LIGHTRAG_KV_STORAGE=JsonKVStorage
128
  # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
129
  # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
130
  # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
 
 
 
 
 
 
 
 
131
  # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
 
 
 
 
 
132
  ### PostgreSQL
133
  # LIGHTRAG_KV_STORAGE=PGKVStorage
134
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
135
  # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
136
  # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
 
137
  ### MongoDB (Vector storage only available on Atlas Cloud)
138
  # LIGHTRAG_KV_STORAGE=MongoKVStorage
139
  # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
140
  # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
141
  # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
142
- ### Redis Storage (Recommended for production deployment)
143
- # LIGHTRAG_KV_STORAGE=RedisKVStorage
144
- # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
145
- ### Vector Storage (Recommended for production deployment)
146
- # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
147
- # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
148
- ### Graph Storage (Recommended for production deployment)
149
- # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
150
- # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
151
 
152
  ####################################################################
153
- ### Default workspace for all storage types
154
- ### For the purpose of isolation of data for each LightRAG instance
155
- ### Valid characters: a-z, A-Z, 0-9, and _
156
  ####################################################################
157
  # WORKSPACE=space1
158
 
 
1
  ### This is sample file of .env
2
 
3
+ ###########################
4
  ### Server Configuration
5
+ ###########################
6
  HOST=0.0.0.0
7
  PORT=9621
8
  WEBUI_TITLE='My Graph KB'
 
11
  # WORKERS=2
12
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  ### Optional SSL Configuration
15
  # SSL=true
16
  # SSL_CERTFILE=/path/to/cert.pem
17
  # SSL_KEYFILE=/path/to/key.pem
18
 
19
  ### Directory Configuration (defaults to current working directory)
 
20
  ### Default value is ./inputs and ./rag_storage
21
  # INPUT_DIR=<absolute_path_for_doc_input_dir>
22
  # WORKING_DIR=<absolute_path_for_working_dir>
23
 
24
+ ### Max nodes return from grap retrieval in webui
25
  # MAX_GRAPH_NODES=1000
26
 
27
  ### Logging level
 
32
  ### Logfile location (defaults to current working directory)
33
  # LOG_DIR=/path/to/log/directory
34
 
35
+ #####################################
36
+ ### Login and API-Key Configuration
37
+ #####################################
38
+ # AUTH_ACCOUNTS='admin:admin123,user1:pass456'
39
+ # TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
40
+ # TOKEN_EXPIRE_HOURS=48
41
+ # GUEST_TOKEN_EXPIRE_HOURS=24
42
+ # JWT_ALGORITHM=HS256
43
 
44
+ ### API-Key to access LightRAG Server API
45
+ # LIGHTRAG_API_KEY=your-secure-api-key-here
46
+ # WHITELIST_PATHS=/health,/api/*
47
+
48
+ ########################
49
+ ### Query Configuration
50
+ ########################
51
+ # LLM responde cache for query (Not valid for streaming response
52
+ ENABLE_LLM_CACHE=true
53
  # HISTORY_TURNS=3
 
 
 
54
  # COSINE_THRESHOLD=0.2
55
+ ### Number of entities or relations retrieved from KG
56
+ # TOP_K=40
57
+ ### Maxmium number or chunks plan to send to LLM
58
+ # CHUNK_TOP_K=10
59
+ ### control the actual enties send to LLM
60
+ # MAX_ENTITY_TOKENS=10000
61
+ ### control the actual relations send to LLM
62
+ # MAX_RELATION_TOKENS=10000
63
+ ### control the maximum tokens send to LLM (include entities, raltions and chunks)
64
+ # MAX_TOTAL_TOKENS=32000
65
+ ### maxumium related chunks grab from single entity or relations
66
+ # RELATED_CHUNK_NUMBER=10
67
+
68
+ ### Reranker configuration (Set ENABLE_RERANK to true in reranking model is configed)
69
+ ENABLE_RERANK=False
70
  # RERANK_MODEL=BAAI/bge-reranker-v2-m3
71
  # RERANK_BINDING_HOST=https://api.your-rerank-provider.com/v1/rerank
72
  # RERANK_BINDING_API_KEY=your_rerank_api_key_here
73
 
74
+ ########################################
75
+ ### Document processing configuration
76
+ ########################################
77
  ### Language: English, Chinese, French, German ...
78
  SUMMARY_LANGUAGE=English
79
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true
80
+ ### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
81
+ MAX_TOKENS=32000
82
+ ### Chunk size for document splitting, 500~1500 is recommended
83
+ # CHUNK_SIZE=1200
84
+ # CHUNK_OVERLAP_SIZE=100
85
+ ### Entity and relation summarization configuration
86
  ### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
87
+ # FORCE_LLM_SUMMARY_ON_MERGE=4
88
  ### Maximum number of entity extraction attempts for ambiguous content
89
  # MAX_GLEANING=1
90
 
91
+ ###############################
92
+ ### Concurrency Configuration
93
+ ###############################
94
+ ### Max concurrency requests of LLM (for both query and document processing)
95
+ MAX_ASYNC=4
96
+ ### Number of parallel processing documents(between 2~10, MAX_ASYNC/4 is recommended)
97
+ MAX_PARALLEL_INSERT=2
98
+ ### Max concurrency requests for Embedding
99
+ # EMBEDDING_FUNC_MAX_ASYNC=8
100
+ ### Num of chunks send to Embedding in single request
101
+ # EMBEDDING_BATCH_NUM=10
102
 
103
+ #######################
104
  ### LLM Configuration
105
+ #######################
 
106
  ### Time out in seconds for LLM, None for infinite timeout
107
  TIMEOUT=240
108
  ### Some models like o1-mini require temperature to be set to 1
109
  TEMPERATURE=0
 
 
 
 
110
  ### LLM Binding type: openai, ollama, lollms, azure_openai
111
  LLM_BINDING=openai
112
  LLM_MODEL=gpt-4o
113
  LLM_BINDING_HOST=https://api.openai.com/v1
114
  LLM_BINDING_API_KEY=your_api_key
115
+
116
+ ### Set as num_ctx option for Ollama LLM
117
+ # OLLAMA_NUM_CTX=32768
118
+
119
  ### Optional for Azure
120
  # AZURE_OPENAI_API_VERSION=2024-08-01-preview
121
  # AZURE_OPENAI_DEPLOYMENT=gpt-4o
 
 
122
 
123
+ ####################################################################################
124
+ ### Embedding Configuration (Should not be changed after the first file processed)
125
+ ####################################################################################
126
  ### Embedding Binding type: openai, ollama, lollms, azure_openai
127
  EMBEDDING_BINDING=ollama
128
  EMBEDDING_MODEL=bge-m3:latest
 
130
  EMBEDDING_BINDING_API_KEY=your_api_key
131
  # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
132
  EMBEDDING_BINDING_HOST=http://localhost:11434
 
 
 
 
133
  ### Maximum tokens sent to Embedding for each chunk (no longer in use?)
134
  # MAX_EMBED_TOKENS=8192
135
+
136
  ### Optional for Azure
137
  # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
138
  # AZURE_EMBEDDING_API_VERSION=2023-05-15
139
  # AZURE_EMBEDDING_ENDPOINT=your_endpoint
140
  # AZURE_EMBEDDING_API_KEY=your_api_key
141
 
142
+ ############################
143
  ### Data storage selection
144
+ ############################
145
+ ### Default storage (Recommended for small scale deployment)
146
  # LIGHTRAG_KV_STORAGE=JsonKVStorage
147
  # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
148
  # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
149
  # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
150
+
151
+ ### Redis Storage (Recommended for production deployment)
152
+ # LIGHTRAG_KV_STORAGE=RedisKVStorage
153
+ # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
154
+
155
+ ### Vector Storage (Recommended for production deployment)
156
+ # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
157
+ # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
158
  # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
159
+
160
+ ### Graph Storage (Recommended for production deployment)
161
+ # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
162
+ # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
163
+
164
  ### PostgreSQL
165
  # LIGHTRAG_KV_STORAGE=PGKVStorage
166
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
167
  # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
168
  # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
169
+
170
  ### MongoDB (Vector storage only available on Atlas Cloud)
171
  # LIGHTRAG_KV_STORAGE=MongoKVStorage
172
  # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
173
  # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
174
  # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
 
 
 
 
 
 
 
 
 
175
 
176
  ####################################################################
177
+ ### WORKSPACE setting workspace name for all storage types
178
+ ### in the purpose of isolating data from LightRAG instances.
179
+ ### Valid workspace name constraints: a-z, A-Z, 0-9, and _
180
  ####################################################################
181
  # WORKSPACE=space1
182
 
examples/rerank_example.py CHANGED
@@ -9,7 +9,11 @@ Configuration Required:
9
  2. Set your embedding API key and base URL in embedding_func()
10
  3. Set your rerank API key and base URL in the rerank configuration
11
  4. Or use environment variables (.env file):
12
- - ENABLE_RERANK=True
 
 
 
 
13
  """
14
 
15
  import asyncio
@@ -83,8 +87,7 @@ async def create_rag_with_rerank():
83
  max_token_size=8192,
84
  func=embedding_func,
85
  ),
86
- # Simplified Rerank Configuration
87
- enable_rerank=True,
88
  rerank_model_func=my_rerank_func,
89
  )
90
 
@@ -120,7 +123,6 @@ async def create_rag_with_rerank_model():
120
  max_token_size=8192,
121
  func=embedding_func,
122
  ),
123
- enable_rerank=True,
124
  rerank_model_func=rerank_model.rerank,
125
  )
126
 
@@ -130,9 +132,9 @@ async def create_rag_with_rerank_model():
130
  return rag
131
 
132
 
133
- async def test_rerank_with_different_topk():
134
  """
135
- Test rerank functionality with different top_k settings
136
  """
137
  print("🚀 Setting up LightRAG with Rerank functionality...")
138
 
@@ -154,16 +156,41 @@ async def test_rerank_with_different_topk():
154
  print(f"\n🔍 Testing query: '{query}'")
155
  print("=" * 80)
156
 
157
- # Test different top_k values to show parameter priority
158
- top_k_values = [2, 5, 10]
159
-
160
- for top_k in top_k_values:
161
- print(f"\n📊 Testing with QueryParam(top_k={top_k}):")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # Test naive mode with specific top_k
164
- result = await rag.aquery(query, param=QueryParam(mode="naive", top_k=top_k))
165
- print(f" Result length: {len(result)} characters")
166
- print(f" Preview: {result[:100]}...")
 
 
 
167
 
168
 
169
  async def test_direct_rerank():
@@ -209,17 +236,21 @@ async def main():
209
  print("=" * 60)
210
 
211
  try:
212
- # Test rerank with different top_k values
213
- await test_rerank_with_different_topk()
214
 
215
  # Test direct rerank
216
  await test_direct_rerank()
217
 
218
  print("\n✅ Example completed successfully!")
219
  print("\n💡 Key Points:")
220
- print(" ✓ All rerank configurations are contained within rerank_model_func")
221
- print(" ✓ Rerank improves document relevance ordering")
222
- print(" ✓ Configure API keys within your rerank function")
 
 
 
 
223
  print(" ✓ Monitor API usage and costs when using rerank services")
224
 
225
  except Exception as e:
 
9
  2. Set your embedding API key and base URL in embedding_func()
10
  3. Set your rerank API key and base URL in the rerank configuration
11
  4. Or use environment variables (.env file):
12
+ - RERANK_MODEL=your_rerank_model
13
+ - RERANK_BINDING_HOST=your_rerank_endpoint
14
+ - RERANK_BINDING_API_KEY=your_rerank_api_key
15
+
16
+ Note: Rerank is now controlled per query via the 'enable_rerank' parameter (default: True)
17
  """
18
 
19
  import asyncio
 
87
  max_token_size=8192,
88
  func=embedding_func,
89
  ),
90
+ # Rerank Configuration - provide the rerank function
 
91
  rerank_model_func=my_rerank_func,
92
  )
93
 
 
123
  max_token_size=8192,
124
  func=embedding_func,
125
  ),
 
126
  rerank_model_func=rerank_model.rerank,
127
  )
128
 
 
132
  return rag
133
 
134
 
135
+ async def test_rerank_with_different_settings():
136
  """
137
+ Test rerank functionality with different enable_rerank settings
138
  """
139
  print("🚀 Setting up LightRAG with Rerank functionality...")
140
 
 
156
  print(f"\n🔍 Testing query: '{query}'")
157
  print("=" * 80)
158
 
159
+ # Test with rerank enabled (default)
160
+ print("\n📊 Testing with enable_rerank=True (default):")
161
+ result_with_rerank = await rag.aquery(
162
+ query,
163
+ param=QueryParam(
164
+ mode="naive",
165
+ top_k=10,
166
+ chunk_top_k=5,
167
+ enable_rerank=True, # Explicitly enable rerank
168
+ ),
169
+ )
170
+ print(f" Result length: {len(result_with_rerank)} characters")
171
+ print(f" Preview: {result_with_rerank[:100]}...")
172
+
173
+ # Test with rerank disabled
174
+ print("\n📊 Testing with enable_rerank=False:")
175
+ result_without_rerank = await rag.aquery(
176
+ query,
177
+ param=QueryParam(
178
+ mode="naive",
179
+ top_k=10,
180
+ chunk_top_k=5,
181
+ enable_rerank=False, # Disable rerank
182
+ ),
183
+ )
184
+ print(f" Result length: {len(result_without_rerank)} characters")
185
+ print(f" Preview: {result_without_rerank[:100]}...")
186
 
187
+ # Test with default settings (enable_rerank defaults to True)
188
+ print("\n📊 Testing with default settings (enable_rerank defaults to True):")
189
+ result_default = await rag.aquery(
190
+ query, param=QueryParam(mode="naive", top_k=10, chunk_top_k=5)
191
+ )
192
+ print(f" Result length: {len(result_default)} characters")
193
+ print(f" Preview: {result_default[:100]}...")
194
 
195
 
196
  async def test_direct_rerank():
 
236
  print("=" * 60)
237
 
238
  try:
239
+ # Test rerank with different enable_rerank settings
240
+ await test_rerank_with_different_settings()
241
 
242
  # Test direct rerank
243
  await test_direct_rerank()
244
 
245
  print("\n✅ Example completed successfully!")
246
  print("\n💡 Key Points:")
247
+ print(" ✓ Rerank is now controlled per query via 'enable_rerank' parameter")
248
+ print(" ✓ Default value for enable_rerank is True")
249
+ print(" ✓ Rerank function is configured at LightRAG initialization")
250
+ print(" ✓ Per-query enable_rerank setting overrides default behavior")
251
+ print(
252
+ " ✓ If enable_rerank=True but no rerank model is configured, a warning is issued"
253
+ )
254
  print(" ✓ Monitor API usage and costs when using rerank services")
255
 
256
  except Exception as e:
lightrag/api/config.py CHANGED
@@ -11,6 +11,14 @@ from lightrag.utils import get_env_value
11
  from lightrag.constants import (
12
  DEFAULT_WOKERS,
13
  DEFAULT_TIMEOUT,
 
 
 
 
 
 
 
 
14
  )
15
 
16
  # use the .env that is inside the current folder
@@ -151,45 +159,6 @@ def parse_args() -> argparse.Namespace:
151
  help="Path to SSL private key file (required if --ssl is enabled)",
152
  )
153
 
154
- parser.add_argument(
155
- "--history-turns",
156
- type=int,
157
- default=get_env_value("HISTORY_TURNS", 3, int),
158
- help="Number of conversation history turns to include (default: from env or 3)",
159
- )
160
-
161
- # Search parameters
162
- parser.add_argument(
163
- "--top-k",
164
- type=int,
165
- default=get_env_value("TOP_K", 60, int),
166
- help="Number of most similar results to return (default: from env or 60)",
167
- )
168
- parser.add_argument(
169
- "--chunk-top-k",
170
- type=int,
171
- default=get_env_value("CHUNK_TOP_K", 15, int),
172
- help="Number of text chunks to retrieve initially from vector search (default: from env or 15)",
173
- )
174
- parser.add_argument(
175
- "--chunk-rerank-top-k",
176
- type=int,
177
- default=get_env_value("CHUNK_RERANK_TOP_K", 5, int),
178
- help="Number of text chunks to keep after reranking (default: from env or 5)",
179
- )
180
- parser.add_argument(
181
- "--enable-rerank",
182
- action="store_true",
183
- default=get_env_value("ENABLE_RERANK", False, bool),
184
- help="Enable rerank functionality (default: from env or False)",
185
- )
186
- parser.add_argument(
187
- "--cosine-threshold",
188
- type=float,
189
- default=get_env_value("COSINE_THRESHOLD", 0.2, float),
190
- help="Cosine similarity threshold (default: from env or 0.4)",
191
- )
192
-
193
  # Ollama model name
194
  parser.add_argument(
195
  "--simulated-model-name",
@@ -321,6 +290,26 @@ def parse_args() -> argparse.Namespace:
321
  args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None)
322
  args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None)
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
325
 
326
  return args
 
11
  from lightrag.constants import (
12
  DEFAULT_WOKERS,
13
  DEFAULT_TIMEOUT,
14
+ DEFAULT_TOP_K,
15
+ DEFAULT_CHUNK_TOP_K,
16
+ DEFAULT_HISTORY_TURNS,
17
+ DEFAULT_MAX_ENTITY_TOKENS,
18
+ DEFAULT_MAX_RELATION_TOKENS,
19
+ DEFAULT_MAX_TOTAL_TOKENS,
20
+ DEFAULT_COSINE_THRESHOLD,
21
+ DEFAULT_RELATED_CHUNK_NUMBER,
22
  )
23
 
24
  # use the .env that is inside the current folder
 
159
  help="Path to SSL private key file (required if --ssl is enabled)",
160
  )
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  # Ollama model name
163
  parser.add_argument(
164
  "--simulated-model-name",
 
290
  args.rerank_binding_host = get_env_value("RERANK_BINDING_HOST", None)
291
  args.rerank_binding_api_key = get_env_value("RERANK_BINDING_API_KEY", None)
292
 
293
+ # Query configuration
294
+ args.history_turns = get_env_value("HISTORY_TURNS", DEFAULT_HISTORY_TURNS, int)
295
+ args.top_k = get_env_value("TOP_K", DEFAULT_TOP_K, int)
296
+ args.chunk_top_k = get_env_value("CHUNK_TOP_K", DEFAULT_CHUNK_TOP_K, int)
297
+ args.max_entity_tokens = get_env_value(
298
+ "MAX_ENTITY_TOKENS", DEFAULT_MAX_ENTITY_TOKENS, int
299
+ )
300
+ args.max_relation_tokens = get_env_value(
301
+ "MAX_RELATION_TOKENS", DEFAULT_MAX_RELATION_TOKENS, int
302
+ )
303
+ args.max_total_tokens = get_env_value(
304
+ "MAX_TOTAL_TOKENS", DEFAULT_MAX_TOTAL_TOKENS, int
305
+ )
306
+ args.cosine_threshold = get_env_value(
307
+ "COSINE_THRESHOLD", DEFAULT_COSINE_THRESHOLD, float
308
+ )
309
+ args.related_chunk_number = get_env_value(
310
+ "RELATED_CHUNK_NUMBER", DEFAULT_RELATED_CHUNK_NUMBER, int
311
+ )
312
+
313
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
314
 
315
  return args
lightrag/api/lightrag_server.py CHANGED
@@ -292,9 +292,9 @@ def create_app(args):
292
  ),
293
  )
294
 
295
- # Configure rerank function if enabled
296
  rerank_model_func = None
297
- if args.enable_rerank and args.rerank_binding_api_key and args.rerank_binding_host:
298
  from lightrag.rerank import custom_rerank
299
 
300
  async def server_rerank_func(
@@ -312,10 +312,12 @@ def create_app(args):
312
  )
313
 
314
  rerank_model_func = server_rerank_func
315
- logger.info(f"Rerank enabled with model: {args.rerank_model}")
316
- elif args.enable_rerank:
317
- logger.warning(
318
- "Rerank enabled but RERANK_BINDING_API_KEY or RERANK_BINDING_HOST not configured. Rerank will be disabled."
 
 
319
  )
320
 
321
  # Initialize RAG
@@ -351,7 +353,6 @@ def create_app(args):
351
  },
352
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
353
  enable_llm_cache=args.enable_llm_cache,
354
- enable_rerank=args.enable_rerank,
355
  rerank_model_func=rerank_model_func,
356
  auto_manage_storages_states=False,
357
  max_parallel_insert=args.max_parallel_insert,
@@ -381,7 +382,6 @@ def create_app(args):
381
  },
382
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
383
  enable_llm_cache=args.enable_llm_cache,
384
- enable_rerank=args.enable_rerank,
385
  rerank_model_func=rerank_model_func,
386
  auto_manage_storages_states=False,
387
  max_parallel_insert=args.max_parallel_insert,
@@ -512,11 +512,13 @@ def create_app(args):
512
  "enable_llm_cache": args.enable_llm_cache,
513
  "workspace": args.workspace,
514
  "max_graph_nodes": args.max_graph_nodes,
515
- # Rerank configuration
516
- "enable_rerank": args.enable_rerank,
517
- "rerank_model": args.rerank_model if args.enable_rerank else None,
 
 
518
  "rerank_binding_host": args.rerank_binding_host
519
- if args.enable_rerank
520
  else None,
521
  },
522
  "auth_mode": auth_mode,
 
292
  ),
293
  )
294
 
295
+ # Configure rerank function if model and API are configured
296
  rerank_model_func = None
297
+ if args.rerank_binding_api_key and args.rerank_binding_host:
298
  from lightrag.rerank import custom_rerank
299
 
300
  async def server_rerank_func(
 
312
  )
313
 
314
  rerank_model_func = server_rerank_func
315
+ logger.info(
316
+ f"Rerank model configured: {args.rerank_model} (can be enabled per query)"
317
+ )
318
+ else:
319
+ logger.info(
320
+ "Rerank model not configured. Set RERANK_BINDING_API_KEY and RERANK_BINDING_HOST to enable reranking."
321
  )
322
 
323
  # Initialize RAG
 
353
  },
354
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
355
  enable_llm_cache=args.enable_llm_cache,
 
356
  rerank_model_func=rerank_model_func,
357
  auto_manage_storages_states=False,
358
  max_parallel_insert=args.max_parallel_insert,
 
382
  },
383
  enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
384
  enable_llm_cache=args.enable_llm_cache,
 
385
  rerank_model_func=rerank_model_func,
386
  auto_manage_storages_states=False,
387
  max_parallel_insert=args.max_parallel_insert,
 
512
  "enable_llm_cache": args.enable_llm_cache,
513
  "workspace": args.workspace,
514
  "max_graph_nodes": args.max_graph_nodes,
515
+ # Rerank configuration (based on whether rerank model is configured)
516
+ "enable_rerank": rerank_model_func is not None,
517
+ "rerank_model": args.rerank_model
518
+ if rerank_model_func is not None
519
+ else None,
520
  "rerank_binding_host": args.rerank_binding_host
521
+ if rerank_model_func is not None
522
  else None,
523
  },
524
  "auth_mode": auth_mode,
lightrag/api/routers/query_routes.py CHANGED
@@ -52,31 +52,25 @@ class QueryRequest(BaseModel):
52
  chunk_top_k: Optional[int] = Field(
53
  ge=1,
54
  default=None,
55
- description="Number of text chunks to retrieve initially from vector search.",
56
  )
57
 
58
- chunk_rerank_top_k: Optional[int] = Field(
59
- ge=1,
60
  default=None,
61
- description="Number of text chunks to keep after reranking.",
62
- )
63
-
64
- max_token_for_text_unit: Optional[int] = Field(
65
- gt=1,
66
- default=None,
67
- description="Maximum number of tokens allowed for each retrieved text chunk.",
68
  )
69
 
70
- max_token_for_global_context: Optional[int] = Field(
71
- gt=1,
72
  default=None,
73
- description="Maximum number of tokens allocated for relationship descriptions in global retrieval.",
 
74
  )
75
 
76
- max_token_for_local_context: Optional[int] = Field(
77
- gt=1,
78
  default=None,
79
- description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
 
80
  )
81
 
82
  conversation_history: Optional[List[Dict[str, Any]]] = Field(
@@ -99,6 +93,11 @@ class QueryRequest(BaseModel):
99
  description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.",
100
  )
101
 
 
 
 
 
 
102
  @field_validator("query", mode="after")
103
  @classmethod
104
  def query_strip_after(cls, query: str) -> str:
 
52
  chunk_top_k: Optional[int] = Field(
53
  ge=1,
54
  default=None,
55
+ description="Number of text chunks to retrieve initially from vector search and keep after reranking.",
56
  )
57
 
58
+ max_entity_tokens: Optional[int] = Field(
 
59
  default=None,
60
+ description="Maximum number of tokens allocated for entity context in unified token control system.",
61
+ ge=1,
 
 
 
 
 
62
  )
63
 
64
+ max_relation_tokens: Optional[int] = Field(
 
65
  default=None,
66
+ description="Maximum number of tokens allocated for relationship context in unified token control system.",
67
+ ge=1,
68
  )
69
 
70
+ max_total_tokens: Optional[int] = Field(
 
71
  default=None,
72
+ description="Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt).",
73
+ ge=1,
74
  )
75
 
76
  conversation_history: Optional[List[Dict[str, Any]]] = Field(
 
93
  description="User-provided prompt for the query. If provided, this will be used instead of the default value from prompt template.",
94
  )
95
 
96
+ enable_rerank: Optional[bool] = Field(
97
+ default=None,
98
+ description="Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.",
99
+ )
100
+
101
  @field_validator("query", mode="after")
102
  @classmethod
103
  def query_strip_after(cls, query: str) -> str:
lightrag/api/webui/assets/{_basePickBy-D3PHsJjq.js → _basePickBy-DV1dBXEu.js} RENAMED
Binary files a/lightrag/api/webui/assets/_basePickBy-D3PHsJjq.js and b/lightrag/api/webui/assets/_basePickBy-DV1dBXEu.js differ
 
lightrag/api/webui/assets/{_baseUniq-CtAZZJ8e.js → _baseUniq-BZ3hvks1.js} RENAMED
Binary files a/lightrag/api/webui/assets/_baseUniq-CtAZZJ8e.js and b/lightrag/api/webui/assets/_baseUniq-BZ3hvks1.js differ
 
lightrag/api/webui/assets/{architectureDiagram-IEHRJDOE-Bou3pEJo.js → architectureDiagram-IEHRJDOE-0ddCq26Q.js} RENAMED
Binary files a/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-Bou3pEJo.js and b/lightrag/api/webui/assets/architectureDiagram-IEHRJDOE-0ddCq26Q.js differ
 
lightrag/api/webui/assets/{blockDiagram-JOT3LUYC-BxXXNv1O.js → blockDiagram-JOT3LUYC-DezBiNXY.js} RENAMED
Binary files a/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-BxXXNv1O.js and b/lightrag/api/webui/assets/blockDiagram-JOT3LUYC-DezBiNXY.js differ
 
lightrag/api/webui/assets/{c4Diagram-VJAJSXHY-BpY1T-jk.js → c4Diagram-VJAJSXHY-BMYcCHQE.js} RENAMED
Binary files a/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BpY1T-jk.js and b/lightrag/api/webui/assets/c4Diagram-VJAJSXHY-BMYcCHQE.js differ
 
lightrag/api/webui/assets/{chunk-4BMEZGHF-CAhtCpmT.js → chunk-4BMEZGHF-DM9xX3Iw.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-4BMEZGHF-CAhtCpmT.js and b/lightrag/api/webui/assets/chunk-4BMEZGHF-DM9xX3Iw.js differ
 
lightrag/api/webui/assets/{chunk-A2AXSNBT-B91iiasA.js → chunk-A2AXSNBT-CRex3-yW.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-A2AXSNBT-B91iiasA.js and b/lightrag/api/webui/assets/chunk-A2AXSNBT-CRex3-yW.js differ
 
lightrag/api/webui/assets/{chunk-AEK57VVT-gQ4j2jcG.js → chunk-AEK57VVT-DlsJi6tH.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-AEK57VVT-gQ4j2jcG.js and b/lightrag/api/webui/assets/chunk-AEK57VVT-DlsJi6tH.js differ
 
lightrag/api/webui/assets/{chunk-D6G4REZN-CGaqGId9.js → chunk-D6G4REZN-5j_Vlndu.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-D6G4REZN-CGaqGId9.js and b/lightrag/api/webui/assets/chunk-D6G4REZN-5j_Vlndu.js differ
 
lightrag/api/webui/assets/{chunk-RZ5BOZE2-B615FLH4.js → chunk-RZ5BOZE2-CdnIs5Fb.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-RZ5BOZE2-B615FLH4.js and b/lightrag/api/webui/assets/chunk-RZ5BOZE2-CdnIs5Fb.js differ
 
lightrag/api/webui/assets/{chunk-XZIHB7SX-c4P7PYPk.js → chunk-XZIHB7SX-gcyrJN2U.js} RENAMED
Binary files a/lightrag/api/webui/assets/chunk-XZIHB7SX-c4P7PYPk.js and b/lightrag/api/webui/assets/chunk-XZIHB7SX-gcyrJN2U.js differ
 
lightrag/api/webui/assets/{classDiagram-GIVACNV2-DBTA8XwB.js → classDiagram-GIVACNV2-DZXU66uW.js} RENAMED
Binary files a/lightrag/api/webui/assets/classDiagram-GIVACNV2-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-GIVACNV2-DZXU66uW.js differ
 
lightrag/api/webui/assets/{classDiagram-v2-COTLJTTW-DBTA8XwB.js → classDiagram-v2-COTLJTTW-DZXU66uW.js} RENAMED
Binary files a/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DBTA8XwB.js and b/lightrag/api/webui/assets/classDiagram-v2-COTLJTTW-DZXU66uW.js differ
 
lightrag/api/webui/assets/{clone-Dm5jEAXQ.js → clone-eVzB-9-f.js} RENAMED
Binary files a/lightrag/api/webui/assets/clone-Dm5jEAXQ.js and b/lightrag/api/webui/assets/clone-eVzB-9-f.js differ
 
lightrag/api/webui/assets/{dagre-OKDRZEBW-CqR4Poz4.js → dagre-OKDRZEBW-Cas2IJD5.js} RENAMED
Binary files a/lightrag/api/webui/assets/dagre-OKDRZEBW-CqR4Poz4.js and b/lightrag/api/webui/assets/dagre-OKDRZEBW-Cas2IJD5.js differ
 
lightrag/api/webui/assets/{diagram-SSKATNLV-pBYsrik-.js → diagram-SSKATNLV-CA9pCZ-g.js} RENAMED
Binary files a/lightrag/api/webui/assets/diagram-SSKATNLV-pBYsrik-.js and b/lightrag/api/webui/assets/diagram-SSKATNLV-CA9pCZ-g.js differ
 
lightrag/api/webui/assets/{diagram-VNBRO52H-Bu64Jus9.js → diagram-VNBRO52H-B9-Mlqta.js} RENAMED
Binary files a/lightrag/api/webui/assets/diagram-VNBRO52H-Bu64Jus9.js and b/lightrag/api/webui/assets/diagram-VNBRO52H-B9-Mlqta.js differ
 
lightrag/api/webui/assets/{erDiagram-Q7BY3M3F-BTmP3B4h.js → erDiagram-Q7BY3M3F-CX4Di1zm.js} RENAMED
Binary files a/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-BTmP3B4h.js and b/lightrag/api/webui/assets/erDiagram-Q7BY3M3F-CX4Di1zm.js differ
 
lightrag/api/webui/assets/{feature-documents-oks3sUnM.js → feature-documents-DZY3tMAq.js} RENAMED
Binary files a/lightrag/api/webui/assets/feature-documents-oks3sUnM.js and b/lightrag/api/webui/assets/feature-documents-DZY3tMAq.js differ
 
lightrag/api/webui/assets/{feature-graph-NODQb6qW.js → feature-graph-wF7LCIjH.js} RENAMED
Binary files a/lightrag/api/webui/assets/feature-graph-NODQb6qW.js and b/lightrag/api/webui/assets/feature-graph-wF7LCIjH.js differ
 
lightrag/api/webui/assets/feature-retrieval-DalFy9WB.js DELETED
Binary file (184 kB)
 
lightrag/api/webui/assets/feature-retrieval-DdCvVec9.js ADDED
Binary file (185 kB). View file
 
lightrag/api/webui/assets/{flowDiagram-4HSFHLVR-DZNySYxV.js → flowDiagram-4HSFHLVR-BDwWKjb6.js} RENAMED
Binary files a/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-DZNySYxV.js and b/lightrag/api/webui/assets/flowDiagram-4HSFHLVR-BDwWKjb6.js differ
 
lightrag/api/webui/assets/{ganttDiagram-APWFNJXF-GWTNv7FR.js → ganttDiagram-APWFNJXF-Du3IUDRk.js} RENAMED
Binary files a/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-GWTNv7FR.js and b/lightrag/api/webui/assets/ganttDiagram-APWFNJXF-Du3IUDRk.js differ
 
lightrag/api/webui/assets/{gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js → gitGraphDiagram-7IBYFJ6S-CD8MAiok.js} RENAMED
Binary files a/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-BXUpvPAf.js and b/lightrag/api/webui/assets/gitGraphDiagram-7IBYFJ6S-CD8MAiok.js differ
 
lightrag/api/webui/assets/{graph-BLnbmvfZ.js → graph-DJgPOSDl.js} RENAMED
Binary files a/lightrag/api/webui/assets/graph-BLnbmvfZ.js and b/lightrag/api/webui/assets/graph-DJgPOSDl.js differ
 
lightrag/api/webui/assets/{index-yRRg2BZk.js → index-D3V9EKqf.js} RENAMED
Binary files a/lightrag/api/webui/assets/index-yRRg2BZk.js and b/lightrag/api/webui/assets/index-D3V9EKqf.js differ
 
lightrag/api/webui/assets/{index-1Hy45NwC.js → index-DB3D3pNI.js} RENAMED
Binary files a/lightrag/api/webui/assets/index-1Hy45NwC.js and b/lightrag/api/webui/assets/index-DB3D3pNI.js differ
 
lightrag/api/webui/assets/{infoDiagram-PH2N3AL5-DAtlRRqj.js → infoDiagram-PH2N3AL5-DYkrQwoL.js} RENAMED
Binary files a/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DAtlRRqj.js and b/lightrag/api/webui/assets/infoDiagram-PH2N3AL5-DYkrQwoL.js differ
 
lightrag/api/webui/assets/{journeyDiagram-U35MCT3I-BscxFTBa.js → journeyDiagram-U35MCT3I-CZecBGFk.js} RENAMED
Binary files a/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-BscxFTBa.js and b/lightrag/api/webui/assets/journeyDiagram-U35MCT3I-CZecBGFk.js differ
 
lightrag/api/webui/assets/{kanban-definition-NDS4AKOZ-QESEl0tA.js → kanban-definition-NDS4AKOZ-CD8vwi41.js} RENAMED
Binary files a/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-QESEl0tA.js and b/lightrag/api/webui/assets/kanban-definition-NDS4AKOZ-CD8vwi41.js differ
 
lightrag/api/webui/assets/{layout-DsT4215v.js → layout-D_MnvYWV.js} RENAMED
Binary files a/lightrag/api/webui/assets/layout-DsT4215v.js and b/lightrag/api/webui/assets/layout-D_MnvYWV.js differ
 
lightrag/api/webui/assets/{markdown-vendor-DmIvJdn7.js → markdown-vendor-ZbbHR4ge.js} RENAMED
Binary files a/lightrag/api/webui/assets/markdown-vendor-DmIvJdn7.js and b/lightrag/api/webui/assets/markdown-vendor-ZbbHR4ge.js differ
 
lightrag/api/webui/assets/{mermaid-vendor-D0f_SE0h.js → mermaid-vendor-CR44n-lC.js} RENAMED
Binary files a/lightrag/api/webui/assets/mermaid-vendor-D0f_SE0h.js and b/lightrag/api/webui/assets/mermaid-vendor-CR44n-lC.js differ
 
lightrag/api/webui/assets/{mindmap-definition-ALO5MXBD-aQwMTShx.js → mindmap-definition-ALO5MXBD-CEOit9vG.js} RENAMED
Binary files a/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-aQwMTShx.js and b/lightrag/api/webui/assets/mindmap-definition-ALO5MXBD-CEOit9vG.js differ
 
lightrag/api/webui/assets/{pieDiagram-IB7DONF6-D6N6SEu_.js → pieDiagram-IB7DONF6-Ca5AV9bY.js} RENAMED
Binary files a/lightrag/api/webui/assets/pieDiagram-IB7DONF6-D6N6SEu_.js and b/lightrag/api/webui/assets/pieDiagram-IB7DONF6-Ca5AV9bY.js differ
 
lightrag/api/webui/assets/{quadrantDiagram-7GDLP6J5-COkzo7lS.js → quadrantDiagram-7GDLP6J5-D5ZAOmhC.js} RENAMED
Binary files a/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-COkzo7lS.js and b/lightrag/api/webui/assets/quadrantDiagram-7GDLP6J5-D5ZAOmhC.js differ
 
lightrag/api/webui/assets/{radar-MK3ICKWK-DOAXm8cx.js → radar-MK3ICKWK-B97XRKGx.js} RENAMED
Binary files a/lightrag/api/webui/assets/radar-MK3ICKWK-DOAXm8cx.js and b/lightrag/api/webui/assets/radar-MK3ICKWK-B97XRKGx.js differ
 
lightrag/api/webui/assets/{requirementDiagram-KVF5MWMF-lKW1n5a1.js → requirementDiagram-KVF5MWMF-BzPWhOZW.js} RENAMED
Binary files a/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-lKW1n5a1.js and b/lightrag/api/webui/assets/requirementDiagram-KVF5MWMF-BzPWhOZW.js differ
 
lightrag/api/webui/assets/{sankeyDiagram-QLVOVGJD-BqECU7xS.js → sankeyDiagram-QLVOVGJD-DYZFDO6U.js} RENAMED
Binary files a/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-BqECU7xS.js and b/lightrag/api/webui/assets/sankeyDiagram-QLVOVGJD-DYZFDO6U.js differ
 
lightrag/api/webui/assets/{sequenceDiagram-X6HHIX6F-ByOWqALm.js → sequenceDiagram-X6HHIX6F-GAQ6Ejep.js} RENAMED
Binary files a/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-ByOWqALm.js and b/lightrag/api/webui/assets/sequenceDiagram-X6HHIX6F-GAQ6Ejep.js differ
 
lightrag/api/webui/assets/{stateDiagram-DGXRK772-DjKMsne-.js → stateDiagram-DGXRK772-pI_aBJdi.js} RENAMED
Binary files a/lightrag/api/webui/assets/stateDiagram-DGXRK772-DjKMsne-.js and b/lightrag/api/webui/assets/stateDiagram-DGXRK772-pI_aBJdi.js differ