zrguo commited on
Commit
ce07957
·
unverified ·
2 Parent(s): bf73d58 0cd5542

Merge pull request #674 from danielaskdd/topk-cosine-threshold-env-config

Browse files
.env.example CHANGED
@@ -6,6 +6,17 @@ PORT=9621
6
  WORKING_DIR=/app/data/rag_storage
7
  INPUT_DIR=/app/data/inputs
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
10
  # Ollama example
11
  LLM_BINDING=ollama
@@ -38,15 +49,6 @@ EMBEDDING_MODEL=bge-m3:latest
38
  # EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
39
  # EMBEDDING_MODEL=bge-m3:latest
40
 
41
- # RAG Configuration
42
- MAX_ASYNC=4
43
- MAX_TOKENS=32768
44
- EMBEDDING_DIM=1024
45
- MAX_EMBED_TOKENS=8192
46
- #HISTORY_TURNS=3
47
- #CHUNK_SIZE=1200
48
- #CHUNK_OVERLAP_SIZE=100
49
-
50
  # Security (empty for no key)
51
  LIGHTRAG_API_KEY=your-secure-api-key-here
52
 
 
6
  WORKING_DIR=/app/data/rag_storage
7
  INPUT_DIR=/app/data/inputs
8
 
9
+ # RAG Configuration
10
+ MAX_ASYNC=4
11
+ MAX_TOKENS=32768
12
+ EMBEDDING_DIM=1024
13
+ MAX_EMBED_TOKENS=8192
14
+ #HISTORY_TURNS=3
15
+ #CHUNK_SIZE=1200
16
+ #CHUNK_OVERLAP_SIZE=100
17
+ #COSINE_THRESHOLD=0.4 # 0.2 while not running API server
18
+ #TOP_K=50 # 60 while not running API server
19
+
20
  # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
21
  # Ollama example
22
  LLM_BINDING=ollama
 
49
  # EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
50
  # EMBEDDING_MODEL=bge-m3:latest
51
 
 
 
 
 
 
 
 
 
 
52
  # Security (empty for no key)
53
  LIGHTRAG_API_KEY=your-secure-api-key-here
54
 
README.md CHANGED
@@ -360,6 +360,8 @@ class QueryParam:
360
  max_token_for_local_context: int = 4000
361
  ```
362
 
 
 
363
  ### Batch Insert
364
 
365
  ```python
@@ -730,10 +732,10 @@ if __name__ == "__main__":
730
  | **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
731
  | **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
732
  | **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
733
- | **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768` |
734
- | **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16` |
735
  | **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
736
- | **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
737
  | **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
738
  | **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
739
  | **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
@@ -741,6 +743,7 @@ if __name__ == "__main__":
741
  | **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
742
 
743
  ### Error Handling
 
744
  <details>
745
  <summary>Click to view error handling details</summary>
746
 
 
360
  max_token_for_local_context: int = 4000
361
  ```
362
 
363
+ > default value of Top_k can be change by environment variables TOP_K.
364
+
365
  ### Batch Insert
366
 
367
  ```python
 
732
  | **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
733
  | **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
734
  | **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
735
+ | **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) |
736
+ | **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16`(default value changed by env var MAX_ASYNC) |
737
  | **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
738
+ | **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval. | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
739
  | **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
740
  | **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
741
  | **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
 
743
  | **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
744
 
745
  ### Error Handling
746
+
747
  <details>
748
  <summary>Click to view error handling details</summary>
749
 
lightrag/api/README.md CHANGED
@@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the
98
 
99
  LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
100
 
 
 
101
  ### Environment Variables
102
 
103
  You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
@@ -111,6 +113,17 @@ PORT=9621
111
  WORKING_DIR=/app/data/rag_storage
112
  INPUT_DIR=/app/data/inputs
113
 
 
 
 
 
 
 
 
 
 
 
 
114
  # LLM Configuration
115
  LLM_BINDING=ollama
116
  LLM_BINDING_HOST=http://localhost:11434
@@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama
124
  EMBEDDING_BINDING_HOST=http://localhost:11434
125
  EMBEDDING_MODEL=bge-m3:latest
126
 
127
- # RAG Configuration
128
- MAX_ASYNC=4
129
- MAX_TOKENS=32768
130
- EMBEDDING_DIM=1024
131
- MAX_EMBED_TOKENS=8192
132
-
133
  # Security
134
- LIGHTRAG_API_KEY=
135
 
136
  # Logging
137
  LOG_LEVEL=INFO
@@ -186,10 +193,9 @@ PORT=7000 python lightrag.py
186
  | --ssl | False | Enable HTTPS |
187
  | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
188
  | --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
 
 
189
 
190
-
191
-
192
- For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`.
193
  ### Example Usage
194
 
195
  #### Running a Lightrag server with ollama default local server as llm and embedding backends
 
98
 
99
  LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
100
 
101
+ For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM.
102
+
103
  ### Environment Variables
104
 
105
  You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
 
113
  WORKING_DIR=/app/data/rag_storage
114
  INPUT_DIR=/app/data/inputs
115
 
116
+ # RAG Configuration
117
+ MAX_ASYNC=4
118
+ MAX_TOKENS=32768
119
+ EMBEDDING_DIM=1024
120
+ MAX_EMBED_TOKENS=8192
121
+ #HISTORY_TURNS=3
122
+ #CHUNK_SIZE=1200
123
+ #CHUNK_OVERLAP_SIZE=100
124
+ #COSINE_THRESHOLD=0.4
125
+ #TOP_K=50
126
+
127
  # LLM Configuration
128
  LLM_BINDING=ollama
129
  LLM_BINDING_HOST=http://localhost:11434
 
137
  EMBEDDING_BINDING_HOST=http://localhost:11434
138
  EMBEDDING_MODEL=bge-m3:latest
139
 
 
 
 
 
 
 
140
  # Security
141
+ #LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG
142
 
143
  # Logging
144
  LOG_LEVEL=INFO
 
193
  | --ssl | False | Enable HTTPS |
194
  | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
195
  | --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
196
+ | --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
197
+ | --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
198
 
 
 
 
199
  ### Example Usage
200
 
201
  #### Running a Lightrag server with ollama default local server as llm and embedding backends
lightrag/api/lightrag_server.py CHANGED
@@ -212,8 +212,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
212
  ASCIIColors.yellow(f"{args.chunk_size}")
213
  ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
214
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
215
- ASCIIColors.white(" └─ History Turns: ", end="")
216
  ASCIIColors.yellow(f"{args.history_turns}")
 
 
 
 
217
 
218
  # System Configuration
219
  ASCIIColors.magenta("\n🛠️ System Configuration:")
@@ -489,6 +493,20 @@ def parse_args() -> argparse.Namespace:
489
  help="Number of conversation history turns to include (default: from env or 3)",
490
  )
491
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  parser.add_argument(
493
  "--simulated-model-name",
494
  type=str,
@@ -862,6 +880,9 @@ def create_app(args):
862
  graph_storage=ollama_server_infos.GRAPH_STORAGE,
863
  vector_storage=ollama_server_infos.VECTOR_STORAGE,
864
  doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
 
 
 
865
  )
866
  else:
867
  rag = LightRAG(
@@ -871,6 +892,9 @@ def create_app(args):
871
  else openai_alike_model_complete,
872
  chunk_token_size=int(args.chunk_size),
873
  chunk_overlap_token_size=int(args.chunk_overlap_size),
 
 
 
874
  llm_model_name=args.llm_model,
875
  llm_model_max_async=args.max_async,
876
  llm_model_max_token_size=args.max_tokens,
@@ -879,6 +903,9 @@ def create_app(args):
879
  graph_storage=ollama_server_infos.GRAPH_STORAGE,
880
  vector_storage=ollama_server_infos.VECTOR_STORAGE,
881
  doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
 
 
 
882
  )
883
 
884
  async def index_file(file_path: Union[str, Path]) -> None:
@@ -1068,6 +1095,7 @@ def create_app(args):
1068
  mode=request.mode,
1069
  stream=request.stream,
1070
  only_need_context=request.only_need_context,
 
1071
  ),
1072
  )
1073
 
@@ -1109,6 +1137,7 @@ def create_app(args):
1109
  mode=request.mode,
1110
  stream=True,
1111
  only_need_context=request.only_need_context,
 
1112
  ),
1113
  )
1114
 
@@ -1648,6 +1677,7 @@ def create_app(args):
1648
  "stream": request.stream,
1649
  "only_need_context": False,
1650
  "conversation_history": conversation_history,
 
1651
  }
1652
 
1653
  if args.history_turns is not None:
 
212
  ASCIIColors.yellow(f"{args.chunk_size}")
213
  ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
214
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
215
+ ASCIIColors.white(" ├─ History Turns: ", end="")
216
  ASCIIColors.yellow(f"{args.history_turns}")
217
+ ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
218
+ ASCIIColors.yellow(f"{args.cosine_threshold}")
219
+ ASCIIColors.white(" └─ Top-K: ", end="")
220
+ ASCIIColors.yellow(f"{args.top_k}")
221
 
222
  # System Configuration
223
  ASCIIColors.magenta("\n🛠️ System Configuration:")
 
493
  help="Number of conversation history turns to include (default: from env or 3)",
494
  )
495
 
496
+ # Search parameters
497
+ parser.add_argument(
498
+ "--top-k",
499
+ type=int,
500
+ default=get_env_value("TOP_K", 50, int),
501
+ help="Number of most similar results to return (default: from env or 50)",
502
+ )
503
+ parser.add_argument(
504
+ "--cosine-threshold",
505
+ type=float,
506
+ default=get_env_value("COSINE_THRESHOLD", 0.4, float),
507
+ help="Cosine similarity threshold (default: from env or 0.4)",
508
+ )
509
+
510
  parser.add_argument(
511
  "--simulated-model-name",
512
  type=str,
 
880
  graph_storage=ollama_server_infos.GRAPH_STORAGE,
881
  vector_storage=ollama_server_infos.VECTOR_STORAGE,
882
  doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
883
+ vector_db_storage_cls_kwargs={
884
+ "cosine_better_than_threshold": args.cosine_threshold
885
+ },
886
  )
887
  else:
888
  rag = LightRAG(
 
892
  else openai_alike_model_complete,
893
  chunk_token_size=int(args.chunk_size),
894
  chunk_overlap_token_size=int(args.chunk_overlap_size),
895
+ llm_model_kwargs={
896
+ "timeout": args.timeout,
897
+ },
898
  llm_model_name=args.llm_model,
899
  llm_model_max_async=args.max_async,
900
  llm_model_max_token_size=args.max_tokens,
 
903
  graph_storage=ollama_server_infos.GRAPH_STORAGE,
904
  vector_storage=ollama_server_infos.VECTOR_STORAGE,
905
  doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
906
+ vector_db_storage_cls_kwargs={
907
+ "cosine_better_than_threshold": args.cosine_threshold
908
+ },
909
  )
910
 
911
  async def index_file(file_path: Union[str, Path]) -> None:
 
1095
  mode=request.mode,
1096
  stream=request.stream,
1097
  only_need_context=request.only_need_context,
1098
+ top_k=args.top_k,
1099
  ),
1100
  )
1101
 
 
1137
  mode=request.mode,
1138
  stream=True,
1139
  only_need_context=request.only_need_context,
1140
+ top_k=args.top_k,
1141
  ),
1142
  )
1143
 
 
1677
  "stream": request.stream,
1678
  "only_need_context": False,
1679
  "conversation_history": conversation_history,
1680
+ "top_k": args.top_k,
1681
  }
1682
 
1683
  if args.history_turns is not None:
lightrag/base.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from dataclasses import dataclass, field
2
  from typing import (
3
  TypedDict,
@@ -32,7 +33,7 @@ class QueryParam:
32
  response_type: str = "Multiple Paragraphs"
33
  stream: bool = False
34
  # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
35
- top_k: int = 60
36
  # Number of document chunks to retrieve.
37
  # top_n: int = 10
38
  # Number of tokens for the original chunks.
 
1
+ import os
2
  from dataclasses import dataclass, field
3
  from typing import (
4
  TypedDict,
 
33
  response_type: str = "Multiple Paragraphs"
34
  stream: bool = False
35
  # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
36
+ top_k: int = int(os.getenv("TOP_K", "60"))
37
  # Number of document chunks to retrieve.
38
  # top_n: int = 10
39
  # Number of tokens for the original chunks.
lightrag/kg/chroma_impl.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import asyncio
2
  from dataclasses import dataclass
3
  from typing import Union
@@ -12,16 +13,16 @@ from lightrag.utils import logger
12
  class ChromaVectorDBStorage(BaseVectorStorage):
13
  """ChromaDB vector storage implementation."""
14
 
15
- cosine_better_than_threshold: float = 0.2
16
 
17
  def __post_init__(self):
18
  try:
19
  # Use global config value if specified, otherwise use default
20
- self.cosine_better_than_threshold = self.global_config.get(
 
21
  "cosine_better_than_threshold", self.cosine_better_than_threshold
22
  )
23
 
24
- config = self.global_config.get("vector_db_storage_cls_kwargs", {})
25
  user_collection_settings = config.get("collection_settings", {})
26
  # Default HNSW index settings for ChromaDB
27
  default_collection_settings = {
 
1
+ import os
2
  import asyncio
3
  from dataclasses import dataclass
4
  from typing import Union
 
13
  class ChromaVectorDBStorage(BaseVectorStorage):
14
  """ChromaDB vector storage implementation."""
15
 
16
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
17
 
18
  def __post_init__(self):
19
  try:
20
  # Use global config value if specified, otherwise use default
21
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
22
+ self.cosine_better_than_threshold = config.get(
23
  "cosine_better_than_threshold", self.cosine_better_than_threshold
24
  )
25
 
 
26
  user_collection_settings = config.get("collection_settings", {})
27
  # Default HNSW index settings for ChromaDB
28
  default_collection_settings = {
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -73,9 +73,15 @@ from lightrag.base import (
73
 
74
  @dataclass
75
  class NanoVectorDBStorage(BaseVectorStorage):
76
- cosine_better_than_threshold: float = 0.2
77
 
78
  def __post_init__(self):
 
 
 
 
 
 
79
  self._client_file_name = os.path.join(
80
  self.global_config["working_dir"], f"vdb_{self.namespace}.json"
81
  )
@@ -83,9 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
83
  self._client = NanoVectorDB(
84
  self.embedding_func.embedding_dim, storage_file=self._client_file_name
85
  )
86
- self.cosine_better_than_threshold = self.global_config.get(
87
- "cosine_better_than_threshold", self.cosine_better_than_threshold
88
- )
89
 
90
  async def upsert(self, data: dict[str, dict]):
91
  logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
@@ -134,6 +137,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
134
  async def query(self, query: str, top_k=5):
135
  embedding = await self.embedding_func([query])
136
  embedding = embedding[0]
 
 
 
137
  results = self._client.query(
138
  query=embedding,
139
  top_k=top_k,
 
73
 
74
  @dataclass
75
  class NanoVectorDBStorage(BaseVectorStorage):
76
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
77
 
78
  def __post_init__(self):
79
+ # Use global config value if specified, otherwise use default
80
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
81
+ self.cosine_better_than_threshold = config.get(
82
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
83
+ )
84
+
85
  self._client_file_name = os.path.join(
86
  self.global_config["working_dir"], f"vdb_{self.namespace}.json"
87
  )
 
89
  self._client = NanoVectorDB(
90
  self.embedding_func.embedding_dim, storage_file=self._client_file_name
91
  )
 
 
 
92
 
93
  async def upsert(self, data: dict[str, dict]):
94
  logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
 
137
  async def query(self, query: str, top_k=5):
138
  embedding = await self.embedding_func([query])
139
  embedding = embedding[0]
140
+ logger.info(
141
+ f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}"
142
+ )
143
  results = self._client.query(
144
  query=embedding,
145
  top_k=top_k,
lightrag/kg/oracle_impl.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import asyncio
2
 
3
  # import html
@@ -341,10 +342,14 @@ class OracleKVStorage(BaseKVStorage):
341
  class OracleVectorDBStorage(BaseVectorStorage):
342
  # should pass db object to self.db
343
  db: OracleDB = None
344
- cosine_better_than_threshold: float = 0.2
345
 
346
  def __post_init__(self):
347
- pass
 
 
 
 
348
 
349
  async def upsert(self, data: dict[str, dict]):
350
  """向向量数据库中插入数据"""
 
1
+ import os
2
  import asyncio
3
 
4
  # import html
 
342
  class OracleVectorDBStorage(BaseVectorStorage):
343
  # should pass db object to self.db
344
  db: OracleDB = None
345
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
346
 
347
  def __post_init__(self):
348
+ # Use global config value if specified, otherwise use default
349
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
350
+ self.cosine_better_than_threshold = config.get(
351
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
352
+ )
353
 
354
  async def upsert(self, data: dict[str, dict]):
355
  """向向量数据库中插入数据"""
lightrag/kg/postgres_impl.py CHANGED
@@ -301,12 +301,14 @@ class PGKVStorage(BaseKVStorage):
301
 
302
  @dataclass
303
  class PGVectorStorage(BaseVectorStorage):
304
- cosine_better_than_threshold: float = 0.2
305
  db: PostgreSQLDB = None
306
 
307
  def __post_init__(self):
308
  self._max_batch_size = self.global_config["embedding_batch_num"]
309
- self.cosine_better_than_threshold = self.global_config.get(
 
 
310
  "cosine_better_than_threshold", self.cosine_better_than_threshold
311
  )
312
 
 
301
 
302
  @dataclass
303
  class PGVectorStorage(BaseVectorStorage):
304
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
305
  db: PostgreSQLDB = None
306
 
307
  def __post_init__(self):
308
  self._max_batch_size = self.global_config["embedding_batch_num"]
309
+ # Use global config value if specified, otherwise use default
310
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
311
+ self.cosine_better_than_threshold = config.get(
312
  "cosine_better_than_threshold", self.cosine_better_than_threshold
313
  )
314
 
lightrag/kg/tidb_impl.py CHANGED
@@ -217,14 +217,16 @@ class TiDBKVStorage(BaseKVStorage):
217
 
218
  @dataclass
219
  class TiDBVectorDBStorage(BaseVectorStorage):
220
- cosine_better_than_threshold: float = 0.2
221
 
222
  def __post_init__(self):
223
  self._client_file_name = os.path.join(
224
  self.global_config["working_dir"], f"vdb_{self.namespace}.json"
225
  )
226
  self._max_batch_size = self.global_config["embedding_batch_num"]
227
- self.cosine_better_than_threshold = self.global_config.get(
 
 
228
  "cosine_better_than_threshold", self.cosine_better_than_threshold
229
  )
230
 
 
217
 
218
  @dataclass
219
  class TiDBVectorDBStorage(BaseVectorStorage):
220
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
221
 
222
  def __post_init__(self):
223
  self._client_file_name = os.path.join(
224
  self.global_config["working_dir"], f"vdb_{self.namespace}.json"
225
  )
226
  self._max_batch_size = self.global_config["embedding_batch_num"]
227
+ # Use global config value if specified, otherwise use default
228
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
229
+ self.cosine_better_than_threshold = config.get(
230
  "cosine_better_than_threshold", self.cosine_better_than_threshold
231
  )
232
 
lightrag/lightrag.py CHANGED
@@ -158,8 +158,8 @@ class LightRAG:
158
  # LLM
159
  llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
160
  llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
161
- llm_model_max_token_size: int = 32768
162
- llm_model_max_async: int = 16
163
  llm_model_kwargs: dict = field(default_factory=dict)
164
 
165
  # storage
 
158
  # LLM
159
  llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
160
  llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
161
+ llm_model_max_token_size: int = int(os.getenv("MAX_TOKENS", "32768"))
162
+ llm_model_max_async: int = int(os.getenv("MAX_ASYNC", "16"))
163
  llm_model_kwargs: dict = field(default_factory=dict)
164
 
165
  # storage
lightrag/operate.py CHANGED
@@ -590,8 +590,8 @@ async def kg_query(
590
  query, query_param, global_config, hashing_kv
591
  )
592
 
593
- logger.info(f"High-level keywords: {hl_keywords}")
594
- logger.info(f"Low-level keywords: {ll_keywords}")
595
 
596
  # Handle empty keywords
597
  if hl_keywords == [] and ll_keywords == []:
@@ -1025,6 +1025,10 @@ async def _build_query_context(
1025
  [hl_relations_context, ll_relations_context],
1026
  [hl_text_units_context, ll_text_units_context],
1027
  )
 
 
 
 
1028
  return f"""
1029
  -----Entities-----
1030
  ```csv
 
590
  query, query_param, global_config, hashing_kv
591
  )
592
 
593
+ logger.debug(f"High-level keywords: {hl_keywords}")
594
+ logger.debug(f"Low-level keywords: {ll_keywords}")
595
 
596
  # Handle empty keywords
597
  if hl_keywords == [] and ll_keywords == []:
 
1025
  [hl_relations_context, ll_relations_context],
1026
  [hl_text_units_context, ll_text_units_context],
1027
  )
1028
+ # not necessary to use LLM to generate a response
1029
+ if not entities_context.strip() and not relations_context.strip():
1030
+ return None
1031
+
1032
  return f"""
1033
  -----Entities-----
1034
  ```csv