yangdx commited on
Commit
d46e3aa
Β·
1 Parent(s): a76ad0a

Add RAG configuration options and enhance parameter configurability

Browse files

- Add top-k and cosine-threshold parms for api server
- Update .env and cli parms handling with new parameters
- Improve splash screen display
- Update bash and storage classes to read new parameters from .env file.

.env.example CHANGED
@@ -6,6 +6,17 @@ PORT=9621
6
  WORKING_DIR=/app/data/rag_storage
7
  INPUT_DIR=/app/data/inputs
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
10
  # Ollama example
11
  LLM_BINDING=ollama
@@ -38,15 +49,6 @@ EMBEDDING_MODEL=bge-m3:latest
38
  # EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
39
  # EMBEDDING_MODEL=bge-m3:latest
40
 
41
- # RAG Configuration
42
- MAX_ASYNC=4
43
- MAX_TOKENS=32768
44
- EMBEDDING_DIM=1024
45
- MAX_EMBED_TOKENS=8192
46
- #HISTORY_TURNS=3
47
- #CHUNK_SIZE=1200
48
- #CHUNK_OVERLAP_SIZE=100
49
-
50
  # Security (empty for no key)
51
  LIGHTRAG_API_KEY=your-secure-api-key-here
52
 
 
6
  WORKING_DIR=/app/data/rag_storage
7
  INPUT_DIR=/app/data/inputs
8
 
9
+ # RAG Configuration
10
+ MAX_ASYNC=4
11
+ MAX_TOKENS=32768
12
+ EMBEDDING_DIM=1024
13
+ MAX_EMBED_TOKENS=8192
14
+ #HISTORY_TURNS=3
15
+ #CHUNK_SIZE=1200
16
+ #CHUNK_OVERLAP_SIZE=100
17
+ #COSINE_THRESHOLD=0.2
18
+ #TOP_K=50
19
+
20
  # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
21
  # Ollama example
22
  LLM_BINDING=ollama
 
49
  # EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
50
  # EMBEDDING_MODEL=bge-m3:latest
51
 
 
 
 
 
 
 
 
 
 
52
  # Security (empty for no key)
53
  LIGHTRAG_API_KEY=your-secure-api-key-here
54
 
lightrag/api/lightrag_server.py CHANGED
@@ -207,8 +207,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
207
  ASCIIColors.yellow(f"{args.chunk_size}")
208
  ASCIIColors.white(" β”œβ”€ Chunk Overlap Size: ", end="")
209
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
210
- ASCIIColors.white(" └─ History Turns: ", end="")
211
  ASCIIColors.yellow(f"{args.history_turns}")
 
 
 
 
212
 
213
  # System Configuration
214
  ASCIIColors.magenta("\nπŸ› οΈ System Configuration:")
@@ -484,6 +488,20 @@ def parse_args() -> argparse.Namespace:
484
  help="Number of conversation history turns to include (default: from env or 3)",
485
  )
486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  args = parser.parse_args()
488
 
489
  return args
@@ -846,6 +864,9 @@ def create_app(args):
846
  graph_storage=GRAPH_STORAGE,
847
  vector_storage=VECTOR_STORAGE,
848
  doc_status_storage=DOC_STATUS_STORAGE,
 
 
 
849
  )
850
  else:
851
  rag = LightRAG(
@@ -863,6 +884,9 @@ def create_app(args):
863
  graph_storage=GRAPH_STORAGE,
864
  vector_storage=VECTOR_STORAGE,
865
  doc_status_storage=DOC_STATUS_STORAGE,
 
 
 
866
  )
867
 
868
  async def index_file(file_path: Union[str, Path]) -> None:
@@ -1052,6 +1076,7 @@ def create_app(args):
1052
  mode=request.mode,
1053
  stream=request.stream,
1054
  only_need_context=request.only_need_context,
 
1055
  ),
1056
  )
1057
 
@@ -1093,6 +1118,7 @@ def create_app(args):
1093
  mode=request.mode,
1094
  stream=True,
1095
  only_need_context=request.only_need_context,
 
1096
  ),
1097
  )
1098
 
@@ -1632,6 +1658,7 @@ def create_app(args):
1632
  "stream": request.stream,
1633
  "only_need_context": False,
1634
  "conversation_history": conversation_history,
 
1635
  }
1636
 
1637
  if args.history_turns is not None:
 
207
  ASCIIColors.yellow(f"{args.chunk_size}")
208
  ASCIIColors.white(" β”œβ”€ Chunk Overlap Size: ", end="")
209
  ASCIIColors.yellow(f"{args.chunk_overlap_size}")
210
+ ASCIIColors.white(" β”œβ”€ History Turns: ", end="")
211
  ASCIIColors.yellow(f"{args.history_turns}")
212
+ ASCIIColors.white(" β”œβ”€ Cosine Threshold: ", end="")
213
+ ASCIIColors.yellow(f"{args.cosine_threshold}")
214
+ ASCIIColors.white(" └─ Top-K: ", end="")
215
+ ASCIIColors.yellow(f"{args.top_k}")
216
 
217
  # System Configuration
218
  ASCIIColors.magenta("\nπŸ› οΈ System Configuration:")
 
488
  help="Number of conversation history turns to include (default: from env or 3)",
489
  )
490
 
491
+ # Search parameters
492
+ parser.add_argument(
493
+ "--top-k",
494
+ type=int,
495
+ default=get_env_value("TOP_K", 50, int),
496
+ help="Number of most similar results to return (default: from env or 50)",
497
+ )
498
+ parser.add_argument(
499
+ "--cosine-threshold",
500
+ type=float,
501
+ default=get_env_value("COSINE_THRESHOLD", 0.4, float),
502
+ help="Cosine similarity threshold (default: from env or 0.4)",
503
+ )
504
+
505
  args = parser.parse_args()
506
 
507
  return args
 
864
  graph_storage=GRAPH_STORAGE,
865
  vector_storage=VECTOR_STORAGE,
866
  doc_status_storage=DOC_STATUS_STORAGE,
867
+ vector_db_storage_cls_kwargs={
868
+ "cosine_better_than_threshold": args.cosine_threshold
869
+ },
870
  )
871
  else:
872
  rag = LightRAG(
 
884
  graph_storage=GRAPH_STORAGE,
885
  vector_storage=VECTOR_STORAGE,
886
  doc_status_storage=DOC_STATUS_STORAGE,
887
+ vector_db_storage_cls_kwargs={
888
+ "cosine_better_than_threshold": args.cosine_threshold
889
+ },
890
  )
891
 
892
  async def index_file(file_path: Union[str, Path]) -> None:
 
1076
  mode=request.mode,
1077
  stream=request.stream,
1078
  only_need_context=request.only_need_context,
1079
+ top_k=args.top_k,
1080
  ),
1081
  )
1082
 
 
1118
  mode=request.mode,
1119
  stream=True,
1120
  only_need_context=request.only_need_context,
1121
+ top_k=args.top_k,
1122
  ),
1123
  )
1124
 
 
1658
  "stream": request.stream,
1659
  "only_need_context": False,
1660
  "conversation_history": conversation_history,
1661
+ "top_k": args.top_k,
1662
  }
1663
 
1664
  if args.history_turns is not None:
lightrag/base.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from dataclasses import dataclass, field
2
  from typing import (
3
  TypedDict,
@@ -32,7 +33,7 @@ class QueryParam:
32
  response_type: str = "Multiple Paragraphs"
33
  stream: bool = False
34
  # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
35
- top_k: int = 60
36
  # Number of document chunks to retrieve.
37
  # top_n: int = 10
38
  # Number of tokens for the original chunks.
 
1
+ import os
2
  from dataclasses import dataclass, field
3
  from typing import (
4
  TypedDict,
 
33
  response_type: str = "Multiple Paragraphs"
34
  stream: bool = False
35
  # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
36
+ top_k: int = int(os.getenv("TOP_K", "60"))
37
  # Number of document chunks to retrieve.
38
  # top_n: int = 10
39
  # Number of tokens for the original chunks.
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -73,7 +73,7 @@ from lightrag.base import (
73
 
74
  @dataclass
75
  class NanoVectorDBStorage(BaseVectorStorage):
76
- cosine_better_than_threshold: float = 0.2
77
 
78
  def __post_init__(self):
79
  self._client_file_name = os.path.join(
 
73
 
74
  @dataclass
75
  class NanoVectorDBStorage(BaseVectorStorage):
76
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
77
 
78
  def __post_init__(self):
79
  self._client_file_name = os.path.join(