Merge pull request #674 from danielaskdd/topk-cosine-threshold-env-config
Browse files- .env.example +11 -9
- README.md +6 -3
- lightrag/api/README.md +16 -10
- lightrag/api/lightrag_server.py +31 -1
- lightrag/base.py +2 -1
- lightrag/kg/chroma_impl.py +4 -3
- lightrag/kg/nano_vector_db_impl.py +10 -4
- lightrag/kg/oracle_impl.py +7 -2
- lightrag/kg/postgres_impl.py +4 -2
- lightrag/kg/tidb_impl.py +4 -2
- lightrag/lightrag.py +2 -2
- lightrag/operate.py +6 -2
.env.example
CHANGED
@@ -6,6 +6,17 @@ PORT=9621
|
|
6 |
WORKING_DIR=/app/data/rag_storage
|
7 |
INPUT_DIR=/app/data/inputs
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
10 |
# Ollama example
|
11 |
LLM_BINDING=ollama
|
@@ -38,15 +49,6 @@ EMBEDDING_MODEL=bge-m3:latest
|
|
38 |
# EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
|
39 |
# EMBEDDING_MODEL=bge-m3:latest
|
40 |
|
41 |
-
# RAG Configuration
|
42 |
-
MAX_ASYNC=4
|
43 |
-
MAX_TOKENS=32768
|
44 |
-
EMBEDDING_DIM=1024
|
45 |
-
MAX_EMBED_TOKENS=8192
|
46 |
-
#HISTORY_TURNS=3
|
47 |
-
#CHUNK_SIZE=1200
|
48 |
-
#CHUNK_OVERLAP_SIZE=100
|
49 |
-
|
50 |
# Security (empty for no key)
|
51 |
LIGHTRAG_API_KEY=your-secure-api-key-here
|
52 |
|
|
|
6 |
WORKING_DIR=/app/data/rag_storage
|
7 |
INPUT_DIR=/app/data/inputs
|
8 |
|
9 |
+
# RAG Configuration
|
10 |
+
MAX_ASYNC=4
|
11 |
+
MAX_TOKENS=32768
|
12 |
+
EMBEDDING_DIM=1024
|
13 |
+
MAX_EMBED_TOKENS=8192
|
14 |
+
#HISTORY_TURNS=3
|
15 |
+
#CHUNK_SIZE=1200
|
16 |
+
#CHUNK_OVERLAP_SIZE=100
|
17 |
+
#COSINE_THRESHOLD=0.4 # 0.2 while not running API server
|
18 |
+
#TOP_K=50 # 60 while not running API server
|
19 |
+
|
20 |
# LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
21 |
# Ollama example
|
22 |
LLM_BINDING=ollama
|
|
|
49 |
# EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
|
50 |
# EMBEDDING_MODEL=bge-m3:latest
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# Security (empty for no key)
|
53 |
LIGHTRAG_API_KEY=your-secure-api-key-here
|
54 |
|
README.md
CHANGED
@@ -360,6 +360,8 @@ class QueryParam:
|
|
360 |
max_token_for_local_context: int = 4000
|
361 |
```
|
362 |
|
|
|
|
|
363 |
### Batch Insert
|
364 |
|
365 |
```python
|
@@ -730,10 +732,10 @@ if __name__ == "__main__":
|
|
730 |
| **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
731 |
| **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
732 |
| **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
733 |
-
| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768
|
734 |
-
| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16
|
735 |
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
736 |
-
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database
|
737 |
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
738 |
| **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
|
739 |
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
|
@@ -741,6 +743,7 @@ if __name__ == "__main__":
|
|
741 |
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
742 |
|
743 |
### Error Handling
|
|
|
744 |
<details>
|
745 |
<summary>Click to view error handling details</summary>
|
746 |
|
|
|
360 |
max_token_for_local_context: int = 4000
|
361 |
```
|
362 |
|
363 |
+
> default value of Top_k can be change by environment variables TOP_K.
|
364 |
+
|
365 |
### Batch Insert
|
366 |
|
367 |
```python
|
|
|
732 |
| **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
733 |
| **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
734 |
| **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
735 |
+
| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) |
|
736 |
+
| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16`(default value changed by env var MAX_ASYNC) |
|
737 |
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
738 |
+
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval. | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
739 |
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
740 |
| **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
|
741 |
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
|
|
|
743 |
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
744 |
|
745 |
### Error Handling
|
746 |
+
|
747 |
<details>
|
748 |
<summary>Click to view error handling details</summary>
|
749 |
|
lightrag/api/README.md
CHANGED
@@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the
|
|
98 |
|
99 |
LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
|
100 |
|
|
|
|
|
101 |
### Environment Variables
|
102 |
|
103 |
You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
|
@@ -111,6 +113,17 @@ PORT=9621
|
|
111 |
WORKING_DIR=/app/data/rag_storage
|
112 |
INPUT_DIR=/app/data/inputs
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
# LLM Configuration
|
115 |
LLM_BINDING=ollama
|
116 |
LLM_BINDING_HOST=http://localhost:11434
|
@@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama
|
|
124 |
EMBEDDING_BINDING_HOST=http://localhost:11434
|
125 |
EMBEDDING_MODEL=bge-m3:latest
|
126 |
|
127 |
-
# RAG Configuration
|
128 |
-
MAX_ASYNC=4
|
129 |
-
MAX_TOKENS=32768
|
130 |
-
EMBEDDING_DIM=1024
|
131 |
-
MAX_EMBED_TOKENS=8192
|
132 |
-
|
133 |
# Security
|
134 |
-
LIGHTRAG_API_KEY=
|
135 |
|
136 |
# Logging
|
137 |
LOG_LEVEL=INFO
|
@@ -186,10 +193,9 @@ PORT=7000 python lightrag.py
|
|
186 |
| --ssl | False | Enable HTTPS |
|
187 |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
188 |
| --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
|
|
|
|
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`.
|
193 |
### Example Usage
|
194 |
|
195 |
#### Running a Lightrag server with ollama default local server as llm and embedding backends
|
|
|
98 |
|
99 |
LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
|
100 |
|
101 |
+
For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM.
|
102 |
+
|
103 |
### Environment Variables
|
104 |
|
105 |
You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
|
|
|
113 |
WORKING_DIR=/app/data/rag_storage
|
114 |
INPUT_DIR=/app/data/inputs
|
115 |
|
116 |
+
# RAG Configuration
|
117 |
+
MAX_ASYNC=4
|
118 |
+
MAX_TOKENS=32768
|
119 |
+
EMBEDDING_DIM=1024
|
120 |
+
MAX_EMBED_TOKENS=8192
|
121 |
+
#HISTORY_TURNS=3
|
122 |
+
#CHUNK_SIZE=1200
|
123 |
+
#CHUNK_OVERLAP_SIZE=100
|
124 |
+
#COSINE_THRESHOLD=0.4
|
125 |
+
#TOP_K=50
|
126 |
+
|
127 |
# LLM Configuration
|
128 |
LLM_BINDING=ollama
|
129 |
LLM_BINDING_HOST=http://localhost:11434
|
|
|
137 |
EMBEDDING_BINDING_HOST=http://localhost:11434
|
138 |
EMBEDDING_MODEL=bge-m3:latest
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
# Security
|
141 |
+
#LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG
|
142 |
|
143 |
# Logging
|
144 |
LOG_LEVEL=INFO
|
|
|
193 |
| --ssl | False | Enable HTTPS |
|
194 |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
195 |
| --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
|
196 |
+
| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
|
197 |
+
| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
|
198 |
|
|
|
|
|
|
|
199 |
### Example Usage
|
200 |
|
201 |
#### Running a Lightrag server with ollama default local server as llm and embedding backends
|
lightrag/api/lightrag_server.py
CHANGED
@@ -212,8 +212,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
212 |
ASCIIColors.yellow(f"{args.chunk_size}")
|
213 |
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
214 |
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
|
215 |
-
ASCIIColors.white("
|
216 |
ASCIIColors.yellow(f"{args.history_turns}")
|
|
|
|
|
|
|
|
|
217 |
|
218 |
# System Configuration
|
219 |
ASCIIColors.magenta("\n🛠️ System Configuration:")
|
@@ -489,6 +493,20 @@ def parse_args() -> argparse.Namespace:
|
|
489 |
help="Number of conversation history turns to include (default: from env or 3)",
|
490 |
)
|
491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
parser.add_argument(
|
493 |
"--simulated-model-name",
|
494 |
type=str,
|
@@ -862,6 +880,9 @@ def create_app(args):
|
|
862 |
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
863 |
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
864 |
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
|
|
|
|
|
|
865 |
)
|
866 |
else:
|
867 |
rag = LightRAG(
|
@@ -871,6 +892,9 @@ def create_app(args):
|
|
871 |
else openai_alike_model_complete,
|
872 |
chunk_token_size=int(args.chunk_size),
|
873 |
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
|
|
|
|
|
|
874 |
llm_model_name=args.llm_model,
|
875 |
llm_model_max_async=args.max_async,
|
876 |
llm_model_max_token_size=args.max_tokens,
|
@@ -879,6 +903,9 @@ def create_app(args):
|
|
879 |
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
880 |
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
881 |
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
|
|
|
|
|
|
882 |
)
|
883 |
|
884 |
async def index_file(file_path: Union[str, Path]) -> None:
|
@@ -1068,6 +1095,7 @@ def create_app(args):
|
|
1068 |
mode=request.mode,
|
1069 |
stream=request.stream,
|
1070 |
only_need_context=request.only_need_context,
|
|
|
1071 |
),
|
1072 |
)
|
1073 |
|
@@ -1109,6 +1137,7 @@ def create_app(args):
|
|
1109 |
mode=request.mode,
|
1110 |
stream=True,
|
1111 |
only_need_context=request.only_need_context,
|
|
|
1112 |
),
|
1113 |
)
|
1114 |
|
@@ -1648,6 +1677,7 @@ def create_app(args):
|
|
1648 |
"stream": request.stream,
|
1649 |
"only_need_context": False,
|
1650 |
"conversation_history": conversation_history,
|
|
|
1651 |
}
|
1652 |
|
1653 |
if args.history_turns is not None:
|
|
|
212 |
ASCIIColors.yellow(f"{args.chunk_size}")
|
213 |
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
214 |
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
|
215 |
+
ASCIIColors.white(" ├─ History Turns: ", end="")
|
216 |
ASCIIColors.yellow(f"{args.history_turns}")
|
217 |
+
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
|
218 |
+
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
219 |
+
ASCIIColors.white(" └─ Top-K: ", end="")
|
220 |
+
ASCIIColors.yellow(f"{args.top_k}")
|
221 |
|
222 |
# System Configuration
|
223 |
ASCIIColors.magenta("\n🛠️ System Configuration:")
|
|
|
493 |
help="Number of conversation history turns to include (default: from env or 3)",
|
494 |
)
|
495 |
|
496 |
+
# Search parameters
|
497 |
+
parser.add_argument(
|
498 |
+
"--top-k",
|
499 |
+
type=int,
|
500 |
+
default=get_env_value("TOP_K", 50, int),
|
501 |
+
help="Number of most similar results to return (default: from env or 50)",
|
502 |
+
)
|
503 |
+
parser.add_argument(
|
504 |
+
"--cosine-threshold",
|
505 |
+
type=float,
|
506 |
+
default=get_env_value("COSINE_THRESHOLD", 0.4, float),
|
507 |
+
help="Cosine similarity threshold (default: from env or 0.4)",
|
508 |
+
)
|
509 |
+
|
510 |
parser.add_argument(
|
511 |
"--simulated-model-name",
|
512 |
type=str,
|
|
|
880 |
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
881 |
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
882 |
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
883 |
+
vector_db_storage_cls_kwargs={
|
884 |
+
"cosine_better_than_threshold": args.cosine_threshold
|
885 |
+
},
|
886 |
)
|
887 |
else:
|
888 |
rag = LightRAG(
|
|
|
892 |
else openai_alike_model_complete,
|
893 |
chunk_token_size=int(args.chunk_size),
|
894 |
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
895 |
+
llm_model_kwargs={
|
896 |
+
"timeout": args.timeout,
|
897 |
+
},
|
898 |
llm_model_name=args.llm_model,
|
899 |
llm_model_max_async=args.max_async,
|
900 |
llm_model_max_token_size=args.max_tokens,
|
|
|
903 |
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
904 |
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
905 |
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
906 |
+
vector_db_storage_cls_kwargs={
|
907 |
+
"cosine_better_than_threshold": args.cosine_threshold
|
908 |
+
},
|
909 |
)
|
910 |
|
911 |
async def index_file(file_path: Union[str, Path]) -> None:
|
|
|
1095 |
mode=request.mode,
|
1096 |
stream=request.stream,
|
1097 |
only_need_context=request.only_need_context,
|
1098 |
+
top_k=args.top_k,
|
1099 |
),
|
1100 |
)
|
1101 |
|
|
|
1137 |
mode=request.mode,
|
1138 |
stream=True,
|
1139 |
only_need_context=request.only_need_context,
|
1140 |
+
top_k=args.top_k,
|
1141 |
),
|
1142 |
)
|
1143 |
|
|
|
1677 |
"stream": request.stream,
|
1678 |
"only_need_context": False,
|
1679 |
"conversation_history": conversation_history,
|
1680 |
+
"top_k": args.top_k,
|
1681 |
}
|
1682 |
|
1683 |
if args.history_turns is not None:
|
lightrag/base.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from dataclasses import dataclass, field
|
2 |
from typing import (
|
3 |
TypedDict,
|
@@ -32,7 +33,7 @@ class QueryParam:
|
|
32 |
response_type: str = "Multiple Paragraphs"
|
33 |
stream: bool = False
|
34 |
# Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
|
35 |
-
top_k: int = 60
|
36 |
# Number of document chunks to retrieve.
|
37 |
# top_n: int = 10
|
38 |
# Number of tokens for the original chunks.
|
|
|
1 |
+
import os
|
2 |
from dataclasses import dataclass, field
|
3 |
from typing import (
|
4 |
TypedDict,
|
|
|
33 |
response_type: str = "Multiple Paragraphs"
|
34 |
stream: bool = False
|
35 |
# Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
|
36 |
+
top_k: int = int(os.getenv("TOP_K", "60"))
|
37 |
# Number of document chunks to retrieve.
|
38 |
# top_n: int = 10
|
39 |
# Number of tokens for the original chunks.
|
lightrag/kg/chroma_impl.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import asyncio
|
2 |
from dataclasses import dataclass
|
3 |
from typing import Union
|
@@ -12,16 +13,16 @@ from lightrag.utils import logger
|
|
12 |
class ChromaVectorDBStorage(BaseVectorStorage):
|
13 |
"""ChromaDB vector storage implementation."""
|
14 |
|
15 |
-
cosine_better_than_threshold: float = 0.2
|
16 |
|
17 |
def __post_init__(self):
|
18 |
try:
|
19 |
# Use global config value if specified, otherwise use default
|
20 |
-
|
|
|
21 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
22 |
)
|
23 |
|
24 |
-
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
25 |
user_collection_settings = config.get("collection_settings", {})
|
26 |
# Default HNSW index settings for ChromaDB
|
27 |
default_collection_settings = {
|
|
|
1 |
+
import os
|
2 |
import asyncio
|
3 |
from dataclasses import dataclass
|
4 |
from typing import Union
|
|
|
13 |
class ChromaVectorDBStorage(BaseVectorStorage):
|
14 |
"""ChromaDB vector storage implementation."""
|
15 |
|
16 |
+
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
17 |
|
18 |
def __post_init__(self):
|
19 |
try:
|
20 |
# Use global config value if specified, otherwise use default
|
21 |
+
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
22 |
+
self.cosine_better_than_threshold = config.get(
|
23 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
24 |
)
|
25 |
|
|
|
26 |
user_collection_settings = config.get("collection_settings", {})
|
27 |
# Default HNSW index settings for ChromaDB
|
28 |
default_collection_settings = {
|
lightrag/kg/nano_vector_db_impl.py
CHANGED
@@ -73,9 +73,15 @@ from lightrag.base import (
|
|
73 |
|
74 |
@dataclass
|
75 |
class NanoVectorDBStorage(BaseVectorStorage):
|
76 |
-
cosine_better_than_threshold: float = 0.2
|
77 |
|
78 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
self._client_file_name = os.path.join(
|
80 |
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
81 |
)
|
@@ -83,9 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
83 |
self._client = NanoVectorDB(
|
84 |
self.embedding_func.embedding_dim, storage_file=self._client_file_name
|
85 |
)
|
86 |
-
self.cosine_better_than_threshold = self.global_config.get(
|
87 |
-
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
88 |
-
)
|
89 |
|
90 |
async def upsert(self, data: dict[str, dict]):
|
91 |
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
|
@@ -134,6 +137,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
134 |
async def query(self, query: str, top_k=5):
|
135 |
embedding = await self.embedding_func([query])
|
136 |
embedding = embedding[0]
|
|
|
|
|
|
|
137 |
results = self._client.query(
|
138 |
query=embedding,
|
139 |
top_k=top_k,
|
|
|
73 |
|
74 |
@dataclass
|
75 |
class NanoVectorDBStorage(BaseVectorStorage):
|
76 |
+
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
77 |
|
78 |
def __post_init__(self):
|
79 |
+
# Use global config value if specified, otherwise use default
|
80 |
+
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
81 |
+
self.cosine_better_than_threshold = config.get(
|
82 |
+
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
83 |
+
)
|
84 |
+
|
85 |
self._client_file_name = os.path.join(
|
86 |
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
87 |
)
|
|
|
89 |
self._client = NanoVectorDB(
|
90 |
self.embedding_func.embedding_dim, storage_file=self._client_file_name
|
91 |
)
|
|
|
|
|
|
|
92 |
|
93 |
async def upsert(self, data: dict[str, dict]):
|
94 |
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
|
|
|
137 |
async def query(self, query: str, top_k=5):
|
138 |
embedding = await self.embedding_func([query])
|
139 |
embedding = embedding[0]
|
140 |
+
logger.info(
|
141 |
+
f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}"
|
142 |
+
)
|
143 |
results = self._client.query(
|
144 |
query=embedding,
|
145 |
top_k=top_k,
|
lightrag/kg/oracle_impl.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import asyncio
|
2 |
|
3 |
# import html
|
@@ -341,10 +342,14 @@ class OracleKVStorage(BaseKVStorage):
|
|
341 |
class OracleVectorDBStorage(BaseVectorStorage):
|
342 |
# should pass db object to self.db
|
343 |
db: OracleDB = None
|
344 |
-
cosine_better_than_threshold: float = 0.2
|
345 |
|
346 |
def __post_init__(self):
|
347 |
-
|
|
|
|
|
|
|
|
|
348 |
|
349 |
async def upsert(self, data: dict[str, dict]):
|
350 |
"""向向量数据库中插入数据"""
|
|
|
1 |
+
import os
|
2 |
import asyncio
|
3 |
|
4 |
# import html
|
|
|
342 |
class OracleVectorDBStorage(BaseVectorStorage):
|
343 |
# should pass db object to self.db
|
344 |
db: OracleDB = None
|
345 |
+
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
346 |
|
347 |
def __post_init__(self):
|
348 |
+
# Use global config value if specified, otherwise use default
|
349 |
+
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
350 |
+
self.cosine_better_than_threshold = config.get(
|
351 |
+
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
352 |
+
)
|
353 |
|
354 |
async def upsert(self, data: dict[str, dict]):
|
355 |
"""向向量数据库中插入数据"""
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -301,12 +301,14 @@ class PGKVStorage(BaseKVStorage):
|
|
301 |
|
302 |
@dataclass
|
303 |
class PGVectorStorage(BaseVectorStorage):
|
304 |
-
cosine_better_than_threshold: float = 0.2
|
305 |
db: PostgreSQLDB = None
|
306 |
|
307 |
def __post_init__(self):
|
308 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
309 |
-
|
|
|
|
|
310 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
311 |
)
|
312 |
|
|
|
301 |
|
302 |
@dataclass
|
303 |
class PGVectorStorage(BaseVectorStorage):
|
304 |
+
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
305 |
db: PostgreSQLDB = None
|
306 |
|
307 |
def __post_init__(self):
|
308 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
309 |
+
# Use global config value if specified, otherwise use default
|
310 |
+
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
311 |
+
self.cosine_better_than_threshold = config.get(
|
312 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
313 |
)
|
314 |
|
lightrag/kg/tidb_impl.py
CHANGED
@@ -217,14 +217,16 @@ class TiDBKVStorage(BaseKVStorage):
|
|
217 |
|
218 |
@dataclass
|
219 |
class TiDBVectorDBStorage(BaseVectorStorage):
|
220 |
-
cosine_better_than_threshold: float = 0.2
|
221 |
|
222 |
def __post_init__(self):
|
223 |
self._client_file_name = os.path.join(
|
224 |
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
225 |
)
|
226 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
227 |
-
|
|
|
|
|
228 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
229 |
)
|
230 |
|
|
|
217 |
|
218 |
@dataclass
|
219 |
class TiDBVectorDBStorage(BaseVectorStorage):
|
220 |
+
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
221 |
|
222 |
def __post_init__(self):
|
223 |
self._client_file_name = os.path.join(
|
224 |
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
225 |
)
|
226 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
227 |
+
# Use global config value if specified, otherwise use default
|
228 |
+
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
229 |
+
self.cosine_better_than_threshold = config.get(
|
230 |
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
231 |
)
|
232 |
|
lightrag/lightrag.py
CHANGED
@@ -158,8 +158,8 @@ class LightRAG:
|
|
158 |
# LLM
|
159 |
llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
|
160 |
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
161 |
-
llm_model_max_token_size: int = 32768
|
162 |
-
llm_model_max_async: int = 16
|
163 |
llm_model_kwargs: dict = field(default_factory=dict)
|
164 |
|
165 |
# storage
|
|
|
158 |
# LLM
|
159 |
llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
|
160 |
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
161 |
+
llm_model_max_token_size: int = int(os.getenv("MAX_TOKENS", "32768"))
|
162 |
+
llm_model_max_async: int = int(os.getenv("MAX_ASYNC", "16"))
|
163 |
llm_model_kwargs: dict = field(default_factory=dict)
|
164 |
|
165 |
# storage
|
lightrag/operate.py
CHANGED
@@ -590,8 +590,8 @@ async def kg_query(
|
|
590 |
query, query_param, global_config, hashing_kv
|
591 |
)
|
592 |
|
593 |
-
logger.
|
594 |
-
logger.
|
595 |
|
596 |
# Handle empty keywords
|
597 |
if hl_keywords == [] and ll_keywords == []:
|
@@ -1025,6 +1025,10 @@ async def _build_query_context(
|
|
1025 |
[hl_relations_context, ll_relations_context],
|
1026 |
[hl_text_units_context, ll_text_units_context],
|
1027 |
)
|
|
|
|
|
|
|
|
|
1028 |
return f"""
|
1029 |
-----Entities-----
|
1030 |
```csv
|
|
|
590 |
query, query_param, global_config, hashing_kv
|
591 |
)
|
592 |
|
593 |
+
logger.debug(f"High-level keywords: {hl_keywords}")
|
594 |
+
logger.debug(f"Low-level keywords: {ll_keywords}")
|
595 |
|
596 |
# Handle empty keywords
|
597 |
if hl_keywords == [] and ll_keywords == []:
|
|
|
1025 |
[hl_relations_context, ll_relations_context],
|
1026 |
[hl_text_units_context, ll_text_units_context],
|
1027 |
)
|
1028 |
+
# not necessary to use LLM to generate a response
|
1029 |
+
if not entities_context.strip() and not relations_context.strip():
|
1030 |
+
return None
|
1031 |
+
|
1032 |
return f"""
|
1033 |
-----Entities-----
|
1034 |
```csv
|