yangdx
commited on
Commit
Β·
82c47ab
1
Parent(s):
91c5685
Added temperature parameter for LLM
Browse files- README.md +2 -2
- env.example +7 -5
- lightrag/api/lightrag_server.py +4 -0
- lightrag/api/routers/document_routes.py +2 -2
- lightrag/api/utils_api.py +16 -7
README.md
CHANGED
@@ -630,11 +630,11 @@ rag.insert(["TEXT1", "TEXT2",...])
|
|
630 |
rag = LightRAG(
|
631 |
working_dir=WORKING_DIR,
|
632 |
addon_params={
|
633 |
-
"insert_batch_size":
|
634 |
}
|
635 |
)
|
636 |
|
637 |
-
rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of
|
638 |
```
|
639 |
|
640 |
The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:
|
|
|
630 |
rag = LightRAG(
|
631 |
working_dir=WORKING_DIR,
|
632 |
addon_params={
|
633 |
+
"insert_batch_size": 4 # Process 4 documents per batch
|
634 |
}
|
635 |
)
|
636 |
|
637 |
+
rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 4
|
638 |
```
|
639 |
|
640 |
The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:
|
env.example
CHANGED
@@ -39,21 +39,23 @@
|
|
39 |
# MAX_TOKEN_ENTITY_DESC=4000
|
40 |
|
41 |
### Settings for document indexing
|
42 |
-
#
|
|
|
43 |
# CHUNK_SIZE=1200
|
44 |
# CHUNK_OVERLAP_SIZE=100
|
45 |
# MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
|
46 |
# MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
|
47 |
-
# MAX_ASYNC=4 # Max concurrency requests of LLM
|
48 |
-
# ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
|
49 |
|
50 |
# EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
|
51 |
# EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
|
52 |
# MAX_EMBED_TOKENS=8192
|
53 |
|
54 |
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
57 |
LLM_BINDING=ollama
|
58 |
LLM_MODEL=mistral-nemo:latest
|
59 |
LLM_BINDING_API_KEY=your_api_key
|
|
|
39 |
# MAX_TOKEN_ENTITY_DESC=4000
|
40 |
|
41 |
### Settings for document indexing
|
42 |
+
ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
|
43 |
+
SUMMARY_LANGUAGE=English
|
44 |
# CHUNK_SIZE=1200
|
45 |
# CHUNK_OVERLAP_SIZE=100
|
46 |
# MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
|
47 |
# MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
|
|
|
|
|
48 |
|
49 |
# EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
|
50 |
# EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
|
51 |
# MAX_EMBED_TOKENS=8192
|
52 |
|
53 |
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
54 |
+
TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout
|
55 |
+
TEMPERATURE=0.5
|
56 |
+
MAX_ASYNC=4 # Max concurrency requests of LLM
|
57 |
+
MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model)
|
58 |
+
|
59 |
LLM_BINDING=ollama
|
60 |
LLM_MODEL=mistral-nemo:latest
|
61 |
LLM_BINDING_API_KEY=your_api_key
|
lightrag/api/lightrag_server.py
CHANGED
@@ -207,6 +207,7 @@ def create_app(args):
|
|
207 |
history_messages=history_messages,
|
208 |
base_url=args.llm_binding_host,
|
209 |
api_key=args.llm_binding_api_key,
|
|
|
210 |
**kwargs,
|
211 |
)
|
212 |
|
@@ -230,6 +231,7 @@ def create_app(args):
|
|
230 |
base_url=args.llm_binding_host,
|
231 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
232 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
|
|
|
233 |
**kwargs,
|
234 |
)
|
235 |
|
@@ -302,6 +304,7 @@ def create_app(args):
|
|
302 |
},
|
303 |
namespace_prefix=args.namespace_prefix,
|
304 |
auto_manage_storages_states=False,
|
|
|
305 |
)
|
306 |
else: # azure_openai
|
307 |
rag = LightRAG(
|
@@ -331,6 +334,7 @@ def create_app(args):
|
|
331 |
},
|
332 |
namespace_prefix=args.namespace_prefix,
|
333 |
auto_manage_storages_states=False,
|
|
|
334 |
)
|
335 |
|
336 |
# Add routes
|
|
|
207 |
history_messages=history_messages,
|
208 |
base_url=args.llm_binding_host,
|
209 |
api_key=args.llm_binding_api_key,
|
210 |
+
temperature=args.temperature,
|
211 |
**kwargs,
|
212 |
)
|
213 |
|
|
|
231 |
base_url=args.llm_binding_host,
|
232 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
233 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
|
234 |
+
temperature=args.temperature,
|
235 |
**kwargs,
|
236 |
)
|
237 |
|
|
|
304 |
},
|
305 |
namespace_prefix=args.namespace_prefix,
|
306 |
auto_manage_storages_states=False,
|
307 |
+
max_parallel_insert=args.max_parallel_insert,
|
308 |
)
|
309 |
else: # azure_openai
|
310 |
rag = LightRAG(
|
|
|
334 |
},
|
335 |
namespace_prefix=args.namespace_prefix,
|
336 |
auto_manage_storages_states=False,
|
337 |
+
max_parallel_insert=args.max_parallel_insert,
|
338 |
)
|
339 |
|
340 |
# Add routes
|
lightrag/api/routers/document_routes.py
CHANGED
@@ -475,8 +475,8 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
|
|
475 |
if not new_files:
|
476 |
return
|
477 |
|
478 |
-
# Get MAX_PARALLEL_INSERT from global_args
|
479 |
-
max_parallel = global_args["
|
480 |
# Calculate batch size as 2 * MAX_PARALLEL_INSERT
|
481 |
batch_size = 2 * max_parallel
|
482 |
|
|
|
475 |
if not new_files:
|
476 |
return
|
477 |
|
478 |
+
# Get MAX_PARALLEL_INSERT from global_args["main_args"]
|
479 |
+
max_parallel = global_args["main_args"].max_parallel_insert
|
480 |
# Calculate batch size as 2 * MAX_PARALLEL_INSERT
|
481 |
batch_size = 2 * max_parallel
|
482 |
|
lightrag/api/utils_api.py
CHANGED
@@ -14,6 +14,7 @@ from dotenv import load_dotenv
|
|
14 |
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
|
15 |
from starlette.status import HTTP_403_FORBIDDEN
|
16 |
from .auth import auth_handler
|
|
|
17 |
|
18 |
# Load environment variables
|
19 |
load_dotenv()
|
@@ -364,9 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
|
|
364 |
args.vector_storage = get_env_value(
|
365 |
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
|
366 |
)
|
367 |
-
|
368 |
# Get MAX_PARALLEL_INSERT from environment
|
369 |
-
|
370 |
|
371 |
# Handle openai-ollama special case
|
372 |
if args.llm_binding == "openai-ollama":
|
@@ -396,6 +397,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
|
|
396 |
args.enable_llm_cache_for_extract = get_env_value(
|
397 |
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
|
398 |
)
|
|
|
|
|
|
|
399 |
|
400 |
# Select Document loading tool (DOCLING, DEFAULT)
|
401 |
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
@@ -464,6 +468,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
464 |
ASCIIColors.yellow(f"{args.llm_binding_host}")
|
465 |
ASCIIColors.white(" ββ Model: ", end="")
|
466 |
ASCIIColors.yellow(f"{args.llm_model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
ASCIIColors.white(" ββ Timeout: ", end="")
|
468 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
469 |
|
@@ -479,13 +489,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
479 |
ASCIIColors.yellow(f"{args.embedding_dim}")
|
480 |
|
481 |
# RAG Configuration
|
|
|
482 |
ASCIIColors.magenta("\nβοΈ RAG Configuration:")
|
483 |
-
ASCIIColors.white(" ββ
|
484 |
-
ASCIIColors.yellow(f"{
|
485 |
ASCIIColors.white(" ββ Max Parallel Insert: ", end="")
|
486 |
-
ASCIIColors.yellow(f"{
|
487 |
-
ASCIIColors.white(" ββ Max Tokens: ", end="")
|
488 |
-
ASCIIColors.yellow(f"{args.max_tokens}")
|
489 |
ASCIIColors.white(" ββ Max Embed Tokens: ", end="")
|
490 |
ASCIIColors.yellow(f"{args.max_embed_tokens}")
|
491 |
ASCIIColors.white(" ββ Chunk Size: ", end="")
|
|
|
14 |
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
|
15 |
from starlette.status import HTTP_403_FORBIDDEN
|
16 |
from .auth import auth_handler
|
17 |
+
from ..prompt import PROMPTS
|
18 |
|
19 |
# Load environment variables
|
20 |
load_dotenv()
|
|
|
365 |
args.vector_storage = get_env_value(
|
366 |
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
|
367 |
)
|
368 |
+
|
369 |
# Get MAX_PARALLEL_INSERT from environment
|
370 |
+
args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
|
371 |
|
372 |
# Handle openai-ollama special case
|
373 |
if args.llm_binding == "openai-ollama":
|
|
|
397 |
args.enable_llm_cache_for_extract = get_env_value(
|
398 |
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
|
399 |
)
|
400 |
+
|
401 |
+
# Inject LLM temperature configuration
|
402 |
+
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
|
403 |
|
404 |
# Select Document loading tool (DOCLING, DEFAULT)
|
405 |
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
|
|
468 |
ASCIIColors.yellow(f"{args.llm_binding_host}")
|
469 |
ASCIIColors.white(" ββ Model: ", end="")
|
470 |
ASCIIColors.yellow(f"{args.llm_model}")
|
471 |
+
ASCIIColors.white(" ββ Temperature: ", end="")
|
472 |
+
ASCIIColors.yellow(f"{args.temperature}")
|
473 |
+
ASCIIColors.white(" ββ Max Async for LLM: ", end="")
|
474 |
+
ASCIIColors.yellow(f"{args.max_async}")
|
475 |
+
ASCIIColors.white(" ββ Max Tokens: ", end="")
|
476 |
+
ASCIIColors.yellow(f"{args.max_tokens}")
|
477 |
ASCIIColors.white(" ββ Timeout: ", end="")
|
478 |
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
479 |
|
|
|
489 |
ASCIIColors.yellow(f"{args.embedding_dim}")
|
490 |
|
491 |
# RAG Configuration
|
492 |
+
summary_language = os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
|
493 |
ASCIIColors.magenta("\nβοΈ RAG Configuration:")
|
494 |
+
ASCIIColors.white(" ββ Summary Language: ", end="")
|
495 |
+
ASCIIColors.yellow(f"{summary_language}")
|
496 |
ASCIIColors.white(" ββ Max Parallel Insert: ", end="")
|
497 |
+
ASCIIColors.yellow(f"{args.max_parallel_insert}")
|
|
|
|
|
498 |
ASCIIColors.white(" ββ Max Embed Tokens: ", end="")
|
499 |
ASCIIColors.yellow(f"{args.max_embed_tokens}")
|
500 |
ASCIIColors.white(" ββ Chunk Size: ", end="")
|