yangdx commited on
Commit
82c47ab
Β·
1 Parent(s): 91c5685

Added temperature parameter for LLM

Browse files
README.md CHANGED
@@ -630,11 +630,11 @@ rag.insert(["TEXT1", "TEXT2",...])
630
  rag = LightRAG(
631
  working_dir=WORKING_DIR,
632
  addon_params={
633
- "insert_batch_size": 20 # Process 20 documents per batch
634
  }
635
  )
636
 
637
- rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 20
638
  ```
639
 
640
  The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:
 
630
  rag = LightRAG(
631
  working_dir=WORKING_DIR,
632
  addon_params={
633
+ "insert_batch_size": 4 # Process 4 documents per batch
634
  }
635
  )
636
 
637
+ rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 4
638
  ```
639
 
640
  The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:
env.example CHANGED
@@ -39,21 +39,23 @@
39
  # MAX_TOKEN_ENTITY_DESC=4000
40
 
41
  ### Settings for document indexing
42
- # SUMMARY_LANGUAGE=English
 
43
  # CHUNK_SIZE=1200
44
  # CHUNK_OVERLAP_SIZE=100
45
  # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
46
  # MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
47
- # MAX_ASYNC=4 # Max concurrency requests of LLM
48
- # ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
49
 
50
  # EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
51
  # EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
52
  # MAX_EMBED_TOKENS=8192
53
 
54
  ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
55
- # MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model)
56
- # TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout
 
 
 
57
  LLM_BINDING=ollama
58
  LLM_MODEL=mistral-nemo:latest
59
  LLM_BINDING_API_KEY=your_api_key
 
39
  # MAX_TOKEN_ENTITY_DESC=4000
40
 
41
  ### Settings for document indexing
42
+ ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
43
+ SUMMARY_LANGUAGE=English
44
  # CHUNK_SIZE=1200
45
  # CHUNK_OVERLAP_SIZE=100
46
  # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
47
  # MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
 
 
48
 
49
  # EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
50
  # EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
51
  # MAX_EMBED_TOKENS=8192
52
 
53
  ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
54
+ TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout
55
+ TEMPERATURE=0.5
56
+ MAX_ASYNC=4 # Max concurrency requests of LLM
57
+ MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model)
58
+
59
  LLM_BINDING=ollama
60
  LLM_MODEL=mistral-nemo:latest
61
  LLM_BINDING_API_KEY=your_api_key
lightrag/api/lightrag_server.py CHANGED
@@ -207,6 +207,7 @@ def create_app(args):
207
  history_messages=history_messages,
208
  base_url=args.llm_binding_host,
209
  api_key=args.llm_binding_api_key,
 
210
  **kwargs,
211
  )
212
 
@@ -230,6 +231,7 @@ def create_app(args):
230
  base_url=args.llm_binding_host,
231
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
232
  api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
 
233
  **kwargs,
234
  )
235
 
@@ -302,6 +304,7 @@ def create_app(args):
302
  },
303
  namespace_prefix=args.namespace_prefix,
304
  auto_manage_storages_states=False,
 
305
  )
306
  else: # azure_openai
307
  rag = LightRAG(
@@ -331,6 +334,7 @@ def create_app(args):
331
  },
332
  namespace_prefix=args.namespace_prefix,
333
  auto_manage_storages_states=False,
 
334
  )
335
 
336
  # Add routes
 
207
  history_messages=history_messages,
208
  base_url=args.llm_binding_host,
209
  api_key=args.llm_binding_api_key,
210
+ temperature=args.temperature,
211
  **kwargs,
212
  )
213
 
 
231
  base_url=args.llm_binding_host,
232
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
233
  api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
234
+ temperature=args.temperature,
235
  **kwargs,
236
  )
237
 
 
304
  },
305
  namespace_prefix=args.namespace_prefix,
306
  auto_manage_storages_states=False,
307
+ max_parallel_insert=args.max_parallel_insert,
308
  )
309
  else: # azure_openai
310
  rag = LightRAG(
 
334
  },
335
  namespace_prefix=args.namespace_prefix,
336
  auto_manage_storages_states=False,
337
+ max_parallel_insert=args.max_parallel_insert,
338
  )
339
 
340
  # Add routes
lightrag/api/routers/document_routes.py CHANGED
@@ -475,8 +475,8 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
475
  if not new_files:
476
  return
477
 
478
- # Get MAX_PARALLEL_INSERT from global_args
479
- max_parallel = global_args["max_parallel_insert"]
480
  # Calculate batch size as 2 * MAX_PARALLEL_INSERT
481
  batch_size = 2 * max_parallel
482
 
 
475
  if not new_files:
476
  return
477
 
478
+ # Get MAX_PARALLEL_INSERT from global_args["main_args"]
479
+ max_parallel = global_args["main_args"].max_parallel_insert
480
  # Calculate batch size as 2 * MAX_PARALLEL_INSERT
481
  batch_size = 2 * max_parallel
482
 
lightrag/api/utils_api.py CHANGED
@@ -14,6 +14,7 @@ from dotenv import load_dotenv
14
  from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
15
  from starlette.status import HTTP_403_FORBIDDEN
16
  from .auth import auth_handler
 
17
 
18
  # Load environment variables
19
  load_dotenv()
@@ -364,9 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
364
  args.vector_storage = get_env_value(
365
  "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
366
  )
367
-
368
  # Get MAX_PARALLEL_INSERT from environment
369
- global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
370
 
371
  # Handle openai-ollama special case
372
  if args.llm_binding == "openai-ollama":
@@ -396,6 +397,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
396
  args.enable_llm_cache_for_extract = get_env_value(
397
  "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
398
  )
 
 
 
399
 
400
  # Select Document loading tool (DOCLING, DEFAULT)
401
  args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
@@ -464,6 +468,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
464
  ASCIIColors.yellow(f"{args.llm_binding_host}")
465
  ASCIIColors.white(" β”œβ”€ Model: ", end="")
466
  ASCIIColors.yellow(f"{args.llm_model}")
 
 
 
 
 
 
467
  ASCIIColors.white(" └─ Timeout: ", end="")
468
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
469
 
@@ -479,13 +489,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
479
  ASCIIColors.yellow(f"{args.embedding_dim}")
480
 
481
  # RAG Configuration
 
482
  ASCIIColors.magenta("\nβš™οΈ RAG Configuration:")
483
- ASCIIColors.white(" β”œβ”€ Max Async for LLM: ", end="")
484
- ASCIIColors.yellow(f"{args.max_async}")
485
  ASCIIColors.white(" β”œβ”€ Max Parallel Insert: ", end="")
486
- ASCIIColors.yellow(f"{global_args['max_parallel_insert']}")
487
- ASCIIColors.white(" β”œβ”€ Max Tokens: ", end="")
488
- ASCIIColors.yellow(f"{args.max_tokens}")
489
  ASCIIColors.white(" β”œβ”€ Max Embed Tokens: ", end="")
490
  ASCIIColors.yellow(f"{args.max_embed_tokens}")
491
  ASCIIColors.white(" β”œβ”€ Chunk Size: ", end="")
 
14
  from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
15
  from starlette.status import HTTP_403_FORBIDDEN
16
  from .auth import auth_handler
17
+ from ..prompt import PROMPTS
18
 
19
  # Load environment variables
20
  load_dotenv()
 
365
  args.vector_storage = get_env_value(
366
  "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
367
  )
368
+
369
  # Get MAX_PARALLEL_INSERT from environment
370
+ args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
371
 
372
  # Handle openai-ollama special case
373
  if args.llm_binding == "openai-ollama":
 
397
  args.enable_llm_cache_for_extract = get_env_value(
398
  "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
399
  )
400
+
401
+ # Inject LLM temperature configuration
402
+ args.temperature = get_env_value("TEMPERATURE", 0.5, float)
403
 
404
  # Select Document loading tool (DOCLING, DEFAULT)
405
  args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
 
468
  ASCIIColors.yellow(f"{args.llm_binding_host}")
469
  ASCIIColors.white(" β”œβ”€ Model: ", end="")
470
  ASCIIColors.yellow(f"{args.llm_model}")
471
+ ASCIIColors.white(" β”œβ”€ Temperature: ", end="")
472
+ ASCIIColors.yellow(f"{args.temperature}")
473
+ ASCIIColors.white(" β”œβ”€ Max Async for LLM: ", end="")
474
+ ASCIIColors.yellow(f"{args.max_async}")
475
+ ASCIIColors.white(" β”œβ”€ Max Tokens: ", end="")
476
+ ASCIIColors.yellow(f"{args.max_tokens}")
477
  ASCIIColors.white(" └─ Timeout: ", end="")
478
  ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
479
 
 
489
  ASCIIColors.yellow(f"{args.embedding_dim}")
490
 
491
  # RAG Configuration
492
+ summary_language = os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
493
  ASCIIColors.magenta("\nβš™οΈ RAG Configuration:")
494
+ ASCIIColors.white(" β”œβ”€ Summary Language: ", end="")
495
+ ASCIIColors.yellow(f"{summary_language}")
496
  ASCIIColors.white(" β”œβ”€ Max Parallel Insert: ", end="")
497
+ ASCIIColors.yellow(f"{args.max_parallel_insert}")
 
 
498
  ASCIIColors.white(" β”œβ”€ Max Embed Tokens: ", end="")
499
  ASCIIColors.yellow(f"{args.max_embed_tokens}")
500
  ASCIIColors.white(" β”œβ”€ Chunk Size: ", end="")