Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

yangdx commited on Mar 23

Commit

82c47ab

1 Parent(s): 91c5685

Added temperature parameter for LLM

Browse files

Files changed (5) hide show

README.md +2 -2
env.example +7 -5
lightrag/api/lightrag_server.py +4 -0
lightrag/api/routers/document_routes.py +2 -2
lightrag/api/utils_api.py +16 -7

README.md CHANGED Viewed

@@ -630,11 +630,11 @@ rag.insert(["TEXT1", "TEXT2",...])
 rag = LightRAG(
     working_dir=WORKING_DIR,
     addon_params={
-        "insert_batch_size": 20  # Process 20 documents per batch
     }
 )
-rag.insert(["TEXT1", "TEXT2", "TEXT3", ...])  # Documents will be processed in batches of 20
 ```
 The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:

 rag = LightRAG(
     working_dir=WORKING_DIR,
     addon_params={
+        "insert_batch_size": 4  # Process 4 documents per batch
     }
 )
+rag.insert(["TEXT1", "TEXT2", "TEXT3", ...])  # Documents will be processed in batches of 4
 ```
 The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:

env.example CHANGED Viewed

@@ -39,21 +39,23 @@
 # MAX_TOKEN_ENTITY_DESC=4000
 ### Settings for document indexing
-# SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 # MAX_TOKEN_SUMMARY=500                # Max tokens for entity or relations summary
 # MAX_PARALLEL_INSERT=2                # Number of parallel processing documents in one patch
-# MAX_ASYNC=4                          # Max concurrency requests of LLM
-# ENABLE_LLM_CACHE_FOR_EXTRACT=true    # Enable LLM cache for entity extraction
 # EMBEDDING_BATCH_NUM=32               # num of chunks send to Embedding in one request
 # EMBEDDING_FUNC_MAX_ASYNC=16          # Max concurrency requests for Embedding
 # MAX_EMBED_TOKENS=8192
 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
-# MAX_TOKENS=32768             # Max tokens send to LLM (less than context size of the model)
-# TIMEOUT=150                  # Time out in seconds for LLM, None for infinite timeout
 LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_API_KEY=your_api_key

 # MAX_TOKEN_ENTITY_DESC=4000
 ### Settings for document indexing
+ENABLE_LLM_CACHE_FOR_EXTRACT=true    # Enable LLM cache for entity extraction
+SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 # MAX_TOKEN_SUMMARY=500                # Max tokens for entity or relations summary
 # MAX_PARALLEL_INSERT=2                # Number of parallel processing documents in one patch
 # EMBEDDING_BATCH_NUM=32               # num of chunks send to Embedding in one request
 # EMBEDDING_FUNC_MAX_ASYNC=16          # Max concurrency requests for Embedding
 # MAX_EMBED_TOKENS=8192
 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+TIMEOUT=150                            # Time out in seconds for LLM, None for infinite timeout
+TEMPERATURE=0.5
+MAX_ASYNC=4                            # Max concurrency requests of LLM
+MAX_TOKENS=32768                       # Max tokens send to LLM (less than context size of the model)
 LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_API_KEY=your_api_key

lightrag/api/lightrag_server.py CHANGED Viewed

@@ -207,6 +207,7 @@ def create_app(args):
             history_messages=history_messages,
             base_url=args.llm_binding_host,
             api_key=args.llm_binding_api_key,
             **kwargs,
         )
@@ -230,6 +231,7 @@ def create_app(args):
             base_url=args.llm_binding_host,
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
             api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
             **kwargs,
         )
@@ -302,6 +304,7 @@ def create_app(args):
             },
             namespace_prefix=args.namespace_prefix,
             auto_manage_storages_states=False,
         )
     else:  # azure_openai
         rag = LightRAG(
@@ -331,6 +334,7 @@ def create_app(args):
             },
             namespace_prefix=args.namespace_prefix,
             auto_manage_storages_states=False,
         )
     # Add routes

             history_messages=history_messages,
             base_url=args.llm_binding_host,
             api_key=args.llm_binding_api_key,
+            temperature=args.temperature,
             **kwargs,
         )
             base_url=args.llm_binding_host,
             api_key=os.getenv("AZURE_OPENAI_API_KEY"),
             api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
+            temperature=args.temperature,
             **kwargs,
         )
             },
             namespace_prefix=args.namespace_prefix,
             auto_manage_storages_states=False,
+            max_parallel_insert=args.max_parallel_insert,
         )
     else:  # azure_openai
         rag = LightRAG(
             },
             namespace_prefix=args.namespace_prefix,
             auto_manage_storages_states=False,
+            max_parallel_insert=args.max_parallel_insert,
         )
     # Add routes

lightrag/api/routers/document_routes.py CHANGED Viewed

@@ -475,8 +475,8 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
         if not new_files:
             return
-        # Get MAX_PARALLEL_INSERT from global_args
-        max_parallel = global_args["max_parallel_insert"]
         # Calculate batch size as 2 * MAX_PARALLEL_INSERT
         batch_size = 2 * max_parallel

         if not new_files:
             return
+        # Get MAX_PARALLEL_INSERT from global_args["main_args"]
+        max_parallel = global_args["main_args"].max_parallel_insert
         # Calculate batch size as 2 * MAX_PARALLEL_INSERT
         batch_size = 2 * max_parallel

lightrag/api/utils_api.py CHANGED Viewed

@@ -14,6 +14,7 @@ from dotenv import load_dotenv
 from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
 from starlette.status import HTTP_403_FORBIDDEN
 from .auth import auth_handler
 # Load environment variables
 load_dotenv()
@@ -364,9 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
     args.vector_storage = get_env_value(
         "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
     )
     # Get MAX_PARALLEL_INSERT from environment
-    global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
     # Handle openai-ollama special case
     if args.llm_binding == "openai-ollama":
@@ -396,6 +397,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
     args.enable_llm_cache_for_extract = get_env_value(
         "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
     )
     # Select Document loading tool (DOCLING, DEFAULT)
     args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
@@ -464,6 +468,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.llm_binding_host}")
     ASCIIColors.white("    ├─ Model: ", end="")
     ASCIIColors.yellow(f"{args.llm_model}")
     ASCIIColors.white("    └─ Timeout: ", end="")
     ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
@@ -479,13 +489,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.embedding_dim}")
     # RAG Configuration
     ASCIIColors.magenta("\n⚙️ RAG Configuration:")
-    ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
-    ASCIIColors.yellow(f"{args.max_async}")
     ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
-    ASCIIColors.yellow(f"{global_args['max_parallel_insert']}")
-    ASCIIColors.white("    ├─ Max Tokens: ", end="")
-    ASCIIColors.yellow(f"{args.max_tokens}")
     ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_embed_tokens}")
     ASCIIColors.white("    ├─ Chunk Size: ", end="")

 from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
 from starlette.status import HTTP_403_FORBIDDEN
 from .auth import auth_handler
+from ..prompt import PROMPTS
 # Load environment variables
 load_dotenv()
     args.vector_storage = get_env_value(
         "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
     )
     # Get MAX_PARALLEL_INSERT from environment
+    args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
     # Handle openai-ollama special case
     if args.llm_binding == "openai-ollama":
     args.enable_llm_cache_for_extract = get_env_value(
         "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
     )
+    # Inject LLM temperature configuration
+    args.temperature = get_env_value("TEMPERATURE", 0.5, float)
     # Select Document loading tool (DOCLING, DEFAULT)
     args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
     ASCIIColors.yellow(f"{args.llm_binding_host}")
     ASCIIColors.white("    ├─ Model: ", end="")
     ASCIIColors.yellow(f"{args.llm_model}")
+    ASCIIColors.white("    ├─ Temperature: ", end="")
+    ASCIIColors.yellow(f"{args.temperature}")
+    ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
+    ASCIIColors.yellow(f"{args.max_async}")
+    ASCIIColors.white("    ├─ Max Tokens: ", end="")
+    ASCIIColors.yellow(f"{args.max_tokens}")
     ASCIIColors.white("    └─ Timeout: ", end="")
     ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
     ASCIIColors.yellow(f"{args.embedding_dim}")
     # RAG Configuration
+    summary_language = os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
     ASCIIColors.magenta("\n⚙️ RAG Configuration:")
+    ASCIIColors.white("    ├─ Summary Language: ", end="")
+    ASCIIColors.yellow(f"{summary_language}")
     ASCIIColors.white("    ├─ Max Parallel Insert: ", end="")
+    ASCIIColors.yellow(f"{args.max_parallel_insert}")
     ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_embed_tokens}")
     ASCIIColors.white("    ├─ Chunk Size: ", end="")