Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

Roy commited on Mar 8

Commit

888b9e4

2 Parent(s): 8bbd053 4e9e345

main_merge

Browse files

Files changed (38) hide show

README.md +70 -0
env.example +9 -1
examples/lightrag_openai_neo4j_milvus_redis_demo.py +2 -2
examples/test_postgres.py +51 -0
lightrag/api/README.md +18 -0
lightrag/api/auth.py +41 -0
lightrag/api/lightrag_server.py +29 -9
lightrag/api/requirements.txt +10 -0
lightrag/api/routers/document_routes.py +6 -3
lightrag/api/routers/graph_routes.py +17 -8
lightrag/api/routers/query_routes.py +2 -2
lightrag/api/utils_api.py +21 -3
lightrag/api/webui/assets/{index-rP-YlyR1.css → index-CH-3l4_Z.css} +0 -0
lightrag/api/webui/assets/{index-DbuMPJAD.js → index-CJz72b6Q.js} +0 -0
lightrag/api/webui/index.html +0 -0
lightrag/base.py +1 -1
lightrag/kg/chroma_impl.py +40 -0
lightrag/kg/faiss_impl.py +21 -0
lightrag/kg/milvus_impl.py +25 -0
lightrag/kg/mongo_impl.py +26 -0
lightrag/kg/nano_vector_db_impl.py +20 -0
lightrag/kg/networkx_impl.py +47 -20
lightrag/kg/oracle_impl.py +35 -0
lightrag/kg/postgres_impl.py +145 -41
lightrag/kg/qdrant_impl.py +41 -1
lightrag/kg/tidb_impl.py +65 -0
lightrag/lightrag.py +489 -5
lightrag/operate.py +4 -3
lightrag/prompt.py +80 -42
lightrag_webui/src/api/lightrag.ts +6 -2
lightrag_webui/src/components/graph/GraphControl.tsx +10 -7
lightrag_webui/src/components/graph/Settings.tsx +17 -2
lightrag_webui/src/components/ui/Input.tsx +1 -1
lightrag_webui/src/hooks/useLightragGraph.tsx +10 -6
lightrag_webui/src/stores/settings.ts +6 -0
lightrag_webui/src/vite-env.d.ts +10 -0
lightrag_webui/tsconfig.json +1 -1
lightrag_webui/vite.config.ts +16 -1

README.md CHANGED Viewed

@@ -849,6 +849,76 @@ All operations are available in both synchronous and asynchronous versions. The
 These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
 ## Cache
 <details>

 These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
+## Entity Merging
+<details>
+<summary> <b>Merge Entities and Their Relationships</b> </summary>
+LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
+```python
+# Basic entity merging
+rag.merge_entities(
+    source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
+    target_entity="AI Technology"
+)
+```
+With custom merge strategy:
+```python
+# Define custom merge strategy for different fields
+rag.merge_entities(
+    source_entities=["John Smith", "Dr. Smith", "J. Smith"],
+    target_entity="John Smith",
+    merge_strategy={
+        "description": "concatenate",  # Combine all descriptions
+        "entity_type": "keep_first",   # Keep the entity type from the first entity
+        "source_id": "join_unique"     # Combine all unique source IDs
+    }
+)
+```
+With custom target entity data:
+```python
+# Specify exact values for the merged entity
+rag.merge_entities(
+    source_entities=["New York", "NYC", "Big Apple"],
+    target_entity="New York City",
+    target_entity_data={
+        "entity_type": "LOCATION",
+        "description": "New York City is the most populous city in the United States.",
+    }
+)
+```
+Advanced usage combining both approaches:
+```python
+# Merge company entities with both strategy and custom data
+rag.merge_entities(
+    source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
+    target_entity="Microsoft",
+    merge_strategy={
+        "description": "concatenate",  # Combine all descriptions
+        "source_id": "join_unique"     # Combine source IDs
+    },
+    target_entity_data={
+        "entity_type": "ORGANIZATION",
+    }
+)
+```
+When merging entities:
+* All relationships from source entities are redirected to the target entity
+* Duplicate relationships are intelligently merged
+* Self-relationships (loops) are prevented
+* Source entities are removed after merging
+* Relationship weights and attributes are preserved
+</details>
 ## Cache
 <details>

env.example CHANGED Viewed

@@ -48,8 +48,9 @@
 # CHUNK_OVERLAP_SIZE=100
 # MAX_TOKENS=32768             # Max tokens send to LLM for summarization
 # MAX_TOKEN_SUMMARY=500        # Max tokens for entity or relations summary
-# LANGUAGE=English
 # MAX_EMBED_TOKENS=8192
 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
 LLM_BINDING=ollama
@@ -148,3 +149,10 @@ QDRANT_URL=http://localhost:16333
 ### Redis
 REDIS_URI=redis://localhost:6379

 # CHUNK_OVERLAP_SIZE=100
 # MAX_TOKENS=32768             # Max tokens send to LLM for summarization
 # MAX_TOKEN_SUMMARY=500        # Max tokens for entity or relations summary
+# SUMMARY_LANGUAGE=English
 # MAX_EMBED_TOKENS=8192
+# ENABLE_LLM_CACHE_FOR_EXTRACT=false  # Enable LLM cache for entity extraction, defaults to false
 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
 LLM_BINDING=ollama
 ### Redis
 REDIS_URI=redis://localhost:6379
+# For jwt auth
+AUTH_USERNAME=admin      # login name
+AUTH_PASSWORD=admin123   # password
+TOKEN_SECRET=your-key # JWT key
+TOKEN_EXPIRE_HOURS=4     # expire duration
+WHITELIST_PATHS=/login,/health  # white list

examples/lightrag_openai_neo4j_milvus_redis_demo.py CHANGED Viewed

@@ -37,8 +37,8 @@ async def llm_model_func(
         prompt,
         system_prompt=system_prompt,
         history_messages=history_messages,
-        api_key="sk-91d0b59f25554251aa813ed756d79a6d",
-        base_url="https://api.deepseek.com",
         **kwargs,
     )

         prompt,
         system_prompt=system_prompt,
         history_messages=history_messages,
+        api_key="",
+        base_url="",
         **kwargs,
     )

examples/test_postgres.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import asyncio
+from lightrag.kg.postgres_impl import PGGraphStorage
+from lightrag.llm.ollama import ollama_embedding
+from lightrag.utils import EmbeddingFunc
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+WORKING_DIR = "./local_neo4jWorkDir"
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+# AGE
+os.environ["AGE_GRAPH_NAME"] = "dickens"
+os.environ["POSTGRES_HOST"] = "localhost"
+os.environ["POSTGRES_PORT"] = "15432"
+os.environ["POSTGRES_USER"] = "rag"
+os.environ["POSTGRES_PASSWORD"] = "rag"
+os.environ["POSTGRES_DATABASE"] = "rag"
+async def main():
+    graph_db = PGGraphStorage(
+        namespace="dickens",
+        embedding_func=EmbeddingFunc(
+            embedding_dim=1024,
+            max_token_size=8192,
+            func=lambda texts: ollama_embedding(
+                texts, embed_model="bge-m3", host="http://localhost:11434"
+            ),
+        ),
+        global_config={},
+    )
+    await graph_db.initialize()
+    labels = await graph_db.get_all_labels()
+    print("all labels", labels)
+    res = await graph_db.get_knowledge_graph("FEZZIWIG")
+    print("knowledge graphs", res)
+    await graph_db.finalize()
+if __name__ == "__main__":
+    asyncio.run(main())

lightrag/api/README.md CHANGED Viewed

@@ -223,6 +223,11 @@ LightRAG supports binding to various LLM/Embedding backends:
 Use environment variables  `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables  `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type.
 ### Storage Types Supported
 LightRAG uses 4 types of storage for difference purposes:
@@ -387,6 +392,19 @@ Note: If you don't need the API functionality, you can install the base package
 pip install lightrag-hku
 ```
 ## API Endpoints
 All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When API Server is running, visit:

 Use environment variables  `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables  `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type.
+### Entity Extraction Configuration
+* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: false)
+It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for test environment to reduce the cost of LLM calls.
 ### Storage Types Supported
 LightRAG uses 4 types of storage for difference purposes:
 pip install lightrag-hku
 ```
+## Authentication Endpoints
+### JWT Authentication Mechanism
+LightRAG API Server implements JWT-based authentication using HS256 algorithm. To enable secure access control, the following environment variables are required:
+```bash
+# For jwt auth
+AUTH_USERNAME=admin      # login name
+AUTH_PASSWORD=admin123   # password
+TOKEN_SECRET=your-key # JWT key
+TOKEN_EXPIRE_HOURS=4     # expire duration
+WHITELIST_PATHS=/api1,/api2  # white list. /login,/health,/docs,/redoc,/openapi.json are whitelisted by default.
+```
 ## API Endpoints
 All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When API Server is running, visit:

lightrag/api/auth.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from datetime import datetime, timedelta
+import jwt
+from fastapi import HTTPException, status
+from pydantic import BaseModel
+class TokenPayload(BaseModel):
+    sub: str
+    exp: datetime
+class AuthHandler:
+    def __init__(self):
+        self.secret = os.getenv("TOKEN_SECRET", "4f85ds4f56dsf46")
+        self.algorithm = "HS256"
+        self.expire_hours = int(os.getenv("TOKEN_EXPIRE_HOURS", 4))
+    def create_token(self, username: str) -> str:
+        expire = datetime.utcnow() + timedelta(hours=self.expire_hours)
+        payload = TokenPayload(sub=username, exp=expire)
+        return jwt.encode(payload.dict(), self.secret, algorithm=self.algorithm)
+    def validate_token(self, token: str) -> str:
+        try:
+            payload = jwt.decode(token, self.secret, algorithms=[self.algorithm])
+            expire_timestamp = payload["exp"]
+            expire_time = datetime.utcfromtimestamp(expire_timestamp)
+            if datetime.utcnow() > expire_time:
+                raise HTTPException(
+                    status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired"
+                )
+            return payload["sub"]
+        except jwt.PyJWTError:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
+            )
+auth_handler = AuthHandler()

lightrag/api/lightrag_server.py CHANGED Viewed

@@ -2,10 +2,7 @@
 LightRAG FastAPI Server
 """
-from fastapi import (
-    FastAPI,
-    Depends,
-)
 import asyncio
 import os
 import logging
@@ -45,12 +42,17 @@ from lightrag.kg.shared_storage import (
     initialize_pipeline_status,
     get_all_update_flags_status,
 )
 # Load environment variables
 # Updated to use the .env that is inside the current folder
 # This update allows the user to put a different.env file for each lightrag folder
 load_dotenv(".env", override=True)
 # Initialize config parser
 config = configparser.ConfigParser()
 config.read("config.ini")
@@ -324,16 +326,13 @@ def create_app(args):
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
-            enable_llm_cache_for_entity_extract=False,  # set to True for debuging to reduce llm fee
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
                 "use_llm_check": False,
             },
             namespace_prefix=args.namespace_prefix,
-            addon_params={
-                "language": args.language,
-            },
             auto_manage_storages_states=False,
         )
     else:  # azure_openai
@@ -356,7 +355,7 @@ def create_app(args):
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
-            enable_llm_cache_for_entity_extract=False,  # set to True for debuging to reduce llm fee
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
@@ -375,6 +374,27 @@ def create_app(args):
     ollama_api = OllamaAPI(rag, top_k=args.top_k)
     app.include_router(ollama_api.router, prefix="/api")
     @app.get("/health", dependencies=[Depends(optional_api_key)])
     async def get_status():
         """Get current system status"""

 LightRAG FastAPI Server
 """
+from fastapi import FastAPI, Depends, HTTPException, status
 import asyncio
 import os
 import logging
     initialize_pipeline_status,
     get_all_update_flags_status,
 )
+from fastapi.security import OAuth2PasswordRequestForm
+from .auth import auth_handler
 # Load environment variables
 # Updated to use the .env that is inside the current folder
 # This update allows the user to put a different.env file for each lightrag folder
 load_dotenv(".env", override=True)
+# Read entity extraction cache config
+enable_llm_cache = os.getenv("ENABLE_LLM_CACHE_FOR_EXTRACT", "false").lower() == "true"
 # Initialize config parser
 config = configparser.ConfigParser()
 config.read("config.ini")
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
+            enable_llm_cache_for_entity_extract=enable_llm_cache,  # Read from environment variable
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
                 "use_llm_check": False,
             },
             namespace_prefix=args.namespace_prefix,
             auto_manage_storages_states=False,
         )
     else:  # azure_openai
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
+            enable_llm_cache_for_entity_extract=enable_llm_cache,  # Read from environment variable
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
     ollama_api = OllamaAPI(rag, top_k=args.top_k)
     app.include_router(ollama_api.router, prefix="/api")
+    @app.post("/login")
+    async def login(form_data: OAuth2PasswordRequestForm = Depends()):
+        username = os.getenv("AUTH_USERNAME")
+        password = os.getenv("AUTH_PASSWORD")
+        if not (username and password):
+            raise HTTPException(
+                status_code=status.HTTP_501_NOT_IMPLEMENTED,
+                detail="Authentication not configured",
+            )
+        if form_data.username != username or form_data.password != password:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
+            )
+        return {
+            "access_token": auth_handler.create_token(username),
+            "token_type": "bearer",
+        }
     @app.get("/health", dependencies=[Depends(optional_api_key)])
     async def get_status():
         """Get current system status"""

lightrag/api/requirements.txt CHANGED Viewed

@@ -1,10 +1,20 @@
 aiofiles
 ascii_colors
 fastapi
 numpy
 pipmaster
 python-dotenv
 python-multipart
 tenacity
 tiktoken
 uvicorn

 aiofiles
 ascii_colors
+asyncpg
+distro
 fastapi
+httpcore
+httpx
+jiter
 numpy
+openai
+passlib[bcrypt]
 pipmaster
+PyJWT
 python-dotenv
+python-jose[cryptography]
 python-multipart
+pytz
 tenacity
 tiktoken
 uvicorn

lightrag/api/routers/document_routes.py CHANGED Viewed

@@ -16,10 +16,13 @@ from pydantic import BaseModel, Field, field_validator
 from lightrag import LightRAG
 from lightrag.base import DocProcessingStatus, DocStatus
-from ..utils_api import get_api_key_dependency
-router = APIRouter(prefix="/documents", tags=["documents"])
 # Temporary file prefix
 temp_prefix = "__tmp__"

 from lightrag import LightRAG
 from lightrag.base import DocProcessingStatus, DocStatus
+from ..utils_api import get_api_key_dependency, get_auth_dependency
+router = APIRouter(
+    prefix="/documents",
+    tags=["documents"],
+    dependencies=[Depends(get_auth_dependency())],
+)
 # Temporary file prefix
 temp_prefix = "__tmp__"

lightrag/api/routers/graph_routes.py CHANGED Viewed

@@ -3,12 +3,11 @@ This module contains all graph-related routes for the LightRAG API.
 """
 from typing import Optional
 from fastapi import APIRouter, Depends
-from ..utils_api import get_api_key_dependency
-router = APIRouter(tags=["graph"])
 def create_graph_routes(rag, api_key: Optional[str] = None):
@@ -25,23 +24,33 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
         return await rag.get_graph_labels()
     @router.get("/graphs", dependencies=[Depends(optional_api_key)])
-    async def get_knowledge_graph(label: str, max_depth: int = 3):
         """
         Retrieve a connected subgraph of nodes where the label includes the specified label.
         Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
         When reducing the number of nodes, the prioritization criteria are as follows:
-            1. Label matching nodes take precedence
-            2. Followed by nodes directly connected to the matching nodes
-            3. Finally, the degree of the nodes
         Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
         Args:
             label (str): Label to get knowledge graph for
             max_depth (int, optional): Maximum depth of graph. Defaults to 3.
         Returns:
             Dict[str, List[str]]: Knowledge graph for label
         """
-        return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
     return router

 """
 from typing import Optional
 from fastapi import APIRouter, Depends
+from ..utils_api import get_api_key_dependency, get_auth_dependency
+router = APIRouter(tags=["graph"], dependencies=[Depends(get_auth_dependency())])
 def create_graph_routes(rag, api_key: Optional[str] = None):
         return await rag.get_graph_labels()
     @router.get("/graphs", dependencies=[Depends(optional_api_key)])
+    async def get_knowledge_graph(
+        label: str, max_depth: int = 3, min_degree: int = 0, inclusive: bool = False
+    ):
         """
         Retrieve a connected subgraph of nodes where the label includes the specified label.
         Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
         When reducing the number of nodes, the prioritization criteria are as follows:
+            1. min_degree does not affect nodes directly connected to the matching nodes
+            2. Label matching nodes take precedence
+            3. Followed by nodes directly connected to the matching nodes
+            4. Finally, the degree of the nodes
         Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
         Args:
             label (str): Label to get knowledge graph for
             max_depth (int, optional): Maximum depth of graph. Defaults to 3.
+            inclusive_search (bool, optional): If True, search for nodes that include the label. Defaults to False.
+            min_degree (int, optional): Minimum degree of nodes. Defaults to 0.
         Returns:
             Dict[str, List[str]]: Knowledge graph for label
         """
+        return await rag.get_knowledge_graph(
+            node_label=label,
+            max_depth=max_depth,
+            inclusive=inclusive,
+            min_degree=min_degree,
+        )
     return router

lightrag/api/routers/query_routes.py CHANGED Viewed

@@ -8,12 +8,12 @@ from typing import Any, Dict, List, Literal, Optional
 from fastapi import APIRouter, Depends, HTTPException
 from lightrag.base import QueryParam
-from ..utils_api import get_api_key_dependency
 from pydantic import BaseModel, Field, field_validator
 from ascii_colors import trace_exception
-router = APIRouter(tags=["query"])
 class QueryRequest(BaseModel):

 from fastapi import APIRouter, Depends, HTTPException
 from lightrag.base import QueryParam
+from ..utils_api import get_api_key_dependency, get_auth_dependency
 from pydantic import BaseModel, Field, field_validator
 from ascii_colors import trace_exception
+router = APIRouter(tags=["query"], dependencies=[Depends(get_auth_dependency())])
 class QueryRequest(BaseModel):

lightrag/api/utils_api.py CHANGED Viewed

@@ -9,10 +9,11 @@ import sys
 import logging
 from ascii_colors import ASCIIColors
 from lightrag.api import __api_version__
-from fastapi import HTTPException, Security
 from dotenv import load_dotenv
-from fastapi.security import APIKeyHeader
 from starlette.status import HTTP_403_FORBIDDEN
 # Load environment variables
 load_dotenv(override=True)
@@ -31,6 +32,24 @@ class OllamaServerInfos:
 ollama_server_infos = OllamaServerInfos()
 def get_api_key_dependency(api_key: Optional[str]):
     """
     Create an API key dependency for route protection.
@@ -340,7 +359,6 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
     args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
-    args.language = get_env_value("LANGUAGE", "English")
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name

 import logging
 from ascii_colors import ASCIIColors
 from lightrag.api import __api_version__
+from fastapi import HTTPException, Security, Depends, Request
 from dotenv import load_dotenv
+from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
 from starlette.status import HTTP_403_FORBIDDEN
+from .auth import auth_handler
 # Load environment variables
 load_dotenv(override=True)
 ollama_server_infos = OllamaServerInfos()
+def get_auth_dependency():
+    whitelist = os.getenv("WHITELIST_PATHS", "").split(",")
+    async def dependency(
+        request: Request,
+        token: str = Depends(OAuth2PasswordBearer(tokenUrl="login", auto_error=False)),
+    ):
+        if request.url.path in whitelist:
+            return
+        if not (os.getenv("AUTH_USERNAME") and os.getenv("AUTH_PASSWORD")):
+            return
+        auth_handler.validate_token(token)
+    return dependency
 def get_api_key_dependency(api_key: Optional[str]):
     """
     Create an API key dependency for route protection.
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
     args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name

lightrag/api/webui/assets/{index-rP-YlyR1.css → index-CH-3l4_Z.css} RENAMED Viewed

Binary files a/lightrag/api/webui/assets/index-rP-YlyR1.css and b/lightrag/api/webui/assets/index-CH-3l4_Z.css differ

lightrag/api/webui/assets/{index-DbuMPJAD.js → index-CJz72b6Q.js} RENAMED Viewed

Binary files a/lightrag/api/webui/assets/index-DbuMPJAD.js and b/lightrag/api/webui/assets/index-CJz72b6Q.js differ

lightrag/api/webui/index.html CHANGED Viewed

Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ

lightrag/base.py CHANGED Viewed

@@ -206,7 +206,7 @@ class BaseGraphStorage(StorageNameSpace, ABC):
     @abstractmethod
     async def get_knowledge_graph(
-        self, node_label: str, max_depth: int = 5
     ) -> KnowledgeGraph:
         """Retrieve a subgraph of the knowledge graph starting from a given node."""

     @abstractmethod
     async def get_knowledge_graph(
+        self, node_label: str, max_depth: int = 3
     ) -> KnowledgeGraph:
         """Retrieve a subgraph of the knowledge graph starting from a given node."""

lightrag/kg/chroma_impl.py CHANGED Viewed

@@ -229,3 +229,43 @@ class ChromaVectorDBStorage(BaseVectorStorage):
         except Exception as e:
             logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
             raise

         except Exception as e:
             logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
             raise
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        try:
+            # Get all records from the collection
+            # Since ChromaDB doesn't directly support prefix search on IDs,
+            # we'll get all records and filter in Python
+            results = self._collection.get(
+                include=["metadatas", "documents", "embeddings"]
+            )
+            matching_records = []
+            # Filter records where ID starts with the prefix
+            for i, record_id in enumerate(results["ids"]):
+                if record_id.startswith(prefix):
+                    matching_records.append(
+                        {
+                            "id": record_id,
+                            "content": results["documents"][i],
+                            "vector": results["embeddings"][i],
+                            **results["metadatas"][i],
+                        }
+                    )
+            logger.debug(
+                f"Found {len(matching_records)} records with prefix '{prefix}'"
+            )
+            return matching_records
+        except Exception as e:
+            logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
+            raise

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -371,3 +371,24 @@ class FaissVectorDBStorage(BaseVectorStorage):
                 return False  # Return error
         return True  # Return success

                 return False  # Return error
         return True  # Return success
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        matching_records = []
+        # Search for records with IDs starting with the prefix
+        for faiss_id, meta in self._id_to_meta.items():
+            if "__id__" in meta and meta["__id__"].startswith(prefix):
+                # Create a copy of all metadata and add "id" field
+                record = {**meta, "id": meta["__id__"]}
+                matching_records.append(record)
+        logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
+        return matching_records

lightrag/kg/milvus_impl.py CHANGED Viewed

@@ -206,3 +206,28 @@ class MilvusVectorDBStorage(BaseVectorStorage):
         except Exception as e:
             logger.error(f"Error while deleting vectors from {self.namespace}: {e}")

         except Exception as e:
             logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        try:
+            # Use Milvus query with expression to find IDs with the given prefix
+            expression = f'id like "{prefix}%"'
+            results = self._client.query(
+                collection_name=self.namespace,
+                filter=expression,
+                output_fields=list(self.meta_fields) + ["id"],
+            )
+            logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
+            return results
+        except Exception as e:
+            logger.error(f"Error searching for records with prefix '{prefix}': {e}")
+            return []

lightrag/kg/mongo_impl.py CHANGED Viewed

@@ -1045,6 +1045,32 @@ class MongoVectorDBStorage(BaseVectorStorage):
         except PyMongoError as e:
             logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
 async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
     collection_names = await db.list_collection_names()

         except PyMongoError as e:
             logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        try:
+            # Use MongoDB regex to find documents where _id starts with the prefix
+            cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
+            matching_records = await cursor.to_list(length=None)
+            # Format results
+            results = [{**doc, "id": doc["_id"]} for doc in matching_records]
+            logger.debug(
+                f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
+            )
+            return results
+        except PyMongoError as e:
+            logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
+            return []
 async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
     collection_names = await db.list_collection_names()

lightrag/kg/nano_vector_db_impl.py CHANGED Viewed

@@ -236,3 +236,23 @@ class NanoVectorDBStorage(BaseVectorStorage):
                 return False  # Return error
         return True  # Return success

                 return False  # Return error
         return True  # Return success
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        storage = await self.client_storage
+        matching_records = []
+        # Search for records with IDs starting with the prefix
+        for record in storage["data"]:
+            if "__id__" in record and record["__id__"].startswith(prefix):
+                matching_records.append({**record, "id": record["__id__"]})
+        logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
+        return matching_records

lightrag/kg/networkx_impl.py CHANGED Viewed

@@ -232,19 +232,26 @@ class NetworkXStorage(BaseGraphStorage):
         return sorted(list(labels))
     async def get_knowledge_graph(
-        self, node_label: str, max_depth: int = 5
     ) -> KnowledgeGraph:
         """
         Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
         Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
         When reducing the number of nodes, the prioritization criteria are as follows:
-            1. Label matching nodes take precedence
-            2. Followed by nodes directly connected to the matching nodes
-            3. Finally, the degree of the nodes
         Args:
             node_label: Label of the starting node
             max_depth: Maximum depth of the subgraph
         Returns:
             KnowledgeGraph object containing nodes and edges
@@ -255,6 +262,10 @@ class NetworkXStorage(BaseGraphStorage):
         graph = await self._get_graph()
         # Handle special case for "*" label
         if node_label == "*":
             # For "*", return the entire graph including all nodes and edges
@@ -262,11 +273,16 @@ class NetworkXStorage(BaseGraphStorage):
                 graph.copy()
             )  # Create a copy to avoid modifying the original graph
         else:
-            # Find nodes with matching node id (partial match)
             nodes_to_explore = []
             for n, attr in graph.nodes(data=True):
-                if node_label in str(n):  # Use partial matching
-                    nodes_to_explore.append(n)
             if not nodes_to_explore:
                 logger.warning(f"No nodes found with label {node_label}")
@@ -277,26 +293,37 @@ class NetworkXStorage(BaseGraphStorage):
             for start_node in nodes_to_explore:
                 node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth)
                 combined_subgraph = nx.compose(combined_subgraph, node_subgraph)
-            subgraph = combined_subgraph
-        # Check if number of nodes exceeds max_graph_nodes
-        if len(subgraph.nodes()) > MAX_GRAPH_NODES:
-            origin_nodes = len(subgraph.nodes())
-            node_degrees = dict(subgraph.degree())
-            start_nodes = set()
-            direct_connected_nodes = set()
-            if node_label != "*" and nodes_to_explore:
                 start_nodes = set(nodes_to_explore)
                 # Get nodes directly connected to all start nodes
                 for start_node in start_nodes:
-                    direct_connected_nodes.update(subgraph.neighbors(start_node))
                 # Remove start nodes from directly connected nodes (avoid duplicates)
                 direct_connected_nodes -= start_nodes
             def priority_key(node_item):
                 node, degree = node_item
                 # Priority order: start(2) > directly connected(1) > other nodes(0)
@@ -356,7 +383,7 @@ class NetworkXStorage(BaseGraphStorage):
             result.edges.append(
                 KnowledgeGraphEdge(
                     id=edge_id,
-                    type="RELATED",
                     source=str(source),
                     target=str(target),
                     properties=edge_data,

         return sorted(list(labels))
     async def get_knowledge_graph(
+        self,
+        node_label: str,
+        max_depth: int = 3,
+        min_degree: int = 0,
+        inclusive: bool = False,
     ) -> KnowledgeGraph:
         """
         Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
         Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
         When reducing the number of nodes, the prioritization criteria are as follows:
+            1. min_degree does not affect nodes directly connected to the matching nodes
+            2. Label matching nodes take precedence
+            3. Followed by nodes directly connected to the matching nodes
+            4. Finally, the degree of the nodes
         Args:
             node_label: Label of the starting node
             max_depth: Maximum depth of the subgraph
+            min_degree: Minimum degree of nodes to include. Defaults to 0
+            inclusive: Do an inclusive search if true
         Returns:
             KnowledgeGraph object containing nodes and edges
         graph = await self._get_graph()
+        # Initialize sets for start nodes and direct connected nodes
+        start_nodes = set()
+        direct_connected_nodes = set()
         # Handle special case for "*" label
         if node_label == "*":
             # For "*", return the entire graph including all nodes and edges
                 graph.copy()
             )  # Create a copy to avoid modifying the original graph
         else:
+            # Find nodes with matching node id based on search_mode
             nodes_to_explore = []
             for n, attr in graph.nodes(data=True):
+                node_str = str(n)
+                if not inclusive:
+                    if node_label == node_str:  # Use exact matching
+                        nodes_to_explore.append(n)
+                else:  # inclusive mode
+                    if node_label in node_str:  # Use partial matching
+                        nodes_to_explore.append(n)
             if not nodes_to_explore:
                 logger.warning(f"No nodes found with label {node_label}")
             for start_node in nodes_to_explore:
                 node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth)
                 combined_subgraph = nx.compose(combined_subgraph, node_subgraph)
+            # Get start nodes and direct connected nodes
+            if nodes_to_explore:
                 start_nodes = set(nodes_to_explore)
                 # Get nodes directly connected to all start nodes
                 for start_node in start_nodes:
+                    direct_connected_nodes.update(
+                        combined_subgraph.neighbors(start_node)
+                    )
                 # Remove start nodes from directly connected nodes (avoid duplicates)
                 direct_connected_nodes -= start_nodes
+            subgraph = combined_subgraph
+        # Filter nodes based on min_degree, but keep start nodes and direct connected nodes
+        if min_degree > 0:
+            nodes_to_keep = [
+                node
+                for node, degree in subgraph.degree()
+                if node in start_nodes
+                or node in direct_connected_nodes
+                or degree >= min_degree
+            ]
+            subgraph = subgraph.subgraph(nodes_to_keep)
+        # Check if number of nodes exceeds max_graph_nodes
+        if len(subgraph.nodes()) > MAX_GRAPH_NODES:
+            origin_nodes = len(subgraph.nodes())
+            node_degrees = dict(subgraph.degree())
             def priority_key(node_item):
                 node, degree = node_item
                 # Priority order: start(2) > directly connected(1) > other nodes(0)
             result.edges.append(
                 KnowledgeGraphEdge(
                     id=edge_id,
+                    type="DIRECTED",
                     source=str(source),
                     target=str(target),
                     properties=edge_data,

lightrag/kg/oracle_impl.py CHANGED Viewed

@@ -494,6 +494,41 @@ class OracleVectorDBStorage(BaseVectorStorage):
             logger.error(f"Error deleting relations for entity {entity_name}: {e}")
             raise
 @final
 @dataclass

             logger.error(f"Error deleting relations for entity {entity_name}: {e}")
             raise
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        try:
+            # Determine the appropriate table based on namespace
+            table_name = namespace_to_table_name(self.namespace)
+            # Create SQL query to find records with IDs starting with prefix
+            search_sql = f"""
+                SELECT * FROM {table_name}
+                WHERE workspace = :workspace
+                AND id LIKE :prefix_pattern
+                ORDER BY id
+            """
+            params = {"workspace": self.db.workspace, "prefix_pattern": f"{prefix}%"}
+            # Execute query and get results
+            results = await self.db.query(search_sql, params, multirows=True)
+            logger.debug(
+                f"Found {len(results) if results else 0} records with prefix '{prefix}'"
+            )
+            return results or []
+        except Exception as e:
+            logger.error(f"Error searching records with prefix '{prefix}': {e}")
+            return []
 @final
 @dataclass

lightrag/kg/postgres_impl.py CHANGED Viewed

@@ -585,6 +585,41 @@ class PGVectorStorage(BaseVectorStorage):
         except Exception as e:
             logger.error(f"Error deleting relations for entity {entity_name}: {e}")
 @final
 @dataclass
@@ -785,42 +820,85 @@ class PGGraphStorage(BaseGraphStorage):
             v = record[k]
             # agtype comes back '{key: value}::type' which must be parsed
             if isinstance(v, str) and "::" in v:
-                dtype = v.split("::")[-1]
-                v = v.split("::")[0]
-                if dtype == "vertex":
-                    vertex = json.loads(v)
-                    vertices[vertex["id"]] = vertex.get("properties")
         # iterate returned fields and parse appropriately
         for k in record.keys():
             v = record[k]
             if isinstance(v, str) and "::" in v:
-                dtype = v.split("::")[-1]
-                v = v.split("::")[0]
-            else:
-                dtype = ""
-            if dtype == "vertex":
-                vertex = json.loads(v)
-                field = vertex.get("properties")
-                if not field:
-                    field = {}
-                field["label"] = PGGraphStorage._decode_graph_label(field["node_id"])
-                d[k] = field
-            # convert edge from id-label->id by replacing id with node information
-            # we only do this if the vertex was also returned in the query
-            # this is an attempt to be consistent with neo4j implementation
-            elif dtype == "edge":
-                edge = json.loads(v)
-                d[k] = (
-                    vertices.get(edge["start_id"], {}),
-                    edge[
-                        "label"
-                    ],  # we don't use decode_graph_label(), since edge label is always "DIRECTED"
-                    vertices.get(edge["end_id"], {}),
-                )
             else:
-                d[k] = json.loads(v) if isinstance(v, str) else v
         return d
@@ -1294,7 +1372,7 @@ class PGGraphStorage(BaseGraphStorage):
                          OPTIONAL MATCH p = (n)-[*..%d]-(m)
                          RETURN nodes(p) AS nodes, relationships(p) AS relationships
                          LIMIT %d
-                       $$) AS (nodes agtype[], relationships agtype[])""" % (
                 self.graph_name,
                 encoded_node_label,
                 max_depth,
@@ -1303,17 +1381,23 @@ class PGGraphStorage(BaseGraphStorage):
         results = await self._query(query)
-        nodes = set()
         edges = []
         for result in results:
             if node_label == "*":
                 if result["n"]:
                     node = result["n"]
-                    nodes.add(self._decode_graph_label(node["node_id"]))
                 if result["m"]:
                     node = result["m"]
-                    nodes.add(self._decode_graph_label(node["node_id"]))
                 if result["r"]:
                     edge = result["r"]
                     src_id = self._decode_graph_label(edge["start_id"])
@@ -1322,16 +1406,36 @@ class PGGraphStorage(BaseGraphStorage):
             else:
                 if result["nodes"]:
                     for node in result["nodes"]:
-                        nodes.add(self._decode_graph_label(node["node_id"]))
                 if result["relationships"]:
-                    for edge in result["relationships"]:
-                        src_id = self._decode_graph_label(edge["start_id"])
-                        tgt_id = self._decode_graph_label(edge["end_id"])
-                        edges.append((src_id, tgt_id))
         kg = KnowledgeGraph(
-            nodes=[KnowledgeGraphNode(id=node_id) for node_id in nodes],
-            edges=[KnowledgeGraphEdge(source=src, target=tgt) for src, tgt in edges],
         )
         return kg

         except Exception as e:
             logger.error(f"Error deleting relations for entity {entity_name}: {e}")
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        table_name = namespace_to_table_name(self.namespace)
+        if not table_name:
+            logger.error(f"Unknown namespace for prefix search: {self.namespace}")
+            return []
+        search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
+        params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
+        try:
+            results = await self.db.query(search_sql, params, multirows=True)
+            logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
+            # Format results to match the expected return format
+            formatted_results = []
+            for record in results:
+                formatted_record = dict(record)
+                # Ensure id field is available (for consistency with NanoVectorDB implementation)
+                if "id" not in formatted_record:
+                    formatted_record["id"] = record["id"]
+                formatted_results.append(formatted_record)
+            return formatted_results
+        except Exception as e:
+            logger.error(f"Error during prefix search for '{prefix}': {e}")
+            return []
 @final
 @dataclass
             v = record[k]
             # agtype comes back '{key: value}::type' which must be parsed
             if isinstance(v, str) and "::" in v:
+                if v.startswith("[") and v.endswith("]"):
+                    if "::vertex" not in v:
+                        continue
+                    v = v.replace("::vertex", "")
+                    vertexes = json.loads(v)
+                    for vertex in vertexes:
+                        vertices[vertex["id"]] = vertex.get("properties")
+                else:
+                    dtype = v.split("::")[-1]
+                    v = v.split("::")[0]
+                    if dtype == "vertex":
+                        vertex = json.loads(v)
+                        vertices[vertex["id"]] = vertex.get("properties")
         # iterate returned fields and parse appropriately
         for k in record.keys():
             v = record[k]
             if isinstance(v, str) and "::" in v:
+                if v.startswith("[") and v.endswith("]"):
+                    if "::vertex" in v:
+                        v = v.replace("::vertex", "")
+                        vertexes = json.loads(v)
+                        dl = []
+                        for vertex in vertexes:
+                            prop = vertex.get("properties")
+                            if not prop:
+                                prop = {}
+                            prop["label"] = PGGraphStorage._decode_graph_label(
+                                prop["node_id"]
+                            )
+                            dl.append(prop)
+                        d[k] = dl
+                    elif "::edge" in v:
+                        v = v.replace("::edge", "")
+                        edges = json.loads(v)
+                        dl = []
+                        for edge in edges:
+                            dl.append(
+                                (
+                                    vertices[edge["start_id"]],
+                                    edge["label"],
+                                    vertices[edge["end_id"]],
+                                )
+                            )
+                        d[k] = dl
+                    else:
+                        print("WARNING: unsupported type")
+                        continue
+                else:
+                    dtype = v.split("::")[-1]
+                    v = v.split("::")[0]
+                    if dtype == "vertex":
+                        vertex = json.loads(v)
+                        field = vertex.get("properties")
+                        if not field:
+                            field = {}
+                        field["label"] = PGGraphStorage._decode_graph_label(
+                            field["node_id"]
+                        )
+                        d[k] = field
+                    # convert edge from id-label->id by replacing id with node information
+                    # we only do this if the vertex was also returned in the query
+                    # this is an attempt to be consistent with neo4j implementation
+                    elif dtype == "edge":
+                        edge = json.loads(v)
+                        d[k] = (
+                            vertices.get(edge["start_id"], {}),
+                            edge[
+                                "label"
+                            ],  # we don't use decode_graph_label(), since edge label is always "DIRECTED"
+                            vertices.get(edge["end_id"], {}),
+                        )
             else:
+                if v is None or (v.count("{") < 1 and v.count("[") < 1):
+                    d[k] = v
+                else:
+                    d[k] = json.loads(v) if isinstance(v, str) else v
         return d
                          OPTIONAL MATCH p = (n)-[*..%d]-(m)
                          RETURN nodes(p) AS nodes, relationships(p) AS relationships
                          LIMIT %d
+                       $$) AS (nodes agtype, relationships agtype)""" % (
                 self.graph_name,
                 encoded_node_label,
                 max_depth,
         results = await self._query(query)
+        nodes = {}
         edges = []
+        unique_edge_ids = set()
         for result in results:
             if node_label == "*":
                 if result["n"]:
                     node = result["n"]
+                    node_id = self._decode_graph_label(node["node_id"])
+                    if node_id not in nodes:
+                        nodes[node_id] = node
                 if result["m"]:
                     node = result["m"]
+                    node_id = self._decode_graph_label(node["node_id"])
+                    if node_id not in nodes:
+                        nodes[node_id] = node
                 if result["r"]:
                     edge = result["r"]
                     src_id = self._decode_graph_label(edge["start_id"])
             else:
                 if result["nodes"]:
                     for node in result["nodes"]:
+                        node_id = self._decode_graph_label(node["node_id"])
+                        if node_id not in nodes:
+                            nodes[node_id] = node
                 if result["relationships"]:
+                    for edge in result["relationships"]:  # src --DIRECTED--> target
+                        src_id = self._decode_graph_label(edge[0]["node_id"])
+                        tgt_id = self._decode_graph_label(edge[2]["node_id"])
+                        id = src_id + "," + tgt_id
+                        if id in unique_edge_ids:
+                            continue
+                        else:
+                            unique_edge_ids.add(id)
+                        edges.append(
+                            (id, src_id, tgt_id, {"source": edge[0], "target": edge[2]})
+                        )
         kg = KnowledgeGraph(
+            nodes=[
+                KnowledgeGraphNode(
+                    id=node_id, labels=[node_id], properties=nodes[node_id]
+                )
+                for node_id in nodes
+            ],
+            edges=[
+                KnowledgeGraphEdge(
+                    id=id, type="DIRECTED", source=src, target=tgt, properties=props
+                )
+                for id, src, tgt, props in edges
+            ],
         )
         return kg

lightrag/kg/qdrant_impl.py CHANGED Viewed

@@ -135,7 +135,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
         logger.debug(f"query result: {results}")
-        return [{**dp.payload, "id": dp.id, "distance": dp.score} for dp in results]
     async def index_done_callback(self) -> None:
         # Qdrant handles persistence automatically
@@ -233,3 +233,43 @@ class QdrantVectorDBStorage(BaseVectorStorage):
                 logger.debug(f"No relations found for entity {entity_name}")
         except Exception as e:
             logger.error(f"Error deleting relations for {entity_name}: {e}")

         logger.debug(f"query result: {results}")
+        return [{**dp.payload, "distance": dp.score} for dp in results]
     async def index_done_callback(self) -> None:
         # Qdrant handles persistence automatically
                 logger.debug(f"No relations found for entity {entity_name}")
         except Exception as e:
             logger.error(f"Error deleting relations for {entity_name}: {e}")
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        try:
+            # Use scroll method to find records with IDs starting with the prefix
+            results = self._client.scroll(
+                collection_name=self.namespace,
+                scroll_filter=models.Filter(
+                    must=[
+                        models.FieldCondition(
+                            key="id", match=models.MatchText(text=prefix, prefix=True)
+                        )
+                    ]
+                ),
+                with_payload=True,
+                with_vectors=False,
+                limit=1000,  # Adjust as needed for your use case
+            )
+            # Extract matching points
+            matching_records = results[0]
+            # Format the results to match expected return format
+            formatted_results = [{**point.payload} for point in matching_records]
+            logger.debug(
+                f"Found {len(formatted_results)} records with prefix '{prefix}'"
+            )
+            return formatted_results
+        except Exception as e:
+            logger.error(f"Error searching for prefix '{prefix}': {e}")
+            return []

lightrag/kg/tidb_impl.py CHANGED Viewed

@@ -414,6 +414,55 @@ class TiDBVectorDBStorage(BaseVectorStorage):
         # Ti handles persistence automatically
         pass
 @final
 @dataclass
@@ -968,4 +1017,20 @@ SQL_TEMPLATES = {
         WHERE (source_name = :source AND target_name = :target)
         AND workspace = :workspace
     """,
 }

         # Ti handles persistence automatically
         pass
+    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
+        """Search for records with IDs starting with a specific prefix.
+        Args:
+            prefix: The prefix to search for in record IDs
+        Returns:
+            List of records with matching ID prefixes
+        """
+        # Determine which table to query based on namespace
+        if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
+            sql_template = """
+                SELECT entity_id as id, name as entity_name, entity_type, description, content
+                FROM LIGHTRAG_GRAPH_NODES
+                WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
+            """
+        elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
+            sql_template = """
+                SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
+                       keywords, description, content
+                FROM LIGHTRAG_GRAPH_EDGES
+                WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
+            """
+        elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
+            sql_template = """
+                SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
+                FROM LIGHTRAG_DOC_CHUNKS
+                WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
+            """
+        else:
+            logger.warning(
+                f"Namespace {self.namespace} not supported for prefix search"
+            )
+            return []
+        # Add prefix pattern parameter with % for SQL LIKE
+        prefix_pattern = f"{prefix}%"
+        params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
+        try:
+            results = await self.db.query(sql_template, params=params, multirows=True)
+            logger.debug(
+                f"Found {len(results) if results else 0} records with prefix '{prefix}'"
+            )
+            return results if results else []
+        except Exception as e:
+            logger.error(f"Error searching records with prefix '{prefix}': {e}")
+            return []
 @final
 @dataclass
         WHERE (source_name = :source AND target_name = :target)
         AND workspace = :workspace
     """,
+    # Search by prefix SQL templates
+    "search_entity_by_prefix": """
+        SELECT entity_id as id, name as entity_name, entity_type, description, content
+        FROM LIGHTRAG_GRAPH_NODES
+        WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
+    """,
+    "search_relationship_by_prefix": """
+        SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content
+        FROM LIGHTRAG_GRAPH_EDGES
+        WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
+    """,
+    "search_chunk_by_prefix": """
+        SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
+        FROM LIGHTRAG_DOC_CHUNKS
+        WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
+    """,
 }

lightrag/lightrag.py CHANGED Viewed

@@ -504,11 +504,39 @@ class LightRAG:
         return text
     async def get_knowledge_graph(
-        self, node_label: str, max_depth: int
     ) -> KnowledgeGraph:
-        return await self.chunk_entity_relation_graph.get_knowledge_graph(
-            node_label=node_label, max_depth=max_depth
-        )
     def _get_storage_class(self, storage_name: str) -> Callable[..., Any]:
         import_path = STORAGES[storage_name]
@@ -1981,6 +2009,9 @@ class LightRAG:
                     new_entity_name, new_node_data
                 )
                 # Get all edges related to the original entity
                 edges = await self.chunk_entity_relation_graph.get_node_edges(
                     entity_name
@@ -1996,10 +2027,16 @@ class LightRAG:
                                 await self.chunk_entity_relation_graph.upsert_edge(
                                     new_entity_name, target, edge_data
                                 )
                             else:  # target == entity_name
                                 await self.chunk_entity_relation_graph.upsert_edge(
                                     source, new_entity_name, edge_data
                                 )
                 # Delete old entity
                 await self.chunk_entity_relation_graph.delete_node(entity_name)
@@ -2007,6 +2044,38 @@ class LightRAG:
                 # Delete old entity record from vector database
                 old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
                 await self.entities_vdb.delete([old_entity_id])
                 # Update working entity name to new name
                 entity_name = new_entity_name
@@ -2105,6 +2174,15 @@ class LightRAG:
                     f"Relation from '{source_entity}' to '{target_entity}' does not exist"
                 )
             # 2. Update relation information in the graph
             new_edge_data = {**edge_data, **updated_data}
             await self.chunk_entity_relation_graph.upsert_edge(
@@ -2118,7 +2196,7 @@ class LightRAG:
             weight = float(new_edge_data.get("weight", 1.0))
             # Create content for embedding
-            content = f"{keywords}\t{source_entity}\n{target_entity}\n{description}"
             # Calculate relation ID
             relation_id = compute_mdhash_id(
@@ -2382,3 +2460,409 @@ class LightRAG:
         return loop.run_until_complete(
             self.acreate_relation(source_entity, target_entity, relation_data)
         )

         return text
     async def get_knowledge_graph(
+        self,
+        node_label: str,
+        max_depth: int = 3,
+        min_degree: int = 0,
+        inclusive: bool = False,
     ) -> KnowledgeGraph:
+        """Get knowledge graph for a given label
+        Args:
+            node_label (str): Label to get knowledge graph for
+            max_depth (int): Maximum depth of graph
+            min_degree (int, optional): Minimum degree of nodes to include. Defaults to 0.
+            inclusive (bool, optional): Whether to use inclusive search mode. Defaults to False.
+        Returns:
+            KnowledgeGraph: Knowledge graph containing nodes and edges
+        """
+        # get params supported by get_knowledge_graph of specified storage
+        import inspect
+        storage_params = inspect.signature(
+            self.chunk_entity_relation_graph.get_knowledge_graph
+        ).parameters
+        kwargs = {"node_label": node_label, "max_depth": max_depth}
+        if "min_degree" in storage_params and min_degree > 0:
+            kwargs["min_degree"] = min_degree
+        if "inclusive" in storage_params:
+            kwargs["inclusive"] = inclusive
+        return await self.chunk_entity_relation_graph.get_knowledge_graph(**kwargs)
     def _get_storage_class(self, storage_name: str) -> Callable[..., Any]:
         import_path = STORAGES[storage_name]
                     new_entity_name, new_node_data
                 )
+                # Store relationships that need to be updated
+                relations_to_update = []
                 # Get all edges related to the original entity
                 edges = await self.chunk_entity_relation_graph.get_node_edges(
                     entity_name
                                 await self.chunk_entity_relation_graph.upsert_edge(
                                     new_entity_name, target, edge_data
                                 )
+                                relations_to_update.append(
+                                    (new_entity_name, target, edge_data)
+                                )
                             else:  # target == entity_name
                                 await self.chunk_entity_relation_graph.upsert_edge(
                                     source, new_entity_name, edge_data
                                 )
+                                relations_to_update.append(
+                                    (source, new_entity_name, edge_data)
+                                )
                 # Delete old entity
                 await self.chunk_entity_relation_graph.delete_node(entity_name)
                 # Delete old entity record from vector database
                 old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
                 await self.entities_vdb.delete([old_entity_id])
+                logger.info(
+                    f"Deleted old entity '{entity_name}' and its vector embedding from database"
+                )
+                # Update relationship vector representations
+                for src, tgt, edge_data in relations_to_update:
+                    description = edge_data.get("description", "")
+                    keywords = edge_data.get("keywords", "")
+                    source_id = edge_data.get("source_id", "")
+                    weight = float(edge_data.get("weight", 1.0))
+                    # Create new content for embedding
+                    content = f"{src}\t{tgt}\n{keywords}\n{description}"
+                    # Calculate relationship ID
+                    relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
+                    # Prepare data for vector database update
+                    relation_data = {
+                        relation_id: {
+                            "content": content,
+                            "src_id": src,
+                            "tgt_id": tgt,
+                            "source_id": source_id,
+                            "description": description,
+                            "keywords": keywords,
+                            "weight": weight,
+                        }
+                    }
+                    # Update vector database
+                    await self.relationships_vdb.upsert(relation_data)
                 # Update working entity name to new name
                 entity_name = new_entity_name
                     f"Relation from '{source_entity}' to '{target_entity}' does not exist"
                 )
+            # Important: First delete the old relation record from the vector database
+            old_relation_id = compute_mdhash_id(
+                source_entity + target_entity, prefix="rel-"
+            )
+            await self.relationships_vdb.delete([old_relation_id])
+            logger.info(
+                f"Deleted old relation record from vector database for relation {source_entity} -> {target_entity}"
+            )
             # 2. Update relation information in the graph
             new_edge_data = {**edge_data, **updated_data}
             await self.chunk_entity_relation_graph.upsert_edge(
             weight = float(new_edge_data.get("weight", 1.0))
             # Create content for embedding
+            content = f"{source_entity}\t{target_entity}\n{keywords}\n{description}"
             # Calculate relation ID
             relation_id = compute_mdhash_id(
         return loop.run_until_complete(
             self.acreate_relation(source_entity, target_entity, relation_data)
         )
+    async def amerge_entities(
+        self,
+        source_entities: list[str],
+        target_entity: str,
+        merge_strategy: dict[str, str] = None,
+        target_entity_data: dict[str, Any] = None,
+    ) -> dict[str, Any]:
+        """Asynchronously merge multiple entities into one entity.
+        Merges multiple source entities into a target entity, handling all relationships,
+        and updating both the knowledge graph and vector database.
+        Args:
+            source_entities: List of source entity names to merge
+            target_entity: Name of the target entity after merging
+            merge_strategy: Merge strategy configuration, e.g. {"description": "concatenate", "entity_type": "keep_first"}
+                Supported strategies:
+                - "concatenate": Concatenate all values (for text fields)
+                - "keep_first": Keep the first non-empty value
+                - "keep_last": Keep the last non-empty value
+                - "join_unique": Join all unique values (for fields separated by delimiter)
+            target_entity_data: Dictionary of specific values to set for the target entity,
+                overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
+        Returns:
+            Dictionary containing the merged entity information
+        """
+        try:
+            # Default merge strategy
+            default_strategy = {
+                "description": "concatenate",
+                "entity_type": "keep_first",
+                "source_id": "join_unique",
+            }
+            merge_strategy = (
+                default_strategy
+                if merge_strategy is None
+                else {**default_strategy, **merge_strategy}
+            )
+            target_entity_data = (
+                {} if target_entity_data is None else target_entity_data
+            )
+            # 1. Check if all source entities exist
+            source_entities_data = {}
+            for entity_name in source_entities:
+                node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
+                if not node_data:
+                    raise ValueError(f"Source entity '{entity_name}' does not exist")
+                source_entities_data[entity_name] = node_data
+            # 2. Check if target entity exists and get its data if it does
+            target_exists = await self.chunk_entity_relation_graph.has_node(
+                target_entity
+            )
+            target_entity_data = {}
+            if target_exists:
+                target_entity_data = await self.chunk_entity_relation_graph.get_node(
+                    target_entity
+                )
+                logger.info(
+                    f"Target entity '{target_entity}' already exists, will merge data"
+                )
+            # 3. Merge entity data
+            merged_entity_data = self._merge_entity_attributes(
+                list(source_entities_data.values())
+                + ([target_entity_data] if target_exists else []),
+                merge_strategy,
+            )
+            # Apply any explicitly provided target entity data (overrides merged data)
+            for key, value in target_entity_data.items():
+                merged_entity_data[key] = value
+            # 4. Get all relationships of the source entities
+            all_relations = []
+            for entity_name in source_entities:
+                # Get all relationships where this entity is the source
+                outgoing_edges = await self.chunk_entity_relation_graph.get_node_edges(
+                    entity_name
+                )
+                if outgoing_edges:
+                    for src, tgt in outgoing_edges:
+                        # Ensure src is the current entity
+                        if src == entity_name:
+                            edge_data = await self.chunk_entity_relation_graph.get_edge(
+                                src, tgt
+                            )
+                            all_relations.append(("outgoing", src, tgt, edge_data))
+                # Get all relationships where this entity is the target
+                incoming_edges = []
+                all_labels = await self.chunk_entity_relation_graph.get_all_labels()
+                for label in all_labels:
+                    if label == entity_name:
+                        continue
+                    node_edges = await self.chunk_entity_relation_graph.get_node_edges(
+                        label
+                    )
+                    for src, tgt in node_edges or []:
+                        if tgt == entity_name:
+                            incoming_edges.append((src, tgt))
+                for src, tgt in incoming_edges:
+                    edge_data = await self.chunk_entity_relation_graph.get_edge(
+                        src, tgt
+                    )
+                    all_relations.append(("incoming", src, tgt, edge_data))
+            # 5. Create or update the target entity
+            if not target_exists:
+                await self.chunk_entity_relation_graph.upsert_node(
+                    target_entity, merged_entity_data
+                )
+                logger.info(f"Created new target entity '{target_entity}'")
+            else:
+                await self.chunk_entity_relation_graph.upsert_node(
+                    target_entity, merged_entity_data
+                )
+                logger.info(f"Updated existing target entity '{target_entity}'")
+            # 6. Recreate all relationships, pointing to the target entity
+            relation_updates = {}  # Track relationships that need to be merged
+            for rel_type, src, tgt, edge_data in all_relations:
+                new_src = target_entity if src in source_entities else src
+                new_tgt = target_entity if tgt in source_entities else tgt
+                # Skip relationships between source entities to avoid self-loops
+                if new_src == new_tgt:
+                    logger.info(
+                        f"Skipping relationship between source entities: {src} -> {tgt} to avoid self-loop"
+                    )
+                    continue
+                # Check if the same relationship already exists
+                relation_key = f"{new_src}|{new_tgt}"
+                if relation_key in relation_updates:
+                    # Merge relationship data
+                    existing_data = relation_updates[relation_key]["data"]
+                    merged_relation = self._merge_relation_attributes(
+                        [existing_data, edge_data],
+                        {
+                            "description": "concatenate",
+                            "keywords": "join_unique",
+                            "source_id": "join_unique",
+                            "weight": "max",
+                        },
+                    )
+                    relation_updates[relation_key]["data"] = merged_relation
+                    logger.info(
+                        f"Merged duplicate relationship: {new_src} -> {new_tgt}"
+                    )
+                else:
+                    relation_updates[relation_key] = {
+                        "src": new_src,
+                        "tgt": new_tgt,
+                        "data": edge_data.copy(),
+                    }
+            # Apply relationship updates
+            for rel_data in relation_updates.values():
+                await self.chunk_entity_relation_graph.upsert_edge(
+                    rel_data["src"], rel_data["tgt"], rel_data["data"]
+                )
+                logger.info(
+                    f"Created or updated relationship: {rel_data['src']} -> {rel_data['tgt']}"
+                )
+            # 7. Update entity vector representation
+            description = merged_entity_data.get("description", "")
+            source_id = merged_entity_data.get("source_id", "")
+            entity_type = merged_entity_data.get("entity_type", "")
+            content = target_entity + "\n" + description
+            entity_id = compute_mdhash_id(target_entity, prefix="ent-")
+            entity_data_for_vdb = {
+                entity_id: {
+                    "content": content,
+                    "entity_name": target_entity,
+                    "source_id": source_id,
+                    "description": description,
+                    "entity_type": entity_type,
+                }
+            }
+            await self.entities_vdb.upsert(entity_data_for_vdb)
+            # 8. Update relationship vector representations
+            for rel_data in relation_updates.values():
+                src = rel_data["src"]
+                tgt = rel_data["tgt"]
+                edge_data = rel_data["data"]
+                description = edge_data.get("description", "")
+                keywords = edge_data.get("keywords", "")
+                source_id = edge_data.get("source_id", "")
+                weight = float(edge_data.get("weight", 1.0))
+                content = f"{keywords}\t{src}\n{tgt}\n{description}"
+                relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
+                relation_data_for_vdb = {
+                    relation_id: {
+                        "content": content,
+                        "src_id": src,
+                        "tgt_id": tgt,
+                        "source_id": source_id,
+                        "description": description,
+                        "keywords": keywords,
+                        "weight": weight,
+                    }
+                }
+                await self.relationships_vdb.upsert(relation_data_for_vdb)
+            # 9. Delete source entities
+            for entity_name in source_entities:
+                # Delete entity node from knowledge graph
+                await self.chunk_entity_relation_graph.delete_node(entity_name)
+                # Delete entity record from vector database
+                entity_id = compute_mdhash_id(entity_name, prefix="ent-")
+                await self.entities_vdb.delete([entity_id])
+                # Also ensure any relationships specific to this entity are deleted from vector DB
+                # This is a safety check, as these should have been transformed to the target entity already
+                entity_relation_prefix = compute_mdhash_id(entity_name, prefix="rel-")
+                relations_with_entity = await self.relationships_vdb.search_by_prefix(
+                    entity_relation_prefix
+                )
+                if relations_with_entity:
+                    relation_ids = [r["id"] for r in relations_with_entity]
+                    await self.relationships_vdb.delete(relation_ids)
+                    logger.info(
+                        f"Deleted {len(relation_ids)} relation records for entity '{entity_name}' from vector database"
+                    )
+                logger.info(
+                    f"Deleted source entity '{entity_name}' and its vector embedding from database"
+                )
+            # 10. Save changes
+            await self._merge_entities_done()
+            logger.info(
+                f"Successfully merged {len(source_entities)} entities into '{target_entity}'"
+            )
+            return await self.get_entity_info(target_entity, include_vector_data=True)
+        except Exception as e:
+            logger.error(f"Error merging entities: {e}")
+            raise
+    def merge_entities(
+        self,
+        source_entities: list[str],
+        target_entity: str,
+        merge_strategy: dict[str, str] = None,
+        target_entity_data: dict[str, Any] = None,
+    ) -> dict[str, Any]:
+        """Synchronously merge multiple entities into one entity.
+        Merges multiple source entities into a target entity, handling all relationships,
+        and updating both the knowledge graph and vector database.
+        Args:
+            source_entities: List of source entity names to merge
+            target_entity: Name of the target entity after merging
+            merge_strategy: Merge strategy configuration, e.g. {"description": "concatenate", "entity_type": "keep_first"}
+            target_entity_data: Dictionary of specific values to set for the target entity,
+                overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
+        Returns:
+            Dictionary containing the merged entity information
+        """
+        loop = always_get_an_event_loop()
+        return loop.run_until_complete(
+            self.amerge_entities(
+                source_entities, target_entity, merge_strategy, target_entity_data
+            )
+        )
+    def _merge_entity_attributes(
+        self, entity_data_list: list[dict[str, Any]], merge_strategy: dict[str, str]
+    ) -> dict[str, Any]:
+        """Merge attributes from multiple entities.
+        Args:
+            entity_data_list: List of dictionaries containing entity data
+            merge_strategy: Merge strategy for each field
+        Returns:
+            Dictionary containing merged entity data
+        """
+        merged_data = {}
+        # Collect all possible keys
+        all_keys = set()
+        for data in entity_data_list:
+            all_keys.update(data.keys())
+        # Merge values for each key
+        for key in all_keys:
+            # Get all values for this key
+            values = [data.get(key) for data in entity_data_list if data.get(key)]
+            if not values:
+                continue
+            # Merge values according to strategy
+            strategy = merge_strategy.get(key, "keep_first")
+            if strategy == "concatenate":
+                merged_data[key] = "\n\n".join(values)
+            elif strategy == "keep_first":
+                merged_data[key] = values[0]
+            elif strategy == "keep_last":
+                merged_data[key] = values[-1]
+            elif strategy == "join_unique":
+                # Handle fields separated by GRAPH_FIELD_SEP
+                unique_items = set()
+                for value in values:
+                    items = value.split(GRAPH_FIELD_SEP)
+                    unique_items.update(items)
+                merged_data[key] = GRAPH_FIELD_SEP.join(unique_items)
+            else:
+                # Default strategy
+                merged_data[key] = values[0]
+        return merged_data
+    def _merge_relation_attributes(
+        self, relation_data_list: list[dict[str, Any]], merge_strategy: dict[str, str]
+    ) -> dict[str, Any]:
+        """Merge attributes from multiple relationships.
+        Args:
+            relation_data_list: List of dictionaries containing relationship data
+            merge_strategy: Merge strategy for each field
+        Returns:
+            Dictionary containing merged relationship data
+        """
+        merged_data = {}
+        # Collect all possible keys
+        all_keys = set()
+        for data in relation_data_list:
+            all_keys.update(data.keys())
+        # Merge values for each key
+        for key in all_keys:
+            # Get all values for this key
+            values = [
+                data.get(key)
+                for data in relation_data_list
+                if data.get(key) is not None
+            ]
+            if not values:
+                continue
+            # Merge values according to strategy
+            strategy = merge_strategy.get(key, "keep_first")
+            if strategy == "concatenate":
+                merged_data[key] = "\n\n".join(str(v) for v in values)
+            elif strategy == "keep_first":
+                merged_data[key] = values[0]
+            elif strategy == "keep_last":
+                merged_data[key] = values[-1]
+            elif strategy == "join_unique":
+                # Handle fields separated by GRAPH_FIELD_SEP
+                unique_items = set()
+                for value in values:
+                    items = str(value).split(GRAPH_FIELD_SEP)
+                    unique_items.update(items)
+                merged_data[key] = GRAPH_FIELD_SEP.join(unique_items)
+            elif strategy == "max":
+                # For numeric fields like weight
+                try:
+                    merged_data[key] = max(float(v) for v in values)
+                except (ValueError, TypeError):
+                    merged_data[key] = values[0]
+            else:
+                # Default strategy
+                merged_data[key] = values[0]
+        return merged_data
+    async def _merge_entities_done(self) -> None:
+        """Callback after entity merging is complete, ensures updates are persisted"""
+        await asyncio.gather(
+            *[
+                cast(StorageNameSpace, storage_inst).index_done_callback()
+                for storage_inst in [  # type: ignore
+                    self.entities_vdb,
+                    self.relationships_vdb,
+                    self.chunk_entity_relation_graph,
+                ]
+            ]
+        )

lightrag/operate.py CHANGED Viewed

@@ -384,8 +384,8 @@ async def extract_entities(
         language=language,
     )
-    continue_prompt = PROMPTS["entiti_continue_extraction"]
-    if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
     processed_chunks = 0
     total_chunks = len(ordered_chunks)
@@ -1156,7 +1156,8 @@ async def _get_node_data(
             "entity",
             "type",
             "description",
-            "rank" "created_at",
         ]
     ]
     for i, n in enumerate(node_datas):

         language=language,
     )
+    continue_prompt = PROMPTS["entity_continue_extraction"]
+    if_loop_prompt = PROMPTS["entity_if_loop_extraction"]
     processed_chunks = 0
     total_chunks = len(ordered_chunks)
             "entity",
             "type",
             "description",
+            "rank",
+            "created_at",
         ]
     ]
     for i, n in enumerate(node_datas):

lightrag/prompt.py CHANGED Viewed

@@ -58,14 +58,16 @@ PROMPTS["entity_extraction_examples"] = [
 Entity_types: [person, technology, mission, organization, location]
 Text:
 while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
-Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
 The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
 It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
-################
 Output:
 ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
 ("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
@@ -81,48 +83,52 @@ Output:
 #############################""",
     """Example 2:
-Entity_types: [person, technology, mission, organization, location]
 Text:
-They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
-Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
-Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
-#############
 Output:
-("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
-("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
-("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
-("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){record_delimiter}
-("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
 #############################""",
     """Example 3:
-Entity_types: [person, role, technology, organization, event, location, concept]
 Text:
-their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
-"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
-Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
-Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
-The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
-#############
 Output:
-("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
-("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
-("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
-("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
-("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
-("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
-("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
-("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
-("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
-("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
 #############################""",
 ]
@@ -143,15 +149,47 @@ Description List: {description_list}
 Output:
 """
-PROMPTS[
-    "entiti_continue_extraction"
-] = """MANY entities were missed in the last extraction.  Add them below using the same format:
-"""
-PROMPTS[
-    "entiti_if_loop_extraction"
-] = """It appears some entities may have still been missed.  Answer YES | NO if there are still entities that need to be added.
-"""
 PROMPTS["fail_response"] = (
     "Sorry, I'm not able to provide an answer to that question.[no-context]"

 Entity_types: [person, technology, mission, organization, location]
 Text:
+```
 while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
+Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
 The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
 It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
+```
 Output:
 ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
 ("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
 #############################""",
     """Example 2:
+Entity_types: [company, index, commodity, market_trend, economic_policy, biological]
 Text:
+```
+Stock markets faced a sharp downturn today as tech giants saw significant declines, with the Global Tech Index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty.
+Among the hardest hit, Nexon Technologies saw its stock plummet by 7.8% after reporting lower-than-expected quarterly earnings. In contrast, Omega Energy posted a modest 2.1% gain, driven by rising oil prices.
+Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
+Financial experts are closely watching the Federal Reserve’s next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
+```
 Output:
+("entity"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"index"{tuple_delimiter}"The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today."){record_delimiter}
+("entity"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"company"{tuple_delimiter}"Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings."){record_delimiter}
+("entity"{tuple_delimiter}"Omega Energy"{tuple_delimiter}"company"{tuple_delimiter}"Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices."){record_delimiter}
+("entity"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"commodity"{tuple_delimiter}"Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets."){record_delimiter}
+("entity"{tuple_delimiter}"Crude Oil"{tuple_delimiter}"commodity"{tuple_delimiter}"Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand."){record_delimiter}
+("entity"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"market_trend"{tuple_delimiter}"Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations."){record_delimiter}
+("entity"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"economic_policy"{tuple_delimiter}"The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability."){record_delimiter}
+("relationship"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns."{tuple_delimiter}"market performance, investor sentiment"{tuple_delimiter}9){record_delimiter}
+("relationship"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index."{tuple_delimiter}"company impact, index movement"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Gold prices rose as investors sought safe-haven assets during the market selloff."{tuple_delimiter}"market reaction, safe-haven investment"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff."{tuple_delimiter}"interest rate impact, financial regulation"{tuple_delimiter}7){record_delimiter}
+("content_keywords"{tuple_delimiter}"market downturn, investor sentiment, commodities, Federal Reserve, stock performance"){completion_delimiter}
 #############################""",
     """Example 3:
+Entity_types: [economic_policy, athlete, event, location, record, organization, equipment]
 Text:
+```
+At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes.
+```
 Output:
+("entity"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"event"{tuple_delimiter}"The World Athletics Championship is a global sports competition featuring top athletes in track and field."){record_delimiter}
+("entity"{tuple_delimiter}"Tokyo"{tuple_delimiter}"location"{tuple_delimiter}"Tokyo is the host city of the World Athletics Championship."){record_delimiter}
+("entity"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"athlete"{tuple_delimiter}"Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship."){record_delimiter}
+("entity"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"record"{tuple_delimiter}"The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter."){record_delimiter}
+("entity"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"equipment"{tuple_delimiter}"Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction."){record_delimiter}
+("entity"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"organization"{tuple_delimiter}"The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations."){record_delimiter}
+("relationship"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"Tokyo"{tuple_delimiter}"The World Athletics Championship is being hosted in Tokyo."{tuple_delimiter}"event location, international competition"{tuple_delimiter}8){record_delimiter}
+("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"Noah Carter set a new 100m sprint record at the championship."{tuple_delimiter}"athlete achievement, record-breaking"{tuple_delimiter}10){record_delimiter}
+("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"Noah Carter used carbon-fiber spikes to enhance performance during the race."{tuple_delimiter}"athletic equipment, performance boost"{tuple_delimiter}7){record_delimiter}
+("relationship"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"The World Athletics Federation is responsible for validating and recognizing new sprint records."{tuple_delimiter}"sports regulation, record certification"{tuple_delimiter}9){record_delimiter}
+("content_keywords"{tuple_delimiter}"athletics, sprinting, record-breaking, sports technology, competition"){completion_delimiter}
 #############################""",
 ]
 Output:
 """
+PROMPTS["entity_continue_extraction"] = """
+MANY entities and relationships were missed in the last extraction.
+---Remember Steps---
+1. Identify all entities. For each identified entity, extract the following information:
+- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
+- entity_type: One of the following types: [{entity_types}]
+- entity_description: Comprehensive description of the entity's attributes and activities
+Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
+2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
+For each pair of related entities, extract the following information:
+- source_entity: name of the source entity, as identified in step 1
+- target_entity: name of the target entity, as identified in step 1
+- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
+- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
+- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
+Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_keywords>{tuple_delimiter}<relationship_strength>)
+3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
+Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_level_keywords>)
+4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
+5. When finished, output {completion_delimiter}
+---Output---
+Add them below using the same format:\n
+""".strip()
+PROMPTS["entity_if_loop_extraction"] = """
+---Goal---'
+It appears some entities may have still been missed.
+---Output---
+Answer ONLY by `YES` OR `NO` if there are still entities that need to be added.
+""".strip()
 PROMPTS["fail_response"] = (
     "Sorry, I'm not able to provide an answer to that question.[no-context]"

lightrag_webui/src/api/lightrag.ts CHANGED Viewed

@@ -161,8 +161,12 @@ axiosInstance.interceptors.response.use(
 )
 // API methods
-export const queryGraphs = async (label: string, maxDepth: number): Promise<LightragGraphType> => {
-  const response = await axiosInstance.get(`/graphs?label=${label}&max_depth=${maxDepth}`)
   return response.data
 }

 )
 // API methods
+export const queryGraphs = async (
+  label: string,
+  maxDepth: number,
+  minDegree: number
+): Promise<LightragGraphType> => {
+  const response = await axiosInstance.get(`/graphs?label=${encodeURIComponent(label)}&max_depth=${maxDepth}&min_degree=${minDegree}`)
   return response.data
 }

lightrag_webui/src/components/graph/GraphControl.tsx CHANGED Viewed

@@ -40,18 +40,21 @@ const GraphControl = ({ disableHoverEffect }: { disableHoverEffect?: boolean })
   const focusedEdge = useGraphStore.use.focusedEdge()
   /**
-   * When component mount
-   * => load the graph
    */
   useEffect(() => {
     // Create & load the graph
     const graph = lightrageGraph()
     loadGraph(graph)
-    if (!(graph as any).__force_applied) {
-      assignLayout()
-      Object.assign(graph, { __force_applied: true })
-    }
     const { setFocusedNode, setSelectedNode, setFocusedEdge, setSelectedEdge, clearSelection } =
       useGraphStore.getState()
@@ -87,7 +90,7 @@ const GraphControl = ({ disableHoverEffect }: { disableHoverEffect?: boolean })
       },
       clickStage: () => clearSelection()
     })
-  }, [assignLayout, loadGraph, registerEvents, lightrageGraph])
   /**
    * When component mount or hovered node change

   const focusedEdge = useGraphStore.use.focusedEdge()
   /**
+   * When component mount or maxIterations changes
+   * => load the graph and apply layout
    */
   useEffect(() => {
     // Create & load the graph
     const graph = lightrageGraph()
     loadGraph(graph)
+    assignLayout()
+  }, [assignLayout, loadGraph, lightrageGraph, maxIterations])
+  /**
+   * When component mount
+   * => register events
+   */
+  useEffect(() => {
     const { setFocusedNode, setSelectedNode, setFocusedEdge, setSelectedEdge, clearSelection } =
       useGraphStore.getState()
       },
       clickStage: () => clearSelection()
     })
+  }, [registerEvents])
   /**
    * When component mount or hovered node change

lightrag_webui/src/components/graph/Settings.tsx CHANGED Viewed

@@ -90,9 +90,12 @@ const LabeledNumberInput = ({
         {label}
       </label>
       <Input
-        value={currentValue || ''}
         onChange={onValueChange}
-        className="h-6 w-full min-w-0"
         onBlur={onBlur}
         onKeyDown={(e) => {
           if (e.key === 'Enter') {
@@ -119,6 +122,7 @@ export default function Settings() {
   const enableHideUnselectedEdges = useSettingsStore.use.enableHideUnselectedEdges()
   const showEdgeLabel = useSettingsStore.use.showEdgeLabel()
   const graphQueryMaxDepth = useSettingsStore.use.graphQueryMaxDepth()
   const graphLayoutMaxIterations = useSettingsStore.use.graphLayoutMaxIterations()
   const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
@@ -177,6 +181,11 @@ export default function Settings() {
     useSettingsStore.setState({ graphQueryMaxDepth: depth })
   }, [])
   const setGraphLayoutMaxIterations = useCallback((iterations: number) => {
     if (iterations < 1) return
     useSettingsStore.setState({ graphLayoutMaxIterations: iterations })
@@ -266,6 +275,12 @@ export default function Settings() {
             value={graphQueryMaxDepth}
             onEditFinished={setGraphQueryMaxDepth}
           />
           <LabeledNumberInput
             label="Max Layout Iterations"
             min={1}

         {label}
       </label>
       <Input
+        type="number"
+        value={currentValue === null ? '' : currentValue}
         onChange={onValueChange}
+        className="h-6 w-full min-w-0 pr-1"
+        min={min}
+        max={max}
         onBlur={onBlur}
         onKeyDown={(e) => {
           if (e.key === 'Enter') {
   const enableHideUnselectedEdges = useSettingsStore.use.enableHideUnselectedEdges()
   const showEdgeLabel = useSettingsStore.use.showEdgeLabel()
   const graphQueryMaxDepth = useSettingsStore.use.graphQueryMaxDepth()
+  const graphMinDegree = useSettingsStore.use.graphMinDegree()
   const graphLayoutMaxIterations = useSettingsStore.use.graphLayoutMaxIterations()
   const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
     useSettingsStore.setState({ graphQueryMaxDepth: depth })
   }, [])
+  const setGraphMinDegree = useCallback((degree: number) => {
+    if (degree < 0) return
+    useSettingsStore.setState({ graphMinDegree: degree })
+  }, [])
   const setGraphLayoutMaxIterations = useCallback((iterations: number) => {
     if (iterations < 1) return
     useSettingsStore.setState({ graphLayoutMaxIterations: iterations })
             value={graphQueryMaxDepth}
             onEditFinished={setGraphQueryMaxDepth}
           />
+          <LabeledNumberInput
+            label="Minimum Degree"
+            min={0}
+            value={graphMinDegree}
+            onEditFinished={setGraphMinDegree}
+          />
           <LabeledNumberInput
             label="Max Layout Iterations"
             min={1}

lightrag_webui/src/components/ui/Input.tsx CHANGED Viewed

@@ -7,7 +7,7 @@ const Input = React.forwardRef<HTMLInputElement, React.ComponentProps<'input'>>(
       <input
         type={type}
         className={cn(
-          'border-input file:text-foreground placeholder:text-muted-foreground focus-visible:ring-ring flex h-9 rounded-md border bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium focus-visible:ring-1 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
           className
         )}
         ref={ref}

       <input
         type={type}
         className={cn(
+          'border-input file:text-foreground placeholder:text-muted-foreground focus-visible:ring-ring flex h-9 rounded-md border bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium focus-visible:ring-1 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm [&::-webkit-inner-spin-button]:opacity-100 [&::-webkit-outer-spin-button]:opacity-100',
           className
         )}
         ref={ref}

lightrag_webui/src/hooks/useLightragGraph.tsx CHANGED Viewed

@@ -50,11 +50,11 @@ export type NodeType = {
 }
 export type EdgeType = { label: string }
-const fetchGraph = async (label: string, maxDepth: number) => {
   let rawData: any = null
   try {
-    rawData = await queryGraphs(label, maxDepth)
   } catch (e) {
     useBackendState.getState().setErrorMessage(errorMessage(e), 'Query Graphs Error!')
     return null
@@ -161,13 +161,14 @@ const createSigmaGraph = (rawGraph: RawGraph | null) => {
   return graph
 }
-const lastQueryLabel = { label: '', maxQueryDepth: 0 }
 const useLightrangeGraph = () => {
   const queryLabel = useSettingsStore.use.queryLabel()
   const rawGraph = useGraphStore.use.rawGraph()
   const sigmaGraph = useGraphStore.use.sigmaGraph()
   const maxQueryDepth = useSettingsStore.use.graphQueryMaxDepth()
   const getNode = useCallback(
     (nodeId: string) => {
@@ -185,13 +186,16 @@ const useLightrangeGraph = () => {
   useEffect(() => {
     if (queryLabel) {
-      if (lastQueryLabel.label !== queryLabel || lastQueryLabel.maxQueryDepth !== maxQueryDepth) {
         lastQueryLabel.label = queryLabel
         lastQueryLabel.maxQueryDepth = maxQueryDepth
         const state = useGraphStore.getState()
         state.reset()
-        fetchGraph(queryLabel, maxQueryDepth).then((data) => {
           // console.debug('Query label: ' + queryLabel)
           state.setSigmaGraph(createSigmaGraph(data))
           data?.buildDynamicMap()
@@ -203,7 +207,7 @@ const useLightrangeGraph = () => {
       state.reset()
       state.setSigmaGraph(new DirectedGraph())
     }
-  }, [queryLabel, maxQueryDepth])
   const lightrageGraph = useCallback(() => {
     if (sigmaGraph) {

 }
 export type EdgeType = { label: string }
+const fetchGraph = async (label: string, maxDepth: number, minDegree: number) => {
   let rawData: any = null
   try {
+    rawData = await queryGraphs(label, maxDepth, minDegree)
   } catch (e) {
     useBackendState.getState().setErrorMessage(errorMessage(e), 'Query Graphs Error!')
     return null
   return graph
 }
+const lastQueryLabel = { label: '', maxQueryDepth: 0, minDegree: 0 }
 const useLightrangeGraph = () => {
   const queryLabel = useSettingsStore.use.queryLabel()
   const rawGraph = useGraphStore.use.rawGraph()
   const sigmaGraph = useGraphStore.use.sigmaGraph()
   const maxQueryDepth = useSettingsStore.use.graphQueryMaxDepth()
+  const minDegree = useSettingsStore.use.graphMinDegree()
   const getNode = useCallback(
     (nodeId: string) => {
   useEffect(() => {
     if (queryLabel) {
+      if (lastQueryLabel.label !== queryLabel ||
+          lastQueryLabel.maxQueryDepth !== maxQueryDepth ||
+          lastQueryLabel.minDegree !== minDegree) {
         lastQueryLabel.label = queryLabel
         lastQueryLabel.maxQueryDepth = maxQueryDepth
+        lastQueryLabel.minDegree = minDegree
         const state = useGraphStore.getState()
         state.reset()
+        fetchGraph(queryLabel, maxQueryDepth, minDegree).then((data) => {
           // console.debug('Query label: ' + queryLabel)
           state.setSigmaGraph(createSigmaGraph(data))
           data?.buildDynamicMap()
       state.reset()
       state.setSigmaGraph(new DirectedGraph())
     }
+  }, [queryLabel, maxQueryDepth, minDegree])
   const lightrageGraph = useCallback(() => {
     if (sigmaGraph) {

lightrag_webui/src/stores/settings.ts CHANGED Viewed

@@ -22,6 +22,9 @@ interface SettingsState {
   graphQueryMaxDepth: number
   setGraphQueryMaxDepth: (depth: number) => void
   graphLayoutMaxIterations: number
   setGraphLayoutMaxIterations: (iterations: number) => void
@@ -66,6 +69,7 @@ const useSettingsStoreBase = create<SettingsState>()(
       enableEdgeEvents: false,
       graphQueryMaxDepth: 3,
       graphLayoutMaxIterations: 10,
       queryLabel: defaultQueryLabel,
@@ -107,6 +111,8 @@ const useSettingsStoreBase = create<SettingsState>()(
       setGraphQueryMaxDepth: (depth: number) => set({ graphQueryMaxDepth: depth }),
       setEnableHealthCheck: (enable: boolean) => set({ enableHealthCheck: enable }),
       setApiKey: (apiKey: string | null) => set({ apiKey }),

   graphQueryMaxDepth: number
   setGraphQueryMaxDepth: (depth: number) => void
+  graphMinDegree: number
+  setGraphMinDegree: (degree: number) => void
   graphLayoutMaxIterations: number
   setGraphLayoutMaxIterations: (iterations: number) => void
       enableEdgeEvents: false,
       graphQueryMaxDepth: 3,
+      graphMinDegree: 0,
       graphLayoutMaxIterations: 10,
       queryLabel: defaultQueryLabel,
       setGraphQueryMaxDepth: (depth: number) => set({ graphQueryMaxDepth: depth }),
+      setGraphMinDegree: (degree: number) => set({ graphMinDegree: degree }),
       setEnableHealthCheck: (enable: boolean) => set({ enableHealthCheck: enable }),
       setApiKey: (apiKey: string | null) => set({ apiKey }),

lightrag_webui/src/vite-env.d.ts CHANGED Viewed

	@@ -1 +1,11 @@
1	/// <reference types="vite/client" />

 /// <reference types="vite/client" />
+interface ImportMetaEnv {
+  readonly VITE_API_PROXY: string
+  readonly VITE_API_ENDPOINTS: string
+  readonly VITE_BACKEND_URL: string
+}
+interface ImportMeta {
+  readonly env: ImportMetaEnv
+}

lightrag_webui/tsconfig.json CHANGED Viewed

@@ -26,5 +26,5 @@
       "@/*": ["./src/*"]
     }
   },
-  "include": ["src"]
 }

       "@/*": ["./src/*"]
     }
   },
+  "include": ["src", "vite.config.ts"]
 }

lightrag_webui/vite.config.ts CHANGED Viewed

@@ -14,6 +14,21 @@ export default defineConfig({
   },
   base: './',
   build: {
-    outDir: path.resolve(__dirname, '../lightrag/api/webui')
   }
 })

   },
   base: './',
   build: {
+    outDir: path.resolve(__dirname, '../lightrag/api/webui'),
+    emptyOutDir: true
+  },
+  server: {
+    proxy: import.meta.env.VITE_API_PROXY === 'true' && import.meta.env.VITE_API_ENDPOINTS ?
+      Object.fromEntries(
+        import.meta.env.VITE_API_ENDPOINTS.split(',').map(endpoint => [
+          endpoint,
+          {
+            target: import.meta.env.VITE_BACKEND_URL || 'http://localhost:9621',
+            changeOrigin: true,
+            rewrite: endpoint === '/api' ?
+              (path) => path.replace(/^\/api/, '') : undefined
+          }
+        ])
+      ) : {}
   }
 })