Roy commited on
Commit
888b9e4
·
2 Parent(s): 8bbd053 4e9e345

main_merge

Browse files
Files changed (38) hide show
  1. README.md +70 -0
  2. env.example +9 -1
  3. examples/lightrag_openai_neo4j_milvus_redis_demo.py +2 -2
  4. examples/test_postgres.py +51 -0
  5. lightrag/api/README.md +18 -0
  6. lightrag/api/auth.py +41 -0
  7. lightrag/api/lightrag_server.py +29 -9
  8. lightrag/api/requirements.txt +10 -0
  9. lightrag/api/routers/document_routes.py +6 -3
  10. lightrag/api/routers/graph_routes.py +17 -8
  11. lightrag/api/routers/query_routes.py +2 -2
  12. lightrag/api/utils_api.py +21 -3
  13. lightrag/api/webui/assets/{index-rP-YlyR1.css → index-CH-3l4_Z.css} +0 -0
  14. lightrag/api/webui/assets/{index-DbuMPJAD.js → index-CJz72b6Q.js} +0 -0
  15. lightrag/api/webui/index.html +0 -0
  16. lightrag/base.py +1 -1
  17. lightrag/kg/chroma_impl.py +40 -0
  18. lightrag/kg/faiss_impl.py +21 -0
  19. lightrag/kg/milvus_impl.py +25 -0
  20. lightrag/kg/mongo_impl.py +26 -0
  21. lightrag/kg/nano_vector_db_impl.py +20 -0
  22. lightrag/kg/networkx_impl.py +47 -20
  23. lightrag/kg/oracle_impl.py +35 -0
  24. lightrag/kg/postgres_impl.py +145 -41
  25. lightrag/kg/qdrant_impl.py +41 -1
  26. lightrag/kg/tidb_impl.py +65 -0
  27. lightrag/lightrag.py +489 -5
  28. lightrag/operate.py +4 -3
  29. lightrag/prompt.py +80 -42
  30. lightrag_webui/src/api/lightrag.ts +6 -2
  31. lightrag_webui/src/components/graph/GraphControl.tsx +10 -7
  32. lightrag_webui/src/components/graph/Settings.tsx +17 -2
  33. lightrag_webui/src/components/ui/Input.tsx +1 -1
  34. lightrag_webui/src/hooks/useLightragGraph.tsx +10 -6
  35. lightrag_webui/src/stores/settings.ts +6 -0
  36. lightrag_webui/src/vite-env.d.ts +10 -0
  37. lightrag_webui/tsconfig.json +1 -1
  38. lightrag_webui/vite.config.ts +16 -1
README.md CHANGED
@@ -849,6 +849,76 @@ All operations are available in both synchronous and asynchronous versions. The
849
 
850
  These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
851
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852
  ## Cache
853
 
854
  <details>
 
849
 
850
  These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
851
 
852
+ ## Entity Merging
853
+
854
+ <details>
855
+ <summary> <b>Merge Entities and Their Relationships</b> </summary>
856
+
857
+ LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
858
+
859
+ ```python
860
+ # Basic entity merging
861
+ rag.merge_entities(
862
+ source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
863
+ target_entity="AI Technology"
864
+ )
865
+ ```
866
+
867
+ With custom merge strategy:
868
+
869
+ ```python
870
+ # Define custom merge strategy for different fields
871
+ rag.merge_entities(
872
+ source_entities=["John Smith", "Dr. Smith", "J. Smith"],
873
+ target_entity="John Smith",
874
+ merge_strategy={
875
+ "description": "concatenate", # Combine all descriptions
876
+ "entity_type": "keep_first", # Keep the entity type from the first entity
877
+ "source_id": "join_unique" # Combine all unique source IDs
878
+ }
879
+ )
880
+ ```
881
+
882
+ With custom target entity data:
883
+
884
+ ```python
885
+ # Specify exact values for the merged entity
886
+ rag.merge_entities(
887
+ source_entities=["New York", "NYC", "Big Apple"],
888
+ target_entity="New York City",
889
+ target_entity_data={
890
+ "entity_type": "LOCATION",
891
+ "description": "New York City is the most populous city in the United States.",
892
+ }
893
+ )
894
+ ```
895
+
896
+ Advanced usage combining both approaches:
897
+
898
+ ```python
899
+ # Merge company entities with both strategy and custom data
900
+ rag.merge_entities(
901
+ source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
902
+ target_entity="Microsoft",
903
+ merge_strategy={
904
+ "description": "concatenate", # Combine all descriptions
905
+ "source_id": "join_unique" # Combine source IDs
906
+ },
907
+ target_entity_data={
908
+ "entity_type": "ORGANIZATION",
909
+ }
910
+ )
911
+ ```
912
+
913
+ When merging entities:
914
+ * All relationships from source entities are redirected to the target entity
915
+ * Duplicate relationships are intelligently merged
916
+ * Self-relationships (loops) are prevented
917
+ * Source entities are removed after merging
918
+ * Relationship weights and attributes are preserved
919
+
920
+ </details>
921
+
922
  ## Cache
923
 
924
  <details>
env.example CHANGED
@@ -48,8 +48,9 @@
48
  # CHUNK_OVERLAP_SIZE=100
49
  # MAX_TOKENS=32768 # Max tokens send to LLM for summarization
50
  # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
51
- # LANGUAGE=English
52
  # MAX_EMBED_TOKENS=8192
 
53
 
54
  ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
55
  LLM_BINDING=ollama
@@ -148,3 +149,10 @@ QDRANT_URL=http://localhost:16333
148
 
149
  ### Redis
150
  REDIS_URI=redis://localhost:6379
 
 
 
 
 
 
 
 
48
  # CHUNK_OVERLAP_SIZE=100
49
  # MAX_TOKENS=32768 # Max tokens send to LLM for summarization
50
  # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
51
+ # SUMMARY_LANGUAGE=English
52
  # MAX_EMBED_TOKENS=8192
53
+ # ENABLE_LLM_CACHE_FOR_EXTRACT=false # Enable LLM cache for entity extraction, defaults to false
54
 
55
  ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
56
  LLM_BINDING=ollama
 
149
 
150
  ### Redis
151
  REDIS_URI=redis://localhost:6379
152
+
153
+ # For jwt auth
154
+ AUTH_USERNAME=admin # login name
155
+ AUTH_PASSWORD=admin123 # password
156
+ TOKEN_SECRET=your-key # JWT key
157
+ TOKEN_EXPIRE_HOURS=4 # expire duration
158
+ WHITELIST_PATHS=/login,/health # white list
examples/lightrag_openai_neo4j_milvus_redis_demo.py CHANGED
@@ -37,8 +37,8 @@ async def llm_model_func(
37
  prompt,
38
  system_prompt=system_prompt,
39
  history_messages=history_messages,
40
- api_key="sk-91d0b59f25554251aa813ed756d79a6d",
41
- base_url="https://api.deepseek.com",
42
  **kwargs,
43
  )
44
 
 
37
  prompt,
38
  system_prompt=system_prompt,
39
  history_messages=history_messages,
40
+ api_key="",
41
+ base_url="",
42
  **kwargs,
43
  )
44
 
examples/test_postgres.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from lightrag.kg.postgres_impl import PGGraphStorage
4
+ from lightrag.llm.ollama import ollama_embedding
5
+ from lightrag.utils import EmbeddingFunc
6
+
7
+ #########
8
+ # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
9
+ # import nest_asyncio
10
+ # nest_asyncio.apply()
11
+ #########
12
+
13
+ WORKING_DIR = "./local_neo4jWorkDir"
14
+
15
+ if not os.path.exists(WORKING_DIR):
16
+ os.mkdir(WORKING_DIR)
17
+
18
+ # AGE
19
+ os.environ["AGE_GRAPH_NAME"] = "dickens"
20
+
21
+ os.environ["POSTGRES_HOST"] = "localhost"
22
+ os.environ["POSTGRES_PORT"] = "15432"
23
+ os.environ["POSTGRES_USER"] = "rag"
24
+ os.environ["POSTGRES_PASSWORD"] = "rag"
25
+ os.environ["POSTGRES_DATABASE"] = "rag"
26
+
27
+
28
+ async def main():
29
+ graph_db = PGGraphStorage(
30
+ namespace="dickens",
31
+ embedding_func=EmbeddingFunc(
32
+ embedding_dim=1024,
33
+ max_token_size=8192,
34
+ func=lambda texts: ollama_embedding(
35
+ texts, embed_model="bge-m3", host="http://localhost:11434"
36
+ ),
37
+ ),
38
+ global_config={},
39
+ )
40
+ await graph_db.initialize()
41
+ labels = await graph_db.get_all_labels()
42
+ print("all labels", labels)
43
+
44
+ res = await graph_db.get_knowledge_graph("FEZZIWIG")
45
+ print("knowledge graphs", res)
46
+
47
+ await graph_db.finalize()
48
+
49
+
50
+ if __name__ == "__main__":
51
+ asyncio.run(main())
lightrag/api/README.md CHANGED
@@ -223,6 +223,11 @@ LightRAG supports binding to various LLM/Embedding backends:
223
 
224
  Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type.
225
 
 
 
 
 
 
226
  ### Storage Types Supported
227
 
228
  LightRAG uses 4 types of storage for difference purposes:
@@ -387,6 +392,19 @@ Note: If you don't need the API functionality, you can install the base package
387
  pip install lightrag-hku
388
  ```
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  ## API Endpoints
391
 
392
  All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When API Server is running, visit:
 
223
 
224
  Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type.
225
 
226
+ ### Entity Extraction Configuration
227
+ * ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: false)
228
+
229
+ It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for test environment to reduce the cost of LLM calls.
230
+
231
  ### Storage Types Supported
232
 
233
  LightRAG uses 4 types of storage for difference purposes:
 
392
  pip install lightrag-hku
393
  ```
394
 
395
+ ## Authentication Endpoints
396
+
397
+ ### JWT Authentication Mechanism
398
+ LightRAG API Server implements JWT-based authentication using HS256 algorithm. To enable secure access control, the following environment variables are required:
399
+ ```bash
400
+ # For jwt auth
401
+ AUTH_USERNAME=admin # login name
402
+ AUTH_PASSWORD=admin123 # password
403
+ TOKEN_SECRET=your-key # JWT key
404
+ TOKEN_EXPIRE_HOURS=4 # expire duration
405
+ WHITELIST_PATHS=/api1,/api2 # white list. /login,/health,/docs,/redoc,/openapi.json are whitelisted by default.
406
+ ```
407
+
408
  ## API Endpoints
409
 
410
  All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When API Server is running, visit:
lightrag/api/auth.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime, timedelta
3
+ import jwt
4
+ from fastapi import HTTPException, status
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class TokenPayload(BaseModel):
9
+ sub: str
10
+ exp: datetime
11
+
12
+
13
+ class AuthHandler:
14
+ def __init__(self):
15
+ self.secret = os.getenv("TOKEN_SECRET", "4f85ds4f56dsf46")
16
+ self.algorithm = "HS256"
17
+ self.expire_hours = int(os.getenv("TOKEN_EXPIRE_HOURS", 4))
18
+
19
+ def create_token(self, username: str) -> str:
20
+ expire = datetime.utcnow() + timedelta(hours=self.expire_hours)
21
+ payload = TokenPayload(sub=username, exp=expire)
22
+ return jwt.encode(payload.dict(), self.secret, algorithm=self.algorithm)
23
+
24
+ def validate_token(self, token: str) -> str:
25
+ try:
26
+ payload = jwt.decode(token, self.secret, algorithms=[self.algorithm])
27
+ expire_timestamp = payload["exp"]
28
+ expire_time = datetime.utcfromtimestamp(expire_timestamp)
29
+
30
+ if datetime.utcnow() > expire_time:
31
+ raise HTTPException(
32
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired"
33
+ )
34
+ return payload["sub"]
35
+ except jwt.PyJWTError:
36
+ raise HTTPException(
37
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
38
+ )
39
+
40
+
41
+ auth_handler = AuthHandler()
lightrag/api/lightrag_server.py CHANGED
@@ -2,10 +2,7 @@
2
  LightRAG FastAPI Server
3
  """
4
 
5
- from fastapi import (
6
- FastAPI,
7
- Depends,
8
- )
9
  import asyncio
10
  import os
11
  import logging
@@ -45,12 +42,17 @@ from lightrag.kg.shared_storage import (
45
  initialize_pipeline_status,
46
  get_all_update_flags_status,
47
  )
 
 
48
 
49
  # Load environment variables
50
  # Updated to use the .env that is inside the current folder
51
  # This update allows the user to put a different.env file for each lightrag folder
52
  load_dotenv(".env", override=True)
53
 
 
 
 
54
  # Initialize config parser
55
  config = configparser.ConfigParser()
56
  config.read("config.ini")
@@ -324,16 +326,13 @@ def create_app(args):
324
  vector_db_storage_cls_kwargs={
325
  "cosine_better_than_threshold": args.cosine_threshold
326
  },
327
- enable_llm_cache_for_entity_extract=False, # set to True for debuging to reduce llm fee
328
  embedding_cache_config={
329
  "enabled": True,
330
  "similarity_threshold": 0.95,
331
  "use_llm_check": False,
332
  },
333
  namespace_prefix=args.namespace_prefix,
334
- addon_params={
335
- "language": args.language,
336
- },
337
  auto_manage_storages_states=False,
338
  )
339
  else: # azure_openai
@@ -356,7 +355,7 @@ def create_app(args):
356
  vector_db_storage_cls_kwargs={
357
  "cosine_better_than_threshold": args.cosine_threshold
358
  },
359
- enable_llm_cache_for_entity_extract=False, # set to True for debuging to reduce llm fee
360
  embedding_cache_config={
361
  "enabled": True,
362
  "similarity_threshold": 0.95,
@@ -375,6 +374,27 @@ def create_app(args):
375
  ollama_api = OllamaAPI(rag, top_k=args.top_k)
376
  app.include_router(ollama_api.router, prefix="/api")
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  @app.get("/health", dependencies=[Depends(optional_api_key)])
379
  async def get_status():
380
  """Get current system status"""
 
2
  LightRAG FastAPI Server
3
  """
4
 
5
+ from fastapi import FastAPI, Depends, HTTPException, status
 
 
 
6
  import asyncio
7
  import os
8
  import logging
 
42
  initialize_pipeline_status,
43
  get_all_update_flags_status,
44
  )
45
+ from fastapi.security import OAuth2PasswordRequestForm
46
+ from .auth import auth_handler
47
 
48
  # Load environment variables
49
  # Updated to use the .env that is inside the current folder
50
  # This update allows the user to put a different.env file for each lightrag folder
51
  load_dotenv(".env", override=True)
52
 
53
+ # Read entity extraction cache config
54
+ enable_llm_cache = os.getenv("ENABLE_LLM_CACHE_FOR_EXTRACT", "false").lower() == "true"
55
+
56
  # Initialize config parser
57
  config = configparser.ConfigParser()
58
  config.read("config.ini")
 
326
  vector_db_storage_cls_kwargs={
327
  "cosine_better_than_threshold": args.cosine_threshold
328
  },
329
+ enable_llm_cache_for_entity_extract=enable_llm_cache, # Read from environment variable
330
  embedding_cache_config={
331
  "enabled": True,
332
  "similarity_threshold": 0.95,
333
  "use_llm_check": False,
334
  },
335
  namespace_prefix=args.namespace_prefix,
 
 
 
336
  auto_manage_storages_states=False,
337
  )
338
  else: # azure_openai
 
355
  vector_db_storage_cls_kwargs={
356
  "cosine_better_than_threshold": args.cosine_threshold
357
  },
358
+ enable_llm_cache_for_entity_extract=enable_llm_cache, # Read from environment variable
359
  embedding_cache_config={
360
  "enabled": True,
361
  "similarity_threshold": 0.95,
 
374
  ollama_api = OllamaAPI(rag, top_k=args.top_k)
375
  app.include_router(ollama_api.router, prefix="/api")
376
 
377
+ @app.post("/login")
378
+ async def login(form_data: OAuth2PasswordRequestForm = Depends()):
379
+ username = os.getenv("AUTH_USERNAME")
380
+ password = os.getenv("AUTH_PASSWORD")
381
+
382
+ if not (username and password):
383
+ raise HTTPException(
384
+ status_code=status.HTTP_501_NOT_IMPLEMENTED,
385
+ detail="Authentication not configured",
386
+ )
387
+
388
+ if form_data.username != username or form_data.password != password:
389
+ raise HTTPException(
390
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Incorrect credentials"
391
+ )
392
+
393
+ return {
394
+ "access_token": auth_handler.create_token(username),
395
+ "token_type": "bearer",
396
+ }
397
+
398
  @app.get("/health", dependencies=[Depends(optional_api_key)])
399
  async def get_status():
400
  """Get current system status"""
lightrag/api/requirements.txt CHANGED
@@ -1,10 +1,20 @@
1
  aiofiles
2
  ascii_colors
 
 
3
  fastapi
 
 
 
4
  numpy
 
 
5
  pipmaster
 
6
  python-dotenv
 
7
  python-multipart
 
8
  tenacity
9
  tiktoken
10
  uvicorn
 
1
  aiofiles
2
  ascii_colors
3
+ asyncpg
4
+ distro
5
  fastapi
6
+ httpcore
7
+ httpx
8
+ jiter
9
  numpy
10
+ openai
11
+ passlib[bcrypt]
12
  pipmaster
13
+ PyJWT
14
  python-dotenv
15
+ python-jose[cryptography]
16
  python-multipart
17
+ pytz
18
  tenacity
19
  tiktoken
20
  uvicorn
lightrag/api/routers/document_routes.py CHANGED
@@ -16,10 +16,13 @@ from pydantic import BaseModel, Field, field_validator
16
 
17
  from lightrag import LightRAG
18
  from lightrag.base import DocProcessingStatus, DocStatus
19
- from ..utils_api import get_api_key_dependency
20
 
21
-
22
- router = APIRouter(prefix="/documents", tags=["documents"])
 
 
 
23
 
24
  # Temporary file prefix
25
  temp_prefix = "__tmp__"
 
16
 
17
  from lightrag import LightRAG
18
  from lightrag.base import DocProcessingStatus, DocStatus
19
+ from ..utils_api import get_api_key_dependency, get_auth_dependency
20
 
21
+ router = APIRouter(
22
+ prefix="/documents",
23
+ tags=["documents"],
24
+ dependencies=[Depends(get_auth_dependency())],
25
+ )
26
 
27
  # Temporary file prefix
28
  temp_prefix = "__tmp__"
lightrag/api/routers/graph_routes.py CHANGED
@@ -3,12 +3,11 @@ This module contains all graph-related routes for the LightRAG API.
3
  """
4
 
5
  from typing import Optional
6
-
7
  from fastapi import APIRouter, Depends
8
 
9
- from ..utils_api import get_api_key_dependency
10
 
11
- router = APIRouter(tags=["graph"])
12
 
13
 
14
  def create_graph_routes(rag, api_key: Optional[str] = None):
@@ -25,23 +24,33 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
25
  return await rag.get_graph_labels()
26
 
27
  @router.get("/graphs", dependencies=[Depends(optional_api_key)])
28
- async def get_knowledge_graph(label: str, max_depth: int = 3):
 
 
29
  """
30
  Retrieve a connected subgraph of nodes where the label includes the specified label.
31
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
32
  When reducing the number of nodes, the prioritization criteria are as follows:
33
- 1. Label matching nodes take precedence
34
- 2. Followed by nodes directly connected to the matching nodes
35
- 3. Finally, the degree of the nodes
 
36
  Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
37
 
38
  Args:
39
  label (str): Label to get knowledge graph for
40
  max_depth (int, optional): Maximum depth of graph. Defaults to 3.
 
 
41
 
42
  Returns:
43
  Dict[str, List[str]]: Knowledge graph for label
44
  """
45
- return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
 
 
 
 
 
46
 
47
  return router
 
3
  """
4
 
5
  from typing import Optional
 
6
  from fastapi import APIRouter, Depends
7
 
8
+ from ..utils_api import get_api_key_dependency, get_auth_dependency
9
 
10
+ router = APIRouter(tags=["graph"], dependencies=[Depends(get_auth_dependency())])
11
 
12
 
13
  def create_graph_routes(rag, api_key: Optional[str] = None):
 
24
  return await rag.get_graph_labels()
25
 
26
  @router.get("/graphs", dependencies=[Depends(optional_api_key)])
27
+ async def get_knowledge_graph(
28
+ label: str, max_depth: int = 3, min_degree: int = 0, inclusive: bool = False
29
+ ):
30
  """
31
  Retrieve a connected subgraph of nodes where the label includes the specified label.
32
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
33
  When reducing the number of nodes, the prioritization criteria are as follows:
34
+ 1. min_degree does not affect nodes directly connected to the matching nodes
35
+ 2. Label matching nodes take precedence
36
+ 3. Followed by nodes directly connected to the matching nodes
37
+ 4. Finally, the degree of the nodes
38
  Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
39
 
40
  Args:
41
  label (str): Label to get knowledge graph for
42
  max_depth (int, optional): Maximum depth of graph. Defaults to 3.
43
+ inclusive_search (bool, optional): If True, search for nodes that include the label. Defaults to False.
44
+ min_degree (int, optional): Minimum degree of nodes. Defaults to 0.
45
 
46
  Returns:
47
  Dict[str, List[str]]: Knowledge graph for label
48
  """
49
+ return await rag.get_knowledge_graph(
50
+ node_label=label,
51
+ max_depth=max_depth,
52
+ inclusive=inclusive,
53
+ min_degree=min_degree,
54
+ )
55
 
56
  return router
lightrag/api/routers/query_routes.py CHANGED
@@ -8,12 +8,12 @@ from typing import Any, Dict, List, Literal, Optional
8
 
9
  from fastapi import APIRouter, Depends, HTTPException
10
  from lightrag.base import QueryParam
11
- from ..utils_api import get_api_key_dependency
12
  from pydantic import BaseModel, Field, field_validator
13
 
14
  from ascii_colors import trace_exception
15
 
16
- router = APIRouter(tags=["query"])
17
 
18
 
19
  class QueryRequest(BaseModel):
 
8
 
9
  from fastapi import APIRouter, Depends, HTTPException
10
  from lightrag.base import QueryParam
11
+ from ..utils_api import get_api_key_dependency, get_auth_dependency
12
  from pydantic import BaseModel, Field, field_validator
13
 
14
  from ascii_colors import trace_exception
15
 
16
+ router = APIRouter(tags=["query"], dependencies=[Depends(get_auth_dependency())])
17
 
18
 
19
  class QueryRequest(BaseModel):
lightrag/api/utils_api.py CHANGED
@@ -9,10 +9,11 @@ import sys
9
  import logging
10
  from ascii_colors import ASCIIColors
11
  from lightrag.api import __api_version__
12
- from fastapi import HTTPException, Security
13
  from dotenv import load_dotenv
14
- from fastapi.security import APIKeyHeader
15
  from starlette.status import HTTP_403_FORBIDDEN
 
16
 
17
  # Load environment variables
18
  load_dotenv(override=True)
@@ -31,6 +32,24 @@ class OllamaServerInfos:
31
  ollama_server_infos = OllamaServerInfos()
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def get_api_key_dependency(api_key: Optional[str]):
35
  """
36
  Create an API key dependency for route protection.
@@ -340,7 +359,6 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
340
  # Inject chunk configuration
341
  args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
342
  args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
343
- args.language = get_env_value("LANGUAGE", "English")
344
 
345
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
346
 
 
9
  import logging
10
  from ascii_colors import ASCIIColors
11
  from lightrag.api import __api_version__
12
+ from fastapi import HTTPException, Security, Depends, Request
13
  from dotenv import load_dotenv
14
+ from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
15
  from starlette.status import HTTP_403_FORBIDDEN
16
+ from .auth import auth_handler
17
 
18
  # Load environment variables
19
  load_dotenv(override=True)
 
32
  ollama_server_infos = OllamaServerInfos()
33
 
34
 
35
+ def get_auth_dependency():
36
+ whitelist = os.getenv("WHITELIST_PATHS", "").split(",")
37
+
38
+ async def dependency(
39
+ request: Request,
40
+ token: str = Depends(OAuth2PasswordBearer(tokenUrl="login", auto_error=False)),
41
+ ):
42
+ if request.url.path in whitelist:
43
+ return
44
+
45
+ if not (os.getenv("AUTH_USERNAME") and os.getenv("AUTH_PASSWORD")):
46
+ return
47
+
48
+ auth_handler.validate_token(token)
49
+
50
+ return dependency
51
+
52
+
53
  def get_api_key_dependency(api_key: Optional[str]):
54
  """
55
  Create an API key dependency for route protection.
 
359
  # Inject chunk configuration
360
  args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
361
  args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
 
362
 
363
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
364
 
lightrag/api/webui/assets/{index-rP-YlyR1.css → index-CH-3l4_Z.css} RENAMED
Binary files a/lightrag/api/webui/assets/index-rP-YlyR1.css and b/lightrag/api/webui/assets/index-CH-3l4_Z.css differ
 
lightrag/api/webui/assets/{index-DbuMPJAD.js → index-CJz72b6Q.js} RENAMED
Binary files a/lightrag/api/webui/assets/index-DbuMPJAD.js and b/lightrag/api/webui/assets/index-CJz72b6Q.js differ
 
lightrag/api/webui/index.html CHANGED
Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ
 
lightrag/base.py CHANGED
@@ -206,7 +206,7 @@ class BaseGraphStorage(StorageNameSpace, ABC):
206
 
207
  @abstractmethod
208
  async def get_knowledge_graph(
209
- self, node_label: str, max_depth: int = 5
210
  ) -> KnowledgeGraph:
211
  """Retrieve a subgraph of the knowledge graph starting from a given node."""
212
 
 
206
 
207
  @abstractmethod
208
  async def get_knowledge_graph(
209
+ self, node_label: str, max_depth: int = 3
210
  ) -> KnowledgeGraph:
211
  """Retrieve a subgraph of the knowledge graph starting from a given node."""
212
 
lightrag/kg/chroma_impl.py CHANGED
@@ -229,3 +229,43 @@ class ChromaVectorDBStorage(BaseVectorStorage):
229
  except Exception as e:
230
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
231
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  except Exception as e:
230
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
231
  raise
232
+
233
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
234
+ """Search for records with IDs starting with a specific prefix.
235
+
236
+ Args:
237
+ prefix: The prefix to search for in record IDs
238
+
239
+ Returns:
240
+ List of records with matching ID prefixes
241
+ """
242
+ try:
243
+ # Get all records from the collection
244
+ # Since ChromaDB doesn't directly support prefix search on IDs,
245
+ # we'll get all records and filter in Python
246
+ results = self._collection.get(
247
+ include=["metadatas", "documents", "embeddings"]
248
+ )
249
+
250
+ matching_records = []
251
+
252
+ # Filter records where ID starts with the prefix
253
+ for i, record_id in enumerate(results["ids"]):
254
+ if record_id.startswith(prefix):
255
+ matching_records.append(
256
+ {
257
+ "id": record_id,
258
+ "content": results["documents"][i],
259
+ "vector": results["embeddings"][i],
260
+ **results["metadatas"][i],
261
+ }
262
+ )
263
+
264
+ logger.debug(
265
+ f"Found {len(matching_records)} records with prefix '{prefix}'"
266
+ )
267
+ return matching_records
268
+
269
+ except Exception as e:
270
+ logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
271
+ raise
lightrag/kg/faiss_impl.py CHANGED
@@ -371,3 +371,24 @@ class FaissVectorDBStorage(BaseVectorStorage):
371
  return False # Return error
372
 
373
  return True # Return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  return False # Return error
372
 
373
  return True # Return success
374
+
375
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
376
+ """Search for records with IDs starting with a specific prefix.
377
+
378
+ Args:
379
+ prefix: The prefix to search for in record IDs
380
+
381
+ Returns:
382
+ List of records with matching ID prefixes
383
+ """
384
+ matching_records = []
385
+
386
+ # Search for records with IDs starting with the prefix
387
+ for faiss_id, meta in self._id_to_meta.items():
388
+ if "__id__" in meta and meta["__id__"].startswith(prefix):
389
+ # Create a copy of all metadata and add "id" field
390
+ record = {**meta, "id": meta["__id__"]}
391
+ matching_records.append(record)
392
+
393
+ logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
394
+ return matching_records
lightrag/kg/milvus_impl.py CHANGED
@@ -206,3 +206,28 @@ class MilvusVectorDBStorage(BaseVectorStorage):
206
 
207
  except Exception as e:
208
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  except Exception as e:
208
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
209
+
210
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
211
+ """Search for records with IDs starting with a specific prefix.
212
+
213
+ Args:
214
+ prefix: The prefix to search for in record IDs
215
+
216
+ Returns:
217
+ List of records with matching ID prefixes
218
+ """
219
+ try:
220
+ # Use Milvus query with expression to find IDs with the given prefix
221
+ expression = f'id like "{prefix}%"'
222
+ results = self._client.query(
223
+ collection_name=self.namespace,
224
+ filter=expression,
225
+ output_fields=list(self.meta_fields) + ["id"],
226
+ )
227
+
228
+ logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
229
+ return results
230
+
231
+ except Exception as e:
232
+ logger.error(f"Error searching for records with prefix '{prefix}': {e}")
233
+ return []
lightrag/kg/mongo_impl.py CHANGED
@@ -1045,6 +1045,32 @@ class MongoVectorDBStorage(BaseVectorStorage):
1045
  except PyMongoError as e:
1046
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048
 
1049
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1050
  collection_names = await db.list_collection_names()
 
1045
  except PyMongoError as e:
1046
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1047
 
1048
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
1049
+ """Search for records with IDs starting with a specific prefix.
1050
+
1051
+ Args:
1052
+ prefix: The prefix to search for in record IDs
1053
+
1054
+ Returns:
1055
+ List of records with matching ID prefixes
1056
+ """
1057
+ try:
1058
+ # Use MongoDB regex to find documents where _id starts with the prefix
1059
+ cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
1060
+ matching_records = await cursor.to_list(length=None)
1061
+
1062
+ # Format results
1063
+ results = [{**doc, "id": doc["_id"]} for doc in matching_records]
1064
+
1065
+ logger.debug(
1066
+ f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
1067
+ )
1068
+ return results
1069
+
1070
+ except PyMongoError as e:
1071
+ logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1072
+ return []
1073
+
1074
 
1075
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1076
  collection_names = await db.list_collection_names()
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -236,3 +236,23 @@ class NanoVectorDBStorage(BaseVectorStorage):
236
  return False # Return error
237
 
238
  return True # Return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  return False # Return error
237
 
238
  return True # Return success
239
+
240
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
241
+ """Search for records with IDs starting with a specific prefix.
242
+
243
+ Args:
244
+ prefix: The prefix to search for in record IDs
245
+
246
+ Returns:
247
+ List of records with matching ID prefixes
248
+ """
249
+ storage = await self.client_storage
250
+ matching_records = []
251
+
252
+ # Search for records with IDs starting with the prefix
253
+ for record in storage["data"]:
254
+ if "__id__" in record and record["__id__"].startswith(prefix):
255
+ matching_records.append({**record, "id": record["__id__"]})
256
+
257
+ logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
258
+ return matching_records
lightrag/kg/networkx_impl.py CHANGED
@@ -232,19 +232,26 @@ class NetworkXStorage(BaseGraphStorage):
232
  return sorted(list(labels))
233
 
234
  async def get_knowledge_graph(
235
- self, node_label: str, max_depth: int = 5
 
 
 
 
236
  ) -> KnowledgeGraph:
237
  """
238
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
239
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
240
  When reducing the number of nodes, the prioritization criteria are as follows:
241
- 1. Label matching nodes take precedence
242
- 2. Followed by nodes directly connected to the matching nodes
243
- 3. Finally, the degree of the nodes
 
244
 
245
  Args:
246
  node_label: Label of the starting node
247
  max_depth: Maximum depth of the subgraph
 
 
248
 
249
  Returns:
250
  KnowledgeGraph object containing nodes and edges
@@ -255,6 +262,10 @@ class NetworkXStorage(BaseGraphStorage):
255
 
256
  graph = await self._get_graph()
257
 
 
 
 
 
258
  # Handle special case for "*" label
259
  if node_label == "*":
260
  # For "*", return the entire graph including all nodes and edges
@@ -262,11 +273,16 @@ class NetworkXStorage(BaseGraphStorage):
262
  graph.copy()
263
  ) # Create a copy to avoid modifying the original graph
264
  else:
265
- # Find nodes with matching node id (partial match)
266
  nodes_to_explore = []
267
  for n, attr in graph.nodes(data=True):
268
- if node_label in str(n): # Use partial matching
269
- nodes_to_explore.append(n)
 
 
 
 
 
270
 
271
  if not nodes_to_explore:
272
  logger.warning(f"No nodes found with label {node_label}")
@@ -277,26 +293,37 @@ class NetworkXStorage(BaseGraphStorage):
277
  for start_node in nodes_to_explore:
278
  node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth)
279
  combined_subgraph = nx.compose(combined_subgraph, node_subgraph)
280
- subgraph = combined_subgraph
281
-
282
- # Check if number of nodes exceeds max_graph_nodes
283
- if len(subgraph.nodes()) > MAX_GRAPH_NODES:
284
- origin_nodes = len(subgraph.nodes())
285
-
286
- node_degrees = dict(subgraph.degree())
287
-
288
- start_nodes = set()
289
- direct_connected_nodes = set()
290
 
291
- if node_label != "*" and nodes_to_explore:
 
292
  start_nodes = set(nodes_to_explore)
293
  # Get nodes directly connected to all start nodes
294
  for start_node in start_nodes:
295
- direct_connected_nodes.update(subgraph.neighbors(start_node))
 
 
296
 
297
  # Remove start nodes from directly connected nodes (avoid duplicates)
298
  direct_connected_nodes -= start_nodes
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  def priority_key(node_item):
301
  node, degree = node_item
302
  # Priority order: start(2) > directly connected(1) > other nodes(0)
@@ -356,7 +383,7 @@ class NetworkXStorage(BaseGraphStorage):
356
  result.edges.append(
357
  KnowledgeGraphEdge(
358
  id=edge_id,
359
- type="RELATED",
360
  source=str(source),
361
  target=str(target),
362
  properties=edge_data,
 
232
  return sorted(list(labels))
233
 
234
  async def get_knowledge_graph(
235
+ self,
236
+ node_label: str,
237
+ max_depth: int = 3,
238
+ min_degree: int = 0,
239
+ inclusive: bool = False,
240
  ) -> KnowledgeGraph:
241
  """
242
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
243
  Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
244
  When reducing the number of nodes, the prioritization criteria are as follows:
245
+ 1. min_degree does not affect nodes directly connected to the matching nodes
246
+ 2. Label matching nodes take precedence
247
+ 3. Followed by nodes directly connected to the matching nodes
248
+ 4. Finally, the degree of the nodes
249
 
250
  Args:
251
  node_label: Label of the starting node
252
  max_depth: Maximum depth of the subgraph
253
+ min_degree: Minimum degree of nodes to include. Defaults to 0
254
+ inclusive: Do an inclusive search if true
255
 
256
  Returns:
257
  KnowledgeGraph object containing nodes and edges
 
262
 
263
  graph = await self._get_graph()
264
 
265
+ # Initialize sets for start nodes and direct connected nodes
266
+ start_nodes = set()
267
+ direct_connected_nodes = set()
268
+
269
  # Handle special case for "*" label
270
  if node_label == "*":
271
  # For "*", return the entire graph including all nodes and edges
 
273
  graph.copy()
274
  ) # Create a copy to avoid modifying the original graph
275
  else:
276
+ # Find nodes with matching node id based on search_mode
277
  nodes_to_explore = []
278
  for n, attr in graph.nodes(data=True):
279
+ node_str = str(n)
280
+ if not inclusive:
281
+ if node_label == node_str: # Use exact matching
282
+ nodes_to_explore.append(n)
283
+ else: # inclusive mode
284
+ if node_label in node_str: # Use partial matching
285
+ nodes_to_explore.append(n)
286
 
287
  if not nodes_to_explore:
288
  logger.warning(f"No nodes found with label {node_label}")
 
293
  for start_node in nodes_to_explore:
294
  node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth)
295
  combined_subgraph = nx.compose(combined_subgraph, node_subgraph)
 
 
 
 
 
 
 
 
 
 
296
 
297
+ # Get start nodes and direct connected nodes
298
+ if nodes_to_explore:
299
  start_nodes = set(nodes_to_explore)
300
  # Get nodes directly connected to all start nodes
301
  for start_node in start_nodes:
302
+ direct_connected_nodes.update(
303
+ combined_subgraph.neighbors(start_node)
304
+ )
305
 
306
  # Remove start nodes from directly connected nodes (avoid duplicates)
307
  direct_connected_nodes -= start_nodes
308
 
309
+ subgraph = combined_subgraph
310
+
311
+ # Filter nodes based on min_degree, but keep start nodes and direct connected nodes
312
+ if min_degree > 0:
313
+ nodes_to_keep = [
314
+ node
315
+ for node, degree in subgraph.degree()
316
+ if node in start_nodes
317
+ or node in direct_connected_nodes
318
+ or degree >= min_degree
319
+ ]
320
+ subgraph = subgraph.subgraph(nodes_to_keep)
321
+
322
+ # Check if number of nodes exceeds max_graph_nodes
323
+ if len(subgraph.nodes()) > MAX_GRAPH_NODES:
324
+ origin_nodes = len(subgraph.nodes())
325
+ node_degrees = dict(subgraph.degree())
326
+
327
  def priority_key(node_item):
328
  node, degree = node_item
329
  # Priority order: start(2) > directly connected(1) > other nodes(0)
 
383
  result.edges.append(
384
  KnowledgeGraphEdge(
385
  id=edge_id,
386
+ type="DIRECTED",
387
  source=str(source),
388
  target=str(target),
389
  properties=edge_data,
lightrag/kg/oracle_impl.py CHANGED
@@ -494,6 +494,41 @@ class OracleVectorDBStorage(BaseVectorStorage):
494
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
495
  raise
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
  @final
499
  @dataclass
 
494
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
495
  raise
496
 
497
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
498
+ """Search for records with IDs starting with a specific prefix.
499
+
500
+ Args:
501
+ prefix: The prefix to search for in record IDs
502
+
503
+ Returns:
504
+ List of records with matching ID prefixes
505
+ """
506
+ try:
507
+ # Determine the appropriate table based on namespace
508
+ table_name = namespace_to_table_name(self.namespace)
509
+
510
+ # Create SQL query to find records with IDs starting with prefix
511
+ search_sql = f"""
512
+ SELECT * FROM {table_name}
513
+ WHERE workspace = :workspace
514
+ AND id LIKE :prefix_pattern
515
+ ORDER BY id
516
+ """
517
+
518
+ params = {"workspace": self.db.workspace, "prefix_pattern": f"{prefix}%"}
519
+
520
+ # Execute query and get results
521
+ results = await self.db.query(search_sql, params, multirows=True)
522
+
523
+ logger.debug(
524
+ f"Found {len(results) if results else 0} records with prefix '{prefix}'"
525
+ )
526
+ return results or []
527
+
528
+ except Exception as e:
529
+ logger.error(f"Error searching records with prefix '{prefix}': {e}")
530
+ return []
531
+
532
 
533
  @final
534
  @dataclass
lightrag/kg/postgres_impl.py CHANGED
@@ -585,6 +585,41 @@ class PGVectorStorage(BaseVectorStorage):
585
  except Exception as e:
586
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
 
589
  @final
590
  @dataclass
@@ -785,42 +820,85 @@ class PGGraphStorage(BaseGraphStorage):
785
  v = record[k]
786
  # agtype comes back '{key: value}::type' which must be parsed
787
  if isinstance(v, str) and "::" in v:
788
- dtype = v.split("::")[-1]
789
- v = v.split("::")[0]
790
- if dtype == "vertex":
791
- vertex = json.loads(v)
792
- vertices[vertex["id"]] = vertex.get("properties")
 
 
 
 
 
 
 
 
793
 
794
  # iterate returned fields and parse appropriately
795
  for k in record.keys():
796
  v = record[k]
797
  if isinstance(v, str) and "::" in v:
798
- dtype = v.split("::")[-1]
799
- v = v.split("::")[0]
800
- else:
801
- dtype = ""
802
-
803
- if dtype == "vertex":
804
- vertex = json.loads(v)
805
- field = vertex.get("properties")
806
- if not field:
807
- field = {}
808
- field["label"] = PGGraphStorage._decode_graph_label(field["node_id"])
809
- d[k] = field
810
- # convert edge from id-label->id by replacing id with node information
811
- # we only do this if the vertex was also returned in the query
812
- # this is an attempt to be consistent with neo4j implementation
813
- elif dtype == "edge":
814
- edge = json.loads(v)
815
- d[k] = (
816
- vertices.get(edge["start_id"], {}),
817
- edge[
818
- "label"
819
- ], # we don't use decode_graph_label(), since edge label is always "DIRECTED"
820
- vertices.get(edge["end_id"], {}),
821
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  else:
823
- d[k] = json.loads(v) if isinstance(v, str) else v
 
 
 
824
 
825
  return d
826
 
@@ -1294,7 +1372,7 @@ class PGGraphStorage(BaseGraphStorage):
1294
  OPTIONAL MATCH p = (n)-[*..%d]-(m)
1295
  RETURN nodes(p) AS nodes, relationships(p) AS relationships
1296
  LIMIT %d
1297
- $$) AS (nodes agtype[], relationships agtype[])""" % (
1298
  self.graph_name,
1299
  encoded_node_label,
1300
  max_depth,
@@ -1303,17 +1381,23 @@ class PGGraphStorage(BaseGraphStorage):
1303
 
1304
  results = await self._query(query)
1305
 
1306
- nodes = set()
1307
  edges = []
 
1308
 
1309
  for result in results:
1310
  if node_label == "*":
1311
  if result["n"]:
1312
  node = result["n"]
1313
- nodes.add(self._decode_graph_label(node["node_id"]))
 
 
 
1314
  if result["m"]:
1315
  node = result["m"]
1316
- nodes.add(self._decode_graph_label(node["node_id"]))
 
 
1317
  if result["r"]:
1318
  edge = result["r"]
1319
  src_id = self._decode_graph_label(edge["start_id"])
@@ -1322,16 +1406,36 @@ class PGGraphStorage(BaseGraphStorage):
1322
  else:
1323
  if result["nodes"]:
1324
  for node in result["nodes"]:
1325
- nodes.add(self._decode_graph_label(node["node_id"]))
 
 
 
1326
  if result["relationships"]:
1327
- for edge in result["relationships"]:
1328
- src_id = self._decode_graph_label(edge["start_id"])
1329
- tgt_id = self._decode_graph_label(edge["end_id"])
1330
- edges.append((src_id, tgt_id))
 
 
 
 
 
 
 
1331
 
1332
  kg = KnowledgeGraph(
1333
- nodes=[KnowledgeGraphNode(id=node_id) for node_id in nodes],
1334
- edges=[KnowledgeGraphEdge(source=src, target=tgt) for src, tgt in edges],
 
 
 
 
 
 
 
 
 
 
1335
  )
1336
 
1337
  return kg
 
585
  except Exception as e:
586
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
587
 
588
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
589
+ """Search for records with IDs starting with a specific prefix.
590
+
591
+ Args:
592
+ prefix: The prefix to search for in record IDs
593
+
594
+ Returns:
595
+ List of records with matching ID prefixes
596
+ """
597
+ table_name = namespace_to_table_name(self.namespace)
598
+ if not table_name:
599
+ logger.error(f"Unknown namespace for prefix search: {self.namespace}")
600
+ return []
601
+
602
+ search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
603
+ params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
604
+
605
+ try:
606
+ results = await self.db.query(search_sql, params, multirows=True)
607
+ logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
608
+
609
+ # Format results to match the expected return format
610
+ formatted_results = []
611
+ for record in results:
612
+ formatted_record = dict(record)
613
+ # Ensure id field is available (for consistency with NanoVectorDB implementation)
614
+ if "id" not in formatted_record:
615
+ formatted_record["id"] = record["id"]
616
+ formatted_results.append(formatted_record)
617
+
618
+ return formatted_results
619
+ except Exception as e:
620
+ logger.error(f"Error during prefix search for '{prefix}': {e}")
621
+ return []
622
+
623
 
624
  @final
625
  @dataclass
 
820
  v = record[k]
821
  # agtype comes back '{key: value}::type' which must be parsed
822
  if isinstance(v, str) and "::" in v:
823
+ if v.startswith("[") and v.endswith("]"):
824
+ if "::vertex" not in v:
825
+ continue
826
+ v = v.replace("::vertex", "")
827
+ vertexes = json.loads(v)
828
+ for vertex in vertexes:
829
+ vertices[vertex["id"]] = vertex.get("properties")
830
+ else:
831
+ dtype = v.split("::")[-1]
832
+ v = v.split("::")[0]
833
+ if dtype == "vertex":
834
+ vertex = json.loads(v)
835
+ vertices[vertex["id"]] = vertex.get("properties")
836
 
837
  # iterate returned fields and parse appropriately
838
  for k in record.keys():
839
  v = record[k]
840
  if isinstance(v, str) and "::" in v:
841
+ if v.startswith("[") and v.endswith("]"):
842
+ if "::vertex" in v:
843
+ v = v.replace("::vertex", "")
844
+ vertexes = json.loads(v)
845
+ dl = []
846
+ for vertex in vertexes:
847
+ prop = vertex.get("properties")
848
+ if not prop:
849
+ prop = {}
850
+ prop["label"] = PGGraphStorage._decode_graph_label(
851
+ prop["node_id"]
852
+ )
853
+ dl.append(prop)
854
+ d[k] = dl
855
+
856
+ elif "::edge" in v:
857
+ v = v.replace("::edge", "")
858
+ edges = json.loads(v)
859
+ dl = []
860
+ for edge in edges:
861
+ dl.append(
862
+ (
863
+ vertices[edge["start_id"]],
864
+ edge["label"],
865
+ vertices[edge["end_id"]],
866
+ )
867
+ )
868
+ d[k] = dl
869
+ else:
870
+ print("WARNING: unsupported type")
871
+ continue
872
+
873
+ else:
874
+ dtype = v.split("::")[-1]
875
+ v = v.split("::")[0]
876
+ if dtype == "vertex":
877
+ vertex = json.loads(v)
878
+ field = vertex.get("properties")
879
+ if not field:
880
+ field = {}
881
+ field["label"] = PGGraphStorage._decode_graph_label(
882
+ field["node_id"]
883
+ )
884
+ d[k] = field
885
+ # convert edge from id-label->id by replacing id with node information
886
+ # we only do this if the vertex was also returned in the query
887
+ # this is an attempt to be consistent with neo4j implementation
888
+ elif dtype == "edge":
889
+ edge = json.loads(v)
890
+ d[k] = (
891
+ vertices.get(edge["start_id"], {}),
892
+ edge[
893
+ "label"
894
+ ], # we don't use decode_graph_label(), since edge label is always "DIRECTED"
895
+ vertices.get(edge["end_id"], {}),
896
+ )
897
  else:
898
+ if v is None or (v.count("{") < 1 and v.count("[") < 1):
899
+ d[k] = v
900
+ else:
901
+ d[k] = json.loads(v) if isinstance(v, str) else v
902
 
903
  return d
904
 
 
1372
  OPTIONAL MATCH p = (n)-[*..%d]-(m)
1373
  RETURN nodes(p) AS nodes, relationships(p) AS relationships
1374
  LIMIT %d
1375
+ $$) AS (nodes agtype, relationships agtype)""" % (
1376
  self.graph_name,
1377
  encoded_node_label,
1378
  max_depth,
 
1381
 
1382
  results = await self._query(query)
1383
 
1384
+ nodes = {}
1385
  edges = []
1386
+ unique_edge_ids = set()
1387
 
1388
  for result in results:
1389
  if node_label == "*":
1390
  if result["n"]:
1391
  node = result["n"]
1392
+ node_id = self._decode_graph_label(node["node_id"])
1393
+ if node_id not in nodes:
1394
+ nodes[node_id] = node
1395
+
1396
  if result["m"]:
1397
  node = result["m"]
1398
+ node_id = self._decode_graph_label(node["node_id"])
1399
+ if node_id not in nodes:
1400
+ nodes[node_id] = node
1401
  if result["r"]:
1402
  edge = result["r"]
1403
  src_id = self._decode_graph_label(edge["start_id"])
 
1406
  else:
1407
  if result["nodes"]:
1408
  for node in result["nodes"]:
1409
+ node_id = self._decode_graph_label(node["node_id"])
1410
+ if node_id not in nodes:
1411
+ nodes[node_id] = node
1412
+
1413
  if result["relationships"]:
1414
+ for edge in result["relationships"]: # src --DIRECTED--> target
1415
+ src_id = self._decode_graph_label(edge[0]["node_id"])
1416
+ tgt_id = self._decode_graph_label(edge[2]["node_id"])
1417
+ id = src_id + "," + tgt_id
1418
+ if id in unique_edge_ids:
1419
+ continue
1420
+ else:
1421
+ unique_edge_ids.add(id)
1422
+ edges.append(
1423
+ (id, src_id, tgt_id, {"source": edge[0], "target": edge[2]})
1424
+ )
1425
 
1426
  kg = KnowledgeGraph(
1427
+ nodes=[
1428
+ KnowledgeGraphNode(
1429
+ id=node_id, labels=[node_id], properties=nodes[node_id]
1430
+ )
1431
+ for node_id in nodes
1432
+ ],
1433
+ edges=[
1434
+ KnowledgeGraphEdge(
1435
+ id=id, type="DIRECTED", source=src, target=tgt, properties=props
1436
+ )
1437
+ for id, src, tgt, props in edges
1438
+ ],
1439
  )
1440
 
1441
  return kg
lightrag/kg/qdrant_impl.py CHANGED
@@ -135,7 +135,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
135
 
136
  logger.debug(f"query result: {results}")
137
 
138
- return [{**dp.payload, "id": dp.id, "distance": dp.score} for dp in results]
139
 
140
  async def index_done_callback(self) -> None:
141
  # Qdrant handles persistence automatically
@@ -233,3 +233,43 @@ class QdrantVectorDBStorage(BaseVectorStorage):
233
  logger.debug(f"No relations found for entity {entity_name}")
234
  except Exception as e:
235
  logger.error(f"Error deleting relations for {entity_name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  logger.debug(f"query result: {results}")
137
 
138
+ return [{**dp.payload, "distance": dp.score} for dp in results]
139
 
140
  async def index_done_callback(self) -> None:
141
  # Qdrant handles persistence automatically
 
233
  logger.debug(f"No relations found for entity {entity_name}")
234
  except Exception as e:
235
  logger.error(f"Error deleting relations for {entity_name}: {e}")
236
+
237
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
238
+ """Search for records with IDs starting with a specific prefix.
239
+
240
+ Args:
241
+ prefix: The prefix to search for in record IDs
242
+
243
+ Returns:
244
+ List of records with matching ID prefixes
245
+ """
246
+ try:
247
+ # Use scroll method to find records with IDs starting with the prefix
248
+ results = self._client.scroll(
249
+ collection_name=self.namespace,
250
+ scroll_filter=models.Filter(
251
+ must=[
252
+ models.FieldCondition(
253
+ key="id", match=models.MatchText(text=prefix, prefix=True)
254
+ )
255
+ ]
256
+ ),
257
+ with_payload=True,
258
+ with_vectors=False,
259
+ limit=1000, # Adjust as needed for your use case
260
+ )
261
+
262
+ # Extract matching points
263
+ matching_records = results[0]
264
+
265
+ # Format the results to match expected return format
266
+ formatted_results = [{**point.payload} for point in matching_records]
267
+
268
+ logger.debug(
269
+ f"Found {len(formatted_results)} records with prefix '{prefix}'"
270
+ )
271
+ return formatted_results
272
+
273
+ except Exception as e:
274
+ logger.error(f"Error searching for prefix '{prefix}': {e}")
275
+ return []
lightrag/kg/tidb_impl.py CHANGED
@@ -414,6 +414,55 @@ class TiDBVectorDBStorage(BaseVectorStorage):
414
  # Ti handles persistence automatically
415
  pass
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
  @final
419
  @dataclass
@@ -968,4 +1017,20 @@ SQL_TEMPLATES = {
968
  WHERE (source_name = :source AND target_name = :target)
969
  AND workspace = :workspace
970
  """,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971
  }
 
414
  # Ti handles persistence automatically
415
  pass
416
 
417
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
418
+ """Search for records with IDs starting with a specific prefix.
419
+
420
+ Args:
421
+ prefix: The prefix to search for in record IDs
422
+
423
+ Returns:
424
+ List of records with matching ID prefixes
425
+ """
426
+ # Determine which table to query based on namespace
427
+ if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
428
+ sql_template = """
429
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
430
+ FROM LIGHTRAG_GRAPH_NODES
431
+ WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
432
+ """
433
+ elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
434
+ sql_template = """
435
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
436
+ keywords, description, content
437
+ FROM LIGHTRAG_GRAPH_EDGES
438
+ WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
439
+ """
440
+ elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
441
+ sql_template = """
442
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
443
+ FROM LIGHTRAG_DOC_CHUNKS
444
+ WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
445
+ """
446
+ else:
447
+ logger.warning(
448
+ f"Namespace {self.namespace} not supported for prefix search"
449
+ )
450
+ return []
451
+
452
+ # Add prefix pattern parameter with % for SQL LIKE
453
+ prefix_pattern = f"{prefix}%"
454
+ params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
455
+
456
+ try:
457
+ results = await self.db.query(sql_template, params=params, multirows=True)
458
+ logger.debug(
459
+ f"Found {len(results) if results else 0} records with prefix '{prefix}'"
460
+ )
461
+ return results if results else []
462
+ except Exception as e:
463
+ logger.error(f"Error searching records with prefix '{prefix}': {e}")
464
+ return []
465
+
466
 
467
  @final
468
  @dataclass
 
1017
  WHERE (source_name = :source AND target_name = :target)
1018
  AND workspace = :workspace
1019
  """,
1020
+ # Search by prefix SQL templates
1021
+ "search_entity_by_prefix": """
1022
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
1023
+ FROM LIGHTRAG_GRAPH_NODES
1024
+ WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
1025
+ """,
1026
+ "search_relationship_by_prefix": """
1027
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content
1028
+ FROM LIGHTRAG_GRAPH_EDGES
1029
+ WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
1030
+ """,
1031
+ "search_chunk_by_prefix": """
1032
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
1033
+ FROM LIGHTRAG_DOC_CHUNKS
1034
+ WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
1035
+ """,
1036
  }
lightrag/lightrag.py CHANGED
@@ -504,11 +504,39 @@ class LightRAG:
504
  return text
505
 
506
  async def get_knowledge_graph(
507
- self, node_label: str, max_depth: int
 
 
 
 
508
  ) -> KnowledgeGraph:
509
- return await self.chunk_entity_relation_graph.get_knowledge_graph(
510
- node_label=node_label, max_depth=max_depth
511
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
  def _get_storage_class(self, storage_name: str) -> Callable[..., Any]:
514
  import_path = STORAGES[storage_name]
@@ -1981,6 +2009,9 @@ class LightRAG:
1981
  new_entity_name, new_node_data
1982
  )
1983
 
 
 
 
1984
  # Get all edges related to the original entity
1985
  edges = await self.chunk_entity_relation_graph.get_node_edges(
1986
  entity_name
@@ -1996,10 +2027,16 @@ class LightRAG:
1996
  await self.chunk_entity_relation_graph.upsert_edge(
1997
  new_entity_name, target, edge_data
1998
  )
 
 
 
1999
  else: # target == entity_name
2000
  await self.chunk_entity_relation_graph.upsert_edge(
2001
  source, new_entity_name, edge_data
2002
  )
 
 
 
2003
 
2004
  # Delete old entity
2005
  await self.chunk_entity_relation_graph.delete_node(entity_name)
@@ -2007,6 +2044,38 @@ class LightRAG:
2007
  # Delete old entity record from vector database
2008
  old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2009
  await self.entities_vdb.delete([old_entity_id])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2010
 
2011
  # Update working entity name to new name
2012
  entity_name = new_entity_name
@@ -2105,6 +2174,15 @@ class LightRAG:
2105
  f"Relation from '{source_entity}' to '{target_entity}' does not exist"
2106
  )
2107
 
 
 
 
 
 
 
 
 
 
2108
  # 2. Update relation information in the graph
2109
  new_edge_data = {**edge_data, **updated_data}
2110
  await self.chunk_entity_relation_graph.upsert_edge(
@@ -2118,7 +2196,7 @@ class LightRAG:
2118
  weight = float(new_edge_data.get("weight", 1.0))
2119
 
2120
  # Create content for embedding
2121
- content = f"{keywords}\t{source_entity}\n{target_entity}\n{description}"
2122
 
2123
  # Calculate relation ID
2124
  relation_id = compute_mdhash_id(
@@ -2382,3 +2460,409 @@ class LightRAG:
2382
  return loop.run_until_complete(
2383
  self.acreate_relation(source_entity, target_entity, relation_data)
2384
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  return text
505
 
506
  async def get_knowledge_graph(
507
+ self,
508
+ node_label: str,
509
+ max_depth: int = 3,
510
+ min_degree: int = 0,
511
+ inclusive: bool = False,
512
  ) -> KnowledgeGraph:
513
+ """Get knowledge graph for a given label
514
+
515
+ Args:
516
+ node_label (str): Label to get knowledge graph for
517
+ max_depth (int): Maximum depth of graph
518
+ min_degree (int, optional): Minimum degree of nodes to include. Defaults to 0.
519
+ inclusive (bool, optional): Whether to use inclusive search mode. Defaults to False.
520
+
521
+ Returns:
522
+ KnowledgeGraph: Knowledge graph containing nodes and edges
523
+ """
524
+ # get params supported by get_knowledge_graph of specified storage
525
+ import inspect
526
+
527
+ storage_params = inspect.signature(
528
+ self.chunk_entity_relation_graph.get_knowledge_graph
529
+ ).parameters
530
+
531
+ kwargs = {"node_label": node_label, "max_depth": max_depth}
532
+
533
+ if "min_degree" in storage_params and min_degree > 0:
534
+ kwargs["min_degree"] = min_degree
535
+
536
+ if "inclusive" in storage_params:
537
+ kwargs["inclusive"] = inclusive
538
+
539
+ return await self.chunk_entity_relation_graph.get_knowledge_graph(**kwargs)
540
 
541
  def _get_storage_class(self, storage_name: str) -> Callable[..., Any]:
542
  import_path = STORAGES[storage_name]
 
2009
  new_entity_name, new_node_data
2010
  )
2011
 
2012
+ # Store relationships that need to be updated
2013
+ relations_to_update = []
2014
+
2015
  # Get all edges related to the original entity
2016
  edges = await self.chunk_entity_relation_graph.get_node_edges(
2017
  entity_name
 
2027
  await self.chunk_entity_relation_graph.upsert_edge(
2028
  new_entity_name, target, edge_data
2029
  )
2030
+ relations_to_update.append(
2031
+ (new_entity_name, target, edge_data)
2032
+ )
2033
  else: # target == entity_name
2034
  await self.chunk_entity_relation_graph.upsert_edge(
2035
  source, new_entity_name, edge_data
2036
  )
2037
+ relations_to_update.append(
2038
+ (source, new_entity_name, edge_data)
2039
+ )
2040
 
2041
  # Delete old entity
2042
  await self.chunk_entity_relation_graph.delete_node(entity_name)
 
2044
  # Delete old entity record from vector database
2045
  old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2046
  await self.entities_vdb.delete([old_entity_id])
2047
+ logger.info(
2048
+ f"Deleted old entity '{entity_name}' and its vector embedding from database"
2049
+ )
2050
+
2051
+ # Update relationship vector representations
2052
+ for src, tgt, edge_data in relations_to_update:
2053
+ description = edge_data.get("description", "")
2054
+ keywords = edge_data.get("keywords", "")
2055
+ source_id = edge_data.get("source_id", "")
2056
+ weight = float(edge_data.get("weight", 1.0))
2057
+
2058
+ # Create new content for embedding
2059
+ content = f"{src}\t{tgt}\n{keywords}\n{description}"
2060
+
2061
+ # Calculate relationship ID
2062
+ relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
2063
+
2064
+ # Prepare data for vector database update
2065
+ relation_data = {
2066
+ relation_id: {
2067
+ "content": content,
2068
+ "src_id": src,
2069
+ "tgt_id": tgt,
2070
+ "source_id": source_id,
2071
+ "description": description,
2072
+ "keywords": keywords,
2073
+ "weight": weight,
2074
+ }
2075
+ }
2076
+
2077
+ # Update vector database
2078
+ await self.relationships_vdb.upsert(relation_data)
2079
 
2080
  # Update working entity name to new name
2081
  entity_name = new_entity_name
 
2174
  f"Relation from '{source_entity}' to '{target_entity}' does not exist"
2175
  )
2176
 
2177
+ # Important: First delete the old relation record from the vector database
2178
+ old_relation_id = compute_mdhash_id(
2179
+ source_entity + target_entity, prefix="rel-"
2180
+ )
2181
+ await self.relationships_vdb.delete([old_relation_id])
2182
+ logger.info(
2183
+ f"Deleted old relation record from vector database for relation {source_entity} -> {target_entity}"
2184
+ )
2185
+
2186
  # 2. Update relation information in the graph
2187
  new_edge_data = {**edge_data, **updated_data}
2188
  await self.chunk_entity_relation_graph.upsert_edge(
 
2196
  weight = float(new_edge_data.get("weight", 1.0))
2197
 
2198
  # Create content for embedding
2199
+ content = f"{source_entity}\t{target_entity}\n{keywords}\n{description}"
2200
 
2201
  # Calculate relation ID
2202
  relation_id = compute_mdhash_id(
 
2460
  return loop.run_until_complete(
2461
  self.acreate_relation(source_entity, target_entity, relation_data)
2462
  )
2463
+
2464
+ async def amerge_entities(
2465
+ self,
2466
+ source_entities: list[str],
2467
+ target_entity: str,
2468
+ merge_strategy: dict[str, str] = None,
2469
+ target_entity_data: dict[str, Any] = None,
2470
+ ) -> dict[str, Any]:
2471
+ """Asynchronously merge multiple entities into one entity.
2472
+
2473
+ Merges multiple source entities into a target entity, handling all relationships,
2474
+ and updating both the knowledge graph and vector database.
2475
+
2476
+ Args:
2477
+ source_entities: List of source entity names to merge
2478
+ target_entity: Name of the target entity after merging
2479
+ merge_strategy: Merge strategy configuration, e.g. {"description": "concatenate", "entity_type": "keep_first"}
2480
+ Supported strategies:
2481
+ - "concatenate": Concatenate all values (for text fields)
2482
+ - "keep_first": Keep the first non-empty value
2483
+ - "keep_last": Keep the last non-empty value
2484
+ - "join_unique": Join all unique values (for fields separated by delimiter)
2485
+ target_entity_data: Dictionary of specific values to set for the target entity,
2486
+ overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
2487
+
2488
+ Returns:
2489
+ Dictionary containing the merged entity information
2490
+ """
2491
+ try:
2492
+ # Default merge strategy
2493
+ default_strategy = {
2494
+ "description": "concatenate",
2495
+ "entity_type": "keep_first",
2496
+ "source_id": "join_unique",
2497
+ }
2498
+
2499
+ merge_strategy = (
2500
+ default_strategy
2501
+ if merge_strategy is None
2502
+ else {**default_strategy, **merge_strategy}
2503
+ )
2504
+ target_entity_data = (
2505
+ {} if target_entity_data is None else target_entity_data
2506
+ )
2507
+
2508
+ # 1. Check if all source entities exist
2509
+ source_entities_data = {}
2510
+ for entity_name in source_entities:
2511
+ node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
2512
+ if not node_data:
2513
+ raise ValueError(f"Source entity '{entity_name}' does not exist")
2514
+ source_entities_data[entity_name] = node_data
2515
+
2516
+ # 2. Check if target entity exists and get its data if it does
2517
+ target_exists = await self.chunk_entity_relation_graph.has_node(
2518
+ target_entity
2519
+ )
2520
+ target_entity_data = {}
2521
+ if target_exists:
2522
+ target_entity_data = await self.chunk_entity_relation_graph.get_node(
2523
+ target_entity
2524
+ )
2525
+ logger.info(
2526
+ f"Target entity '{target_entity}' already exists, will merge data"
2527
+ )
2528
+
2529
+ # 3. Merge entity data
2530
+ merged_entity_data = self._merge_entity_attributes(
2531
+ list(source_entities_data.values())
2532
+ + ([target_entity_data] if target_exists else []),
2533
+ merge_strategy,
2534
+ )
2535
+
2536
+ # Apply any explicitly provided target entity data (overrides merged data)
2537
+ for key, value in target_entity_data.items():
2538
+ merged_entity_data[key] = value
2539
+
2540
+ # 4. Get all relationships of the source entities
2541
+ all_relations = []
2542
+ for entity_name in source_entities:
2543
+ # Get all relationships where this entity is the source
2544
+ outgoing_edges = await self.chunk_entity_relation_graph.get_node_edges(
2545
+ entity_name
2546
+ )
2547
+ if outgoing_edges:
2548
+ for src, tgt in outgoing_edges:
2549
+ # Ensure src is the current entity
2550
+ if src == entity_name:
2551
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
2552
+ src, tgt
2553
+ )
2554
+ all_relations.append(("outgoing", src, tgt, edge_data))
2555
+
2556
+ # Get all relationships where this entity is the target
2557
+ incoming_edges = []
2558
+ all_labels = await self.chunk_entity_relation_graph.get_all_labels()
2559
+ for label in all_labels:
2560
+ if label == entity_name:
2561
+ continue
2562
+ node_edges = await self.chunk_entity_relation_graph.get_node_edges(
2563
+ label
2564
+ )
2565
+ for src, tgt in node_edges or []:
2566
+ if tgt == entity_name:
2567
+ incoming_edges.append((src, tgt))
2568
+
2569
+ for src, tgt in incoming_edges:
2570
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
2571
+ src, tgt
2572
+ )
2573
+ all_relations.append(("incoming", src, tgt, edge_data))
2574
+
2575
+ # 5. Create or update the target entity
2576
+ if not target_exists:
2577
+ await self.chunk_entity_relation_graph.upsert_node(
2578
+ target_entity, merged_entity_data
2579
+ )
2580
+ logger.info(f"Created new target entity '{target_entity}'")
2581
+ else:
2582
+ await self.chunk_entity_relation_graph.upsert_node(
2583
+ target_entity, merged_entity_data
2584
+ )
2585
+ logger.info(f"Updated existing target entity '{target_entity}'")
2586
+
2587
+ # 6. Recreate all relationships, pointing to the target entity
2588
+ relation_updates = {} # Track relationships that need to be merged
2589
+
2590
+ for rel_type, src, tgt, edge_data in all_relations:
2591
+ new_src = target_entity if src in source_entities else src
2592
+ new_tgt = target_entity if tgt in source_entities else tgt
2593
+
2594
+ # Skip relationships between source entities to avoid self-loops
2595
+ if new_src == new_tgt:
2596
+ logger.info(
2597
+ f"Skipping relationship between source entities: {src} -> {tgt} to avoid self-loop"
2598
+ )
2599
+ continue
2600
+
2601
+ # Check if the same relationship already exists
2602
+ relation_key = f"{new_src}|{new_tgt}"
2603
+ if relation_key in relation_updates:
2604
+ # Merge relationship data
2605
+ existing_data = relation_updates[relation_key]["data"]
2606
+ merged_relation = self._merge_relation_attributes(
2607
+ [existing_data, edge_data],
2608
+ {
2609
+ "description": "concatenate",
2610
+ "keywords": "join_unique",
2611
+ "source_id": "join_unique",
2612
+ "weight": "max",
2613
+ },
2614
+ )
2615
+ relation_updates[relation_key]["data"] = merged_relation
2616
+ logger.info(
2617
+ f"Merged duplicate relationship: {new_src} -> {new_tgt}"
2618
+ )
2619
+ else:
2620
+ relation_updates[relation_key] = {
2621
+ "src": new_src,
2622
+ "tgt": new_tgt,
2623
+ "data": edge_data.copy(),
2624
+ }
2625
+
2626
+ # Apply relationship updates
2627
+ for rel_data in relation_updates.values():
2628
+ await self.chunk_entity_relation_graph.upsert_edge(
2629
+ rel_data["src"], rel_data["tgt"], rel_data["data"]
2630
+ )
2631
+ logger.info(
2632
+ f"Created or updated relationship: {rel_data['src']} -> {rel_data['tgt']}"
2633
+ )
2634
+
2635
+ # 7. Update entity vector representation
2636
+ description = merged_entity_data.get("description", "")
2637
+ source_id = merged_entity_data.get("source_id", "")
2638
+ entity_type = merged_entity_data.get("entity_type", "")
2639
+ content = target_entity + "\n" + description
2640
+
2641
+ entity_id = compute_mdhash_id(target_entity, prefix="ent-")
2642
+ entity_data_for_vdb = {
2643
+ entity_id: {
2644
+ "content": content,
2645
+ "entity_name": target_entity,
2646
+ "source_id": source_id,
2647
+ "description": description,
2648
+ "entity_type": entity_type,
2649
+ }
2650
+ }
2651
+
2652
+ await self.entities_vdb.upsert(entity_data_for_vdb)
2653
+
2654
+ # 8. Update relationship vector representations
2655
+ for rel_data in relation_updates.values():
2656
+ src = rel_data["src"]
2657
+ tgt = rel_data["tgt"]
2658
+ edge_data = rel_data["data"]
2659
+
2660
+ description = edge_data.get("description", "")
2661
+ keywords = edge_data.get("keywords", "")
2662
+ source_id = edge_data.get("source_id", "")
2663
+ weight = float(edge_data.get("weight", 1.0))
2664
+
2665
+ content = f"{keywords}\t{src}\n{tgt}\n{description}"
2666
+ relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
2667
+
2668
+ relation_data_for_vdb = {
2669
+ relation_id: {
2670
+ "content": content,
2671
+ "src_id": src,
2672
+ "tgt_id": tgt,
2673
+ "source_id": source_id,
2674
+ "description": description,
2675
+ "keywords": keywords,
2676
+ "weight": weight,
2677
+ }
2678
+ }
2679
+
2680
+ await self.relationships_vdb.upsert(relation_data_for_vdb)
2681
+
2682
+ # 9. Delete source entities
2683
+ for entity_name in source_entities:
2684
+ # Delete entity node from knowledge graph
2685
+ await self.chunk_entity_relation_graph.delete_node(entity_name)
2686
+
2687
+ # Delete entity record from vector database
2688
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2689
+ await self.entities_vdb.delete([entity_id])
2690
+
2691
+ # Also ensure any relationships specific to this entity are deleted from vector DB
2692
+ # This is a safety check, as these should have been transformed to the target entity already
2693
+ entity_relation_prefix = compute_mdhash_id(entity_name, prefix="rel-")
2694
+ relations_with_entity = await self.relationships_vdb.search_by_prefix(
2695
+ entity_relation_prefix
2696
+ )
2697
+ if relations_with_entity:
2698
+ relation_ids = [r["id"] for r in relations_with_entity]
2699
+ await self.relationships_vdb.delete(relation_ids)
2700
+ logger.info(
2701
+ f"Deleted {len(relation_ids)} relation records for entity '{entity_name}' from vector database"
2702
+ )
2703
+
2704
+ logger.info(
2705
+ f"Deleted source entity '{entity_name}' and its vector embedding from database"
2706
+ )
2707
+
2708
+ # 10. Save changes
2709
+ await self._merge_entities_done()
2710
+
2711
+ logger.info(
2712
+ f"Successfully merged {len(source_entities)} entities into '{target_entity}'"
2713
+ )
2714
+ return await self.get_entity_info(target_entity, include_vector_data=True)
2715
+
2716
+ except Exception as e:
2717
+ logger.error(f"Error merging entities: {e}")
2718
+ raise
2719
+
2720
+ def merge_entities(
2721
+ self,
2722
+ source_entities: list[str],
2723
+ target_entity: str,
2724
+ merge_strategy: dict[str, str] = None,
2725
+ target_entity_data: dict[str, Any] = None,
2726
+ ) -> dict[str, Any]:
2727
+ """Synchronously merge multiple entities into one entity.
2728
+
2729
+ Merges multiple source entities into a target entity, handling all relationships,
2730
+ and updating both the knowledge graph and vector database.
2731
+
2732
+ Args:
2733
+ source_entities: List of source entity names to merge
2734
+ target_entity: Name of the target entity after merging
2735
+ merge_strategy: Merge strategy configuration, e.g. {"description": "concatenate", "entity_type": "keep_first"}
2736
+ target_entity_data: Dictionary of specific values to set for the target entity,
2737
+ overriding any merged values, e.g. {"description": "custom description", "entity_type": "PERSON"}
2738
+
2739
+ Returns:
2740
+ Dictionary containing the merged entity information
2741
+ """
2742
+ loop = always_get_an_event_loop()
2743
+ return loop.run_until_complete(
2744
+ self.amerge_entities(
2745
+ source_entities, target_entity, merge_strategy, target_entity_data
2746
+ )
2747
+ )
2748
+
2749
+ def _merge_entity_attributes(
2750
+ self, entity_data_list: list[dict[str, Any]], merge_strategy: dict[str, str]
2751
+ ) -> dict[str, Any]:
2752
+ """Merge attributes from multiple entities.
2753
+
2754
+ Args:
2755
+ entity_data_list: List of dictionaries containing entity data
2756
+ merge_strategy: Merge strategy for each field
2757
+
2758
+ Returns:
2759
+ Dictionary containing merged entity data
2760
+ """
2761
+ merged_data = {}
2762
+
2763
+ # Collect all possible keys
2764
+ all_keys = set()
2765
+ for data in entity_data_list:
2766
+ all_keys.update(data.keys())
2767
+
2768
+ # Merge values for each key
2769
+ for key in all_keys:
2770
+ # Get all values for this key
2771
+ values = [data.get(key) for data in entity_data_list if data.get(key)]
2772
+
2773
+ if not values:
2774
+ continue
2775
+
2776
+ # Merge values according to strategy
2777
+ strategy = merge_strategy.get(key, "keep_first")
2778
+
2779
+ if strategy == "concatenate":
2780
+ merged_data[key] = "\n\n".join(values)
2781
+ elif strategy == "keep_first":
2782
+ merged_data[key] = values[0]
2783
+ elif strategy == "keep_last":
2784
+ merged_data[key] = values[-1]
2785
+ elif strategy == "join_unique":
2786
+ # Handle fields separated by GRAPH_FIELD_SEP
2787
+ unique_items = set()
2788
+ for value in values:
2789
+ items = value.split(GRAPH_FIELD_SEP)
2790
+ unique_items.update(items)
2791
+ merged_data[key] = GRAPH_FIELD_SEP.join(unique_items)
2792
+ else:
2793
+ # Default strategy
2794
+ merged_data[key] = values[0]
2795
+
2796
+ return merged_data
2797
+
2798
+ def _merge_relation_attributes(
2799
+ self, relation_data_list: list[dict[str, Any]], merge_strategy: dict[str, str]
2800
+ ) -> dict[str, Any]:
2801
+ """Merge attributes from multiple relationships.
2802
+
2803
+ Args:
2804
+ relation_data_list: List of dictionaries containing relationship data
2805
+ merge_strategy: Merge strategy for each field
2806
+
2807
+ Returns:
2808
+ Dictionary containing merged relationship data
2809
+ """
2810
+ merged_data = {}
2811
+
2812
+ # Collect all possible keys
2813
+ all_keys = set()
2814
+ for data in relation_data_list:
2815
+ all_keys.update(data.keys())
2816
+
2817
+ # Merge values for each key
2818
+ for key in all_keys:
2819
+ # Get all values for this key
2820
+ values = [
2821
+ data.get(key)
2822
+ for data in relation_data_list
2823
+ if data.get(key) is not None
2824
+ ]
2825
+
2826
+ if not values:
2827
+ continue
2828
+
2829
+ # Merge values according to strategy
2830
+ strategy = merge_strategy.get(key, "keep_first")
2831
+
2832
+ if strategy == "concatenate":
2833
+ merged_data[key] = "\n\n".join(str(v) for v in values)
2834
+ elif strategy == "keep_first":
2835
+ merged_data[key] = values[0]
2836
+ elif strategy == "keep_last":
2837
+ merged_data[key] = values[-1]
2838
+ elif strategy == "join_unique":
2839
+ # Handle fields separated by GRAPH_FIELD_SEP
2840
+ unique_items = set()
2841
+ for value in values:
2842
+ items = str(value).split(GRAPH_FIELD_SEP)
2843
+ unique_items.update(items)
2844
+ merged_data[key] = GRAPH_FIELD_SEP.join(unique_items)
2845
+ elif strategy == "max":
2846
+ # For numeric fields like weight
2847
+ try:
2848
+ merged_data[key] = max(float(v) for v in values)
2849
+ except (ValueError, TypeError):
2850
+ merged_data[key] = values[0]
2851
+ else:
2852
+ # Default strategy
2853
+ merged_data[key] = values[0]
2854
+
2855
+ return merged_data
2856
+
2857
+ async def _merge_entities_done(self) -> None:
2858
+ """Callback after entity merging is complete, ensures updates are persisted"""
2859
+ await asyncio.gather(
2860
+ *[
2861
+ cast(StorageNameSpace, storage_inst).index_done_callback()
2862
+ for storage_inst in [ # type: ignore
2863
+ self.entities_vdb,
2864
+ self.relationships_vdb,
2865
+ self.chunk_entity_relation_graph,
2866
+ ]
2867
+ ]
2868
+ )
lightrag/operate.py CHANGED
@@ -384,8 +384,8 @@ async def extract_entities(
384
  language=language,
385
  )
386
 
387
- continue_prompt = PROMPTS["entiti_continue_extraction"]
388
- if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
389
 
390
  processed_chunks = 0
391
  total_chunks = len(ordered_chunks)
@@ -1156,7 +1156,8 @@ async def _get_node_data(
1156
  "entity",
1157
  "type",
1158
  "description",
1159
- "rank" "created_at",
 
1160
  ]
1161
  ]
1162
  for i, n in enumerate(node_datas):
 
384
  language=language,
385
  )
386
 
387
+ continue_prompt = PROMPTS["entity_continue_extraction"]
388
+ if_loop_prompt = PROMPTS["entity_if_loop_extraction"]
389
 
390
  processed_chunks = 0
391
  total_chunks = len(ordered_chunks)
 
1156
  "entity",
1157
  "type",
1158
  "description",
1159
+ "rank",
1160
+ "created_at",
1161
  ]
1162
  ]
1163
  for i, n in enumerate(node_datas):
lightrag/prompt.py CHANGED
@@ -58,14 +58,16 @@ PROMPTS["entity_extraction_examples"] = [
58
 
59
  Entity_types: [person, technology, mission, organization, location]
60
  Text:
 
61
  while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
62
 
63
- Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
64
 
65
  The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
66
 
67
  It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
68
- ################
 
69
  Output:
70
  ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
71
  ("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
@@ -81,48 +83,52 @@ Output:
81
  #############################""",
82
  """Example 2:
83
 
84
- Entity_types: [person, technology, mission, organization, location]
85
  Text:
86
- They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
 
 
 
 
 
87
 
88
- Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
 
89
 
90
- Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence— the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
91
- #############
92
  Output:
93
- ("entity"{tuple_delimiter}"Washington"{tuple_delimiter}"location"{tuple_delimiter}"Washington is a location where communications are being received, indicating its importance in the decision-making process."){record_delimiter}
94
- ("entity"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"mission"{tuple_delimiter}"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities."){record_delimiter}
95
- ("entity"{tuple_delimiter}"The team"{tuple_delimiter}"organization"{tuple_delimiter}"The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role."){record_delimiter}
96
- ("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Washington"{tuple_delimiter}"The team receives communications from Washington, which influences their decision-making process."{tuple_delimiter}"decision-making, external influence"{tuple_delimiter}7){record_delimiter}
97
- ("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){record_delimiter}
98
- ("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
 
 
 
 
 
 
99
  #############################""",
100
  """Example 3:
101
 
102
- Entity_types: [person, role, technology, organization, event, location, concept]
103
  Text:
104
- their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
 
 
105
 
106
- "It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers' a whole new meaning."
107
-
108
- Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
109
-
110
- Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
111
-
112
- The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
113
- #############
114
  Output:
115
- ("entity"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"person"{tuple_delimiter}"Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety."){record_delimiter}
116
- ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task."){record_delimiter}
117
- ("entity"{tuple_delimiter}"Control"{tuple_delimiter}"concept"{tuple_delimiter}"Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules."){record_delimiter}
118
- ("entity"{tuple_delimiter}"Intelligence"{tuple_delimiter}"concept"{tuple_delimiter}"Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate."){record_delimiter}
119
- ("entity"{tuple_delimiter}"First Contact"{tuple_delimiter}"event"{tuple_delimiter}"First Contact is the potential initial communication between humanity and an unknown intelligence."){record_delimiter}
120
- ("entity"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"event"{tuple_delimiter}"Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence."){record_delimiter}
121
- ("relationship"{tuple_delimiter}"Sam Rivera"{tuple_delimiter}"Intelligence"{tuple_delimiter}"Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence."{tuple_delimiter}"communication, learning process"{tuple_delimiter}9){record_delimiter}
122
- ("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"First Contact"{tuple_delimiter}"Alex leads the team that might be making the First Contact with the unknown intelligence."{tuple_delimiter}"leadership, exploration"{tuple_delimiter}10){record_delimiter}
123
- ("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
124
- ("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
125
- ("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
126
  #############################""",
127
  ]
128
 
@@ -143,15 +149,47 @@ Description List: {description_list}
143
  Output:
144
  """
145
 
146
- PROMPTS[
147
- "entiti_continue_extraction"
148
- ] = """MANY entities were missed in the last extraction. Add them below using the same format:
149
- """
150
 
151
- PROMPTS[
152
- "entiti_if_loop_extraction"
153
- ] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
154
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  PROMPTS["fail_response"] = (
157
  "Sorry, I'm not able to provide an answer to that question.[no-context]"
 
58
 
59
  Entity_types: [person, technology, mission, organization, location]
60
  Text:
61
+ ```
62
  while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
63
 
64
+ Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
65
 
66
  The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
67
 
68
  It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
69
+ ```
70
+
71
  Output:
72
  ("entity"{tuple_delimiter}"Alex"{tuple_delimiter}"person"{tuple_delimiter}"Alex is a character who experiences frustration and is observant of the dynamics among other characters."){record_delimiter}
73
  ("entity"{tuple_delimiter}"Taylor"{tuple_delimiter}"person"{tuple_delimiter}"Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective."){record_delimiter}
 
83
  #############################""",
84
  """Example 2:
85
 
86
+ Entity_types: [company, index, commodity, market_trend, economic_policy, biological]
87
  Text:
88
+ ```
89
+ Stock markets faced a sharp downturn today as tech giants saw significant declines, with the Global Tech Index dropping by 3.4% in midday trading. Analysts attribute the selloff to investor concerns over rising interest rates and regulatory uncertainty.
90
+
91
+ Among the hardest hit, Nexon Technologies saw its stock plummet by 7.8% after reporting lower-than-expected quarterly earnings. In contrast, Omega Energy posted a modest 2.1% gain, driven by rising oil prices.
92
+
93
+ Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
94
 
95
+ Financial experts are closely watching the Federal Reserve’s next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
96
+ ```
97
 
 
 
98
  Output:
99
+ ("entity"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"index"{tuple_delimiter}"The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today."){record_delimiter}
100
+ ("entity"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"company"{tuple_delimiter}"Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings."){record_delimiter}
101
+ ("entity"{tuple_delimiter}"Omega Energy"{tuple_delimiter}"company"{tuple_delimiter}"Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices."){record_delimiter}
102
+ ("entity"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"commodity"{tuple_delimiter}"Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets."){record_delimiter}
103
+ ("entity"{tuple_delimiter}"Crude Oil"{tuple_delimiter}"commodity"{tuple_delimiter}"Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand."){record_delimiter}
104
+ ("entity"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"market_trend"{tuple_delimiter}"Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations."){record_delimiter}
105
+ ("entity"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"economic_policy"{tuple_delimiter}"The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability."){record_delimiter}
106
+ ("relationship"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns."{tuple_delimiter}"market performance, investor sentiment"{tuple_delimiter}9){record_delimiter}
107
+ ("relationship"{tuple_delimiter}"Nexon Technologies"{tuple_delimiter}"Global Tech Index"{tuple_delimiter}"Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index."{tuple_delimiter}"company impact, index movement"{tuple_delimiter}8){record_delimiter}
108
+ ("relationship"{tuple_delimiter}"Gold Futures"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Gold prices rose as investors sought safe-haven assets during the market selloff."{tuple_delimiter}"market reaction, safe-haven investment"{tuple_delimiter}10){record_delimiter}
109
+ ("relationship"{tuple_delimiter}"Federal Reserve Policy Announcement"{tuple_delimiter}"Market Selloff"{tuple_delimiter}"Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff."{tuple_delimiter}"interest rate impact, financial regulation"{tuple_delimiter}7){record_delimiter}
110
+ ("content_keywords"{tuple_delimiter}"market downturn, investor sentiment, commodities, Federal Reserve, stock performance"){completion_delimiter}
111
  #############################""",
112
  """Example 3:
113
 
114
+ Entity_types: [economic_policy, athlete, event, location, record, organization, equipment]
115
  Text:
116
+ ```
117
+ At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes.
118
+ ```
119
 
 
 
 
 
 
 
 
 
120
  Output:
121
+ ("entity"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"event"{tuple_delimiter}"The World Athletics Championship is a global sports competition featuring top athletes in track and field."){record_delimiter}
122
+ ("entity"{tuple_delimiter}"Tokyo"{tuple_delimiter}"location"{tuple_delimiter}"Tokyo is the host city of the World Athletics Championship."){record_delimiter}
123
+ ("entity"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"athlete"{tuple_delimiter}"Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship."){record_delimiter}
124
+ ("entity"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"record"{tuple_delimiter}"The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter."){record_delimiter}
125
+ ("entity"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"equipment"{tuple_delimiter}"Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction."){record_delimiter}
126
+ ("entity"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"organization"{tuple_delimiter}"The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations."){record_delimiter}
127
+ ("relationship"{tuple_delimiter}"World Athletics Championship"{tuple_delimiter}"Tokyo"{tuple_delimiter}"The World Athletics Championship is being hosted in Tokyo."{tuple_delimiter}"event location, international competition"{tuple_delimiter}8){record_delimiter}
128
+ ("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"Noah Carter set a new 100m sprint record at the championship."{tuple_delimiter}"athlete achievement, record-breaking"{tuple_delimiter}10){record_delimiter}
129
+ ("relationship"{tuple_delimiter}"Noah Carter"{tuple_delimiter}"Carbon-Fiber Spikes"{tuple_delimiter}"Noah Carter used carbon-fiber spikes to enhance performance during the race."{tuple_delimiter}"athletic equipment, performance boost"{tuple_delimiter}7){record_delimiter}
130
+ ("relationship"{tuple_delimiter}"World Athletics Federation"{tuple_delimiter}"100m Sprint Record"{tuple_delimiter}"The World Athletics Federation is responsible for validating and recognizing new sprint records."{tuple_delimiter}"sports regulation, record certification"{tuple_delimiter}9){record_delimiter}
131
+ ("content_keywords"{tuple_delimiter}"athletics, sprinting, record-breaking, sports technology, competition"){completion_delimiter}
132
  #############################""",
133
  ]
134
 
 
149
  Output:
150
  """
151
 
152
+ PROMPTS["entity_continue_extraction"] = """
153
+ MANY entities and relationships were missed in the last extraction.
 
 
154
 
155
+ ---Remember Steps---
156
+
157
+ 1. Identify all entities. For each identified entity, extract the following information:
158
+ - entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
159
+ - entity_type: One of the following types: [{entity_types}]
160
+ - entity_description: Comprehensive description of the entity's attributes and activities
161
+ Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>
162
+
163
+ 2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
164
+ For each pair of related entities, extract the following information:
165
+ - source_entity: name of the source entity, as identified in step 1
166
+ - target_entity: name of the target entity, as identified in step 1
167
+ - relationship_description: explanation as to why you think the source entity and the target entity are related to each other
168
+ - relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
169
+ - relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details
170
+ Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_keywords>{tuple_delimiter}<relationship_strength>)
171
+
172
+ 3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
173
+ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_level_keywords>)
174
+
175
+ 4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
176
+
177
+ 5. When finished, output {completion_delimiter}
178
+
179
+ ---Output---
180
+
181
+ Add them below using the same format:\n
182
+ """.strip()
183
+
184
+ PROMPTS["entity_if_loop_extraction"] = """
185
+ ---Goal---'
186
+
187
+ It appears some entities may have still been missed.
188
+
189
+ ---Output---
190
+
191
+ Answer ONLY by `YES` OR `NO` if there are still entities that need to be added.
192
+ """.strip()
193
 
194
  PROMPTS["fail_response"] = (
195
  "Sorry, I'm not able to provide an answer to that question.[no-context]"
lightrag_webui/src/api/lightrag.ts CHANGED
@@ -161,8 +161,12 @@ axiosInstance.interceptors.response.use(
161
  )
162
 
163
  // API methods
164
- export const queryGraphs = async (label: string, maxDepth: number): Promise<LightragGraphType> => {
165
- const response = await axiosInstance.get(`/graphs?label=${label}&max_depth=${maxDepth}`)
 
 
 
 
166
  return response.data
167
  }
168
 
 
161
  )
162
 
163
  // API methods
164
+ export const queryGraphs = async (
165
+ label: string,
166
+ maxDepth: number,
167
+ minDegree: number
168
+ ): Promise<LightragGraphType> => {
169
+ const response = await axiosInstance.get(`/graphs?label=${encodeURIComponent(label)}&max_depth=${maxDepth}&min_degree=${minDegree}`)
170
  return response.data
171
  }
172
 
lightrag_webui/src/components/graph/GraphControl.tsx CHANGED
@@ -40,18 +40,21 @@ const GraphControl = ({ disableHoverEffect }: { disableHoverEffect?: boolean })
40
  const focusedEdge = useGraphStore.use.focusedEdge()
41
 
42
  /**
43
- * When component mount
44
- * => load the graph
45
  */
46
  useEffect(() => {
47
  // Create & load the graph
48
  const graph = lightrageGraph()
49
  loadGraph(graph)
50
- if (!(graph as any).__force_applied) {
51
- assignLayout()
52
- Object.assign(graph, { __force_applied: true })
53
- }
54
 
 
 
 
 
 
55
  const { setFocusedNode, setSelectedNode, setFocusedEdge, setSelectedEdge, clearSelection } =
56
  useGraphStore.getState()
57
 
@@ -87,7 +90,7 @@ const GraphControl = ({ disableHoverEffect }: { disableHoverEffect?: boolean })
87
  },
88
  clickStage: () => clearSelection()
89
  })
90
- }, [assignLayout, loadGraph, registerEvents, lightrageGraph])
91
 
92
  /**
93
  * When component mount or hovered node change
 
40
  const focusedEdge = useGraphStore.use.focusedEdge()
41
 
42
  /**
43
+ * When component mount or maxIterations changes
44
+ * => load the graph and apply layout
45
  */
46
  useEffect(() => {
47
  // Create & load the graph
48
  const graph = lightrageGraph()
49
  loadGraph(graph)
50
+ assignLayout()
51
+ }, [assignLayout, loadGraph, lightrageGraph, maxIterations])
 
 
52
 
53
+ /**
54
+ * When component mount
55
+ * => register events
56
+ */
57
+ useEffect(() => {
58
  const { setFocusedNode, setSelectedNode, setFocusedEdge, setSelectedEdge, clearSelection } =
59
  useGraphStore.getState()
60
 
 
90
  },
91
  clickStage: () => clearSelection()
92
  })
93
+ }, [registerEvents])
94
 
95
  /**
96
  * When component mount or hovered node change
lightrag_webui/src/components/graph/Settings.tsx CHANGED
@@ -90,9 +90,12 @@ const LabeledNumberInput = ({
90
  {label}
91
  </label>
92
  <Input
93
- value={currentValue || ''}
 
94
  onChange={onValueChange}
95
- className="h-6 w-full min-w-0"
 
 
96
  onBlur={onBlur}
97
  onKeyDown={(e) => {
98
  if (e.key === 'Enter') {
@@ -119,6 +122,7 @@ export default function Settings() {
119
  const enableHideUnselectedEdges = useSettingsStore.use.enableHideUnselectedEdges()
120
  const showEdgeLabel = useSettingsStore.use.showEdgeLabel()
121
  const graphQueryMaxDepth = useSettingsStore.use.graphQueryMaxDepth()
 
122
  const graphLayoutMaxIterations = useSettingsStore.use.graphLayoutMaxIterations()
123
 
124
  const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
@@ -177,6 +181,11 @@ export default function Settings() {
177
  useSettingsStore.setState({ graphQueryMaxDepth: depth })
178
  }, [])
179
 
 
 
 
 
 
180
  const setGraphLayoutMaxIterations = useCallback((iterations: number) => {
181
  if (iterations < 1) return
182
  useSettingsStore.setState({ graphLayoutMaxIterations: iterations })
@@ -266,6 +275,12 @@ export default function Settings() {
266
  value={graphQueryMaxDepth}
267
  onEditFinished={setGraphQueryMaxDepth}
268
  />
 
 
 
 
 
 
269
  <LabeledNumberInput
270
  label="Max Layout Iterations"
271
  min={1}
 
90
  {label}
91
  </label>
92
  <Input
93
+ type="number"
94
+ value={currentValue === null ? '' : currentValue}
95
  onChange={onValueChange}
96
+ className="h-6 w-full min-w-0 pr-1"
97
+ min={min}
98
+ max={max}
99
  onBlur={onBlur}
100
  onKeyDown={(e) => {
101
  if (e.key === 'Enter') {
 
122
  const enableHideUnselectedEdges = useSettingsStore.use.enableHideUnselectedEdges()
123
  const showEdgeLabel = useSettingsStore.use.showEdgeLabel()
124
  const graphQueryMaxDepth = useSettingsStore.use.graphQueryMaxDepth()
125
+ const graphMinDegree = useSettingsStore.use.graphMinDegree()
126
  const graphLayoutMaxIterations = useSettingsStore.use.graphLayoutMaxIterations()
127
 
128
  const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
 
181
  useSettingsStore.setState({ graphQueryMaxDepth: depth })
182
  }, [])
183
 
184
+ const setGraphMinDegree = useCallback((degree: number) => {
185
+ if (degree < 0) return
186
+ useSettingsStore.setState({ graphMinDegree: degree })
187
+ }, [])
188
+
189
  const setGraphLayoutMaxIterations = useCallback((iterations: number) => {
190
  if (iterations < 1) return
191
  useSettingsStore.setState({ graphLayoutMaxIterations: iterations })
 
275
  value={graphQueryMaxDepth}
276
  onEditFinished={setGraphQueryMaxDepth}
277
  />
278
+ <LabeledNumberInput
279
+ label="Minimum Degree"
280
+ min={0}
281
+ value={graphMinDegree}
282
+ onEditFinished={setGraphMinDegree}
283
+ />
284
  <LabeledNumberInput
285
  label="Max Layout Iterations"
286
  min={1}
lightrag_webui/src/components/ui/Input.tsx CHANGED
@@ -7,7 +7,7 @@ const Input = React.forwardRef<HTMLInputElement, React.ComponentProps<'input'>>(
7
  <input
8
  type={type}
9
  className={cn(
10
- 'border-input file:text-foreground placeholder:text-muted-foreground focus-visible:ring-ring flex h-9 rounded-md border bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium focus-visible:ring-1 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm',
11
  className
12
  )}
13
  ref={ref}
 
7
  <input
8
  type={type}
9
  className={cn(
10
+ 'border-input file:text-foreground placeholder:text-muted-foreground focus-visible:ring-ring flex h-9 rounded-md border bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium focus-visible:ring-1 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm [&::-webkit-inner-spin-button]:opacity-100 [&::-webkit-outer-spin-button]:opacity-100',
11
  className
12
  )}
13
  ref={ref}
lightrag_webui/src/hooks/useLightragGraph.tsx CHANGED
@@ -50,11 +50,11 @@ export type NodeType = {
50
  }
51
  export type EdgeType = { label: string }
52
 
53
- const fetchGraph = async (label: string, maxDepth: number) => {
54
  let rawData: any = null
55
 
56
  try {
57
- rawData = await queryGraphs(label, maxDepth)
58
  } catch (e) {
59
  useBackendState.getState().setErrorMessage(errorMessage(e), 'Query Graphs Error!')
60
  return null
@@ -161,13 +161,14 @@ const createSigmaGraph = (rawGraph: RawGraph | null) => {
161
  return graph
162
  }
163
 
164
- const lastQueryLabel = { label: '', maxQueryDepth: 0 }
165
 
166
  const useLightrangeGraph = () => {
167
  const queryLabel = useSettingsStore.use.queryLabel()
168
  const rawGraph = useGraphStore.use.rawGraph()
169
  const sigmaGraph = useGraphStore.use.sigmaGraph()
170
  const maxQueryDepth = useSettingsStore.use.graphQueryMaxDepth()
 
171
 
172
  const getNode = useCallback(
173
  (nodeId: string) => {
@@ -185,13 +186,16 @@ const useLightrangeGraph = () => {
185
 
186
  useEffect(() => {
187
  if (queryLabel) {
188
- if (lastQueryLabel.label !== queryLabel || lastQueryLabel.maxQueryDepth !== maxQueryDepth) {
 
 
189
  lastQueryLabel.label = queryLabel
190
  lastQueryLabel.maxQueryDepth = maxQueryDepth
 
191
 
192
  const state = useGraphStore.getState()
193
  state.reset()
194
- fetchGraph(queryLabel, maxQueryDepth).then((data) => {
195
  // console.debug('Query label: ' + queryLabel)
196
  state.setSigmaGraph(createSigmaGraph(data))
197
  data?.buildDynamicMap()
@@ -203,7 +207,7 @@ const useLightrangeGraph = () => {
203
  state.reset()
204
  state.setSigmaGraph(new DirectedGraph())
205
  }
206
- }, [queryLabel, maxQueryDepth])
207
 
208
  const lightrageGraph = useCallback(() => {
209
  if (sigmaGraph) {
 
50
  }
51
  export type EdgeType = { label: string }
52
 
53
+ const fetchGraph = async (label: string, maxDepth: number, minDegree: number) => {
54
  let rawData: any = null
55
 
56
  try {
57
+ rawData = await queryGraphs(label, maxDepth, minDegree)
58
  } catch (e) {
59
  useBackendState.getState().setErrorMessage(errorMessage(e), 'Query Graphs Error!')
60
  return null
 
161
  return graph
162
  }
163
 
164
+ const lastQueryLabel = { label: '', maxQueryDepth: 0, minDegree: 0 }
165
 
166
  const useLightrangeGraph = () => {
167
  const queryLabel = useSettingsStore.use.queryLabel()
168
  const rawGraph = useGraphStore.use.rawGraph()
169
  const sigmaGraph = useGraphStore.use.sigmaGraph()
170
  const maxQueryDepth = useSettingsStore.use.graphQueryMaxDepth()
171
+ const minDegree = useSettingsStore.use.graphMinDegree()
172
 
173
  const getNode = useCallback(
174
  (nodeId: string) => {
 
186
 
187
  useEffect(() => {
188
  if (queryLabel) {
189
+ if (lastQueryLabel.label !== queryLabel ||
190
+ lastQueryLabel.maxQueryDepth !== maxQueryDepth ||
191
+ lastQueryLabel.minDegree !== minDegree) {
192
  lastQueryLabel.label = queryLabel
193
  lastQueryLabel.maxQueryDepth = maxQueryDepth
194
+ lastQueryLabel.minDegree = minDegree
195
 
196
  const state = useGraphStore.getState()
197
  state.reset()
198
+ fetchGraph(queryLabel, maxQueryDepth, minDegree).then((data) => {
199
  // console.debug('Query label: ' + queryLabel)
200
  state.setSigmaGraph(createSigmaGraph(data))
201
  data?.buildDynamicMap()
 
207
  state.reset()
208
  state.setSigmaGraph(new DirectedGraph())
209
  }
210
+ }, [queryLabel, maxQueryDepth, minDegree])
211
 
212
  const lightrageGraph = useCallback(() => {
213
  if (sigmaGraph) {
lightrag_webui/src/stores/settings.ts CHANGED
@@ -22,6 +22,9 @@ interface SettingsState {
22
  graphQueryMaxDepth: number
23
  setGraphQueryMaxDepth: (depth: number) => void
24
 
 
 
 
25
  graphLayoutMaxIterations: number
26
  setGraphLayoutMaxIterations: (iterations: number) => void
27
 
@@ -66,6 +69,7 @@ const useSettingsStoreBase = create<SettingsState>()(
66
  enableEdgeEvents: false,
67
 
68
  graphQueryMaxDepth: 3,
 
69
  graphLayoutMaxIterations: 10,
70
 
71
  queryLabel: defaultQueryLabel,
@@ -107,6 +111,8 @@ const useSettingsStoreBase = create<SettingsState>()(
107
 
108
  setGraphQueryMaxDepth: (depth: number) => set({ graphQueryMaxDepth: depth }),
109
 
 
 
110
  setEnableHealthCheck: (enable: boolean) => set({ enableHealthCheck: enable }),
111
 
112
  setApiKey: (apiKey: string | null) => set({ apiKey }),
 
22
  graphQueryMaxDepth: number
23
  setGraphQueryMaxDepth: (depth: number) => void
24
 
25
+ graphMinDegree: number
26
+ setGraphMinDegree: (degree: number) => void
27
+
28
  graphLayoutMaxIterations: number
29
  setGraphLayoutMaxIterations: (iterations: number) => void
30
 
 
69
  enableEdgeEvents: false,
70
 
71
  graphQueryMaxDepth: 3,
72
+ graphMinDegree: 0,
73
  graphLayoutMaxIterations: 10,
74
 
75
  queryLabel: defaultQueryLabel,
 
111
 
112
  setGraphQueryMaxDepth: (depth: number) => set({ graphQueryMaxDepth: depth }),
113
 
114
+ setGraphMinDegree: (degree: number) => set({ graphMinDegree: degree }),
115
+
116
  setEnableHealthCheck: (enable: boolean) => set({ enableHealthCheck: enable }),
117
 
118
  setApiKey: (apiKey: string | null) => set({ apiKey }),
lightrag_webui/src/vite-env.d.ts CHANGED
@@ -1 +1,11 @@
1
  /// <reference types="vite/client" />
 
 
 
 
 
 
 
 
 
 
 
1
  /// <reference types="vite/client" />
2
+
3
+ interface ImportMetaEnv {
4
+ readonly VITE_API_PROXY: string
5
+ readonly VITE_API_ENDPOINTS: string
6
+ readonly VITE_BACKEND_URL: string
7
+ }
8
+
9
+ interface ImportMeta {
10
+ readonly env: ImportMetaEnv
11
+ }
lightrag_webui/tsconfig.json CHANGED
@@ -26,5 +26,5 @@
26
  "@/*": ["./src/*"]
27
  }
28
  },
29
- "include": ["src"]
30
  }
 
26
  "@/*": ["./src/*"]
27
  }
28
  },
29
+ "include": ["src", "vite.config.ts"]
30
  }
lightrag_webui/vite.config.ts CHANGED
@@ -14,6 +14,21 @@ export default defineConfig({
14
  },
15
  base: './',
16
  build: {
17
- outDir: path.resolve(__dirname, '../lightrag/api/webui')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  })
 
14
  },
15
  base: './',
16
  build: {
17
+ outDir: path.resolve(__dirname, '../lightrag/api/webui'),
18
+ emptyOutDir: true
19
+ },
20
+ server: {
21
+ proxy: import.meta.env.VITE_API_PROXY === 'true' && import.meta.env.VITE_API_ENDPOINTS ?
22
+ Object.fromEntries(
23
+ import.meta.env.VITE_API_ENDPOINTS.split(',').map(endpoint => [
24
+ endpoint,
25
+ {
26
+ target: import.meta.env.VITE_BACKEND_URL || 'http://localhost:9621',
27
+ changeOrigin: true,
28
+ rewrite: endpoint === '/api' ?
29
+ (path) => path.replace(/^\/api/, '') : undefined
30
+ }
31
+ ])
32
+ ) : {}
33
  }
34
  })