gzdaniel commited on
Commit
0754aaa
·
2 Parent(s): bf17f9e 7cd4bea

Merge branch 'main' into add-Memgraph-graph-db

Browse files
env.example CHANGED
@@ -111,25 +111,37 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
111
  ###########################
112
  ### Data storage selection
113
  ###########################
 
 
 
 
 
 
114
  ### PostgreSQL
115
  # LIGHTRAG_KV_STORAGE=PGKVStorage
116
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
117
- # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
118
  # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
119
- ### MongoDB
 
120
  # LIGHTRAG_KV_STORAGE=MongoKVStorage
121
  # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
122
- # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
123
  # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
124
- ### KV Storage
 
125
  # LIGHTRAG_KV_STORAGE=RedisKVStorage
126
  # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
127
- ### Vector Storage
128
- # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
129
  # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
130
- ### Graph Storage
 
131
  # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
132
- # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
 
 
 
 
 
 
133
 
134
  ### PostgreSQL Configuration
135
  POSTGRES_HOST=localhost
@@ -138,31 +150,19 @@ POSTGRES_USER=your_username
138
  POSTGRES_PASSWORD='your_password'
139
  POSTGRES_DATABASE=your_database
140
  POSTGRES_MAX_CONNECTIONS=12
141
- ### separating all data from difference Lightrag instances
142
- # POSTGRES_WORKSPACE=default
143
 
144
  ### Neo4j Configuration
145
  NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
146
  NEO4J_USERNAME=neo4j
147
  NEO4J_PASSWORD='your_password'
148
-
149
- ### Independent AGM Configuration(not for AMG embedded in PostreSQL)
150
- # AGE_POSTGRES_DB=
151
- # AGE_POSTGRES_USER=
152
- # AGE_POSTGRES_PASSWORD=
153
- # AGE_POSTGRES_HOST=
154
- # AGE_POSTGRES_PORT=8529
155
-
156
- # AGE Graph Name(apply to PostgreSQL and independent AGM)
157
- ### AGE_GRAPH_NAME is deprecated
158
- # AGE_GRAPH_NAME=lightrag
159
 
160
  ### MongoDB Configuration
161
  MONGO_URI=mongodb://root:root@localhost:27017/
 
162
  MONGO_DATABASE=LightRAG
163
- ### separating all data from difference Lightrag instances(deprecating)
164
- ### separating all data from difference Lightrag instances
165
- # MONGODB_WORKSPACE=default
166
 
167
  ### Milvus Configuration
168
  MILVUS_URI=http://localhost:19530
@@ -170,10 +170,13 @@ MILVUS_DB_NAME=lightrag
170
  # MILVUS_USER=root
171
  # MILVUS_PASSWORD=your_password
172
  # MILVUS_TOKEN=your_token
 
173
 
174
  ### Qdrant
175
- QDRANT_URL=http://localhost:16333
176
  # QDRANT_API_KEY=your-api-key
 
177
 
178
  ### Redis
179
  REDIS_URI=redis://localhost:6379
 
 
111
  ###########################
112
  ### Data storage selection
113
  ###########################
114
+ ### In-memory database with data persistence to local files
115
+ # LIGHTRAG_KV_STORAGE=JsonKVStorage
116
+ # LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
117
+ # LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
118
+ # LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
119
+ # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
120
  ### PostgreSQL
121
  # LIGHTRAG_KV_STORAGE=PGKVStorage
122
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 
123
  # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
124
+ # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
125
+ ### MongoDB (recommended for production deploy)
126
  # LIGHTRAG_KV_STORAGE=MongoKVStorage
127
  # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
 
128
  # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
129
+ # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
130
+ ### Redis Storage (recommended for production deploy)
131
  # LIGHTRAG_KV_STORAGE=RedisKVStorage
132
  # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
133
+ ### Vector Storage (recommended for production deploy)
 
134
  # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
135
+ # LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
136
+ ### Graph Storage (recommended for production deploy)
137
  # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
138
+
139
+ ####################################################################
140
+ ### Default workspace for all storage types
141
+ ### For the purpose of isolation of data for each LightRAG instance
142
+ ### Valid characters: a-z, A-Z, 0-9, and _
143
+ ####################################################################
144
+ # WORKSPACE=doc—
145
 
146
  ### PostgreSQL Configuration
147
  POSTGRES_HOST=localhost
 
150
  POSTGRES_PASSWORD='your_password'
151
  POSTGRES_DATABASE=your_database
152
  POSTGRES_MAX_CONNECTIONS=12
153
+ # POSTGRES_WORKSPACE=forced_workspace_name
 
154
 
155
  ### Neo4j Configuration
156
  NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
157
  NEO4J_USERNAME=neo4j
158
  NEO4J_PASSWORD='your_password'
159
+ # NEO4J_WORKSPACE=forced_workspace_name
 
 
 
 
 
 
 
 
 
 
160
 
161
  ### MongoDB Configuration
162
  MONGO_URI=mongodb://root:root@localhost:27017/
163
+ #MONGO_URI=mongodb+srv://root:[email protected]/?retryWrites=true&w=majority&appName=Cluster0
164
  MONGO_DATABASE=LightRAG
165
+ # MONGODB_WORKSPACE=forced_workspace_name
 
 
166
 
167
  ### Milvus Configuration
168
  MILVUS_URI=http://localhost:19530
 
170
  # MILVUS_USER=root
171
  # MILVUS_PASSWORD=your_password
172
  # MILVUS_TOKEN=your_token
173
+ # MILVUS_WORKSPACE=forced_workspace_name
174
 
175
  ### Qdrant
176
+ QDRANT_URL=http://localhost:6333
177
  # QDRANT_API_KEY=your-api-key
178
+ # QDRANT_WORKSPACE=forced_workspace_name
179
 
180
  ### Redis
181
  REDIS_URI=redis://localhost:6379
182
+ # REDIS_WORKSPACE=forced_workspace_name
lightrag/api/__init__.py CHANGED
@@ -1 +1 @@
1
- __api_version__ = "0178"
 
1
+ __api_version__ = "0179"
lightrag/api/config.py CHANGED
@@ -184,10 +184,10 @@ def parse_args() -> argparse.Namespace:
184
 
185
  # Namespace
186
  parser.add_argument(
187
- "--namespace-prefix",
188
  type=str,
189
- default=get_env_value("NAMESPACE_PREFIX", ""),
190
- help="Prefix of the namespace",
191
  )
192
 
193
  parser.add_argument(
 
184
 
185
  # Namespace
186
  parser.add_argument(
187
+ "--workspace",
188
  type=str,
189
+ default=get_env_value("WORKSPACE", ""),
190
+ help="Default workspace for all storage",
191
  )
192
 
193
  parser.add_argument(
lightrag/api/lightrag_server.py CHANGED
@@ -112,8 +112,8 @@ def create_app(args):
112
  # Check if API key is provided either through env var or args
113
  api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
114
 
115
- # Initialize document manager
116
- doc_manager = DocumentManager(args.input_dir)
117
 
118
  @asynccontextmanager
119
  async def lifespan(app: FastAPI):
@@ -295,6 +295,7 @@ def create_app(args):
295
  if args.llm_binding in ["lollms", "ollama", "openai"]:
296
  rag = LightRAG(
297
  working_dir=args.working_dir,
 
298
  llm_model_func=lollms_model_complete
299
  if args.llm_binding == "lollms"
300
  else ollama_model_complete
@@ -330,6 +331,7 @@ def create_app(args):
330
  else: # azure_openai
331
  rag = LightRAG(
332
  working_dir=args.working_dir,
 
333
  llm_model_func=azure_openai_model_complete,
334
  chunk_token_size=int(args.chunk_size),
335
  chunk_overlap_token_size=int(args.chunk_overlap_size),
@@ -472,6 +474,8 @@ def create_app(args):
472
  "vector_storage": args.vector_storage,
473
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
474
  "enable_llm_cache": args.enable_llm_cache,
 
 
475
  },
476
  "auth_mode": auth_mode,
477
  "pipeline_busy": pipeline_status.get("busy", False),
 
112
  # Check if API key is provided either through env var or args
113
  api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
114
 
115
+ # Initialize document manager with workspace support for data isolation
116
+ doc_manager = DocumentManager(args.input_dir, workspace=args.workspace)
117
 
118
  @asynccontextmanager
119
  async def lifespan(app: FastAPI):
 
295
  if args.llm_binding in ["lollms", "ollama", "openai"]:
296
  rag = LightRAG(
297
  working_dir=args.working_dir,
298
+ workspace=args.workspace,
299
  llm_model_func=lollms_model_complete
300
  if args.llm_binding == "lollms"
301
  else ollama_model_complete
 
331
  else: # azure_openai
332
  rag = LightRAG(
333
  working_dir=args.working_dir,
334
+ workspace=args.workspace,
335
  llm_model_func=azure_openai_model_complete,
336
  chunk_token_size=int(args.chunk_size),
337
  chunk_overlap_token_size=int(args.chunk_overlap_size),
 
474
  "vector_storage": args.vector_storage,
475
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
476
  "enable_llm_cache": args.enable_llm_cache,
477
+ "workspace": args.workspace,
478
+ "max_graph_nodes": os.getenv("MAX_GRAPH_NODES"),
479
  },
480
  "auth_mode": auth_mode,
481
  "pipeline_busy": pipeline_status.get("busy", False),
lightrag/api/routers/document_routes.py CHANGED
@@ -475,6 +475,7 @@ class DocumentManager:
475
  def __init__(
476
  self,
477
  input_dir: str,
 
478
  supported_extensions: tuple = (
479
  ".txt",
480
  ".md",
@@ -515,10 +516,19 @@ class DocumentManager:
515
  ".less", # LESS CSS
516
  ),
517
  ):
518
- self.input_dir = Path(input_dir)
 
 
519
  self.supported_extensions = supported_extensions
520
  self.indexed_files = set()
521
 
 
 
 
 
 
 
 
522
  # Create input directory if it doesn't exist
523
  self.input_dir.mkdir(parents=True, exist_ok=True)
524
 
@@ -714,6 +724,12 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
714
 
715
  # Insert into the RAG queue
716
  if content:
 
 
 
 
 
 
717
  await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
718
  logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
719
  return True
 
475
  def __init__(
476
  self,
477
  input_dir: str,
478
+ workspace: str = "", # New parameter for workspace isolation
479
  supported_extensions: tuple = (
480
  ".txt",
481
  ".md",
 
516
  ".less", # LESS CSS
517
  ),
518
  ):
519
+ # Store the base input directory and workspace
520
+ self.base_input_dir = Path(input_dir)
521
+ self.workspace = workspace
522
  self.supported_extensions = supported_extensions
523
  self.indexed_files = set()
524
 
525
+ # Create workspace-specific input directory
526
+ # If workspace is provided, create a subdirectory for data isolation
527
+ if workspace:
528
+ self.input_dir = self.base_input_dir / workspace
529
+ else:
530
+ self.input_dir = self.base_input_dir
531
+
532
  # Create input directory if it doesn't exist
533
  self.input_dir.mkdir(parents=True, exist_ok=True)
534
 
 
724
 
725
  # Insert into the RAG queue
726
  if content:
727
+ # Check if content contains only whitespace characters
728
+ if not content.strip():
729
+ logger.warning(
730
+ f"File contains only whitespace characters. file_paths={file_path.name}"
731
+ )
732
+
733
  await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
734
  logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
735
  return True
lightrag/api/utils_api.py CHANGED
@@ -284,8 +284,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
284
  ASCIIColors.yellow(f"{args.vector_storage}")
285
  ASCIIColors.white(" ├─ Graph Storage: ", end="")
286
  ASCIIColors.yellow(f"{args.graph_storage}")
287
- ASCIIColors.white(" └─ Document Status Storage: ", end="")
288
  ASCIIColors.yellow(f"{args.doc_status_storage}")
 
 
289
 
290
  # Server Status
291
  ASCIIColors.green("\n✨ Server starting up...\n")
 
284
  ASCIIColors.yellow(f"{args.vector_storage}")
285
  ASCIIColors.white(" ├─ Graph Storage: ", end="")
286
  ASCIIColors.yellow(f"{args.graph_storage}")
287
+ ASCIIColors.white(" ├─ Document Status Storage: ", end="")
288
  ASCIIColors.yellow(f"{args.doc_status_storage}")
289
+ ASCIIColors.white(" └─ Workspace: ", end="")
290
+ ASCIIColors.yellow(f"{args.workspace if args.workspace else '-'}")
291
 
292
  # Server Status
293
  ASCIIColors.green("\n✨ Server starting up...\n")
lightrag/api/webui/assets/{index-CRWiopRc.js → index-BlAjHenV.js} RENAMED
Binary files a/lightrag/api/webui/assets/index-CRWiopRc.js and b/lightrag/api/webui/assets/index-BlAjHenV.js differ
 
lightrag/api/webui/index.html CHANGED
Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ
 
lightrag/base.py CHANGED
@@ -103,6 +103,7 @@ class QueryParam:
103
  @dataclass
104
  class StorageNameSpace(ABC):
105
  namespace: str
 
106
  global_config: dict[str, Any]
107
 
108
  async def initialize(self):
 
103
  @dataclass
104
  class StorageNameSpace(ABC):
105
  namespace: str
106
+ workspace: str
107
  global_config: dict[str, Any]
108
 
109
  async def initialize(self):
lightrag/kg/faiss_impl.py CHANGED
@@ -38,9 +38,19 @@ class FaissVectorDBStorage(BaseVectorStorage):
38
  self.cosine_better_than_threshold = cosine_threshold
39
 
40
  # Where to save index file if you want persistent storage
41
- self._faiss_index_file = os.path.join(
42
- self.global_config["working_dir"], f"faiss_index_{self.namespace}.index"
43
- )
 
 
 
 
 
 
 
 
 
 
44
  self._meta_file = self._faiss_index_file + ".meta.json"
45
 
46
  self._max_batch_size = self.global_config["embedding_batch_num"]
 
38
  self.cosine_better_than_threshold = cosine_threshold
39
 
40
  # Where to save index file if you want persistent storage
41
+ working_dir = self.global_config["working_dir"]
42
+ if self.workspace:
43
+ # Include workspace in the file path for data isolation
44
+ workspace_dir = os.path.join(working_dir, self.workspace)
45
+ os.makedirs(workspace_dir, exist_ok=True)
46
+ self._faiss_index_file = os.path.join(
47
+ workspace_dir, f"faiss_index_{self.namespace}.index"
48
+ )
49
+ else:
50
+ # Default behavior when workspace is empty
51
+ self._faiss_index_file = os.path.join(
52
+ working_dir, f"faiss_index_{self.namespace}.index"
53
+ )
54
  self._meta_file = self._faiss_index_file + ".meta.json"
55
 
56
  self._max_batch_size = self.global_config["embedding_batch_num"]
lightrag/kg/json_doc_status_impl.py CHANGED
@@ -30,7 +30,18 @@ class JsonDocStatusStorage(DocStatusStorage):
30
 
31
  def __post_init__(self):
32
  working_dir = self.global_config["working_dir"]
33
- self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
 
 
 
 
 
 
 
 
 
 
 
34
  self._data = None
35
  self._storage_lock = None
36
  self.storage_updated = None
 
30
 
31
  def __post_init__(self):
32
  working_dir = self.global_config["working_dir"]
33
+ if self.workspace:
34
+ # Include workspace in the file path for data isolation
35
+ workspace_dir = os.path.join(working_dir, self.workspace)
36
+ os.makedirs(workspace_dir, exist_ok=True)
37
+ self._file_name = os.path.join(
38
+ workspace_dir, f"kv_store_{self.namespace}.json"
39
+ )
40
+ else:
41
+ # Default behavior when workspace is empty
42
+ self._file_name = os.path.join(
43
+ working_dir, f"kv_store_{self.namespace}.json"
44
+ )
45
  self._data = None
46
  self._storage_lock = None
47
  self.storage_updated = None
lightrag/kg/json_kv_impl.py CHANGED
@@ -26,7 +26,18 @@ from .shared_storage import (
26
  class JsonKVStorage(BaseKVStorage):
27
  def __post_init__(self):
28
  working_dir = self.global_config["working_dir"]
29
- self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
 
 
 
 
 
 
 
 
 
 
 
30
  self._data = None
31
  self._storage_lock = None
32
  self.storage_updated = None
 
26
  class JsonKVStorage(BaseKVStorage):
27
  def __post_init__(self):
28
  working_dir = self.global_config["working_dir"]
29
+ if self.workspace:
30
+ # Include workspace in the file path for data isolation
31
+ workspace_dir = os.path.join(working_dir, self.workspace)
32
+ os.makedirs(workspace_dir, exist_ok=True)
33
+ self._file_name = os.path.join(
34
+ workspace_dir, f"kv_store_{self.namespace}.json"
35
+ )
36
+ else:
37
+ # Default behavior when workspace is empty
38
+ self._file_name = os.path.join(
39
+ working_dir, f"kv_store_{self.namespace}.json"
40
+ )
41
  self._data = None
42
  self._storage_lock = None
43
  self.storage_updated = None
lightrag/kg/milvus_impl.py CHANGED
@@ -7,10 +7,6 @@ from lightrag.utils import logger, compute_mdhash_id
7
  from ..base import BaseVectorStorage
8
  import pipmaster as pm
9
 
10
-
11
- if not pm.is_installed("configparser"):
12
- pm.install("configparser")
13
-
14
  if not pm.is_installed("pymilvus"):
15
  pm.install("pymilvus")
16
 
@@ -660,6 +656,29 @@ class MilvusVectorDBStorage(BaseVectorStorage):
660
  raise
661
 
662
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
664
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
665
  if cosine_threshold is None:
 
7
  from ..base import BaseVectorStorage
8
  import pipmaster as pm
9
 
 
 
 
 
10
  if not pm.is_installed("pymilvus"):
11
  pm.install("pymilvus")
12
 
 
656
  raise
657
 
658
  def __post_init__(self):
659
+ # Check for MILVUS_WORKSPACE environment variable first (higher priority)
660
+ # This allows administrators to force a specific workspace for all Milvus storage instances
661
+ milvus_workspace = os.environ.get("MILVUS_WORKSPACE")
662
+ if milvus_workspace and milvus_workspace.strip():
663
+ # Use environment variable value, overriding the passed workspace parameter
664
+ effective_workspace = milvus_workspace.strip()
665
+ logger.info(
666
+ f"Using MILVUS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
667
+ )
668
+ else:
669
+ # Use the workspace parameter passed during initialization
670
+ effective_workspace = self.workspace
671
+ if effective_workspace:
672
+ logger.debug(
673
+ f"Using passed workspace parameter: '{effective_workspace}'"
674
+ )
675
+
676
+ # Build namespace with workspace prefix for data isolation
677
+ if effective_workspace:
678
+ self.namespace = f"{effective_workspace}_{self.namespace}"
679
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
680
+ # When workspace is empty, keep the original namespace unchanged
681
+
682
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
683
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
684
  if cosine_threshold is None:
lightrag/kg/mongo_impl.py CHANGED
@@ -25,6 +25,7 @@ if not pm.is_installed("pymongo"):
25
  pm.install("pymongo")
26
 
27
  from pymongo import AsyncMongoClient # type: ignore
 
28
  from pymongo.asynchronous.database import AsyncDatabase # type: ignore
29
  from pymongo.asynchronous.collection import AsyncCollection # type: ignore
30
  from pymongo.operations import SearchIndexModel # type: ignore
@@ -81,7 +82,39 @@ class MongoKVStorage(BaseKVStorage):
81
  db: AsyncDatabase = field(default=None)
82
  _data: AsyncCollection = field(default=None)
83
 
 
 
 
 
 
 
 
 
 
84
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  self._collection_name = self.namespace
86
 
87
  async def initialize(self):
@@ -142,7 +175,6 @@ class MongoKVStorage(BaseKVStorage):
142
 
143
  # Unified handling for all namespaces with flattened keys
144
  # Use bulk_write for better performance
145
- from pymongo import UpdateOne
146
 
147
  operations = []
148
  current_time = int(time.time()) # Get current Unix timestamp
@@ -252,7 +284,39 @@ class MongoDocStatusStorage(DocStatusStorage):
252
  db: AsyncDatabase = field(default=None)
253
  _data: AsyncCollection = field(default=None)
254
 
 
 
 
 
 
 
 
 
 
255
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  self._collection_name = self.namespace
257
 
258
  async def initialize(self):
@@ -367,12 +431,36 @@ class MongoGraphStorage(BaseGraphStorage):
367
  # edge collection storing source_node_id, target_node_id, and edge_properties
368
  edgeCollection: AsyncCollection = field(default=None)
369
 
370
- def __init__(self, namespace, global_config, embedding_func):
371
  super().__init__(
372
  namespace=namespace,
 
373
  global_config=global_config,
374
  embedding_func=embedding_func,
375
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  self._collection_name = self.namespace
377
  self._edge_collection_name = f"{self._collection_name}_edges"
378
 
@@ -1230,8 +1318,52 @@ class MongoGraphStorage(BaseGraphStorage):
1230
  class MongoVectorDBStorage(BaseVectorStorage):
1231
  db: AsyncDatabase | None = field(default=None)
1232
  _data: AsyncCollection | None = field(default=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
 
1234
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1235
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
1236
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
1237
  if cosine_threshold is None:
@@ -1261,13 +1393,11 @@ class MongoVectorDBStorage(BaseVectorStorage):
1261
  async def create_vector_index_if_not_exists(self):
1262
  """Creates an Atlas Vector Search index."""
1263
  try:
1264
- index_name = "vector_knn_index"
1265
-
1266
  indexes_cursor = await self._data.list_search_indexes()
1267
  indexes = await indexes_cursor.to_list(length=None)
1268
  for index in indexes:
1269
- if index["name"] == index_name:
1270
- logger.debug("vector index already exist")
1271
  return
1272
 
1273
  search_index_model = SearchIndexModel(
@@ -1281,15 +1411,15 @@ class MongoVectorDBStorage(BaseVectorStorage):
1281
  }
1282
  ]
1283
  },
1284
- name=index_name,
1285
  type="vectorSearch",
1286
  )
1287
 
1288
  await self._data.create_search_index(search_index_model)
1289
- logger.info("Vector index created successfully.")
1290
 
1291
- except PyMongoError as _:
1292
- logger.debug("vector index already exist")
1293
 
1294
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
1295
  logger.debug(f"Inserting {len(data)} to {self.namespace}")
@@ -1344,7 +1474,7 @@ class MongoVectorDBStorage(BaseVectorStorage):
1344
  pipeline = [
1345
  {
1346
  "$vectorSearch": {
1347
- "index": "vector_knn_index", # Ensure this matches the created index name
1348
  "path": "vector",
1349
  "queryVector": query_vector,
1350
  "numCandidates": 100, # Adjust for performance
 
25
  pm.install("pymongo")
26
 
27
  from pymongo import AsyncMongoClient # type: ignore
28
+ from pymongo import UpdateOne # type: ignore
29
  from pymongo.asynchronous.database import AsyncDatabase # type: ignore
30
  from pymongo.asynchronous.collection import AsyncCollection # type: ignore
31
  from pymongo.operations import SearchIndexModel # type: ignore
 
82
  db: AsyncDatabase = field(default=None)
83
  _data: AsyncCollection = field(default=None)
84
 
85
+ def __init__(self, namespace, global_config, embedding_func, workspace=None):
86
+ super().__init__(
87
+ namespace=namespace,
88
+ workspace=workspace or "",
89
+ global_config=global_config,
90
+ embedding_func=embedding_func,
91
+ )
92
+ self.__post_init__()
93
+
94
  def __post_init__(self):
95
+ # Check for MONGODB_WORKSPACE environment variable first (higher priority)
96
+ # This allows administrators to force a specific workspace for all MongoDB storage instances
97
+ mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
98
+ if mongodb_workspace and mongodb_workspace.strip():
99
+ # Use environment variable value, overriding the passed workspace parameter
100
+ effective_workspace = mongodb_workspace.strip()
101
+ logger.info(
102
+ f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
103
+ )
104
+ else:
105
+ # Use the workspace parameter passed during initialization
106
+ effective_workspace = self.workspace
107
+ if effective_workspace:
108
+ logger.debug(
109
+ f"Using passed workspace parameter: '{effective_workspace}'"
110
+ )
111
+
112
+ # Build namespace with workspace prefix for data isolation
113
+ if effective_workspace:
114
+ self.namespace = f"{effective_workspace}_{self.namespace}"
115
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
116
+ # When workspace is empty, keep the original namespace unchanged
117
+
118
  self._collection_name = self.namespace
119
 
120
  async def initialize(self):
 
175
 
176
  # Unified handling for all namespaces with flattened keys
177
  # Use bulk_write for better performance
 
178
 
179
  operations = []
180
  current_time = int(time.time()) # Get current Unix timestamp
 
284
  db: AsyncDatabase = field(default=None)
285
  _data: AsyncCollection = field(default=None)
286
 
287
+ def __init__(self, namespace, global_config, embedding_func, workspace=None):
288
+ super().__init__(
289
+ namespace=namespace,
290
+ workspace=workspace or "",
291
+ global_config=global_config,
292
+ embedding_func=embedding_func,
293
+ )
294
+ self.__post_init__()
295
+
296
  def __post_init__(self):
297
+ # Check for MONGODB_WORKSPACE environment variable first (higher priority)
298
+ # This allows administrators to force a specific workspace for all MongoDB storage instances
299
+ mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
300
+ if mongodb_workspace and mongodb_workspace.strip():
301
+ # Use environment variable value, overriding the passed workspace parameter
302
+ effective_workspace = mongodb_workspace.strip()
303
+ logger.info(
304
+ f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
305
+ )
306
+ else:
307
+ # Use the workspace parameter passed during initialization
308
+ effective_workspace = self.workspace
309
+ if effective_workspace:
310
+ logger.debug(
311
+ f"Using passed workspace parameter: '{effective_workspace}'"
312
+ )
313
+
314
+ # Build namespace with workspace prefix for data isolation
315
+ if effective_workspace:
316
+ self.namespace = f"{effective_workspace}_{self.namespace}"
317
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
318
+ # When workspace is empty, keep the original namespace unchanged
319
+
320
  self._collection_name = self.namespace
321
 
322
  async def initialize(self):
 
431
  # edge collection storing source_node_id, target_node_id, and edge_properties
432
  edgeCollection: AsyncCollection = field(default=None)
433
 
434
+ def __init__(self, namespace, global_config, embedding_func, workspace=None):
435
  super().__init__(
436
  namespace=namespace,
437
+ workspace=workspace or "",
438
  global_config=global_config,
439
  embedding_func=embedding_func,
440
  )
441
+ # Check for MONGODB_WORKSPACE environment variable first (higher priority)
442
+ # This allows administrators to force a specific workspace for all MongoDB storage instances
443
+ mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
444
+ if mongodb_workspace and mongodb_workspace.strip():
445
+ # Use environment variable value, overriding the passed workspace parameter
446
+ effective_workspace = mongodb_workspace.strip()
447
+ logger.info(
448
+ f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
449
+ )
450
+ else:
451
+ # Use the workspace parameter passed during initialization
452
+ effective_workspace = self.workspace
453
+ if effective_workspace:
454
+ logger.debug(
455
+ f"Using passed workspace parameter: '{effective_workspace}'"
456
+ )
457
+
458
+ # Build namespace with workspace prefix for data isolation
459
+ if effective_workspace:
460
+ self.namespace = f"{effective_workspace}_{self.namespace}"
461
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
462
+ # When workspace is empty, keep the original namespace unchanged
463
+
464
  self._collection_name = self.namespace
465
  self._edge_collection_name = f"{self._collection_name}_edges"
466
 
 
1318
  class MongoVectorDBStorage(BaseVectorStorage):
1319
  db: AsyncDatabase | None = field(default=None)
1320
  _data: AsyncCollection | None = field(default=None)
1321
+ _index_name: str = field(default="", init=False)
1322
+
1323
+ def __init__(
1324
+ self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
1325
+ ):
1326
+ super().__init__(
1327
+ namespace=namespace,
1328
+ workspace=workspace or "",
1329
+ global_config=global_config,
1330
+ embedding_func=embedding_func,
1331
+ meta_fields=meta_fields or set(),
1332
+ )
1333
+ self.__post_init__()
1334
 
1335
  def __post_init__(self):
1336
+ # Check for MONGODB_WORKSPACE environment variable first (higher priority)
1337
+ # This allows administrators to force a specific workspace for all MongoDB storage instances
1338
+ mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
1339
+ if mongodb_workspace and mongodb_workspace.strip():
1340
+ # Use environment variable value, overriding the passed workspace parameter
1341
+ effective_workspace = mongodb_workspace.strip()
1342
+ logger.info(
1343
+ f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
1344
+ )
1345
+ else:
1346
+ # Use the workspace parameter passed during initialization
1347
+ effective_workspace = self.workspace
1348
+ if effective_workspace:
1349
+ logger.debug(
1350
+ f"Using passed workspace parameter: '{effective_workspace}'"
1351
+ )
1352
+
1353
+ # Build namespace with workspace prefix for data isolation
1354
+ if effective_workspace:
1355
+ self.namespace = f"{effective_workspace}_{self.namespace}"
1356
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
1357
+ # When workspace is empty, keep the original namespace unchanged
1358
+
1359
+ # Set index name based on workspace for backward compatibility
1360
+ if effective_workspace:
1361
+ # Use collection-specific index name for workspaced collections to avoid conflicts
1362
+ self._index_name = f"vector_knn_index_{self.namespace}"
1363
+ else:
1364
+ # Keep original index name for backward compatibility with existing deployments
1365
+ self._index_name = "vector_knn_index"
1366
+
1367
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
1368
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
1369
  if cosine_threshold is None:
 
1393
  async def create_vector_index_if_not_exists(self):
1394
  """Creates an Atlas Vector Search index."""
1395
  try:
 
 
1396
  indexes_cursor = await self._data.list_search_indexes()
1397
  indexes = await indexes_cursor.to_list(length=None)
1398
  for index in indexes:
1399
+ if index["name"] == self._index_name:
1400
+ logger.info(f"vector index {self._index_name} already exist")
1401
  return
1402
 
1403
  search_index_model = SearchIndexModel(
 
1411
  }
1412
  ]
1413
  },
1414
+ name=self._index_name,
1415
  type="vectorSearch",
1416
  )
1417
 
1418
  await self._data.create_search_index(search_index_model)
1419
+ logger.info(f"Vector index {self._index_name} created successfully.")
1420
 
1421
+ except PyMongoError as e:
1422
+ logger.error(f"Error creating vector index {self._index_name}: {e}")
1423
 
1424
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
1425
  logger.debug(f"Inserting {len(data)} to {self.namespace}")
 
1474
  pipeline = [
1475
  {
1476
  "$vectorSearch": {
1477
+ "index": self._index_name, # Use stored index name for consistency
1478
  "path": "vector",
1479
  "queryVector": query_vector,
1480
  "numCandidates": 100, # Adjust for performance
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -41,9 +41,19 @@ class NanoVectorDBStorage(BaseVectorStorage):
41
  )
42
  self.cosine_better_than_threshold = cosine_threshold
43
 
44
- self._client_file_name = os.path.join(
45
- self.global_config["working_dir"], f"vdb_{self.namespace}.json"
46
- )
 
 
 
 
 
 
 
 
 
 
47
  self._max_batch_size = self.global_config["embedding_batch_num"]
48
 
49
  self._client = NanoVectorDB(
 
41
  )
42
  self.cosine_better_than_threshold = cosine_threshold
43
 
44
+ working_dir = self.global_config["working_dir"]
45
+ if self.workspace:
46
+ # Include workspace in the file path for data isolation
47
+ workspace_dir = os.path.join(working_dir, self.workspace)
48
+ os.makedirs(workspace_dir, exist_ok=True)
49
+ self._client_file_name = os.path.join(
50
+ workspace_dir, f"vdb_{self.namespace}.json"
51
+ )
52
+ else:
53
+ # Default behavior when workspace is empty
54
+ self._client_file_name = os.path.join(
55
+ working_dir, f"vdb_{self.namespace}.json"
56
+ )
57
  self._max_batch_size = self.global_config["embedding_batch_num"]
58
 
59
  self._client = NanoVectorDB(
lightrag/kg/neo4j_impl.py CHANGED
@@ -50,14 +50,25 @@ logging.getLogger("neo4j").setLevel(logging.ERROR)
50
  @final
51
  @dataclass
52
  class Neo4JStorage(BaseGraphStorage):
53
- def __init__(self, namespace, global_config, embedding_func):
 
 
 
 
 
54
  super().__init__(
55
  namespace=namespace,
 
56
  global_config=global_config,
57
  embedding_func=embedding_func,
58
  )
59
  self._driver = None
60
 
 
 
 
 
 
61
  async def initialize(self):
62
  URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
63
  USERNAME = os.environ.get(
@@ -153,13 +164,14 @@ class Neo4JStorage(BaseGraphStorage):
153
  raise e
154
 
155
  if connected:
156
- # Create index for base nodes on entity_id if it doesn't exist
 
157
  try:
158
  async with self._driver.session(database=database) as session:
159
  # Check if index exists first
160
- check_query = """
161
  CALL db.indexes() YIELD name, labelsOrTypes, properties
162
- WHERE labelsOrTypes = ['base'] AND properties = ['entity_id']
163
  RETURN count(*) > 0 AS exists
164
  """
165
  try:
@@ -172,16 +184,16 @@ class Neo4JStorage(BaseGraphStorage):
172
  if not index_exists:
173
  # Create index only if it doesn't exist
174
  result = await session.run(
175
- "CREATE INDEX FOR (n:base) ON (n.entity_id)"
176
  )
177
  await result.consume()
178
  logger.info(
179
- f"Created index for base nodes on entity_id in {database}"
180
  )
181
  except Exception:
182
  # Fallback if db.indexes() is not supported in this Neo4j version
183
  result = await session.run(
184
- "CREATE INDEX IF NOT EXISTS FOR (n:base) ON (n.entity_id)"
185
  )
186
  await result.consume()
187
  except Exception as e:
@@ -216,11 +228,12 @@ class Neo4JStorage(BaseGraphStorage):
216
  ValueError: If node_id is invalid
217
  Exception: If there is an error executing the query
218
  """
 
219
  async with self._driver.session(
220
  database=self._DATABASE, default_access_mode="READ"
221
  ) as session:
222
  try:
223
- query = "MATCH (n:base {entity_id: $entity_id}) RETURN count(n) > 0 AS node_exists"
224
  result = await session.run(query, entity_id=node_id)
225
  single_result = await result.single()
226
  await result.consume() # Ensure result is fully consumed
@@ -245,12 +258,13 @@ class Neo4JStorage(BaseGraphStorage):
245
  ValueError: If either node_id is invalid
246
  Exception: If there is an error executing the query
247
  """
 
248
  async with self._driver.session(
249
  database=self._DATABASE, default_access_mode="READ"
250
  ) as session:
251
  try:
252
  query = (
253
- "MATCH (a:base {entity_id: $source_entity_id})-[r]-(b:base {entity_id: $target_entity_id}) "
254
  "RETURN COUNT(r) > 0 AS edgeExists"
255
  )
256
  result = await session.run(
@@ -282,11 +296,14 @@ class Neo4JStorage(BaseGraphStorage):
282
  ValueError: If node_id is invalid
283
  Exception: If there is an error executing the query
284
  """
 
285
  async with self._driver.session(
286
  database=self._DATABASE, default_access_mode="READ"
287
  ) as session:
288
  try:
289
- query = "MATCH (n:base {entity_id: $entity_id}) RETURN n"
 
 
290
  result = await session.run(query, entity_id=node_id)
291
  try:
292
  records = await result.fetch(
@@ -300,12 +317,12 @@ class Neo4JStorage(BaseGraphStorage):
300
  if records:
301
  node = records[0]["n"]
302
  node_dict = dict(node)
303
- # Remove base label from labels list if it exists
304
  if "labels" in node_dict:
305
  node_dict["labels"] = [
306
  label
307
  for label in node_dict["labels"]
308
- if label != "base"
309
  ]
310
  # logger.debug(f"Neo4j query node {query} return: {node_dict}")
311
  return node_dict
@@ -326,12 +343,13 @@ class Neo4JStorage(BaseGraphStorage):
326
  Returns:
327
  A dictionary mapping each node_id to its node data (or None if not found).
328
  """
 
329
  async with self._driver.session(
330
  database=self._DATABASE, default_access_mode="READ"
331
  ) as session:
332
- query = """
333
  UNWIND $node_ids AS id
334
- MATCH (n:base {entity_id: id})
335
  RETURN n.entity_id AS entity_id, n
336
  """
337
  result = await session.run(query, node_ids=node_ids)
@@ -340,10 +358,12 @@ class Neo4JStorage(BaseGraphStorage):
340
  entity_id = record["entity_id"]
341
  node = record["n"]
342
  node_dict = dict(node)
343
- # Remove the 'base' label if present in a 'labels' property
344
  if "labels" in node_dict:
345
  node_dict["labels"] = [
346
- label for label in node_dict["labels"] if label != "base"
 
 
347
  ]
348
  nodes[entity_id] = node_dict
349
  await result.consume() # Make sure to consume the result fully
@@ -364,12 +384,13 @@ class Neo4JStorage(BaseGraphStorage):
364
  ValueError: If node_id is invalid
365
  Exception: If there is an error executing the query
366
  """
 
367
  async with self._driver.session(
368
  database=self._DATABASE, default_access_mode="READ"
369
  ) as session:
370
  try:
371
- query = """
372
- MATCH (n:base {entity_id: $entity_id})
373
  OPTIONAL MATCH (n)-[r]-()
374
  RETURN COUNT(r) AS degree
375
  """
@@ -403,13 +424,14 @@ class Neo4JStorage(BaseGraphStorage):
403
  A dictionary mapping each node_id to its degree (number of relationships).
404
  If a node is not found, its degree will be set to 0.
405
  """
 
406
  async with self._driver.session(
407
  database=self._DATABASE, default_access_mode="READ"
408
  ) as session:
409
- query = """
410
  UNWIND $node_ids AS id
411
- MATCH (n:base {entity_id: id})
412
- RETURN n.entity_id AS entity_id, count { (n)--() } AS degree;
413
  """
414
  result = await session.run(query, node_ids=node_ids)
415
  degrees = {}
@@ -489,12 +511,13 @@ class Neo4JStorage(BaseGraphStorage):
489
  ValueError: If either node_id is invalid
490
  Exception: If there is an error executing the query
491
  """
 
492
  try:
493
  async with self._driver.session(
494
  database=self._DATABASE, default_access_mode="READ"
495
  ) as session:
496
- query = """
497
- MATCH (start:base {entity_id: $source_entity_id})-[r]-(end:base {entity_id: $target_entity_id})
498
  RETURN properties(r) as edge_properties
499
  """
500
  result = await session.run(
@@ -571,12 +594,13 @@ class Neo4JStorage(BaseGraphStorage):
571
  Returns:
572
  A dictionary mapping (src, tgt) tuples to their edge properties.
573
  """
 
574
  async with self._driver.session(
575
  database=self._DATABASE, default_access_mode="READ"
576
  ) as session:
577
- query = """
578
  UNWIND $pairs AS pair
579
- MATCH (start:base {entity_id: pair.src})-[r:DIRECTED]-(end:base {entity_id: pair.tgt})
580
  RETURN pair.src AS src_id, pair.tgt AS tgt_id, collect(properties(r)) AS edges
581
  """
582
  result = await session.run(query, pairs=pairs)
@@ -627,8 +651,9 @@ class Neo4JStorage(BaseGraphStorage):
627
  database=self._DATABASE, default_access_mode="READ"
628
  ) as session:
629
  try:
630
- query = """MATCH (n:base {entity_id: $entity_id})
631
- OPTIONAL MATCH (n)-[r]-(connected:base)
 
632
  WHERE connected.entity_id IS NOT NULL
633
  RETURN n, r, connected"""
634
  results = await session.run(query, entity_id=source_node_id)
@@ -689,10 +714,11 @@ class Neo4JStorage(BaseGraphStorage):
689
  database=self._DATABASE, default_access_mode="READ"
690
  ) as session:
691
  # Query to get both outgoing and incoming edges
692
- query = """
 
693
  UNWIND $node_ids AS id
694
- MATCH (n:base {entity_id: id})
695
- OPTIONAL MATCH (n)-[r]-(connected:base)
696
  RETURN id AS queried_id, n.entity_id AS node_entity_id,
697
  connected.entity_id AS connected_entity_id,
698
  startNode(r).entity_id AS start_entity_id
@@ -727,12 +753,13 @@ class Neo4JStorage(BaseGraphStorage):
727
  return edges_dict
728
 
729
  async def get_nodes_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
 
730
  async with self._driver.session(
731
  database=self._DATABASE, default_access_mode="READ"
732
  ) as session:
733
- query = """
734
  UNWIND $chunk_ids AS chunk_id
735
- MATCH (n:base)
736
  WHERE n.source_id IS NOT NULL AND chunk_id IN split(n.source_id, $sep)
737
  RETURN DISTINCT n
738
  """
@@ -748,12 +775,13 @@ class Neo4JStorage(BaseGraphStorage):
748
  return nodes
749
 
750
  async def get_edges_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
 
751
  async with self._driver.session(
752
  database=self._DATABASE, default_access_mode="READ"
753
  ) as session:
754
- query = """
755
  UNWIND $chunk_ids AS chunk_id
756
- MATCH (a:base)-[r]-(b:base)
757
  WHERE r.source_id IS NOT NULL AND chunk_id IN split(r.source_id, $sep)
758
  RETURN DISTINCT a.entity_id AS source, b.entity_id AS target, properties(r) AS properties
759
  """
@@ -787,6 +815,7 @@ class Neo4JStorage(BaseGraphStorage):
787
  node_id: The unique identifier for the node (used as label)
788
  node_data: Dictionary of node properties
789
  """
 
790
  properties = node_data
791
  entity_type = properties["entity_type"]
792
  if "entity_id" not in properties:
@@ -796,14 +825,11 @@ class Neo4JStorage(BaseGraphStorage):
796
  async with self._driver.session(database=self._DATABASE) as session:
797
 
798
  async def execute_upsert(tx: AsyncManagedTransaction):
799
- query = (
800
- """
801
- MERGE (n:base {entity_id: $entity_id})
802
  SET n += $properties
803
- SET n:`%s`
804
  """
805
- % entity_type
806
- )
807
  result = await tx.run(
808
  query, entity_id=node_id, properties=properties
809
  )
@@ -847,10 +873,11 @@ class Neo4JStorage(BaseGraphStorage):
847
  async with self._driver.session(database=self._DATABASE) as session:
848
 
849
  async def execute_upsert(tx: AsyncManagedTransaction):
850
- query = """
851
- MATCH (source:base {entity_id: $source_entity_id})
 
852
  WITH source
853
- MATCH (target:base {entity_id: $target_entity_id})
854
  MERGE (source)-[r:DIRECTED]-(target)
855
  SET r += $properties
856
  RETURN r, source, target
@@ -889,6 +916,7 @@ class Neo4JStorage(BaseGraphStorage):
889
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
890
  indicating whether the graph was truncated due to max_nodes limit
891
  """
 
892
  result = KnowledgeGraph()
893
  seen_nodes = set()
894
  seen_edges = set()
@@ -899,7 +927,9 @@ class Neo4JStorage(BaseGraphStorage):
899
  try:
900
  if node_label == "*":
901
  # First check total node count to determine if graph is truncated
902
- count_query = "MATCH (n) RETURN count(n) as total"
 
 
903
  count_result = None
904
  try:
905
  count_result = await session.run(count_query)
@@ -915,13 +945,13 @@ class Neo4JStorage(BaseGraphStorage):
915
  await count_result.consume()
916
 
917
  # Run main query to get nodes with highest degree
918
- main_query = """
919
- MATCH (n)
920
  OPTIONAL MATCH (n)-[r]-()
921
  WITH n, COALESCE(count(r), 0) AS degree
922
  ORDER BY degree DESC
923
  LIMIT $max_nodes
924
- WITH collect({node: n}) AS filtered_nodes
925
  UNWIND filtered_nodes AS node_info
926
  WITH collect(node_info.node) AS kept_nodes, filtered_nodes
927
  OPTIONAL MATCH (a)-[r]-(b)
@@ -943,20 +973,21 @@ class Neo4JStorage(BaseGraphStorage):
943
  else:
944
  # return await self._robust_fallback(node_label, max_depth, max_nodes)
945
  # First try without limit to check if we need to truncate
946
- full_query = """
947
- MATCH (start)
948
  WHERE start.entity_id = $entity_id
949
  WITH start
950
- CALL apoc.path.subgraphAll(start, {
951
  relationshipFilter: '',
 
952
  minLevel: 0,
953
  maxLevel: $max_depth,
954
  bfs: true
955
- })
956
  YIELD nodes, relationships
957
  WITH nodes, relationships, size(nodes) AS total_nodes
958
  UNWIND nodes AS node
959
- WITH collect({node: node}) AS node_info, relationships, total_nodes
960
  RETURN node_info, relationships, total_nodes
961
  """
962
 
@@ -994,20 +1025,21 @@ class Neo4JStorage(BaseGraphStorage):
994
  )
995
 
996
  # Run limited query
997
- limited_query = """
998
- MATCH (start)
999
  WHERE start.entity_id = $entity_id
1000
  WITH start
1001
- CALL apoc.path.subgraphAll(start, {
1002
  relationshipFilter: '',
 
1003
  minLevel: 0,
1004
  maxLevel: $max_depth,
1005
  limit: $max_nodes,
1006
  bfs: true
1007
- })
1008
  YIELD nodes, relationships
1009
  UNWIND nodes AS node
1010
- WITH collect({node: node}) AS node_info, relationships
1011
  RETURN node_info, relationships
1012
  """
1013
  result_set = None
@@ -1094,11 +1126,12 @@ class Neo4JStorage(BaseGraphStorage):
1094
  visited_edge_pairs = set()
1095
 
1096
  # Get the starting node's data
 
1097
  async with self._driver.session(
1098
  database=self._DATABASE, default_access_mode="READ"
1099
  ) as session:
1100
- query = """
1101
- MATCH (n:base {entity_id: $entity_id})
1102
  RETURN id(n) as node_id, n
1103
  """
1104
  node_result = await session.run(query, entity_id=node_label)
@@ -1156,8 +1189,9 @@ class Neo4JStorage(BaseGraphStorage):
1156
  async with self._driver.session(
1157
  database=self._DATABASE, default_access_mode="READ"
1158
  ) as session:
1159
- query = """
1160
- MATCH (a:base {entity_id: $entity_id})-[r]-(b)
 
1161
  WITH r, b, id(r) as edge_id, id(b) as target_id
1162
  RETURN r, b, edge_id, target_id
1163
  """
@@ -1241,6 +1275,7 @@ class Neo4JStorage(BaseGraphStorage):
1241
  Returns:
1242
  ["Person", "Company", ...] # Alphabetically sorted label list
1243
  """
 
1244
  async with self._driver.session(
1245
  database=self._DATABASE, default_access_mode="READ"
1246
  ) as session:
@@ -1248,8 +1283,8 @@ class Neo4JStorage(BaseGraphStorage):
1248
  # query = "CALL db.labels() YIELD label RETURN label"
1249
 
1250
  # Method 2: Query compatible with older versions
1251
- query = """
1252
- MATCH (n:base)
1253
  WHERE n.entity_id IS NOT NULL
1254
  RETURN DISTINCT n.entity_id AS label
1255
  ORDER BY label
@@ -1285,8 +1320,9 @@ class Neo4JStorage(BaseGraphStorage):
1285
  """
1286
 
1287
  async def _do_delete(tx: AsyncManagedTransaction):
1288
- query = """
1289
- MATCH (n:base {entity_id: $entity_id})
 
1290
  DETACH DELETE n
1291
  """
1292
  result = await tx.run(query, entity_id=node_id)
@@ -1342,8 +1378,9 @@ class Neo4JStorage(BaseGraphStorage):
1342
  for source, target in edges:
1343
 
1344
  async def _do_delete_edge(tx: AsyncManagedTransaction):
1345
- query = """
1346
- MATCH (source:base {entity_id: $source_entity_id})-[r]-(target:base {entity_id: $target_entity_id})
 
1347
  DELETE r
1348
  """
1349
  result = await tx.run(
@@ -1360,26 +1397,32 @@ class Neo4JStorage(BaseGraphStorage):
1360
  raise
1361
 
1362
  async def drop(self) -> dict[str, str]:
1363
- """Drop all data from storage and clean up resources
1364
 
1365
- This method will delete all nodes and relationships in the Neo4j database.
1366
 
1367
  Returns:
1368
  dict[str, str]: Operation status and message
1369
- - On success: {"status": "success", "message": "data dropped"}
1370
  - On failure: {"status": "error", "message": "<error details>"}
1371
  """
 
1372
  try:
1373
  async with self._driver.session(database=self._DATABASE) as session:
1374
- # Delete all nodes and relationships
1375
- query = "MATCH (n) DETACH DELETE n"
1376
  result = await session.run(query)
1377
  await result.consume() # Ensure result is fully consumed
1378
 
1379
  logger.info(
1380
- f"Process {os.getpid()} drop Neo4j database {self._DATABASE}"
1381
  )
1382
- return {"status": "success", "message": "data dropped"}
 
 
 
1383
  except Exception as e:
1384
- logger.error(f"Error dropping Neo4j database {self._DATABASE}: {e}")
 
 
1385
  return {"status": "error", "message": str(e)}
 
50
  @final
51
  @dataclass
52
  class Neo4JStorage(BaseGraphStorage):
53
+ def __init__(self, namespace, global_config, embedding_func, workspace=None):
54
+ # Check NEO4J_WORKSPACE environment variable and override workspace if set
55
+ neo4j_workspace = os.environ.get("NEO4J_WORKSPACE")
56
+ if neo4j_workspace and neo4j_workspace.strip():
57
+ workspace = neo4j_workspace
58
+
59
  super().__init__(
60
  namespace=namespace,
61
+ workspace=workspace or "",
62
  global_config=global_config,
63
  embedding_func=embedding_func,
64
  )
65
  self._driver = None
66
 
67
+ def _get_workspace_label(self) -> str:
68
+ """Get workspace label, return 'base' for compatibility when workspace is empty"""
69
+ workspace = getattr(self, "workspace", None)
70
+ return workspace if workspace else "base"
71
+
72
  async def initialize(self):
73
  URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
74
  USERNAME = os.environ.get(
 
164
  raise e
165
 
166
  if connected:
167
+ # Create index for workspace nodes on entity_id if it doesn't exist
168
+ workspace_label = self._get_workspace_label()
169
  try:
170
  async with self._driver.session(database=database) as session:
171
  # Check if index exists first
172
+ check_query = f"""
173
  CALL db.indexes() YIELD name, labelsOrTypes, properties
174
+ WHERE labelsOrTypes = ['{workspace_label}'] AND properties = ['entity_id']
175
  RETURN count(*) > 0 AS exists
176
  """
177
  try:
 
184
  if not index_exists:
185
  # Create index only if it doesn't exist
186
  result = await session.run(
187
+ f"CREATE INDEX FOR (n:`{workspace_label}`) ON (n.entity_id)"
188
  )
189
  await result.consume()
190
  logger.info(
191
+ f"Created index for {workspace_label} nodes on entity_id in {database}"
192
  )
193
  except Exception:
194
  # Fallback if db.indexes() is not supported in this Neo4j version
195
  result = await session.run(
196
+ f"CREATE INDEX IF NOT EXISTS FOR (n:`{workspace_label}`) ON (n.entity_id)"
197
  )
198
  await result.consume()
199
  except Exception as e:
 
228
  ValueError: If node_id is invalid
229
  Exception: If there is an error executing the query
230
  """
231
+ workspace_label = self._get_workspace_label()
232
  async with self._driver.session(
233
  database=self._DATABASE, default_access_mode="READ"
234
  ) as session:
235
  try:
236
+ query = f"MATCH (n:`{workspace_label}` {{entity_id: $entity_id}}) RETURN count(n) > 0 AS node_exists"
237
  result = await session.run(query, entity_id=node_id)
238
  single_result = await result.single()
239
  await result.consume() # Ensure result is fully consumed
 
258
  ValueError: If either node_id is invalid
259
  Exception: If there is an error executing the query
260
  """
261
+ workspace_label = self._get_workspace_label()
262
  async with self._driver.session(
263
  database=self._DATABASE, default_access_mode="READ"
264
  ) as session:
265
  try:
266
  query = (
267
+ f"MATCH (a:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(b:`{workspace_label}` {{entity_id: $target_entity_id}}) "
268
  "RETURN COUNT(r) > 0 AS edgeExists"
269
  )
270
  result = await session.run(
 
296
  ValueError: If node_id is invalid
297
  Exception: If there is an error executing the query
298
  """
299
+ workspace_label = self._get_workspace_label()
300
  async with self._driver.session(
301
  database=self._DATABASE, default_access_mode="READ"
302
  ) as session:
303
  try:
304
+ query = (
305
+ f"MATCH (n:`{workspace_label}` {{entity_id: $entity_id}}) RETURN n"
306
+ )
307
  result = await session.run(query, entity_id=node_id)
308
  try:
309
  records = await result.fetch(
 
317
  if records:
318
  node = records[0]["n"]
319
  node_dict = dict(node)
320
+ # Remove workspace label from labels list if it exists
321
  if "labels" in node_dict:
322
  node_dict["labels"] = [
323
  label
324
  for label in node_dict["labels"]
325
+ if label != workspace_label
326
  ]
327
  # logger.debug(f"Neo4j query node {query} return: {node_dict}")
328
  return node_dict
 
343
  Returns:
344
  A dictionary mapping each node_id to its node data (or None if not found).
345
  """
346
+ workspace_label = self._get_workspace_label()
347
  async with self._driver.session(
348
  database=self._DATABASE, default_access_mode="READ"
349
  ) as session:
350
+ query = f"""
351
  UNWIND $node_ids AS id
352
+ MATCH (n:`{workspace_label}` {{entity_id: id}})
353
  RETURN n.entity_id AS entity_id, n
354
  """
355
  result = await session.run(query, node_ids=node_ids)
 
358
  entity_id = record["entity_id"]
359
  node = record["n"]
360
  node_dict = dict(node)
361
+ # Remove the workspace label if present in a 'labels' property
362
  if "labels" in node_dict:
363
  node_dict["labels"] = [
364
+ label
365
+ for label in node_dict["labels"]
366
+ if label != workspace_label
367
  ]
368
  nodes[entity_id] = node_dict
369
  await result.consume() # Make sure to consume the result fully
 
384
  ValueError: If node_id is invalid
385
  Exception: If there is an error executing the query
386
  """
387
+ workspace_label = self._get_workspace_label()
388
  async with self._driver.session(
389
  database=self._DATABASE, default_access_mode="READ"
390
  ) as session:
391
  try:
392
+ query = f"""
393
+ MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
394
  OPTIONAL MATCH (n)-[r]-()
395
  RETURN COUNT(r) AS degree
396
  """
 
424
  A dictionary mapping each node_id to its degree (number of relationships).
425
  If a node is not found, its degree will be set to 0.
426
  """
427
+ workspace_label = self._get_workspace_label()
428
  async with self._driver.session(
429
  database=self._DATABASE, default_access_mode="READ"
430
  ) as session:
431
+ query = f"""
432
  UNWIND $node_ids AS id
433
+ MATCH (n:`{workspace_label}` {{entity_id: id}})
434
+ RETURN n.entity_id AS entity_id, count {{ (n)--() }} AS degree;
435
  """
436
  result = await session.run(query, node_ids=node_ids)
437
  degrees = {}
 
511
  ValueError: If either node_id is invalid
512
  Exception: If there is an error executing the query
513
  """
514
+ workspace_label = self._get_workspace_label()
515
  try:
516
  async with self._driver.session(
517
  database=self._DATABASE, default_access_mode="READ"
518
  ) as session:
519
+ query = f"""
520
+ MATCH (start:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(end:`{workspace_label}` {{entity_id: $target_entity_id}})
521
  RETURN properties(r) as edge_properties
522
  """
523
  result = await session.run(
 
594
  Returns:
595
  A dictionary mapping (src, tgt) tuples to their edge properties.
596
  """
597
+ workspace_label = self._get_workspace_label()
598
  async with self._driver.session(
599
  database=self._DATABASE, default_access_mode="READ"
600
  ) as session:
601
+ query = f"""
602
  UNWIND $pairs AS pair
603
+ MATCH (start:`{workspace_label}` {{entity_id: pair.src}})-[r:DIRECTED]-(end:`{workspace_label}` {{entity_id: pair.tgt}})
604
  RETURN pair.src AS src_id, pair.tgt AS tgt_id, collect(properties(r)) AS edges
605
  """
606
  result = await session.run(query, pairs=pairs)
 
651
  database=self._DATABASE, default_access_mode="READ"
652
  ) as session:
653
  try:
654
+ workspace_label = self._get_workspace_label()
655
+ query = f"""MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
656
+ OPTIONAL MATCH (n)-[r]-(connected:`{workspace_label}`)
657
  WHERE connected.entity_id IS NOT NULL
658
  RETURN n, r, connected"""
659
  results = await session.run(query, entity_id=source_node_id)
 
714
  database=self._DATABASE, default_access_mode="READ"
715
  ) as session:
716
  # Query to get both outgoing and incoming edges
717
+ workspace_label = self._get_workspace_label()
718
+ query = f"""
719
  UNWIND $node_ids AS id
720
+ MATCH (n:`{workspace_label}` {{entity_id: id}})
721
+ OPTIONAL MATCH (n)-[r]-(connected:`{workspace_label}`)
722
  RETURN id AS queried_id, n.entity_id AS node_entity_id,
723
  connected.entity_id AS connected_entity_id,
724
  startNode(r).entity_id AS start_entity_id
 
753
  return edges_dict
754
 
755
  async def get_nodes_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
756
+ workspace_label = self._get_workspace_label()
757
  async with self._driver.session(
758
  database=self._DATABASE, default_access_mode="READ"
759
  ) as session:
760
+ query = f"""
761
  UNWIND $chunk_ids AS chunk_id
762
+ MATCH (n:`{workspace_label}`)
763
  WHERE n.source_id IS NOT NULL AND chunk_id IN split(n.source_id, $sep)
764
  RETURN DISTINCT n
765
  """
 
775
  return nodes
776
 
777
  async def get_edges_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
778
+ workspace_label = self._get_workspace_label()
779
  async with self._driver.session(
780
  database=self._DATABASE, default_access_mode="READ"
781
  ) as session:
782
+ query = f"""
783
  UNWIND $chunk_ids AS chunk_id
784
+ MATCH (a:`{workspace_label}`)-[r]-(b:`{workspace_label}`)
785
  WHERE r.source_id IS NOT NULL AND chunk_id IN split(r.source_id, $sep)
786
  RETURN DISTINCT a.entity_id AS source, b.entity_id AS target, properties(r) AS properties
787
  """
 
815
  node_id: The unique identifier for the node (used as label)
816
  node_data: Dictionary of node properties
817
  """
818
+ workspace_label = self._get_workspace_label()
819
  properties = node_data
820
  entity_type = properties["entity_type"]
821
  if "entity_id" not in properties:
 
825
  async with self._driver.session(database=self._DATABASE) as session:
826
 
827
  async def execute_upsert(tx: AsyncManagedTransaction):
828
+ query = f"""
829
+ MERGE (n:`{workspace_label}` {{entity_id: $entity_id}})
 
830
  SET n += $properties
831
+ SET n:`{entity_type}`
832
  """
 
 
833
  result = await tx.run(
834
  query, entity_id=node_id, properties=properties
835
  )
 
873
  async with self._driver.session(database=self._DATABASE) as session:
874
 
875
  async def execute_upsert(tx: AsyncManagedTransaction):
876
+ workspace_label = self._get_workspace_label()
877
+ query = f"""
878
+ MATCH (source:`{workspace_label}` {{entity_id: $source_entity_id}})
879
  WITH source
880
+ MATCH (target:`{workspace_label}` {{entity_id: $target_entity_id}})
881
  MERGE (source)-[r:DIRECTED]-(target)
882
  SET r += $properties
883
  RETURN r, source, target
 
916
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
917
  indicating whether the graph was truncated due to max_nodes limit
918
  """
919
+ workspace_label = self._get_workspace_label()
920
  result = KnowledgeGraph()
921
  seen_nodes = set()
922
  seen_edges = set()
 
927
  try:
928
  if node_label == "*":
929
  # First check total node count to determine if graph is truncated
930
+ count_query = (
931
+ f"MATCH (n:`{workspace_label}`) RETURN count(n) as total"
932
+ )
933
  count_result = None
934
  try:
935
  count_result = await session.run(count_query)
 
945
  await count_result.consume()
946
 
947
  # Run main query to get nodes with highest degree
948
+ main_query = f"""
949
+ MATCH (n:`{workspace_label}`)
950
  OPTIONAL MATCH (n)-[r]-()
951
  WITH n, COALESCE(count(r), 0) AS degree
952
  ORDER BY degree DESC
953
  LIMIT $max_nodes
954
+ WITH collect({{node: n}}) AS filtered_nodes
955
  UNWIND filtered_nodes AS node_info
956
  WITH collect(node_info.node) AS kept_nodes, filtered_nodes
957
  OPTIONAL MATCH (a)-[r]-(b)
 
973
  else:
974
  # return await self._robust_fallback(node_label, max_depth, max_nodes)
975
  # First try without limit to check if we need to truncate
976
+ full_query = f"""
977
+ MATCH (start:`{workspace_label}`)
978
  WHERE start.entity_id = $entity_id
979
  WITH start
980
+ CALL apoc.path.subgraphAll(start, {{
981
  relationshipFilter: '',
982
+ labelFilter: '{workspace_label}',
983
  minLevel: 0,
984
  maxLevel: $max_depth,
985
  bfs: true
986
+ }})
987
  YIELD nodes, relationships
988
  WITH nodes, relationships, size(nodes) AS total_nodes
989
  UNWIND nodes AS node
990
+ WITH collect({{node: node}}) AS node_info, relationships, total_nodes
991
  RETURN node_info, relationships, total_nodes
992
  """
993
 
 
1025
  )
1026
 
1027
  # Run limited query
1028
+ limited_query = f"""
1029
+ MATCH (start:`{workspace_label}`)
1030
  WHERE start.entity_id = $entity_id
1031
  WITH start
1032
+ CALL apoc.path.subgraphAll(start, {{
1033
  relationshipFilter: '',
1034
+ labelFilter: '{workspace_label}',
1035
  minLevel: 0,
1036
  maxLevel: $max_depth,
1037
  limit: $max_nodes,
1038
  bfs: true
1039
+ }})
1040
  YIELD nodes, relationships
1041
  UNWIND nodes AS node
1042
+ WITH collect({{node: node}}) AS node_info, relationships
1043
  RETURN node_info, relationships
1044
  """
1045
  result_set = None
 
1126
  visited_edge_pairs = set()
1127
 
1128
  # Get the starting node's data
1129
+ workspace_label = self._get_workspace_label()
1130
  async with self._driver.session(
1131
  database=self._DATABASE, default_access_mode="READ"
1132
  ) as session:
1133
+ query = f"""
1134
+ MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
1135
  RETURN id(n) as node_id, n
1136
  """
1137
  node_result = await session.run(query, entity_id=node_label)
 
1189
  async with self._driver.session(
1190
  database=self._DATABASE, default_access_mode="READ"
1191
  ) as session:
1192
+ workspace_label = self._get_workspace_label()
1193
+ query = f"""
1194
+ MATCH (a:`{workspace_label}` {{entity_id: $entity_id}})-[r]-(b)
1195
  WITH r, b, id(r) as edge_id, id(b) as target_id
1196
  RETURN r, b, edge_id, target_id
1197
  """
 
1275
  Returns:
1276
  ["Person", "Company", ...] # Alphabetically sorted label list
1277
  """
1278
+ workspace_label = self._get_workspace_label()
1279
  async with self._driver.session(
1280
  database=self._DATABASE, default_access_mode="READ"
1281
  ) as session:
 
1283
  # query = "CALL db.labels() YIELD label RETURN label"
1284
 
1285
  # Method 2: Query compatible with older versions
1286
+ query = f"""
1287
+ MATCH (n:`{workspace_label}`)
1288
  WHERE n.entity_id IS NOT NULL
1289
  RETURN DISTINCT n.entity_id AS label
1290
  ORDER BY label
 
1320
  """
1321
 
1322
  async def _do_delete(tx: AsyncManagedTransaction):
1323
+ workspace_label = self._get_workspace_label()
1324
+ query = f"""
1325
+ MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
1326
  DETACH DELETE n
1327
  """
1328
  result = await tx.run(query, entity_id=node_id)
 
1378
  for source, target in edges:
1379
 
1380
  async def _do_delete_edge(tx: AsyncManagedTransaction):
1381
+ workspace_label = self._get_workspace_label()
1382
+ query = f"""
1383
+ MATCH (source:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(target:`{workspace_label}` {{entity_id: $target_entity_id}})
1384
  DELETE r
1385
  """
1386
  result = await tx.run(
 
1397
  raise
1398
 
1399
  async def drop(self) -> dict[str, str]:
1400
+ """Drop all data from current workspace storage and clean up resources
1401
 
1402
+ This method will delete all nodes and relationships in the current workspace only.
1403
 
1404
  Returns:
1405
  dict[str, str]: Operation status and message
1406
+ - On success: {"status": "success", "message": "workspace data dropped"}
1407
  - On failure: {"status": "error", "message": "<error details>"}
1408
  """
1409
+ workspace_label = self._get_workspace_label()
1410
  try:
1411
  async with self._driver.session(database=self._DATABASE) as session:
1412
+ # Delete all nodes and relationships in current workspace only
1413
+ query = f"MATCH (n:`{workspace_label}`) DETACH DELETE n"
1414
  result = await session.run(query)
1415
  await result.consume() # Ensure result is fully consumed
1416
 
1417
  logger.info(
1418
+ f"Process {os.getpid()} drop Neo4j workspace '{workspace_label}' in database {self._DATABASE}"
1419
  )
1420
+ return {
1421
+ "status": "success",
1422
+ "message": f"workspace '{workspace_label}' data dropped",
1423
+ }
1424
  except Exception as e:
1425
+ logger.error(
1426
+ f"Error dropping Neo4j workspace '{workspace_label}' in database {self._DATABASE}: {e}"
1427
+ )
1428
  return {"status": "error", "message": str(e)}
lightrag/kg/networkx_impl.py CHANGED
@@ -46,9 +46,19 @@ class NetworkXStorage(BaseGraphStorage):
46
  nx.write_graphml(graph, file_name)
47
 
48
  def __post_init__(self):
49
- self._graphml_xml_file = os.path.join(
50
- self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
51
- )
 
 
 
 
 
 
 
 
 
 
52
  self._storage_lock = None
53
  self.storage_updated = None
54
  self._graph = None
 
46
  nx.write_graphml(graph, file_name)
47
 
48
  def __post_init__(self):
49
+ working_dir = self.global_config["working_dir"]
50
+ if self.workspace:
51
+ # Include workspace in the file path for data isolation
52
+ workspace_dir = os.path.join(working_dir, self.workspace)
53
+ os.makedirs(workspace_dir, exist_ok=True)
54
+ self._graphml_xml_file = os.path.join(
55
+ workspace_dir, f"graph_{self.namespace}.graphml"
56
+ )
57
+ else:
58
+ # Default behavior when workspace is empty
59
+ self._graphml_xml_file = os.path.join(
60
+ working_dir, f"graph_{self.namespace}.graphml"
61
+ )
62
  self._storage_lock = None
63
  self.storage_updated = None
64
  self._graph = None
lightrag/kg/postgres_impl.py CHANGED
@@ -1,6 +1,7 @@
1
  import asyncio
2
  import json
3
  import os
 
4
  import datetime
5
  from datetime import timezone
6
  from dataclasses import dataclass, field
@@ -319,7 +320,7 @@ class PostgreSQLDB:
319
  # Get all old format data
320
  old_data_sql = """
321
  SELECT id, mode, original_prompt, return_value, chunk_id,
322
- create_time, update_time
323
  FROM LIGHTRAG_LLM_CACHE
324
  WHERE id NOT LIKE '%:%'
325
  """
@@ -364,7 +365,9 @@ class PostgreSQLDB:
364
  await self.execute(
365
  insert_sql,
366
  {
367
- "workspace": self.workspace,
 
 
368
  "id": new_key,
369
  "mode": record["mode"],
370
  "original_prompt": record["original_prompt"],
@@ -384,7 +387,9 @@ class PostgreSQLDB:
384
  await self.execute(
385
  delete_sql,
386
  {
387
- "workspace": self.workspace,
 
 
388
  "mode": record["mode"],
389
  "id": record["id"], # Old id
390
  },
@@ -505,6 +510,29 @@ class PostgreSQLDB:
505
  f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
506
  )
507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  # After all tables are created, attempt to migrate timestamp fields
509
  try:
510
  await self._migrate_timestamp_columns()
@@ -670,7 +698,7 @@ class ClientManager:
670
  ),
671
  "workspace": os.environ.get(
672
  "POSTGRES_WORKSPACE",
673
- config.get("postgres", "workspace", fallback="default"),
674
  ),
675
  "max_connections": os.environ.get(
676
  "POSTGRES_MAX_CONNECTIONS",
@@ -716,6 +744,18 @@ class PGKVStorage(BaseKVStorage):
716
  async def initialize(self):
717
  if self.db is None:
718
  self.db = await ClientManager.get_client()
 
 
 
 
 
 
 
 
 
 
 
 
719
 
720
  async def finalize(self):
721
  if self.db is not None:
@@ -1047,6 +1087,18 @@ class PGVectorStorage(BaseVectorStorage):
1047
  async def initialize(self):
1048
  if self.db is None:
1049
  self.db = await ClientManager.get_client()
 
 
 
 
 
 
 
 
 
 
 
 
1050
 
1051
  async def finalize(self):
1052
  if self.db is not None:
@@ -1328,6 +1380,18 @@ class PGDocStatusStorage(DocStatusStorage):
1328
  async def initialize(self):
1329
  if self.db is None:
1330
  self.db = await ClientManager.get_client()
 
 
 
 
 
 
 
 
 
 
 
 
1331
 
1332
  async def finalize(self):
1333
  if self.db is not None:
@@ -1606,9 +1670,34 @@ class PGGraphQueryException(Exception):
1606
  @dataclass
1607
  class PGGraphStorage(BaseGraphStorage):
1608
  def __post_init__(self):
1609
- self.graph_name = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
1610
  self.db: PostgreSQLDB | None = None
1611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1612
  @staticmethod
1613
  def _normalize_node_id(node_id: str) -> str:
1614
  """
@@ -1629,6 +1718,27 @@ class PGGraphStorage(BaseGraphStorage):
1629
  async def initialize(self):
1630
  if self.db is None:
1631
  self.db = await ClientManager.get_client()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1632
 
1633
  # Execute each statement separately and ignore errors
1634
  queries = [
@@ -2833,7 +2943,10 @@ class PGGraphStorage(BaseGraphStorage):
2833
  $$) AS (result agtype)"""
2834
 
2835
  await self._query(drop_query, readonly=False)
2836
- return {"status": "success", "message": "graph data dropped"}
 
 
 
2837
  except Exception as e:
2838
  logger.error(f"Error dropping graph: {e}")
2839
  return {"status": "error", "message": str(e)}
 
1
  import asyncio
2
  import json
3
  import os
4
+ import re
5
  import datetime
6
  from datetime import timezone
7
  from dataclasses import dataclass, field
 
320
  # Get all old format data
321
  old_data_sql = """
322
  SELECT id, mode, original_prompt, return_value, chunk_id,
323
+ workspace, create_time, update_time
324
  FROM LIGHTRAG_LLM_CACHE
325
  WHERE id NOT LIKE '%:%'
326
  """
 
365
  await self.execute(
366
  insert_sql,
367
  {
368
+ "workspace": record[
369
+ "workspace"
370
+ ], # Use original record's workspace
371
  "id": new_key,
372
  "mode": record["mode"],
373
  "original_prompt": record["original_prompt"],
 
387
  await self.execute(
388
  delete_sql,
389
  {
390
+ "workspace": record[
391
+ "workspace"
392
+ ], # Use original record's workspace
393
  "mode": record["mode"],
394
  "id": record["id"], # Old id
395
  },
 
510
  f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
511
  )
512
 
513
+ # Create composite index for (workspace, id) columns in each table
514
+ try:
515
+ composite_index_name = f"idx_{k.lower()}_workspace_id"
516
+ check_composite_index_sql = f"""
517
+ SELECT 1 FROM pg_indexes
518
+ WHERE indexname = '{composite_index_name}'
519
+ AND tablename = '{k.lower()}'
520
+ """
521
+ composite_index_exists = await self.query(check_composite_index_sql)
522
+
523
+ if not composite_index_exists:
524
+ create_composite_index_sql = (
525
+ f"CREATE INDEX {composite_index_name} ON {k}(workspace, id)"
526
+ )
527
+ logger.info(
528
+ f"PostgreSQL, Creating composite index {composite_index_name} on table {k}"
529
+ )
530
+ await self.execute(create_composite_index_sql)
531
+ except Exception as e:
532
+ logger.error(
533
+ f"PostgreSQL, Failed to create composite index on table {k}, Got: {e}"
534
+ )
535
+
536
  # After all tables are created, attempt to migrate timestamp fields
537
  try:
538
  await self._migrate_timestamp_columns()
 
698
  ),
699
  "workspace": os.environ.get(
700
  "POSTGRES_WORKSPACE",
701
+ config.get("postgres", "workspace", fallback=None),
702
  ),
703
  "max_connections": os.environ.get(
704
  "POSTGRES_MAX_CONNECTIONS",
 
744
  async def initialize(self):
745
  if self.db is None:
746
  self.db = await ClientManager.get_client()
747
+ # Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
748
+ if self.db.workspace:
749
+ # Use PostgreSQLDB's workspace (highest priority)
750
+ final_workspace = self.db.workspace
751
+ elif hasattr(self, "workspace") and self.workspace:
752
+ # Use storage class's workspace (medium priority)
753
+ final_workspace = self.workspace
754
+ self.db.workspace = final_workspace
755
+ else:
756
+ # Use "default" for compatibility (lowest priority)
757
+ final_workspace = "default"
758
+ self.db.workspace = final_workspace
759
 
760
  async def finalize(self):
761
  if self.db is not None:
 
1087
  async def initialize(self):
1088
  if self.db is None:
1089
  self.db = await ClientManager.get_client()
1090
+ # Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
1091
+ if self.db.workspace:
1092
+ # Use PostgreSQLDB's workspace (highest priority)
1093
+ final_workspace = self.db.workspace
1094
+ elif hasattr(self, "workspace") and self.workspace:
1095
+ # Use storage class's workspace (medium priority)
1096
+ final_workspace = self.workspace
1097
+ self.db.workspace = final_workspace
1098
+ else:
1099
+ # Use "default" for compatibility (lowest priority)
1100
+ final_workspace = "default"
1101
+ self.db.workspace = final_workspace
1102
 
1103
  async def finalize(self):
1104
  if self.db is not None:
 
1380
  async def initialize(self):
1381
  if self.db is None:
1382
  self.db = await ClientManager.get_client()
1383
+ # Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
1384
+ if self.db.workspace:
1385
+ # Use PostgreSQLDB's workspace (highest priority)
1386
+ final_workspace = self.db.workspace
1387
+ elif hasattr(self, "workspace") and self.workspace:
1388
+ # Use storage class's workspace (medium priority)
1389
+ final_workspace = self.workspace
1390
+ self.db.workspace = final_workspace
1391
+ else:
1392
+ # Use "default" for compatibility (lowest priority)
1393
+ final_workspace = "default"
1394
+ self.db.workspace = final_workspace
1395
 
1396
  async def finalize(self):
1397
  if self.db is not None:
 
1670
  @dataclass
1671
  class PGGraphStorage(BaseGraphStorage):
1672
  def __post_init__(self):
1673
+ # Graph name will be dynamically generated in initialize() based on workspace
1674
  self.db: PostgreSQLDB | None = None
1675
 
1676
+ def _get_workspace_graph_name(self) -> str:
1677
+ """
1678
+ Generate graph name based on workspace and namespace for data isolation.
1679
+ Rules:
1680
+ - If workspace is empty: graph_name = namespace
1681
+ - If workspace has value: graph_name = workspace_namespace
1682
+
1683
+ Args:
1684
+ None
1685
+
1686
+ Returns:
1687
+ str: The graph name for the current workspace
1688
+ """
1689
+ workspace = getattr(self, "workspace", None)
1690
+ namespace = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
1691
+
1692
+ if workspace and workspace.strip():
1693
+ # Ensure names comply with PostgreSQL identifier specifications
1694
+ safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
1695
+ safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1696
+ return f"{safe_workspace}_{safe_namespace}"
1697
+ else:
1698
+ # When workspace is empty, use namespace directly
1699
+ return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1700
+
1701
  @staticmethod
1702
  def _normalize_node_id(node_id: str) -> str:
1703
  """
 
1718
  async def initialize(self):
1719
  if self.db is None:
1720
  self.db = await ClientManager.get_client()
1721
+ # Implement workspace priority: PostgreSQLDB.workspace > self.workspace > None
1722
+ if self.db.workspace:
1723
+ # Use PostgreSQLDB's workspace (highest priority)
1724
+ final_workspace = self.db.workspace
1725
+ elif hasattr(self, "workspace") and self.workspace:
1726
+ # Use storage class's workspace (medium priority)
1727
+ final_workspace = self.workspace
1728
+ self.db.workspace = final_workspace
1729
+ else:
1730
+ # Use None for compatibility (lowest priority)
1731
+ final_workspace = None
1732
+ self.db.workspace = final_workspace
1733
+
1734
+ # Dynamically generate graph name based on workspace
1735
+ self.workspace = self.db.workspace
1736
+ self.graph_name = self._get_workspace_graph_name()
1737
+
1738
+ # Log the graph initialization for debugging
1739
+ logger.info(
1740
+ f"PostgreSQL Graph initialized: workspace='{self.workspace}', graph_name='{self.graph_name}'"
1741
+ )
1742
 
1743
  # Execute each statement separately and ignore errors
1744
  queries = [
 
2943
  $$) AS (result agtype)"""
2944
 
2945
  await self._query(drop_query, readonly=False)
2946
+ return {
2947
+ "status": "success",
2948
+ "message": f"workspace '{self.workspace}' graph data dropped",
2949
+ }
2950
  except Exception as e:
2951
  logger.error(f"Error dropping graph: {e}")
2952
  return {"status": "error", "message": str(e)}
lightrag/kg/qdrant_impl.py CHANGED
@@ -50,6 +50,18 @@ def compute_mdhash_id_for_qdrant(
50
  @final
51
  @dataclass
52
  class QdrantVectorDBStorage(BaseVectorStorage):
 
 
 
 
 
 
 
 
 
 
 
 
53
  @staticmethod
54
  def create_collection_if_not_exist(
55
  client: QdrantClient, collection_name: str, **kwargs
@@ -59,6 +71,29 @@ class QdrantVectorDBStorage(BaseVectorStorage):
59
  client.create_collection(collection_name, **kwargs)
60
 
61
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
63
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
64
  if cosine_threshold is None:
 
50
  @final
51
  @dataclass
52
  class QdrantVectorDBStorage(BaseVectorStorage):
53
+ def __init__(
54
+ self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
55
+ ):
56
+ super().__init__(
57
+ namespace=namespace,
58
+ workspace=workspace or "",
59
+ global_config=global_config,
60
+ embedding_func=embedding_func,
61
+ meta_fields=meta_fields or set(),
62
+ )
63
+ self.__post_init__()
64
+
65
  @staticmethod
66
  def create_collection_if_not_exist(
67
  client: QdrantClient, collection_name: str, **kwargs
 
71
  client.create_collection(collection_name, **kwargs)
72
 
73
  def __post_init__(self):
74
+ # Check for QDRANT_WORKSPACE environment variable first (higher priority)
75
+ # This allows administrators to force a specific workspace for all Qdrant storage instances
76
+ qdrant_workspace = os.environ.get("QDRANT_WORKSPACE")
77
+ if qdrant_workspace and qdrant_workspace.strip():
78
+ # Use environment variable value, overriding the passed workspace parameter
79
+ effective_workspace = qdrant_workspace.strip()
80
+ logger.info(
81
+ f"Using QDRANT_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
82
+ )
83
+ else:
84
+ # Use the workspace parameter passed during initialization
85
+ effective_workspace = self.workspace
86
+ if effective_workspace:
87
+ logger.debug(
88
+ f"Using passed workspace parameter: '{effective_workspace}'"
89
+ )
90
+
91
+ # Build namespace with workspace prefix for data isolation
92
+ if effective_workspace:
93
+ self.namespace = f"{effective_workspace}_{self.namespace}"
94
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
95
+ # When workspace is empty, keep the original namespace unchanged
96
+
97
  kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
98
  cosine_threshold = kwargs.get("cosine_better_than_threshold")
99
  if cosine_threshold is None:
lightrag/kg/redis_impl.py CHANGED
@@ -71,6 +71,29 @@ class RedisConnectionManager:
71
  @dataclass
72
  class RedisKVStorage(BaseKVStorage):
73
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  redis_url = os.environ.get(
75
  "REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
76
  )
@@ -461,6 +484,29 @@ class RedisDocStatusStorage(DocStatusStorage):
461
  """Redis implementation of document status storage"""
462
 
463
  def __post_init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  redis_url = os.environ.get(
465
  "REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
466
  )
 
71
  @dataclass
72
  class RedisKVStorage(BaseKVStorage):
73
  def __post_init__(self):
74
+ # Check for REDIS_WORKSPACE environment variable first (higher priority)
75
+ # This allows administrators to force a specific workspace for all Redis storage instances
76
+ redis_workspace = os.environ.get("REDIS_WORKSPACE")
77
+ if redis_workspace and redis_workspace.strip():
78
+ # Use environment variable value, overriding the passed workspace parameter
79
+ effective_workspace = redis_workspace.strip()
80
+ logger.info(
81
+ f"Using REDIS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
82
+ )
83
+ else:
84
+ # Use the workspace parameter passed during initialization
85
+ effective_workspace = self.workspace
86
+ if effective_workspace:
87
+ logger.debug(
88
+ f"Using passed workspace parameter: '{effective_workspace}'"
89
+ )
90
+
91
+ # Build namespace with workspace prefix for data isolation
92
+ if effective_workspace:
93
+ self.namespace = f"{effective_workspace}_{self.namespace}"
94
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
95
+ # When workspace is empty, keep the original namespace unchanged
96
+
97
  redis_url = os.environ.get(
98
  "REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
99
  )
 
484
  """Redis implementation of document status storage"""
485
 
486
  def __post_init__(self):
487
+ # Check for REDIS_WORKSPACE environment variable first (higher priority)
488
+ # This allows administrators to force a specific workspace for all Redis storage instances
489
+ redis_workspace = os.environ.get("REDIS_WORKSPACE")
490
+ if redis_workspace and redis_workspace.strip():
491
+ # Use environment variable value, overriding the passed workspace parameter
492
+ effective_workspace = redis_workspace.strip()
493
+ logger.info(
494
+ f"Using REDIS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
495
+ )
496
+ else:
497
+ # Use the workspace parameter passed during initialization
498
+ effective_workspace = self.workspace
499
+ if effective_workspace:
500
+ logger.debug(
501
+ f"Using passed workspace parameter: '{effective_workspace}'"
502
+ )
503
+
504
+ # Build namespace with workspace prefix for data isolation
505
+ if effective_workspace:
506
+ self.namespace = f"{effective_workspace}_{self.namespace}"
507
+ logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
508
+ # When workspace is empty, keep the original namespace unchanged
509
+
510
  redis_url = os.environ.get(
511
  "REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
512
  )
lightrag/lightrag.py CHANGED
@@ -51,7 +51,7 @@ from .base import (
51
  StoragesStatus,
52
  DeletionResult,
53
  )
54
- from .namespace import NameSpace, make_namespace
55
  from .operate import (
56
  chunking_by_token_size,
57
  extract_entities,
@@ -97,9 +97,7 @@ class LightRAG:
97
  # Directory
98
  # ---
99
 
100
- working_dir: str = field(
101
- default=f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
102
- )
103
  """Directory where cache and temporary files are stored."""
104
 
105
  # Storage
@@ -117,6 +115,12 @@ class LightRAG:
117
  doc_status_storage: str = field(default="JsonDocStatusStorage")
118
  """Storage type for tracking document processing statuses."""
119
 
 
 
 
 
 
 
120
  # Logging (Deprecated, use setup_logger in utils.py instead)
121
  # ---
122
  log_level: int | None = field(default=None)
@@ -242,10 +246,6 @@ class LightRAG:
242
  vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
243
  """Additional parameters for vector database storage."""
244
 
245
- # TODO:deprecated, remove in the future, use WORKSPACE instead
246
- namespace_prefix: str = field(default="")
247
- """Prefix for namespacing stored data across different environments."""
248
-
249
  enable_llm_cache: bool = field(default=True)
250
  """Enables caching for LLM responses to avoid redundant computations."""
251
 
@@ -382,61 +382,53 @@ class LightRAG:
382
  self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
383
 
384
  self.llm_response_cache: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
385
- namespace=make_namespace(
386
- self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
387
- ),
388
- global_config=asdict(
389
- self
390
- ), # Add global_config to ensure cache works properly
391
  embedding_func=self.embedding_func,
392
  )
393
 
394
  self.full_docs: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
395
- namespace=make_namespace(
396
- self.namespace_prefix, NameSpace.KV_STORE_FULL_DOCS
397
- ),
398
  embedding_func=self.embedding_func,
399
  )
400
 
401
  self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
402
- namespace=make_namespace(
403
- self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS
404
- ),
405
  embedding_func=self.embedding_func,
406
  )
407
 
408
  self.chunk_entity_relation_graph: BaseGraphStorage = self.graph_storage_cls( # type: ignore
409
- namespace=make_namespace(
410
- self.namespace_prefix, NameSpace.GRAPH_STORE_CHUNK_ENTITY_RELATION
411
- ),
412
  embedding_func=self.embedding_func,
413
  )
414
 
415
  self.entities_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
416
- namespace=make_namespace(
417
- self.namespace_prefix, NameSpace.VECTOR_STORE_ENTITIES
418
- ),
419
  embedding_func=self.embedding_func,
420
  meta_fields={"entity_name", "source_id", "content", "file_path"},
421
  )
422
  self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
423
- namespace=make_namespace(
424
- self.namespace_prefix, NameSpace.VECTOR_STORE_RELATIONSHIPS
425
- ),
426
  embedding_func=self.embedding_func,
427
  meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"},
428
  )
429
  self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
430
- namespace=make_namespace(
431
- self.namespace_prefix, NameSpace.VECTOR_STORE_CHUNKS
432
- ),
433
  embedding_func=self.embedding_func,
434
  meta_fields={"full_doc_id", "content", "file_path"},
435
  )
436
 
437
  # Initialize document status storage
438
  self.doc_status: DocStatusStorage = self.doc_status_storage_cls(
439
- namespace=make_namespace(self.namespace_prefix, NameSpace.DOC_STATUS),
 
440
  global_config=global_config,
441
  embedding_func=None,
442
  )
@@ -965,6 +957,9 @@ class LightRAG:
965
  )
966
  }
967
 
 
 
 
968
  # Process document in two stages
969
  # Stage 1: Process text chunks and docs (parallel execution)
970
  doc_status_task = asyncio.create_task(
 
51
  StoragesStatus,
52
  DeletionResult,
53
  )
54
+ from .namespace import NameSpace
55
  from .operate import (
56
  chunking_by_token_size,
57
  extract_entities,
 
97
  # Directory
98
  # ---
99
 
100
+ working_dir: str = field(default="./rag_storage")
 
 
101
  """Directory where cache and temporary files are stored."""
102
 
103
  # Storage
 
115
  doc_status_storage: str = field(default="JsonDocStatusStorage")
116
  """Storage type for tracking document processing statuses."""
117
 
118
+ # Workspace
119
+ # ---
120
+
121
+ workspace: str = field(default_factory=lambda: os.getenv("WORKSPACE", ""))
122
+ """Workspace for data isolation. Defaults to empty string if WORKSPACE environment variable is not set."""
123
+
124
  # Logging (Deprecated, use setup_logger in utils.py instead)
125
  # ---
126
  log_level: int | None = field(default=None)
 
246
  vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
247
  """Additional parameters for vector database storage."""
248
 
 
 
 
 
249
  enable_llm_cache: bool = field(default=True)
250
  """Enables caching for LLM responses to avoid redundant computations."""
251
 
 
382
  self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
383
 
384
  self.llm_response_cache: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
385
+ namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
386
+ workspace=self.workspace,
387
+ global_config=global_config,
 
 
 
388
  embedding_func=self.embedding_func,
389
  )
390
 
391
  self.full_docs: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
392
+ namespace=NameSpace.KV_STORE_FULL_DOCS,
393
+ workspace=self.workspace,
 
394
  embedding_func=self.embedding_func,
395
  )
396
 
397
  self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
398
+ namespace=NameSpace.KV_STORE_TEXT_CHUNKS,
399
+ workspace=self.workspace,
 
400
  embedding_func=self.embedding_func,
401
  )
402
 
403
  self.chunk_entity_relation_graph: BaseGraphStorage = self.graph_storage_cls( # type: ignore
404
+ namespace=NameSpace.GRAPH_STORE_CHUNK_ENTITY_RELATION,
405
+ workspace=self.workspace,
 
406
  embedding_func=self.embedding_func,
407
  )
408
 
409
  self.entities_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
410
+ namespace=NameSpace.VECTOR_STORE_ENTITIES,
411
+ workspace=self.workspace,
 
412
  embedding_func=self.embedding_func,
413
  meta_fields={"entity_name", "source_id", "content", "file_path"},
414
  )
415
  self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
416
+ namespace=NameSpace.VECTOR_STORE_RELATIONSHIPS,
417
+ workspace=self.workspace,
 
418
  embedding_func=self.embedding_func,
419
  meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"},
420
  )
421
  self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
422
+ namespace=NameSpace.VECTOR_STORE_CHUNKS,
423
+ workspace=self.workspace,
 
424
  embedding_func=self.embedding_func,
425
  meta_fields={"full_doc_id", "content", "file_path"},
426
  )
427
 
428
  # Initialize document status storage
429
  self.doc_status: DocStatusStorage = self.doc_status_storage_cls(
430
+ namespace=NameSpace.DOC_STATUS,
431
+ workspace=self.workspace,
432
  global_config=global_config,
433
  embedding_func=None,
434
  )
 
957
  )
958
  }
959
 
960
+ if not chunks:
961
+ logger.warning("No document chunks to process")
962
+
963
  # Process document in two stages
964
  # Stage 1: Process text chunks and docs (parallel execution)
965
  doc_status_task = asyncio.create_task(
lightrag/namespace.py CHANGED
@@ -17,10 +17,6 @@ class NameSpace:
17
  DOC_STATUS = "doc_status"
18
 
19
 
20
- def make_namespace(prefix: str, base_namespace: str):
21
- return prefix + base_namespace
22
-
23
-
24
  def is_namespace(namespace: str, base_namespace: str | Iterable[str]):
25
  if isinstance(base_namespace, str):
26
  return namespace.endswith(base_namespace)
 
17
  DOC_STATUS = "doc_status"
18
 
19
 
 
 
 
 
20
  def is_namespace(namespace: str, base_namespace: str | Iterable[str]):
21
  if isinstance(base_namespace, str):
22
  return namespace.endswith(base_namespace)
lightrag_webui/src/api/lightrag.ts CHANGED
@@ -40,6 +40,8 @@ export type LightragStatus = {
40
  doc_status_storage: string
41
  graph_storage: string
42
  vector_storage: string
 
 
43
  }
44
  update_status?: Record<string, any>
45
  core_version?: string
 
40
  doc_status_storage: string
41
  graph_storage: string
42
  vector_storage: string
43
+ workspace?: string
44
+ max_graph_nodes?: string
45
  }
46
  update_status?: Record<string, any>
47
  core_version?: string
lightrag_webui/src/components/status/StatusCard.tsx CHANGED
@@ -56,6 +56,10 @@ const StatusCard = ({ status }: { status: LightragStatus | null }) => {
56
  <span>{status.configuration.graph_storage}</span>
57
  <span>{t('graphPanel.statusCard.vectorStorage')}:</span>
58
  <span>{status.configuration.vector_storage}</span>
 
 
 
 
59
  </div>
60
  </div>
61
  </div>
 
56
  <span>{status.configuration.graph_storage}</span>
57
  <span>{t('graphPanel.statusCard.vectorStorage')}:</span>
58
  <span>{status.configuration.vector_storage}</span>
59
+ <span>{t('graphPanel.statusCard.workspace')}:</span>
60
+ <span>{status.configuration.workspace || '-'}</span>
61
+ <span>{t('graphPanel.statusCard.maxGraphNodes')}:</span>
62
+ <span>{status.configuration.max_graph_nodes || '-'}</span>
63
  </div>
64
  </div>
65
  </div>
lightrag_webui/src/locales/ar.json CHANGED
@@ -263,7 +263,9 @@
263
  "kvStorage": "تخزين المفتاح-القيمة",
264
  "docStatusStorage": "تخزين حالة المستند",
265
  "graphStorage": "تخزين الرسم البياني",
266
- "vectorStorage": "تخزين المتجهات"
 
 
267
  },
268
  "propertiesView": {
269
  "editProperty": "تعديل {{property}}",
 
263
  "kvStorage": "تخزين المفتاح-القيمة",
264
  "docStatusStorage": "تخزين حالة المستند",
265
  "graphStorage": "تخزين الرسم البياني",
266
+ "vectorStorage": "تخزين المتجهات",
267
+ "workspace": "مساحة العمل",
268
+ "maxGraphNodes": "الحد الأقصى لعقد الرسم البياني"
269
  },
270
  "propertiesView": {
271
  "editProperty": "تعديل {{property}}",
lightrag_webui/src/locales/en.json CHANGED
@@ -263,7 +263,9 @@
263
  "kvStorage": "KV Storage",
264
  "docStatusStorage": "Doc Status Storage",
265
  "graphStorage": "Graph Storage",
266
- "vectorStorage": "Vector Storage"
 
 
267
  },
268
  "propertiesView": {
269
  "editProperty": "Edit {{property}}",
 
263
  "kvStorage": "KV Storage",
264
  "docStatusStorage": "Doc Status Storage",
265
  "graphStorage": "Graph Storage",
266
+ "vectorStorage": "Vector Storage",
267
+ "workspace": "Workspace",
268
+ "maxGraphNodes": "Max Graph Nodes"
269
  },
270
  "propertiesView": {
271
  "editProperty": "Edit {{property}}",
lightrag_webui/src/locales/fr.json CHANGED
@@ -263,7 +263,9 @@
263
  "kvStorage": "Stockage clé-valeur",
264
  "docStatusStorage": "Stockage de l'état des documents",
265
  "graphStorage": "Stockage du graphe",
266
- "vectorStorage": "Stockage vectoriel"
 
 
267
  },
268
  "propertiesView": {
269
  "editProperty": "Modifier {{property}}",
 
263
  "kvStorage": "Stockage clé-valeur",
264
  "docStatusStorage": "Stockage de l'état des documents",
265
  "graphStorage": "Stockage du graphe",
266
+ "vectorStorage": "Stockage vectoriel",
267
+ "workspace": "Espace de travail",
268
+ "maxGraphNodes": "Nombre maximum de nœuds du graphe"
269
  },
270
  "propertiesView": {
271
  "editProperty": "Modifier {{property}}",
lightrag_webui/src/locales/zh.json CHANGED
@@ -263,7 +263,9 @@
263
  "kvStorage": "KV存储",
264
  "docStatusStorage": "文档状态存储",
265
  "graphStorage": "图存储",
266
- "vectorStorage": "向量存储"
 
 
267
  },
268
  "propertiesView": {
269
  "editProperty": "编辑{{property}}",
 
263
  "kvStorage": "KV存储",
264
  "docStatusStorage": "文档状态存储",
265
  "graphStorage": "图存储",
266
+ "vectorStorage": "向量存储",
267
+ "workspace": "工作空间",
268
+ "maxGraphNodes": "最大图节点数"
269
  },
270
  "propertiesView": {
271
  "editProperty": "编辑{{property}}",
lightrag_webui/src/locales/zh_TW.json CHANGED
@@ -263,7 +263,9 @@
263
  "kvStorage": "KV 儲存",
264
  "docStatusStorage": "文件狀態儲存",
265
  "graphStorage": "圖形儲存",
266
- "vectorStorage": "向量儲存"
 
 
267
  },
268
  "propertiesView": {
269
  "editProperty": "編輯{{property}}",
 
263
  "kvStorage": "KV 儲存",
264
  "docStatusStorage": "文件狀態儲存",
265
  "graphStorage": "圖形儲存",
266
+ "vectorStorage": "向量儲存",
267
+ "workspace": "工作空間",
268
+ "maxGraphNodes": "最大圖形節點數"
269
  },
270
  "propertiesView": {
271
  "editProperty": "編輯{{property}}",
pyproject.toml CHANGED
@@ -11,7 +11,7 @@ authors = [
11
  description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
12
  readme = "README.md"
13
  license = {text = "MIT"}
14
- requires-python = ">=3.9"
15
  classifiers = [
16
  "Development Status :: 4 - Beta",
17
  "Programming Language :: Python :: 3",
@@ -91,3 +91,6 @@ version = {attr = "lightrag.__version__"}
91
 
92
  [tool.setuptools.package-data]
93
  lightrag = ["api/webui/**/*"]
 
 
 
 
11
  description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
12
  readme = "README.md"
13
  license = {text = "MIT"}
14
+ requires-python = ">=3.10"
15
  classifiers = [
16
  "Development Status :: 4 - Beta",
17
  "Programming Language :: Python :: 3",
 
91
 
92
  [tool.setuptools.package-data]
93
  lightrag = ["api/webui/**/*"]
94
+
95
+ [tool.ruff]
96
+ target-version = "py310"
reproduce/Step_3.py CHANGED
@@ -28,9 +28,10 @@ def run_queries_and_save_to_json(
28
  ):
29
  loop = always_get_an_event_loop()
30
 
31
- with open(output_file, "a", encoding="utf-8") as result_file, open(
32
- error_file, "a", encoding="utf-8"
33
- ) as err_file:
 
34
  result_file.write("[\n")
35
  first_entry = True
36
 
 
28
  ):
29
  loop = always_get_an_event_loop()
30
 
31
+ with (
32
+ open(output_file, "a", encoding="utf-8") as result_file,
33
+ open(error_file, "a", encoding="utf-8") as err_file,
34
+ ):
35
  result_file.write("[\n")
36
  first_entry = True
37
 
reproduce/Step_3_openai_compatible.py CHANGED
@@ -59,9 +59,10 @@ def run_queries_and_save_to_json(
59
  ):
60
  loop = always_get_an_event_loop()
61
 
62
- with open(output_file, "a", encoding="utf-8") as result_file, open(
63
- error_file, "a", encoding="utf-8"
64
- ) as err_file:
 
65
  result_file.write("[\n")
66
  first_entry = True
67
 
 
59
  ):
60
  loop = always_get_an_event_loop()
61
 
62
+ with (
63
+ open(output_file, "a", encoding="utf-8") as result_file,
64
+ open(error_file, "a", encoding="utf-8") as err_file,
65
+ ):
66
  result_file.write("[\n")
67
  first_entry = True
68