zrguo commited on
Commit
1e09d54
·
unverified ·
2 Parent(s): b6d6660 63fcfbd

Merge pull request #729 from ArnoChenFx/add-namespace-prefix

Browse files
lightrag/api/lightrag_server.py CHANGED
@@ -40,7 +40,7 @@ from .ollama_api import (
40
  from .ollama_api import ollama_server_infos
41
 
42
  # Load environment variables
43
- load_dotenv()
44
 
45
 
46
  class RAGStorageConfig:
@@ -532,6 +532,14 @@ def parse_args() -> argparse.Namespace:
532
  help="Number of conversation history turns to include (default: from env or 3)",
533
  )
534
 
 
 
 
 
 
 
 
 
535
  args = parser.parse_args()
536
 
537
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
@@ -861,6 +869,8 @@ def create_app(args):
861
  "similarity_threshold": 0.95,
862
  "use_llm_check": False,
863
  },
 
 
864
  )
865
  else:
866
  rag = LightRAG(
@@ -890,6 +900,8 @@ def create_app(args):
890
  "similarity_threshold": 0.95,
891
  "use_llm_check": False,
892
  },
 
 
893
  )
894
 
895
  async def index_file(file_path: Union[str, Path]) -> None:
 
40
  from .ollama_api import ollama_server_infos
41
 
42
  # Load environment variables
43
+ load_dotenv(override=True)
44
 
45
 
46
  class RAGStorageConfig:
 
532
  help="Number of conversation history turns to include (default: from env or 3)",
533
  )
534
 
535
+ # Namespace
536
+ parser.add_argument(
537
+ "--namespace-prefix",
538
+ type=str,
539
+ default=get_env_value("NAMESPACE_PREFIX", ""),
540
+ help="Prefix of the namespace",
541
+ )
542
+
543
  args = parser.parse_args()
544
 
545
  ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
 
869
  "similarity_threshold": 0.95,
870
  "use_llm_check": False,
871
  },
872
+ log_level=args.log_level,
873
+ namespace_prefix=args.namespace_prefix,
874
  )
875
  else:
876
  rag = LightRAG(
 
900
  "similarity_threshold": 0.95,
901
  "use_llm_check": False,
902
  },
903
+ log_level=args.log_level,
904
+ namespace_prefix=args.namespace_prefix,
905
  )
906
 
907
  async def index_file(file_path: Union[str, Path]) -> None:
lightrag/api/ollama_api.py CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
15
 
16
 
17
  # Load environment variables
18
- load_dotenv()
19
 
20
 
21
  class OllamaServerInfos:
 
15
 
16
 
17
  # Load environment variables
18
+ load_dotenv(override=True)
19
 
20
 
21
  class OllamaServerInfos:
lightrag/kg/mongo_impl.py CHANGED
@@ -52,7 +52,7 @@ class MongoKVStorage(BaseKVStorage):
52
  return set([s for s in data if s not in existing_ids])
53
 
54
  async def upsert(self, data: dict[str, dict]):
55
- if self.namespace == "llm_response_cache":
56
  for mode, items in data.items():
57
  for k, v in tqdm_async(items.items(), desc="Upserting"):
58
  key = f"{mode}_{k}"
@@ -69,7 +69,7 @@ class MongoKVStorage(BaseKVStorage):
69
  return data
70
 
71
  async def get_by_mode_and_id(self, mode: str, id: str) -> Union[dict, None]:
72
- if "llm_response_cache" == self.namespace:
73
  res = {}
74
  v = self._data.find_one({"_id": mode + "_" + id})
75
  if v:
 
52
  return set([s for s in data if s not in existing_ids])
53
 
54
  async def upsert(self, data: dict[str, dict]):
55
+ if self.namespace.endswith("llm_response_cache"):
56
  for mode, items in data.items():
57
  for k, v in tqdm_async(items.items(), desc="Upserting"):
58
  key = f"{mode}_{k}"
 
69
  return data
70
 
71
  async def get_by_mode_and_id(self, mode: str, id: str) -> Union[dict, None]:
72
+ if self.namespace.endswith("llm_response_cache"):
73
  res = {}
74
  v = self._data.find_one({"_id": mode + "_" + id})
75
  if v:
lightrag/kg/oracle_impl.py CHANGED
@@ -185,7 +185,7 @@ class OracleKVStorage(BaseKVStorage):
185
  SQL = SQL_TEMPLATES["get_by_id_" + self.namespace]
186
  params = {"workspace": self.db.workspace, "id": id}
187
  # print("get_by_id:"+SQL)
188
- if "llm_response_cache" == self.namespace:
189
  array_res = await self.db.query(SQL, params, multirows=True)
190
  res = {}
191
  for row in array_res:
@@ -201,7 +201,7 @@ class OracleKVStorage(BaseKVStorage):
201
  """Specifically for llm_response_cache."""
202
  SQL = SQL_TEMPLATES["get_by_mode_id_" + self.namespace]
203
  params = {"workspace": self.db.workspace, "cache_mode": mode, "id": id}
204
- if "llm_response_cache" == self.namespace:
205
  array_res = await self.db.query(SQL, params, multirows=True)
206
  res = {}
207
  for row in array_res:
@@ -218,7 +218,7 @@ class OracleKVStorage(BaseKVStorage):
218
  params = {"workspace": self.db.workspace}
219
  # print("get_by_ids:"+SQL)
220
  res = await self.db.query(SQL, params, multirows=True)
221
- if "llm_response_cache" == self.namespace:
222
  modes = set()
223
  dict_res: dict[str, dict] = {}
224
  for row in res:
@@ -269,7 +269,7 @@ class OracleKVStorage(BaseKVStorage):
269
 
270
  ################ INSERT METHODS ################
271
  async def upsert(self, data: dict[str, dict]):
272
- if self.namespace == "text_chunks":
273
  list_data = [
274
  {
275
  "id": k,
@@ -302,7 +302,7 @@ class OracleKVStorage(BaseKVStorage):
302
  "status": item["status"],
303
  }
304
  await self.db.execute(merge_sql, _data)
305
- if self.namespace == "full_docs":
306
  for k, v in data.items():
307
  # values.clear()
308
  merge_sql = SQL_TEMPLATES["merge_doc_full"]
@@ -313,7 +313,7 @@ class OracleKVStorage(BaseKVStorage):
313
  }
314
  await self.db.execute(merge_sql, _data)
315
 
316
- if self.namespace == "llm_response_cache":
317
  for mode, items in data.items():
318
  for k, v in items.items():
319
  upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
@@ -334,8 +334,10 @@ class OracleKVStorage(BaseKVStorage):
334
  await self.db.execute(SQL, params)
335
 
336
  async def index_done_callback(self):
337
- if self.namespace in ["full_docs", "text_chunks"]:
338
- logger.info("full doc and chunk data had been saved into oracle db!")
 
 
339
 
340
 
341
  @dataclass
 
185
  SQL = SQL_TEMPLATES["get_by_id_" + self.namespace]
186
  params = {"workspace": self.db.workspace, "id": id}
187
  # print("get_by_id:"+SQL)
188
+ if self.namespace.endswith("llm_response_cache"):
189
  array_res = await self.db.query(SQL, params, multirows=True)
190
  res = {}
191
  for row in array_res:
 
201
  """Specifically for llm_response_cache."""
202
  SQL = SQL_TEMPLATES["get_by_mode_id_" + self.namespace]
203
  params = {"workspace": self.db.workspace, "cache_mode": mode, "id": id}
204
+ if self.namespace.endswith("llm_response_cache"):
205
  array_res = await self.db.query(SQL, params, multirows=True)
206
  res = {}
207
  for row in array_res:
 
218
  params = {"workspace": self.db.workspace}
219
  # print("get_by_ids:"+SQL)
220
  res = await self.db.query(SQL, params, multirows=True)
221
+ if self.namespace.endswith("llm_response_cache"):
222
  modes = set()
223
  dict_res: dict[str, dict] = {}
224
  for row in res:
 
269
 
270
  ################ INSERT METHODS ################
271
  async def upsert(self, data: dict[str, dict]):
272
+ if self.namespace.endswith("text_chunks"):
273
  list_data = [
274
  {
275
  "id": k,
 
302
  "status": item["status"],
303
  }
304
  await self.db.execute(merge_sql, _data)
305
+ if self.namespace.endswith("full_docs"):
306
  for k, v in data.items():
307
  # values.clear()
308
  merge_sql = SQL_TEMPLATES["merge_doc_full"]
 
313
  }
314
  await self.db.execute(merge_sql, _data)
315
 
316
+ if self.namespace.endswith("llm_response_cache"):
317
  for mode, items in data.items():
318
  for k, v in items.items():
319
  upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
 
334
  await self.db.execute(SQL, params)
335
 
336
  async def index_done_callback(self):
337
+ for n in ("full_docs", "text_chunks"):
338
+ if self.namespace.endswith(n):
339
+ logger.info("full doc and chunk data had been saved into oracle db!")
340
+ break
341
 
342
 
343
  @dataclass
lightrag/kg/postgres_impl.py CHANGED
@@ -187,7 +187,7 @@ class PGKVStorage(BaseKVStorage):
187
  """Get doc_full data by id."""
188
  sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
189
  params = {"workspace": self.db.workspace, "id": id}
190
- if "llm_response_cache" == self.namespace:
191
  array_res = await self.db.query(sql, params, multirows=True)
192
  res = {}
193
  for row in array_res:
@@ -203,7 +203,7 @@ class PGKVStorage(BaseKVStorage):
203
  """Specifically for llm_response_cache."""
204
  sql = SQL_TEMPLATES["get_by_mode_id_" + self.namespace]
205
  params = {"workspace": self.db.workspace, mode: mode, "id": id}
206
- if "llm_response_cache" == self.namespace:
207
  array_res = await self.db.query(sql, params, multirows=True)
208
  res = {}
209
  for row in array_res:
@@ -219,7 +219,7 @@ class PGKVStorage(BaseKVStorage):
219
  ids=",".join([f"'{id}'" for id in ids])
220
  )
221
  params = {"workspace": self.db.workspace}
222
- if "llm_response_cache" == self.namespace:
223
  array_res = await self.db.query(sql, params, multirows=True)
224
  modes = set()
225
  dict_res: dict[str, dict] = {}
@@ -239,7 +239,7 @@ class PGKVStorage(BaseKVStorage):
239
  return None
240
 
241
  async def all_keys(self) -> list[dict]:
242
- if "llm_response_cache" == self.namespace:
243
  sql = "select workspace,mode,id from lightrag_llm_cache"
244
  res = await self.db.query(sql, multirows=True)
245
  return res
@@ -270,9 +270,9 @@ class PGKVStorage(BaseKVStorage):
270
 
271
  ################ INSERT METHODS ################
272
  async def upsert(self, data: Dict[str, dict]):
273
- if self.namespace == "text_chunks":
274
  pass
275
- elif self.namespace == "full_docs":
276
  for k, v in data.items():
277
  upsert_sql = SQL_TEMPLATES["upsert_doc_full"]
278
  _data = {
@@ -281,7 +281,7 @@ class PGKVStorage(BaseKVStorage):
281
  "workspace": self.db.workspace,
282
  }
283
  await self.db.execute(upsert_sql, _data)
284
- elif self.namespace == "llm_response_cache":
285
  for mode, items in data.items():
286
  for k, v in items.items():
287
  upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
@@ -296,8 +296,12 @@ class PGKVStorage(BaseKVStorage):
296
  await self.db.execute(upsert_sql, _data)
297
 
298
  async def index_done_callback(self):
299
- if self.namespace in ["full_docs", "text_chunks"]:
300
- logger.info("full doc and chunk data had been saved into postgresql db!")
 
 
 
 
301
 
302
 
303
  @dataclass
@@ -389,11 +393,11 @@ class PGVectorStorage(BaseVectorStorage):
389
  for i, d in enumerate(list_data):
390
  d["__vector__"] = embeddings[i]
391
  for item in list_data:
392
- if self.namespace == "chunks":
393
  upsert_sql, data = self._upsert_chunks(item)
394
- elif self.namespace == "entities":
395
  upsert_sql, data = self._upsert_entities(item)
396
- elif self.namespace == "relationships":
397
  upsert_sql, data = self._upsert_relationships(item)
398
  else:
399
  raise ValueError(f"{self.namespace} is not supported")
 
187
  """Get doc_full data by id."""
188
  sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
189
  params = {"workspace": self.db.workspace, "id": id}
190
+ if self.namespace.endswith("llm_response_cache"):
191
  array_res = await self.db.query(sql, params, multirows=True)
192
  res = {}
193
  for row in array_res:
 
203
  """Specifically for llm_response_cache."""
204
  sql = SQL_TEMPLATES["get_by_mode_id_" + self.namespace]
205
  params = {"workspace": self.db.workspace, mode: mode, "id": id}
206
+ if self.namespace.endswith("llm_response_cache"):
207
  array_res = await self.db.query(sql, params, multirows=True)
208
  res = {}
209
  for row in array_res:
 
219
  ids=",".join([f"'{id}'" for id in ids])
220
  )
221
  params = {"workspace": self.db.workspace}
222
+ if self.namespace.endswith("llm_response_cache"):
223
  array_res = await self.db.query(sql, params, multirows=True)
224
  modes = set()
225
  dict_res: dict[str, dict] = {}
 
239
  return None
240
 
241
  async def all_keys(self) -> list[dict]:
242
+ if self.namespace.endswith("llm_response_cache"):
243
  sql = "select workspace,mode,id from lightrag_llm_cache"
244
  res = await self.db.query(sql, multirows=True)
245
  return res
 
270
 
271
  ################ INSERT METHODS ################
272
  async def upsert(self, data: Dict[str, dict]):
273
+ if self.namespace.endswith("text_chunks"):
274
  pass
275
+ elif self.namespace.endswith("full_docs"):
276
  for k, v in data.items():
277
  upsert_sql = SQL_TEMPLATES["upsert_doc_full"]
278
  _data = {
 
281
  "workspace": self.db.workspace,
282
  }
283
  await self.db.execute(upsert_sql, _data)
284
+ elif self.namespace.endswith("llm_response_cache"):
285
  for mode, items in data.items():
286
  for k, v in items.items():
287
  upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
 
296
  await self.db.execute(upsert_sql, _data)
297
 
298
  async def index_done_callback(self):
299
+ for n in ("full_docs", "text_chunks"):
300
+ if self.namespace.endswith(n):
301
+ logger.info(
302
+ "full doc and chunk data had been saved into postgresql db!"
303
+ )
304
+ break
305
 
306
 
307
  @dataclass
 
393
  for i, d in enumerate(list_data):
394
  d["__vector__"] = embeddings[i]
395
  for item in list_data:
396
+ if self.namespace.endswith("chunks"):
397
  upsert_sql, data = self._upsert_chunks(item)
398
+ elif self.namespace.endswith("entities"):
399
  upsert_sql, data = self._upsert_entities(item)
400
+ elif self.namespace.endswith("relationships"):
401
  upsert_sql, data = self._upsert_relationships(item)
402
  else:
403
  raise ValueError(f"{self.namespace} is not supported")
lightrag/kg/tidb_impl.py CHANGED
@@ -160,7 +160,7 @@ class TiDBKVStorage(BaseKVStorage):
160
  async def upsert(self, data: dict[str, dict]):
161
  left_data = {k: v for k, v in data.items() if k not in self._data}
162
  self._data.update(left_data)
163
- if self.namespace == "text_chunks":
164
  list_data = [
165
  {
166
  "__id__": k,
@@ -190,13 +190,13 @@ class TiDBKVStorage(BaseKVStorage):
190
  "tokens": item["tokens"],
191
  "chunk_order_index": item["chunk_order_index"],
192
  "full_doc_id": item["full_doc_id"],
193
- "content_vector": f"{item["__vector__"].tolist()}",
194
  "workspace": self.db.workspace,
195
  }
196
  )
197
  await self.db.execute(merge_sql, data)
198
 
199
- if self.namespace == "full_docs":
200
  merge_sql = SQL_TEMPLATES["upsert_doc_full"]
201
  data = []
202
  for k, v in self._data.items():
@@ -211,8 +211,10 @@ class TiDBKVStorage(BaseKVStorage):
211
  return left_data
212
 
213
  async def index_done_callback(self):
214
- if self.namespace in ["full_docs", "text_chunks"]:
215
- logger.info("full doc and chunk data had been saved into TiDB db!")
 
 
216
 
217
 
218
  @dataclass
@@ -258,7 +260,7 @@ class TiDBVectorDBStorage(BaseVectorStorage):
258
  if not len(data):
259
  logger.warning("You insert an empty data to vector DB")
260
  return []
261
- if self.namespace == "chunks":
262
  return []
263
  logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
264
 
@@ -288,14 +290,14 @@ class TiDBVectorDBStorage(BaseVectorStorage):
288
  for i, d in enumerate(list_data):
289
  d["content_vector"] = embeddings[i]
290
 
291
- if self.namespace == "entities":
292
  data = []
293
  for item in list_data:
294
  param = {
295
  "id": item["id"],
296
  "name": item["entity_name"],
297
  "content": item["content"],
298
- "content_vector": f"{item["content_vector"].tolist()}",
299
  "workspace": self.db.workspace,
300
  }
301
  # update entity_id if node inserted by graph_storage_instance before
@@ -309,7 +311,7 @@ class TiDBVectorDBStorage(BaseVectorStorage):
309
  merge_sql = SQL_TEMPLATES["insert_entity"]
310
  await self.db.execute(merge_sql, data)
311
 
312
- elif self.namespace == "relationships":
313
  data = []
314
  for item in list_data:
315
  param = {
@@ -317,7 +319,7 @@ class TiDBVectorDBStorage(BaseVectorStorage):
317
  "source_name": item["src_id"],
318
  "target_name": item["tgt_id"],
319
  "content": item["content"],
320
- "content_vector": f"{item["content_vector"].tolist()}",
321
  "workspace": self.db.workspace,
322
  }
323
  # update relation_id if node inserted by graph_storage_instance before
 
160
  async def upsert(self, data: dict[str, dict]):
161
  left_data = {k: v for k, v in data.items() if k not in self._data}
162
  self._data.update(left_data)
163
+ if self.namespace.endswith("text_chunks"):
164
  list_data = [
165
  {
166
  "__id__": k,
 
190
  "tokens": item["tokens"],
191
  "chunk_order_index": item["chunk_order_index"],
192
  "full_doc_id": item["full_doc_id"],
193
+ "content_vector": f"{item['__vector__'].tolist()}",
194
  "workspace": self.db.workspace,
195
  }
196
  )
197
  await self.db.execute(merge_sql, data)
198
 
199
+ if self.namespace.endswith("full_docs"):
200
  merge_sql = SQL_TEMPLATES["upsert_doc_full"]
201
  data = []
202
  for k, v in self._data.items():
 
211
  return left_data
212
 
213
  async def index_done_callback(self):
214
+ for n in ("full_docs", "text_chunks"):
215
+ if self.namespace.endswith(n):
216
+ logger.info("full doc and chunk data had been saved into TiDB db!")
217
+ break
218
 
219
 
220
  @dataclass
 
260
  if not len(data):
261
  logger.warning("You insert an empty data to vector DB")
262
  return []
263
+ if self.namespace.endswith("chunks"):
264
  return []
265
  logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
266
 
 
290
  for i, d in enumerate(list_data):
291
  d["content_vector"] = embeddings[i]
292
 
293
+ if self.namespace.endswith("entities"):
294
  data = []
295
  for item in list_data:
296
  param = {
297
  "id": item["id"],
298
  "name": item["entity_name"],
299
  "content": item["content"],
300
+ "content_vector": f"{item['content_vector'].tolist()}",
301
  "workspace": self.db.workspace,
302
  }
303
  # update entity_id if node inserted by graph_storage_instance before
 
311
  merge_sql = SQL_TEMPLATES["insert_entity"]
312
  await self.db.execute(merge_sql, data)
313
 
314
+ elif self.namespace.endswith("relationships"):
315
  data = []
316
  for item in list_data:
317
  param = {
 
319
  "source_name": item["src_id"],
320
  "target_name": item["tgt_id"],
321
  "content": item["content"],
322
+ "content_vector": f"{item['content_vector'].tolist()}",
323
  "workspace": self.db.workspace,
324
  }
325
  # update relation_id if node inserted by graph_storage_instance before
lightrag/lightrag.py CHANGED
@@ -167,6 +167,7 @@ class LightRAG:
167
 
168
  # storage
169
  vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
 
170
 
171
  enable_llm_cache: bool = True
172
  # Sometimes there are some reason the LLM failed at Extracting Entities, and we want to continue without LLM cost, we can use this flag
@@ -227,13 +228,8 @@ class LightRAG:
227
  self.graph_storage_cls, global_config=global_config
228
  )
229
 
230
- self.json_doc_status_storage = self.key_string_value_json_storage_cls(
231
- namespace="json_doc_status_storage",
232
- embedding_func=None,
233
- )
234
-
235
  self.llm_response_cache = self.key_string_value_json_storage_cls(
236
- namespace="llm_response_cache",
237
  embedding_func=self.embedding_func,
238
  )
239
 
@@ -241,33 +237,34 @@ class LightRAG:
241
  # add embedding func by walter
242
  ####
243
  self.full_docs = self.key_string_value_json_storage_cls(
244
- namespace="full_docs",
245
  embedding_func=self.embedding_func,
246
  )
247
  self.text_chunks = self.key_string_value_json_storage_cls(
248
- namespace="text_chunks",
249
  embedding_func=self.embedding_func,
250
  )
251
  self.chunk_entity_relation_graph = self.graph_storage_cls(
252
- namespace="chunk_entity_relation",
253
  embedding_func=self.embedding_func,
254
  )
 
255
  ####
256
  # add embedding func by walter over
257
  ####
258
 
259
  self.entities_vdb = self.vector_db_storage_cls(
260
- namespace="entities",
261
  embedding_func=self.embedding_func,
262
  meta_fields={"entity_name"},
263
  )
264
  self.relationships_vdb = self.vector_db_storage_cls(
265
- namespace="relationships",
266
  embedding_func=self.embedding_func,
267
  meta_fields={"src_id", "tgt_id"},
268
  )
269
  self.chunks_vdb = self.vector_db_storage_cls(
270
- namespace="chunks",
271
  embedding_func=self.embedding_func,
272
  )
273
 
@@ -277,7 +274,7 @@ class LightRAG:
277
  hashing_kv = self.llm_response_cache
278
  else:
279
  hashing_kv = self.key_string_value_json_storage_cls(
280
- namespace="llm_response_cache",
281
  embedding_func=self.embedding_func,
282
  )
283
 
@@ -292,7 +289,7 @@ class LightRAG:
292
  # Initialize document status storage
293
  self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
294
  self.doc_status = self.doc_status_storage_cls(
295
- namespace="doc_status",
296
  global_config=global_config,
297
  embedding_func=None,
298
  )
@@ -928,7 +925,7 @@ class LightRAG:
928
  if self.llm_response_cache
929
  and hasattr(self.llm_response_cache, "global_config")
930
  else self.key_string_value_json_storage_cls(
931
- namespace="llm_response_cache",
932
  global_config=asdict(self),
933
  embedding_func=self.embedding_func,
934
  ),
@@ -945,7 +942,7 @@ class LightRAG:
945
  if self.llm_response_cache
946
  and hasattr(self.llm_response_cache, "global_config")
947
  else self.key_string_value_json_storage_cls(
948
- namespace="llm_response_cache",
949
  global_config=asdict(self),
950
  embedding_func=self.embedding_func,
951
  ),
@@ -964,7 +961,7 @@ class LightRAG:
964
  if self.llm_response_cache
965
  and hasattr(self.llm_response_cache, "global_config")
966
  else self.key_string_value_json_storage_cls(
967
- namespace="llm_response_cache",
968
  global_config=asdict(self),
969
  embedding_func=self.embedding_func,
970
  ),
@@ -1005,7 +1002,7 @@ class LightRAG:
1005
  global_config=asdict(self),
1006
  hashing_kv=self.llm_response_cache
1007
  or self.key_string_value_json_storage_cls(
1008
- namespace="llm_response_cache",
1009
  global_config=asdict(self),
1010
  embedding_func=self.embedding_func,
1011
  ),
@@ -1036,7 +1033,7 @@ class LightRAG:
1036
  if self.llm_response_cache
1037
  and hasattr(self.llm_response_cache, "global_config")
1038
  else self.key_string_value_json_storage_cls(
1039
- namespace="llm_response_cache",
1040
  global_config=asdict(self),
1041
  embedding_func=self.embedding_funcne,
1042
  ),
@@ -1052,7 +1049,7 @@ class LightRAG:
1052
  if self.llm_response_cache
1053
  and hasattr(self.llm_response_cache, "global_config")
1054
  else self.key_string_value_json_storage_cls(
1055
- namespace="llm_response_cache",
1056
  global_config=asdict(self),
1057
  embedding_func=self.embedding_func,
1058
  ),
@@ -1071,7 +1068,7 @@ class LightRAG:
1071
  if self.llm_response_cache
1072
  and hasattr(self.llm_response_cache, "global_config")
1073
  else self.key_string_value_json_storage_cls(
1074
- namespace="llm_response_cache",
1075
  global_config=asdict(self),
1076
  embedding_func=self.embedding_func,
1077
  ),
 
167
 
168
  # storage
169
  vector_db_storage_cls_kwargs: dict = field(default_factory=dict)
170
+ namespace_prefix: str = field(default="")
171
 
172
  enable_llm_cache: bool = True
173
  # Sometimes there are some reason the LLM failed at Extracting Entities, and we want to continue without LLM cost, we can use this flag
 
228
  self.graph_storage_cls, global_config=global_config
229
  )
230
 
 
 
 
 
 
231
  self.llm_response_cache = self.key_string_value_json_storage_cls(
232
+ namespace=self.namespace_prefix + "llm_response_cache",
233
  embedding_func=self.embedding_func,
234
  )
235
 
 
237
  # add embedding func by walter
238
  ####
239
  self.full_docs = self.key_string_value_json_storage_cls(
240
+ namespace=self.namespace_prefix + "full_docs",
241
  embedding_func=self.embedding_func,
242
  )
243
  self.text_chunks = self.key_string_value_json_storage_cls(
244
+ namespace=self.namespace_prefix + "text_chunks",
245
  embedding_func=self.embedding_func,
246
  )
247
  self.chunk_entity_relation_graph = self.graph_storage_cls(
248
+ namespace=self.namespace_prefix + "chunk_entity_relation",
249
  embedding_func=self.embedding_func,
250
  )
251
+
252
  ####
253
  # add embedding func by walter over
254
  ####
255
 
256
  self.entities_vdb = self.vector_db_storage_cls(
257
+ namespace=self.namespace_prefix + "entities",
258
  embedding_func=self.embedding_func,
259
  meta_fields={"entity_name"},
260
  )
261
  self.relationships_vdb = self.vector_db_storage_cls(
262
+ namespace=self.namespace_prefix + "relationships",
263
  embedding_func=self.embedding_func,
264
  meta_fields={"src_id", "tgt_id"},
265
  )
266
  self.chunks_vdb = self.vector_db_storage_cls(
267
+ namespace=self.namespace_prefix + "chunks",
268
  embedding_func=self.embedding_func,
269
  )
270
 
 
274
  hashing_kv = self.llm_response_cache
275
  else:
276
  hashing_kv = self.key_string_value_json_storage_cls(
277
+ namespace=self.namespace_prefix + "llm_response_cache",
278
  embedding_func=self.embedding_func,
279
  )
280
 
 
289
  # Initialize document status storage
290
  self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
291
  self.doc_status = self.doc_status_storage_cls(
292
+ namespace=self.namespace_prefix + "doc_status",
293
  global_config=global_config,
294
  embedding_func=None,
295
  )
 
925
  if self.llm_response_cache
926
  and hasattr(self.llm_response_cache, "global_config")
927
  else self.key_string_value_json_storage_cls(
928
+ namespace=self.namespace_prefix + "llm_response_cache",
929
  global_config=asdict(self),
930
  embedding_func=self.embedding_func,
931
  ),
 
942
  if self.llm_response_cache
943
  and hasattr(self.llm_response_cache, "global_config")
944
  else self.key_string_value_json_storage_cls(
945
+ namespace=self.namespace_prefix + "llm_response_cache",
946
  global_config=asdict(self),
947
  embedding_func=self.embedding_func,
948
  ),
 
961
  if self.llm_response_cache
962
  and hasattr(self.llm_response_cache, "global_config")
963
  else self.key_string_value_json_storage_cls(
964
+ namespace=self.namespace_prefix + "llm_response_cache",
965
  global_config=asdict(self),
966
  embedding_func=self.embedding_func,
967
  ),
 
1002
  global_config=asdict(self),
1003
  hashing_kv=self.llm_response_cache
1004
  or self.key_string_value_json_storage_cls(
1005
+ namespace=self.namespace_prefix + "llm_response_cache",
1006
  global_config=asdict(self),
1007
  embedding_func=self.embedding_func,
1008
  ),
 
1033
  if self.llm_response_cache
1034
  and hasattr(self.llm_response_cache, "global_config")
1035
  else self.key_string_value_json_storage_cls(
1036
+ namespace=self.namespace_prefix + "llm_response_cache",
1037
  global_config=asdict(self),
1038
  embedding_func=self.embedding_funcne,
1039
  ),
 
1049
  if self.llm_response_cache
1050
  and hasattr(self.llm_response_cache, "global_config")
1051
  else self.key_string_value_json_storage_cls(
1052
+ namespace=self.namespace_prefix + "llm_response_cache",
1053
  global_config=asdict(self),
1054
  embedding_func=self.embedding_func,
1055
  ),
 
1068
  if self.llm_response_cache
1069
  and hasattr(self.llm_response_cache, "global_config")
1070
  else self.key_string_value_json_storage_cls(
1071
+ namespace=self.namespace_prefix + "llm_response_cache",
1072
  global_config=asdict(self),
1073
  embedding_func=self.embedding_func,
1074
  ),