yangdx commited on
Commit
57fbde3
·
1 Parent(s): 32436dc

Remove namespace_prefix from PostgreSQL, maintain consistency with other storage implementation

Browse files
env.example CHANGED
@@ -5,7 +5,6 @@
5
  # PORT=9621
6
  # WORKERS=2
7
  ### separating data from difference Lightrag instances
8
- # NAMESPACE_PREFIX=lightrag
9
  ### Max nodes return from grap retrieval
10
  # MAX_GRAPH_NODES=1000
11
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
 
5
  # PORT=9621
6
  # WORKERS=2
7
  ### separating data from difference Lightrag instances
 
8
  ### Max nodes return from grap retrieval
9
  # MAX_GRAPH_NODES=1000
10
  # CORS_ORIGINS=http://localhost:3000,http://localhost:8080
lightrag/api/lightrag_server.py CHANGED
@@ -315,7 +315,7 @@ def create_app(args):
315
  "similarity_threshold": 0.95,
316
  "use_llm_check": False,
317
  },
318
- namespace_prefix=args.namespace_prefix,
319
  auto_manage_storages_states=False,
320
  max_parallel_insert=args.max_parallel_insert,
321
  )
@@ -345,7 +345,7 @@ def create_app(args):
345
  "similarity_threshold": 0.95,
346
  "use_llm_check": False,
347
  },
348
- namespace_prefix=args.namespace_prefix,
349
  auto_manage_storages_states=False,
350
  max_parallel_insert=args.max_parallel_insert,
351
  )
 
315
  "similarity_threshold": 0.95,
316
  "use_llm_check": False,
317
  },
318
+ # namespace_prefix=args.namespace_prefix,
319
  auto_manage_storages_states=False,
320
  max_parallel_insert=args.max_parallel_insert,
321
  )
 
345
  "similarity_threshold": 0.95,
346
  "use_llm_check": False,
347
  },
348
+ # namespace_prefix=args.namespace_prefix,
349
  auto_manage_storages_states=False,
350
  max_parallel_insert=args.max_parallel_insert,
351
  )
lightrag/kg/postgres_impl.py CHANGED
@@ -254,8 +254,6 @@ class PGKVStorage(BaseKVStorage):
254
  db: PostgreSQLDB = field(default=None)
255
 
256
  def __post_init__(self):
257
- namespace_prefix = self.global_config.get("namespace_prefix")
258
- self.base_namespace = self.namespace.replace(namespace_prefix, "")
259
  self._max_batch_size = self.global_config["embedding_batch_num"]
260
 
261
  async def initialize(self):
@@ -271,7 +269,7 @@ class PGKVStorage(BaseKVStorage):
271
 
272
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
273
  """Get doc_full data by id."""
274
- sql = SQL_TEMPLATES["get_by_id_" + self.base_namespace]
275
  params = {"workspace": self.db.workspace, "id": id}
276
  if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
277
  array_res = await self.db.query(sql, params, multirows=True)
@@ -285,7 +283,7 @@ class PGKVStorage(BaseKVStorage):
285
 
286
  async def get_by_mode_and_id(self, mode: str, id: str) -> Union[dict, None]:
287
  """Specifically for llm_response_cache."""
288
- sql = SQL_TEMPLATES["get_by_mode_id_" + self.base_namespace]
289
  params = {"workspace": self.db.workspace, mode: mode, "id": id}
290
  if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
291
  array_res = await self.db.query(sql, params, multirows=True)
@@ -299,7 +297,7 @@ class PGKVStorage(BaseKVStorage):
299
  # Query by id
300
  async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
301
  """Get doc_chunks data by id"""
302
- sql = SQL_TEMPLATES["get_by_ids_" + self.base_namespace].format(
303
  ids=",".join([f"'{id}'" for id in ids])
304
  )
305
  params = {"workspace": self.db.workspace}
@@ -320,7 +318,7 @@ class PGKVStorage(BaseKVStorage):
320
 
321
  async def get_by_status(self, status: str) -> Union[list[dict[str, Any]], None]:
322
  """Specifically for llm_response_cache."""
323
- SQL = SQL_TEMPLATES["get_by_status_" + self.base_namespace]
324
  params = {"workspace": self.db.workspace, "status": status}
325
  return await self.db.query(SQL, params, multirows=True)
326
 
@@ -403,8 +401,6 @@ class PGVectorStorage(BaseVectorStorage):
403
 
404
  def __post_init__(self):
405
  self._max_batch_size = self.global_config["embedding_batch_num"]
406
- namespace_prefix = self.global_config.get("namespace_prefix")
407
- self.base_namespace = self.namespace.replace(namespace_prefix, "")
408
  config = self.global_config.get("vector_db_storage_cls_kwargs", {})
409
  cosine_threshold = config.get("cosine_better_than_threshold")
410
  if cosine_threshold is None:
@@ -533,7 +529,7 @@ class PGVectorStorage(BaseVectorStorage):
533
  else:
534
  formatted_ids = "NULL"
535
 
536
- sql = SQL_TEMPLATES[self.base_namespace].format(
537
  embedding_string=embedding_string, doc_ids=formatted_ids
538
  )
539
  params = {
 
254
  db: PostgreSQLDB = field(default=None)
255
 
256
  def __post_init__(self):
 
 
257
  self._max_batch_size = self.global_config["embedding_batch_num"]
258
 
259
  async def initialize(self):
 
269
 
270
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
271
  """Get doc_full data by id."""
272
+ sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
273
  params = {"workspace": self.db.workspace, "id": id}
274
  if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
275
  array_res = await self.db.query(sql, params, multirows=True)
 
283
 
284
  async def get_by_mode_and_id(self, mode: str, id: str) -> Union[dict, None]:
285
  """Specifically for llm_response_cache."""
286
+ sql = SQL_TEMPLATES["get_by_mode_id_" + self.namespace]
287
  params = {"workspace": self.db.workspace, mode: mode, "id": id}
288
  if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
289
  array_res = await self.db.query(sql, params, multirows=True)
 
297
  # Query by id
298
  async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
299
  """Get doc_chunks data by id"""
300
+ sql = SQL_TEMPLATES["get_by_ids_" + self.namespace].format(
301
  ids=",".join([f"'{id}'" for id in ids])
302
  )
303
  params = {"workspace": self.db.workspace}
 
318
 
319
  async def get_by_status(self, status: str) -> Union[list[dict[str, Any]], None]:
320
  """Specifically for llm_response_cache."""
321
+ SQL = SQL_TEMPLATES["get_by_status_" + self.namespace]
322
  params = {"workspace": self.db.workspace, "status": status}
323
  return await self.db.query(SQL, params, multirows=True)
324
 
 
401
 
402
  def __post_init__(self):
403
  self._max_batch_size = self.global_config["embedding_batch_num"]
 
 
404
  config = self.global_config.get("vector_db_storage_cls_kwargs", {})
405
  cosine_threshold = config.get("cosine_better_than_threshold")
406
  if cosine_threshold is None:
 
529
  else:
530
  formatted_ids = "NULL"
531
 
532
+ sql = SQL_TEMPLATES[self.namespace].format(
533
  embedding_string=embedding_string, doc_ids=formatted_ids
534
  )
535
  params = {
lightrag/lightrag.py CHANGED
@@ -229,6 +229,7 @@ class LightRAG:
229
  vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
230
  """Additional parameters for vector database storage."""
231
 
 
232
  namespace_prefix: str = field(default="")
233
  """Prefix for namespacing stored data across different environments."""
234
 
 
229
  vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
230
  """Additional parameters for vector database storage."""
231
 
232
+ # TODO:deprecated, remove in the future, use WORKSPACE instead
233
  namespace_prefix: str = field(default="")
234
  """Prefix for namespacing stored data across different environments."""
235