samuel-z-chen commited on
Commit
d6fd606
·
1 Parent(s): f353888

Enrich README.md for postgres usage, make some change to cater python version<12

Browse files
README.md CHANGED
@@ -360,6 +360,7 @@ see test_neo4j.py for a working example.
360
  ### Using PostgreSQL for Storage
361
  For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
362
  * PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
 
363
  * How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
364
  * Create index for AGE example: (Change below `dickens` to your graph name if necessary)
365
  ```
 
360
  ### Using PostgreSQL for Storage
361
  For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
362
  * PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
363
+ * If you prefer docker, please start with this image if you are a beginner to avoid hiccups (DO read the overview): https://hub.docker.com/r/shangor/postgres-for-rag
364
  * How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
365
  * Create index for AGE example: (Change below `dickens` to your graph name if necessary)
366
  ```
examples/copy_postgres_llm_cache_to_json.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ from lightrag.kg.postgres_impl import PostgreSQLDB, PGKVStorage
7
+ from lightrag.storage import JsonKVStorage
8
+
9
+ load_dotenv()
10
+ ROOT_DIR = os.environ.get("ROOT_DIR")
11
+ WORKING_DIR = f"{ROOT_DIR}/dickens-pg"
12
+
13
+ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
14
+
15
+ if not os.path.exists(WORKING_DIR):
16
+ os.mkdir(WORKING_DIR)
17
+
18
+ # AGE
19
+ os.environ["AGE_GRAPH_NAME"] = "chinese"
20
+
21
+ postgres_db = PostgreSQLDB(
22
+ config={
23
+ "host": "localhost",
24
+ "port": 15432,
25
+ "user": "rag",
26
+ "password": "rag",
27
+ "database": "r1",
28
+ }
29
+ )
30
+
31
+
32
+ async def main():
33
+ await postgres_db.initdb()
34
+
35
+ from_llm_response_cache = PGKVStorage(
36
+ namespace="llm_response_cache",
37
+ global_config={"embedding_batch_num": 6},
38
+ embedding_func=None,
39
+ db=postgres_db,
40
+ )
41
+
42
+ to_llm_response_cache = JsonKVStorage(
43
+ namespace="llm_response_cache",
44
+ global_config={"working_dir": WORKING_DIR},
45
+ embedding_func=None,
46
+ )
47
+
48
+ kv = {}
49
+ for c_id in await from_llm_response_cache.all_keys():
50
+ print(f"Copying {c_id}")
51
+ workspace = c_id["workspace"]
52
+ mode = c_id["mode"]
53
+ _id = c_id["id"]
54
+ postgres_db.workspace = workspace
55
+ obj = await from_llm_response_cache.get_by_mode_and_id(mode, _id)
56
+ if mode not in kv:
57
+ kv[mode] = {}
58
+ kv[mode][_id] = obj[_id]
59
+ print(f"Object {obj}")
60
+ await to_llm_response_cache.upsert(kv)
61
+ await to_llm_response_cache.index_done_callback()
62
+ print("Mission accomplished!")
63
+
64
+
65
+ if __name__ == "__main__":
66
+ asyncio.run(main())
lightrag/kg/postgres_impl.py CHANGED
@@ -231,6 +231,16 @@ class PGKVStorage(BaseKVStorage):
231
  else:
232
  return None
233
 
 
 
 
 
 
 
 
 
 
 
234
  async def filter_keys(self, keys: List[str]) -> Set[str]:
235
  """Filter out duplicated content"""
236
  sql = SQL_TEMPLATES["filter_keys"].format(
@@ -412,7 +422,10 @@ class PGDocStatusStorage(DocStatusStorage):
412
 
413
  async def filter_keys(self, data: list[str]) -> set[str]:
414
  """Return keys that don't exist in storage"""
415
- sql = f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id IN ({",".join([f"'{_id}'" for _id in data])})"
 
 
 
416
  result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
417
  # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
418
  if result is None:
 
231
  else:
232
  return None
233
 
234
+ async def all_keys(self) -> list[dict]:
235
+ if "llm_response_cache" == self.namespace:
236
+ sql = "select workspace,mode,id from lightrag_llm_cache"
237
+ res = await self.db.query(sql, multirows=True)
238
+ return res
239
+ else:
240
+ logger.error(
241
+ f"all_keys is only implemented for llm_response_cache, not for {self.namespace}"
242
+ )
243
+
244
  async def filter_keys(self, keys: List[str]) -> Set[str]:
245
  """Filter out duplicated content"""
246
  sql = SQL_TEMPLATES["filter_keys"].format(
 
422
 
423
  async def filter_keys(self, data: list[str]) -> set[str]:
424
  """Return keys that don't exist in storage"""
425
+ keys = ",".join([f"'{_id}'" for _id in data])
426
+ sql = (
427
+ f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id IN ({keys})"
428
+ )
429
  result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
430
  # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
431
  if result is None: