Commit
·
d6fd606
1
Parent(s):
f353888
Enrich README.md for postgres usage, make some change to cater python version<12
Browse files- README.md +1 -0
- examples/copy_postgres_llm_cache_to_json.py +66 -0
- lightrag/kg/postgres_impl.py +14 -1
README.md
CHANGED
@@ -360,6 +360,7 @@ see test_neo4j.py for a working example.
|
|
360 |
### Using PostgreSQL for Storage
|
361 |
For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
|
362 |
* PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
|
|
|
363 |
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
|
364 |
* Create index for AGE example: (Change below `dickens` to your graph name if necessary)
|
365 |
```
|
|
|
360 |
### Using PostgreSQL for Storage
|
361 |
For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
|
362 |
* PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
|
363 |
+
* If you prefer docker, please start with this image if you are a beginner to avoid hiccups (DO read the overview): https://hub.docker.com/r/shangor/postgres-for-rag
|
364 |
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
|
365 |
* Create index for AGE example: (Change below `dickens` to your graph name if necessary)
|
366 |
```
|
examples/copy_postgres_llm_cache_to_json.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
from lightrag.kg.postgres_impl import PostgreSQLDB, PGKVStorage
|
7 |
+
from lightrag.storage import JsonKVStorage
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
ROOT_DIR = os.environ.get("ROOT_DIR")
|
11 |
+
WORKING_DIR = f"{ROOT_DIR}/dickens-pg"
|
12 |
+
|
13 |
+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
14 |
+
|
15 |
+
if not os.path.exists(WORKING_DIR):
|
16 |
+
os.mkdir(WORKING_DIR)
|
17 |
+
|
18 |
+
# AGE
|
19 |
+
os.environ["AGE_GRAPH_NAME"] = "chinese"
|
20 |
+
|
21 |
+
postgres_db = PostgreSQLDB(
|
22 |
+
config={
|
23 |
+
"host": "localhost",
|
24 |
+
"port": 15432,
|
25 |
+
"user": "rag",
|
26 |
+
"password": "rag",
|
27 |
+
"database": "r1",
|
28 |
+
}
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
async def main():
|
33 |
+
await postgres_db.initdb()
|
34 |
+
|
35 |
+
from_llm_response_cache = PGKVStorage(
|
36 |
+
namespace="llm_response_cache",
|
37 |
+
global_config={"embedding_batch_num": 6},
|
38 |
+
embedding_func=None,
|
39 |
+
db=postgres_db,
|
40 |
+
)
|
41 |
+
|
42 |
+
to_llm_response_cache = JsonKVStorage(
|
43 |
+
namespace="llm_response_cache",
|
44 |
+
global_config={"working_dir": WORKING_DIR},
|
45 |
+
embedding_func=None,
|
46 |
+
)
|
47 |
+
|
48 |
+
kv = {}
|
49 |
+
for c_id in await from_llm_response_cache.all_keys():
|
50 |
+
print(f"Copying {c_id}")
|
51 |
+
workspace = c_id["workspace"]
|
52 |
+
mode = c_id["mode"]
|
53 |
+
_id = c_id["id"]
|
54 |
+
postgres_db.workspace = workspace
|
55 |
+
obj = await from_llm_response_cache.get_by_mode_and_id(mode, _id)
|
56 |
+
if mode not in kv:
|
57 |
+
kv[mode] = {}
|
58 |
+
kv[mode][_id] = obj[_id]
|
59 |
+
print(f"Object {obj}")
|
60 |
+
await to_llm_response_cache.upsert(kv)
|
61 |
+
await to_llm_response_cache.index_done_callback()
|
62 |
+
print("Mission accomplished!")
|
63 |
+
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
asyncio.run(main())
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -231,6 +231,16 @@ class PGKVStorage(BaseKVStorage):
|
|
231 |
else:
|
232 |
return None
|
233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
async def filter_keys(self, keys: List[str]) -> Set[str]:
|
235 |
"""Filter out duplicated content"""
|
236 |
sql = SQL_TEMPLATES["filter_keys"].format(
|
@@ -412,7 +422,10 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
412 |
|
413 |
async def filter_keys(self, data: list[str]) -> set[str]:
|
414 |
"""Return keys that don't exist in storage"""
|
415 |
-
|
|
|
|
|
|
|
416 |
result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
|
417 |
# The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
|
418 |
if result is None:
|
|
|
231 |
else:
|
232 |
return None
|
233 |
|
234 |
+
async def all_keys(self) -> list[dict]:
|
235 |
+
if "llm_response_cache" == self.namespace:
|
236 |
+
sql = "select workspace,mode,id from lightrag_llm_cache"
|
237 |
+
res = await self.db.query(sql, multirows=True)
|
238 |
+
return res
|
239 |
+
else:
|
240 |
+
logger.error(
|
241 |
+
f"all_keys is only implemented for llm_response_cache, not for {self.namespace}"
|
242 |
+
)
|
243 |
+
|
244 |
async def filter_keys(self, keys: List[str]) -> Set[str]:
|
245 |
"""Filter out duplicated content"""
|
246 |
sql = SQL_TEMPLATES["filter_keys"].format(
|
|
|
422 |
|
423 |
async def filter_keys(self, data: list[str]) -> set[str]:
|
424 |
"""Return keys that don't exist in storage"""
|
425 |
+
keys = ",".join([f"'{_id}'" for _id in data])
|
426 |
+
sql = (
|
427 |
+
f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id IN ({keys})"
|
428 |
+
)
|
429 |
result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
|
430 |
# The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
|
431 |
if result is None:
|