zrguo commited on
Commit
70b7cad
·
2 Parent(s): 5038e19 2ababad

Merge pull request #399 from partoneplay/main

Browse files
examples/{lightrag_ollama_neo4j_milvus_demo.py → lightrag_ollama_neo4j_milvus_mongo_demo.py} RENAMED
@@ -10,6 +10,10 @@ if not os.path.exists(WORKING_DIR):
10
  os.mkdir(WORKING_DIR)
11
  print(f"WorkingDir: {WORKING_DIR}")
12
 
 
 
 
 
13
  # neo4j
14
  BATCH_SIZE_NODES = 500
15
  BATCH_SIZE_EDGES = 100
@@ -38,6 +42,7 @@ rag = LightRAG(
38
  texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
39
  ),
40
  ),
 
41
  graph_storage="Neo4JStorage",
42
  vector_storage="MilvusVectorDBStorge",
43
  )
 
10
  os.mkdir(WORKING_DIR)
11
  print(f"WorkingDir: {WORKING_DIR}")
12
 
13
+ # mongo
14
+ os.environ["MONGO_URI"] = "mongodb://root:root@localhost:27017/"
15
+ os.environ["MONGO_DATABASE"] = "LightRAG"
16
+
17
  # neo4j
18
  BATCH_SIZE_NODES = 500
19
  BATCH_SIZE_EDGES = 100
 
42
  texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
43
  ),
44
  ),
45
+ kv_storage="MongoKVStorage",
46
  graph_storage="Neo4JStorage",
47
  vector_storage="MilvusVectorDBStorge",
48
  )
lightrag/kg/mongo_impl.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from tqdm.asyncio import tqdm as tqdm_async
3
+ from dataclasses import dataclass
4
+ from pymongo import MongoClient
5
+
6
+ from lightrag.utils import logger
7
+
8
+ from lightrag.base import BaseKVStorage
9
+
10
+
11
+ @dataclass
12
+ class MongoKVStorage(BaseKVStorage):
13
+ def __post_init__(self):
14
+ client = MongoClient(
15
+ os.environ.get("MONGO_URI", "mongodb://root:root@localhost:27017/")
16
+ )
17
+ database = client.get_database(os.environ.get("MONGO_DATABASE", "LightRAG"))
18
+ self._data = database.get_collection(self.namespace)
19
+ logger.info(f"Use MongoDB as KV {self.namespace}")
20
+
21
+ async def all_keys(self) -> list[str]:
22
+ return [x["_id"] for x in self._data.find({}, {"_id": 1})]
23
+
24
+ async def get_by_id(self, id):
25
+ return self._data.find_one({"_id": id})
26
+
27
+ async def get_by_ids(self, ids, fields=None):
28
+ if fields is None:
29
+ return list(self._data.find({"_id": {"$in": ids}}))
30
+ return list(
31
+ self._data.find(
32
+ {"_id": {"$in": ids}},
33
+ {field: 1 for field in fields},
34
+ )
35
+ )
36
+
37
+ async def filter_keys(self, data: list[str]) -> set[str]:
38
+ existing_ids = [
39
+ str(x["_id"]) for x in self._data.find({"_id": {"$in": data}}, {"_id": 1})
40
+ ]
41
+ return set([s for s in data if s not in existing_ids])
42
+
43
+ async def upsert(self, data: dict[str, dict]):
44
+ for k, v in tqdm_async(data.items(), desc="Upserting"):
45
+ self._data.update_one({"_id": k}, {"$set": v}, upsert=True)
46
+ data[k]["_id"] = k
47
+ return data
48
+
49
+ async def drop(self):
50
+ """ """
51
+ pass
lightrag/lightrag.py CHANGED
@@ -46,6 +46,8 @@ from .kg.oracle_impl import OracleKVStorage, OracleGraphStorage, OracleVectorDBS
46
 
47
  from .kg.milvus_impl import MilvusVectorDBStorge
48
 
 
 
49
  # future KG integrations
50
 
51
  # from .kg.ArangoDB_impl import (
@@ -227,6 +229,7 @@ class LightRAG:
227
  # kv storage
228
  "JsonKVStorage": JsonKVStorage,
229
  "OracleKVStorage": OracleKVStorage,
 
230
  # vector storage
231
  "NanoVectorDBStorage": NanoVectorDBStorage,
232
  "OracleVectorDBStorage": OracleVectorDBStorage,
 
46
 
47
  from .kg.milvus_impl import MilvusVectorDBStorge
48
 
49
+ from .kg.mongo_impl import MongoKVStorage
50
+
51
  # future KG integrations
52
 
53
  # from .kg.ArangoDB_impl import (
 
229
  # kv storage
230
  "JsonKVStorage": JsonKVStorage,
231
  "OracleKVStorage": OracleKVStorage,
232
+ "MongoKVStorage": MongoKVStorage,
233
  # vector storage
234
  "NanoVectorDBStorage": NanoVectorDBStorage,
235
  "OracleVectorDBStorage": OracleVectorDBStorage,
requirements.txt CHANGED
@@ -12,6 +12,7 @@ ollama
12
  openai
13
  oracledb
14
  pymilvus
 
15
  pyvis
16
  tenacity
17
  # lmdeploy[all]
 
12
  openai
13
  oracledb
14
  pymilvus
15
+ pymongo
16
  pyvis
17
  tenacity
18
  # lmdeploy[all]