Update README
Browse files- README-zh.md +21 -9
- README.md +21 -9
README-zh.md
CHANGED
@@ -824,7 +824,7 @@ rag = LightRAG(
|
|
824 |
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
|
825 |
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
|
826 |
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
|
827 |
-
|
828 |
-- 如有必要可以删除
|
829 |
drop INDEX entity_p_idx;
|
830 |
drop INDEX vertex_p_idx;
|
@@ -849,6 +849,18 @@ rag = LightRAG(
|
|
849 |
|
850 |
</details>
|
851 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
852 |
## 编辑实体和关系
|
853 |
|
854 |
LightRAG现在支持全面的知识图谱管理功能,允许您在知识图谱中创建、编辑和删除实体和关系。
|
@@ -1170,17 +1182,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
|
1170 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1171 |
from lightrag.utils import EmbeddingFunc
|
1172 |
import os
|
1173 |
-
|
1174 |
async def load_existing_lightrag():
|
1175 |
# 首先,创建或加载现有的 LightRAG 实例
|
1176 |
lightrag_working_dir = "./existing_lightrag_storage"
|
1177 |
-
|
1178 |
# 检查是否存在之前的 LightRAG 实例
|
1179 |
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1180 |
print("✅ Found existing LightRAG instance, loading...")
|
1181 |
else:
|
1182 |
print("❌ No existing LightRAG instance found, will create new one")
|
1183 |
-
|
1184 |
# 使用您的配置创建/加载 LightRAG 实例
|
1185 |
lightrag_instance = LightRAG(
|
1186 |
working_dir=lightrag_working_dir,
|
@@ -1203,10 +1215,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
|
1203 |
),
|
1204 |
)
|
1205 |
)
|
1206 |
-
|
1207 |
# 初始化存储(如果有现有数据,这将加载现有数据)
|
1208 |
await lightrag_instance.initialize_storages()
|
1209 |
-
|
1210 |
# 现在使用现有的 LightRAG 实例初始化 RAGAnything
|
1211 |
rag = RAGAnything(
|
1212 |
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
|
@@ -1235,20 +1247,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
|
|
1235 |
)
|
1236 |
# 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
|
1237 |
)
|
1238 |
-
|
1239 |
# 查询现有的知识库
|
1240 |
result = await rag.query_with_multimodal(
|
1241 |
"What data has been processed in this LightRAG instance?",
|
1242 |
mode="hybrid"
|
1243 |
)
|
1244 |
print("Query result:", result)
|
1245 |
-
|
1246 |
# 向现有的 LightRAG 实例添加新的多模态文档
|
1247 |
await rag.process_document_complete(
|
1248 |
file_path="path/to/new/multimodal_document.pdf",
|
1249 |
output_dir="./output"
|
1250 |
)
|
1251 |
-
|
1252 |
if __name__ == "__main__":
|
1253 |
asyncio.run(load_existing_lightrag())
|
1254 |
```
|
|
|
824 |
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
|
825 |
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
|
826 |
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
|
827 |
+
|
828 |
-- 如有必要可以删除
|
829 |
drop INDEX entity_p_idx;
|
830 |
drop INDEX vertex_p_idx;
|
|
|
849 |
|
850 |
</details>
|
851 |
|
852 |
+
### LightRAG实例间的数据隔离
|
853 |
+
|
854 |
+
通过 workspace 参数可以不同实现不同LightRAG实例之间的存储数据隔离。LightRAG在初始化后workspace就已经确定,之后修改workspace是无效的。下面是不同类型的存储实现工作空间的方式:
|
855 |
+
|
856 |
+
- **对于本地基于文件的数据库,数据隔离通过工作空间子目录实现:** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。
|
857 |
+
- **对于将数据存储在集合(collection)中的数据库,通过在集合名称前添加工作空间前缀来实现:** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。
|
858 |
+
- **对于关系型数据库,数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离:** PGKVStorage, PGVectorStorage, PGDocStatusStorage。
|
859 |
+
|
860 |
+
* **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
|
861 |
+
|
862 |
+
为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的默认工作空间为`default`,Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
|
863 |
+
|
864 |
## 编辑实体和关系
|
865 |
|
866 |
LightRAG现在支持全面的知识图谱管理功能,允许您在知识图谱中创建、编辑和删除实体和关系。
|
|
|
1182 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1183 |
from lightrag.utils import EmbeddingFunc
|
1184 |
import os
|
1185 |
+
|
1186 |
async def load_existing_lightrag():
|
1187 |
# 首先,创建或加载现有的 LightRAG 实例
|
1188 |
lightrag_working_dir = "./existing_lightrag_storage"
|
1189 |
+
|
1190 |
# 检查是否存在之前的 LightRAG 实例
|
1191 |
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1192 |
print("✅ Found existing LightRAG instance, loading...")
|
1193 |
else:
|
1194 |
print("❌ No existing LightRAG instance found, will create new one")
|
1195 |
+
|
1196 |
# 使用您的配置创建/加载 LightRAG 实例
|
1197 |
lightrag_instance = LightRAG(
|
1198 |
working_dir=lightrag_working_dir,
|
|
|
1215 |
),
|
1216 |
)
|
1217 |
)
|
1218 |
+
|
1219 |
# 初始化存储(如果有现有数据,这将加载现有数据)
|
1220 |
await lightrag_instance.initialize_storages()
|
1221 |
+
|
1222 |
# 现在使用现有的 LightRAG 实例初始化 RAGAnything
|
1223 |
rag = RAGAnything(
|
1224 |
lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
|
|
|
1247 |
)
|
1248 |
# 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
|
1249 |
)
|
1250 |
+
|
1251 |
# 查询现有的知识库
|
1252 |
result = await rag.query_with_multimodal(
|
1253 |
"What data has been processed in this LightRAG instance?",
|
1254 |
mode="hybrid"
|
1255 |
)
|
1256 |
print("Query result:", result)
|
1257 |
+
|
1258 |
# 向现有的 LightRAG 实例添加新的多模态文档
|
1259 |
await rag.process_document_complete(
|
1260 |
file_path="path/to/new/multimodal_document.pdf",
|
1261 |
output_dir="./output"
|
1262 |
)
|
1263 |
+
|
1264 |
if __name__ == "__main__":
|
1265 |
asyncio.run(load_existing_lightrag())
|
1266 |
```
|
README.md
CHANGED
@@ -239,6 +239,7 @@ A full list of LightRAG init parameters:
|
|
239 |
| **Parameter** | **Type** | **Explanation** | **Default** |
|
240 |
|--------------|----------|-----------------|-------------|
|
241 |
| **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
|
|
|
242 |
| **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
|
243 |
| **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
|
244 |
| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
|
@@ -796,7 +797,7 @@ For production level scenarios you will most likely want to leverage an enterpri
|
|
796 |
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
|
797 |
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
|
798 |
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
|
799 |
-
|
800 |
-- drop if necessary
|
801 |
drop INDEX entity_p_idx;
|
802 |
drop INDEX vertex_p_idx;
|
@@ -895,6 +896,17 @@ async def initialize_rag():
|
|
895 |
|
896 |
</details>
|
897 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
898 |
## Edit Entities and Relations
|
899 |
|
900 |
LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
|
@@ -1219,17 +1231,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
1219 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1220 |
from lightrag.utils import EmbeddingFunc
|
1221 |
import os
|
1222 |
-
|
1223 |
async def load_existing_lightrag():
|
1224 |
# First, create or load an existing LightRAG instance
|
1225 |
lightrag_working_dir = "./existing_lightrag_storage"
|
1226 |
-
|
1227 |
# Check if previous LightRAG instance exists
|
1228 |
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1229 |
print("✅ Found existing LightRAG instance, loading...")
|
1230 |
else:
|
1231 |
print("❌ No existing LightRAG instance found, will create new one")
|
1232 |
-
|
1233 |
# Create/Load LightRAG instance with your configurations
|
1234 |
lightrag_instance = LightRAG(
|
1235 |
working_dir=lightrag_working_dir,
|
@@ -1252,10 +1264,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
1252 |
),
|
1253 |
)
|
1254 |
)
|
1255 |
-
|
1256 |
# Initialize storage (this will load existing data if available)
|
1257 |
await lightrag_instance.initialize_storages()
|
1258 |
-
|
1259 |
# Now initialize RAGAnything with the existing LightRAG instance
|
1260 |
rag = RAGAnything(
|
1261 |
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
@@ -1284,20 +1296,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
1284 |
)
|
1285 |
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
1286 |
)
|
1287 |
-
|
1288 |
# Query the existing knowledge base
|
1289 |
result = await rag.query_with_multimodal(
|
1290 |
"What data has been processed in this LightRAG instance?",
|
1291 |
mode="hybrid"
|
1292 |
)
|
1293 |
print("Query result:", result)
|
1294 |
-
|
1295 |
# Add new multimodal documents to the existing LightRAG instance
|
1296 |
await rag.process_document_complete(
|
1297 |
file_path="path/to/new/multimodal_document.pdf",
|
1298 |
output_dir="./output"
|
1299 |
)
|
1300 |
-
|
1301 |
if __name__ == "__main__":
|
1302 |
asyncio.run(load_existing_lightrag())
|
1303 |
```
|
|
|
239 |
| **Parameter** | **Type** | **Explanation** | **Default** |
|
240 |
|--------------|----------|-----------------|-------------|
|
241 |
| **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
|
242 |
+
| **workspace** | str | Workspace name for data isolation between different LightRAG Instances | |
|
243 |
| **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
|
244 |
| **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
|
245 |
| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
|
|
|
797 |
create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
|
798 |
CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
|
799 |
ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
|
800 |
+
|
801 |
-- drop if necessary
|
802 |
drop INDEX entity_p_idx;
|
803 |
drop INDEX vertex_p_idx;
|
|
|
896 |
|
897 |
</details>
|
898 |
|
899 |
+
### Data Isolation Between LightRAG Instances
|
900 |
+
|
901 |
+
The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed.Here is how workspaces are implemented for different types of storage:
|
902 |
+
|
903 |
+
- **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`.
|
904 |
+
- **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `QdrantVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`.
|
905 |
+
- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
|
906 |
+
- **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
|
907 |
+
|
908 |
+
To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
|
909 |
+
|
910 |
## Edit Entities and Relations
|
911 |
|
912 |
LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
|
|
|
1231 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
1232 |
from lightrag.utils import EmbeddingFunc
|
1233 |
import os
|
1234 |
+
|
1235 |
async def load_existing_lightrag():
|
1236 |
# First, create or load an existing LightRAG instance
|
1237 |
lightrag_working_dir = "./existing_lightrag_storage"
|
1238 |
+
|
1239 |
# Check if previous LightRAG instance exists
|
1240 |
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
1241 |
print("✅ Found existing LightRAG instance, loading...")
|
1242 |
else:
|
1243 |
print("❌ No existing LightRAG instance found, will create new one")
|
1244 |
+
|
1245 |
# Create/Load LightRAG instance with your configurations
|
1246 |
lightrag_instance = LightRAG(
|
1247 |
working_dir=lightrag_working_dir,
|
|
|
1264 |
),
|
1265 |
)
|
1266 |
)
|
1267 |
+
|
1268 |
# Initialize storage (this will load existing data if available)
|
1269 |
await lightrag_instance.initialize_storages()
|
1270 |
+
|
1271 |
# Now initialize RAGAnything with the existing LightRAG instance
|
1272 |
rag = RAGAnything(
|
1273 |
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
|
|
1296 |
)
|
1297 |
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
1298 |
)
|
1299 |
+
|
1300 |
# Query the existing knowledge base
|
1301 |
result = await rag.query_with_multimodal(
|
1302 |
"What data has been processed in this LightRAG instance?",
|
1303 |
mode="hybrid"
|
1304 |
)
|
1305 |
print("Query result:", result)
|
1306 |
+
|
1307 |
# Add new multimodal documents to the existing LightRAG instance
|
1308 |
await rag.process_document_complete(
|
1309 |
file_path="path/to/new/multimodal_document.pdf",
|
1310 |
output_dir="./output"
|
1311 |
)
|
1312 |
+
|
1313 |
if __name__ == "__main__":
|
1314 |
asyncio.run(load_existing_lightrag())
|
1315 |
```
|