gzdaniel commited on
Commit
f07f7a4
·
1 Parent(s): f1a2f89

Update README

Browse files
Files changed (2) hide show
  1. README-zh.md +21 -9
  2. README.md +21 -9
README-zh.md CHANGED
@@ -824,7 +824,7 @@ rag = LightRAG(
824
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
825
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
826
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
827
-
828
  -- 如有必要可以删除
829
  drop INDEX entity_p_idx;
830
  drop INDEX vertex_p_idx;
@@ -849,6 +849,18 @@ rag = LightRAG(
849
 
850
  </details>
851
 
 
 
 
 
 
 
 
 
 
 
 
 
852
  ## 编辑实体和关系
853
 
854
  LightRAG现在支持全面的知识图谱管理功能,允许您在知识图谱中创建、编辑和删除实体和关系。
@@ -1170,17 +1182,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1170
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1171
  from lightrag.utils import EmbeddingFunc
1172
  import os
1173
-
1174
  async def load_existing_lightrag():
1175
  # 首先,创建或加载现有的 LightRAG 实例
1176
  lightrag_working_dir = "./existing_lightrag_storage"
1177
-
1178
  # 检查是否存在之前的 LightRAG 实例
1179
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1180
  print("✅ Found existing LightRAG instance, loading...")
1181
  else:
1182
  print("❌ No existing LightRAG instance found, will create new one")
1183
-
1184
  # 使用您的配置创建/加载 LightRAG 实例
1185
  lightrag_instance = LightRAG(
1186
  working_dir=lightrag_working_dir,
@@ -1203,10 +1215,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1203
  ),
1204
  )
1205
  )
1206
-
1207
  # 初始化存储(如果有现有数据,这将加载现有数据)
1208
  await lightrag_instance.initialize_storages()
1209
-
1210
  # 现在使用现有的 LightRAG 实例初始化 RAGAnything
1211
  rag = RAGAnything(
1212
  lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
@@ -1235,20 +1247,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1235
  )
1236
  # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
1237
  )
1238
-
1239
  # 查询现有的知识库
1240
  result = await rag.query_with_multimodal(
1241
  "What data has been processed in this LightRAG instance?",
1242
  mode="hybrid"
1243
  )
1244
  print("Query result:", result)
1245
-
1246
  # 向现有的 LightRAG 实例添加新的多模态文档
1247
  await rag.process_document_complete(
1248
  file_path="path/to/new/multimodal_document.pdf",
1249
  output_dir="./output"
1250
  )
1251
-
1252
  if __name__ == "__main__":
1253
  asyncio.run(load_existing_lightrag())
1254
  ```
 
824
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
825
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
826
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
827
+
828
  -- 如有必要可以删除
829
  drop INDEX entity_p_idx;
830
  drop INDEX vertex_p_idx;
 
849
 
850
  </details>
851
 
852
+ ### LightRAG实例间的数据隔离
853
+
854
+ 通过 workspace 参数可以不同实现不同LightRAG实例之间的存储数据隔离。LightRAG在初始化后workspace就已经确定,之后修改workspace是无效的。下面是不同类型的存储实现工作空间的方式:
855
+
856
+ - **对于本地基于文件的数据库,数据隔离通过工作空间子目录实现:** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。
857
+ - **对于将数据存储在集合(collection)中的数据库,通过在集合名称前添加工作空间前缀来实现:** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。
858
+ - **对于关系型数据库,数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离:** PGKVStorage, PGVectorStorage, PGDocStatusStorage。
859
+
860
+ * **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
861
+
862
+ 为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的默认工作空间为`default`,Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
863
+
864
  ## 编辑实体和关系
865
 
866
  LightRAG现在支持全面的知识图谱管理功能,允许您在知识图谱中创建、编辑和删除实体和关系。
 
1182
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1183
  from lightrag.utils import EmbeddingFunc
1184
  import os
1185
+
1186
  async def load_existing_lightrag():
1187
  # 首先,创建或加载现有的 LightRAG 实例
1188
  lightrag_working_dir = "./existing_lightrag_storage"
1189
+
1190
  # 检查是否存在之前的 LightRAG 实例
1191
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1192
  print("✅ Found existing LightRAG instance, loading...")
1193
  else:
1194
  print("❌ No existing LightRAG instance found, will create new one")
1195
+
1196
  # 使用您的配置创建/加载 LightRAG 实例
1197
  lightrag_instance = LightRAG(
1198
  working_dir=lightrag_working_dir,
 
1215
  ),
1216
  )
1217
  )
1218
+
1219
  # 初始化存储(如果有现有数据,这将加载现有数据)
1220
  await lightrag_instance.initialize_storages()
1221
+
1222
  # 现在使用现有的 LightRAG 实例初始化 RAGAnything
1223
  rag = RAGAnything(
1224
  lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
 
1247
  )
1248
  # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
1249
  )
1250
+
1251
  # 查询现有的知识库
1252
  result = await rag.query_with_multimodal(
1253
  "What data has been processed in this LightRAG instance?",
1254
  mode="hybrid"
1255
  )
1256
  print("Query result:", result)
1257
+
1258
  # 向现有的 LightRAG 实例添加新的多模态文档
1259
  await rag.process_document_complete(
1260
  file_path="path/to/new/multimodal_document.pdf",
1261
  output_dir="./output"
1262
  )
1263
+
1264
  if __name__ == "__main__":
1265
  asyncio.run(load_existing_lightrag())
1266
  ```
README.md CHANGED
@@ -239,6 +239,7 @@ A full list of LightRAG init parameters:
239
  | **Parameter** | **Type** | **Explanation** | **Default** |
240
  |--------------|----------|-----------------|-------------|
241
  | **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
 
242
  | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
243
  | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
244
  | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
@@ -796,7 +797,7 @@ For production level scenarios you will most likely want to leverage an enterpri
796
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
797
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
798
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
799
-
800
  -- drop if necessary
801
  drop INDEX entity_p_idx;
802
  drop INDEX vertex_p_idx;
@@ -895,6 +896,17 @@ async def initialize_rag():
895
 
896
  </details>
897
 
 
 
 
 
 
 
 
 
 
 
 
898
  ## Edit Entities and Relations
899
 
900
  LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
@@ -1219,17 +1231,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1219
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1220
  from lightrag.utils import EmbeddingFunc
1221
  import os
1222
-
1223
  async def load_existing_lightrag():
1224
  # First, create or load an existing LightRAG instance
1225
  lightrag_working_dir = "./existing_lightrag_storage"
1226
-
1227
  # Check if previous LightRAG instance exists
1228
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1229
  print("✅ Found existing LightRAG instance, loading...")
1230
  else:
1231
  print("❌ No existing LightRAG instance found, will create new one")
1232
-
1233
  # Create/Load LightRAG instance with your configurations
1234
  lightrag_instance = LightRAG(
1235
  working_dir=lightrag_working_dir,
@@ -1252,10 +1264,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1252
  ),
1253
  )
1254
  )
1255
-
1256
  # Initialize storage (this will load existing data if available)
1257
  await lightrag_instance.initialize_storages()
1258
-
1259
  # Now initialize RAGAnything with the existing LightRAG instance
1260
  rag = RAGAnything(
1261
  lightrag=lightrag_instance, # Pass the existing LightRAG instance
@@ -1284,20 +1296,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1284
  )
1285
  # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1286
  )
1287
-
1288
  # Query the existing knowledge base
1289
  result = await rag.query_with_multimodal(
1290
  "What data has been processed in this LightRAG instance?",
1291
  mode="hybrid"
1292
  )
1293
  print("Query result:", result)
1294
-
1295
  # Add new multimodal documents to the existing LightRAG instance
1296
  await rag.process_document_complete(
1297
  file_path="path/to/new/multimodal_document.pdf",
1298
  output_dir="./output"
1299
  )
1300
-
1301
  if __name__ == "__main__":
1302
  asyncio.run(load_existing_lightrag())
1303
  ```
 
239
  | **Parameter** | **Type** | **Explanation** | **Default** |
240
  |--------------|----------|-----------------|-------------|
241
  | **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
242
+ | **workspace** | str | Workspace name for data isolation between different LightRAG Instances | |
243
  | **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage` | `JsonKVStorage` |
244
  | **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage` | `NanoVectorDBStorage` |
245
  | **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage` | `NetworkXStorage` |
 
797
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
798
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
799
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
800
+
801
  -- drop if necessary
802
  drop INDEX entity_p_idx;
803
  drop INDEX vertex_p_idx;
 
896
 
897
  </details>
898
 
899
+ ### Data Isolation Between LightRAG Instances
900
+
901
+ The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable and cannot be changed.Here is how workspaces are implemented for different types of storage:
902
+
903
+ - **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`.
904
+ - **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `QdrantVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`.
905
+ - **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
906
+ - **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
907
+
908
+ To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
909
+
910
  ## Edit Entities and Relations
911
 
912
  LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
 
1231
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1232
  from lightrag.utils import EmbeddingFunc
1233
  import os
1234
+
1235
  async def load_existing_lightrag():
1236
  # First, create or load an existing LightRAG instance
1237
  lightrag_working_dir = "./existing_lightrag_storage"
1238
+
1239
  # Check if previous LightRAG instance exists
1240
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1241
  print("✅ Found existing LightRAG instance, loading...")
1242
  else:
1243
  print("❌ No existing LightRAG instance found, will create new one")
1244
+
1245
  # Create/Load LightRAG instance with your configurations
1246
  lightrag_instance = LightRAG(
1247
  working_dir=lightrag_working_dir,
 
1264
  ),
1265
  )
1266
  )
1267
+
1268
  # Initialize storage (this will load existing data if available)
1269
  await lightrag_instance.initialize_storages()
1270
+
1271
  # Now initialize RAGAnything with the existing LightRAG instance
1272
  rag = RAGAnything(
1273
  lightrag=lightrag_instance, # Pass the existing LightRAG instance
 
1296
  )
1297
  # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1298
  )
1299
+
1300
  # Query the existing knowledge base
1301
  result = await rag.query_with_multimodal(
1302
  "What data has been processed in this LightRAG instance?",
1303
  mode="hybrid"
1304
  )
1305
  print("Query result:", result)
1306
+
1307
  # Add new multimodal documents to the existing LightRAG instance
1308
  await rag.process_document_complete(
1309
  file_path="path/to/new/multimodal_document.pdf",
1310
  output_dir="./output"
1311
  )
1312
+
1313
  if __name__ == "__main__":
1314
  asyncio.run(load_existing_lightrag())
1315
  ```