Merge branch 'main' into add-Memgraph-graph-db
Browse files- env.example +28 -25
- lightrag/api/__init__.py +1 -1
- lightrag/api/config.py +3 -3
- lightrag/api/lightrag_server.py +6 -2
- lightrag/api/routers/document_routes.py +17 -1
- lightrag/api/utils_api.py +3 -1
- lightrag/api/webui/assets/{index-CRWiopRc.js → index-BlAjHenV.js} +0 -0
- lightrag/api/webui/index.html +0 -0
- lightrag/base.py +1 -0
- lightrag/kg/faiss_impl.py +13 -3
- lightrag/kg/json_doc_status_impl.py +12 -1
- lightrag/kg/json_kv_impl.py +12 -1
- lightrag/kg/milvus_impl.py +23 -4
- lightrag/kg/mongo_impl.py +141 -11
- lightrag/kg/nano_vector_db_impl.py +13 -3
- lightrag/kg/neo4j_impl.py +118 -75
- lightrag/kg/networkx_impl.py +13 -3
- lightrag/kg/postgres_impl.py +119 -6
- lightrag/kg/qdrant_impl.py +35 -0
- lightrag/kg/redis_impl.py +46 -0
- lightrag/lightrag.py +28 -33
- lightrag/namespace.py +0 -4
- lightrag_webui/src/api/lightrag.ts +2 -0
- lightrag_webui/src/components/status/StatusCard.tsx +4 -0
- lightrag_webui/src/locales/ar.json +3 -1
- lightrag_webui/src/locales/en.json +3 -1
- lightrag_webui/src/locales/fr.json +3 -1
- lightrag_webui/src/locales/zh.json +3 -1
- lightrag_webui/src/locales/zh_TW.json +3 -1
- pyproject.toml +4 -1
- reproduce/Step_3.py +4 -3
- reproduce/Step_3_openai_compatible.py +4 -3
env.example
CHANGED
@@ -111,25 +111,37 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
|
|
111 |
###########################
|
112 |
### Data storage selection
|
113 |
###########################
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
### PostgreSQL
|
115 |
# LIGHTRAG_KV_STORAGE=PGKVStorage
|
116 |
# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
|
117 |
-
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
|
118 |
# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
|
119 |
-
|
|
|
120 |
# LIGHTRAG_KV_STORAGE=MongoKVStorage
|
121 |
# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
|
122 |
-
# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
|
123 |
# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
|
124 |
-
|
|
|
125 |
# LIGHTRAG_KV_STORAGE=RedisKVStorage
|
126 |
# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
|
127 |
-
### Vector Storage
|
128 |
-
# LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
|
129 |
# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
|
130 |
-
|
|
|
131 |
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
### PostgreSQL Configuration
|
135 |
POSTGRES_HOST=localhost
|
@@ -138,31 +150,19 @@ POSTGRES_USER=your_username
|
|
138 |
POSTGRES_PASSWORD='your_password'
|
139 |
POSTGRES_DATABASE=your_database
|
140 |
POSTGRES_MAX_CONNECTIONS=12
|
141 |
-
|
142 |
-
# POSTGRES_WORKSPACE=default
|
143 |
|
144 |
### Neo4j Configuration
|
145 |
NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
|
146 |
NEO4J_USERNAME=neo4j
|
147 |
NEO4J_PASSWORD='your_password'
|
148 |
-
|
149 |
-
### Independent AGM Configuration(not for AMG embedded in PostreSQL)
|
150 |
-
# AGE_POSTGRES_DB=
|
151 |
-
# AGE_POSTGRES_USER=
|
152 |
-
# AGE_POSTGRES_PASSWORD=
|
153 |
-
# AGE_POSTGRES_HOST=
|
154 |
-
# AGE_POSTGRES_PORT=8529
|
155 |
-
|
156 |
-
# AGE Graph Name(apply to PostgreSQL and independent AGM)
|
157 |
-
### AGE_GRAPH_NAME is deprecated
|
158 |
-
# AGE_GRAPH_NAME=lightrag
|
159 |
|
160 |
### MongoDB Configuration
|
161 |
MONGO_URI=mongodb://root:root@localhost:27017/
|
|
|
162 |
MONGO_DATABASE=LightRAG
|
163 |
-
|
164 |
-
### separating all data from difference Lightrag instances
|
165 |
-
# MONGODB_WORKSPACE=default
|
166 |
|
167 |
### Milvus Configuration
|
168 |
MILVUS_URI=http://localhost:19530
|
@@ -170,10 +170,13 @@ MILVUS_DB_NAME=lightrag
|
|
170 |
# MILVUS_USER=root
|
171 |
# MILVUS_PASSWORD=your_password
|
172 |
# MILVUS_TOKEN=your_token
|
|
|
173 |
|
174 |
### Qdrant
|
175 |
-
QDRANT_URL=http://localhost:
|
176 |
# QDRANT_API_KEY=your-api-key
|
|
|
177 |
|
178 |
### Redis
|
179 |
REDIS_URI=redis://localhost:6379
|
|
|
|
111 |
###########################
|
112 |
### Data storage selection
|
113 |
###########################
|
114 |
+
### In-memory database with data persistence to local files
|
115 |
+
# LIGHTRAG_KV_STORAGE=JsonKVStorage
|
116 |
+
# LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
|
117 |
+
# LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
|
118 |
+
# LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
|
119 |
+
# LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
|
120 |
### PostgreSQL
|
121 |
# LIGHTRAG_KV_STORAGE=PGKVStorage
|
122 |
# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
|
|
|
123 |
# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
|
124 |
+
# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
|
125 |
+
### MongoDB (recommended for production deploy)
|
126 |
# LIGHTRAG_KV_STORAGE=MongoKVStorage
|
127 |
# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
|
|
|
128 |
# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
|
129 |
+
# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
|
130 |
+
### Redis Storage (recommended for production deploy)
|
131 |
# LIGHTRAG_KV_STORAGE=RedisKVStorage
|
132 |
# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
|
133 |
+
### Vector Storage (recommended for production deploy)
|
|
|
134 |
# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
|
135 |
+
# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
|
136 |
+
### Graph Storage (recommended for production deploy)
|
137 |
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
|
138 |
+
|
139 |
+
####################################################################
|
140 |
+
### Default workspace for all storage types
|
141 |
+
### For the purpose of isolation of data for each LightRAG instance
|
142 |
+
### Valid characters: a-z, A-Z, 0-9, and _
|
143 |
+
####################################################################
|
144 |
+
# WORKSPACE=doc—
|
145 |
|
146 |
### PostgreSQL Configuration
|
147 |
POSTGRES_HOST=localhost
|
|
|
150 |
POSTGRES_PASSWORD='your_password'
|
151 |
POSTGRES_DATABASE=your_database
|
152 |
POSTGRES_MAX_CONNECTIONS=12
|
153 |
+
# POSTGRES_WORKSPACE=forced_workspace_name
|
|
|
154 |
|
155 |
### Neo4j Configuration
|
156 |
NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
|
157 |
NEO4J_USERNAME=neo4j
|
158 |
NEO4J_PASSWORD='your_password'
|
159 |
+
# NEO4J_WORKSPACE=forced_workspace_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
### MongoDB Configuration
|
162 |
MONGO_URI=mongodb://root:root@localhost:27017/
|
163 |
+
#MONGO_URI=mongodb+srv://root:[email protected]/?retryWrites=true&w=majority&appName=Cluster0
|
164 |
MONGO_DATABASE=LightRAG
|
165 |
+
# MONGODB_WORKSPACE=forced_workspace_name
|
|
|
|
|
166 |
|
167 |
### Milvus Configuration
|
168 |
MILVUS_URI=http://localhost:19530
|
|
|
170 |
# MILVUS_USER=root
|
171 |
# MILVUS_PASSWORD=your_password
|
172 |
# MILVUS_TOKEN=your_token
|
173 |
+
# MILVUS_WORKSPACE=forced_workspace_name
|
174 |
|
175 |
### Qdrant
|
176 |
+
QDRANT_URL=http://localhost:6333
|
177 |
# QDRANT_API_KEY=your-api-key
|
178 |
+
# QDRANT_WORKSPACE=forced_workspace_name
|
179 |
|
180 |
### Redis
|
181 |
REDIS_URI=redis://localhost:6379
|
182 |
+
# REDIS_WORKSPACE=forced_workspace_name
|
lightrag/api/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
__api_version__ = "
|
|
|
1 |
+
__api_version__ = "0179"
|
lightrag/api/config.py
CHANGED
@@ -184,10 +184,10 @@ def parse_args() -> argparse.Namespace:
|
|
184 |
|
185 |
# Namespace
|
186 |
parser.add_argument(
|
187 |
-
"--
|
188 |
type=str,
|
189 |
-
default=get_env_value("
|
190 |
-
help="
|
191 |
)
|
192 |
|
193 |
parser.add_argument(
|
|
|
184 |
|
185 |
# Namespace
|
186 |
parser.add_argument(
|
187 |
+
"--workspace",
|
188 |
type=str,
|
189 |
+
default=get_env_value("WORKSPACE", ""),
|
190 |
+
help="Default workspace for all storage",
|
191 |
)
|
192 |
|
193 |
parser.add_argument(
|
lightrag/api/lightrag_server.py
CHANGED
@@ -112,8 +112,8 @@ def create_app(args):
|
|
112 |
# Check if API key is provided either through env var or args
|
113 |
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
|
114 |
|
115 |
-
# Initialize document manager
|
116 |
-
doc_manager = DocumentManager(args.input_dir)
|
117 |
|
118 |
@asynccontextmanager
|
119 |
async def lifespan(app: FastAPI):
|
@@ -295,6 +295,7 @@ def create_app(args):
|
|
295 |
if args.llm_binding in ["lollms", "ollama", "openai"]:
|
296 |
rag = LightRAG(
|
297 |
working_dir=args.working_dir,
|
|
|
298 |
llm_model_func=lollms_model_complete
|
299 |
if args.llm_binding == "lollms"
|
300 |
else ollama_model_complete
|
@@ -330,6 +331,7 @@ def create_app(args):
|
|
330 |
else: # azure_openai
|
331 |
rag = LightRAG(
|
332 |
working_dir=args.working_dir,
|
|
|
333 |
llm_model_func=azure_openai_model_complete,
|
334 |
chunk_token_size=int(args.chunk_size),
|
335 |
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
@@ -472,6 +474,8 @@ def create_app(args):
|
|
472 |
"vector_storage": args.vector_storage,
|
473 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
474 |
"enable_llm_cache": args.enable_llm_cache,
|
|
|
|
|
475 |
},
|
476 |
"auth_mode": auth_mode,
|
477 |
"pipeline_busy": pipeline_status.get("busy", False),
|
|
|
112 |
# Check if API key is provided either through env var or args
|
113 |
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
|
114 |
|
115 |
+
# Initialize document manager with workspace support for data isolation
|
116 |
+
doc_manager = DocumentManager(args.input_dir, workspace=args.workspace)
|
117 |
|
118 |
@asynccontextmanager
|
119 |
async def lifespan(app: FastAPI):
|
|
|
295 |
if args.llm_binding in ["lollms", "ollama", "openai"]:
|
296 |
rag = LightRAG(
|
297 |
working_dir=args.working_dir,
|
298 |
+
workspace=args.workspace,
|
299 |
llm_model_func=lollms_model_complete
|
300 |
if args.llm_binding == "lollms"
|
301 |
else ollama_model_complete
|
|
|
331 |
else: # azure_openai
|
332 |
rag = LightRAG(
|
333 |
working_dir=args.working_dir,
|
334 |
+
workspace=args.workspace,
|
335 |
llm_model_func=azure_openai_model_complete,
|
336 |
chunk_token_size=int(args.chunk_size),
|
337 |
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
|
|
474 |
"vector_storage": args.vector_storage,
|
475 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
476 |
"enable_llm_cache": args.enable_llm_cache,
|
477 |
+
"workspace": args.workspace,
|
478 |
+
"max_graph_nodes": os.getenv("MAX_GRAPH_NODES"),
|
479 |
},
|
480 |
"auth_mode": auth_mode,
|
481 |
"pipeline_busy": pipeline_status.get("busy", False),
|
lightrag/api/routers/document_routes.py
CHANGED
@@ -475,6 +475,7 @@ class DocumentManager:
|
|
475 |
def __init__(
|
476 |
self,
|
477 |
input_dir: str,
|
|
|
478 |
supported_extensions: tuple = (
|
479 |
".txt",
|
480 |
".md",
|
@@ -515,10 +516,19 @@ class DocumentManager:
|
|
515 |
".less", # LESS CSS
|
516 |
),
|
517 |
):
|
518 |
-
|
|
|
|
|
519 |
self.supported_extensions = supported_extensions
|
520 |
self.indexed_files = set()
|
521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
# Create input directory if it doesn't exist
|
523 |
self.input_dir.mkdir(parents=True, exist_ok=True)
|
524 |
|
@@ -714,6 +724,12 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
714 |
|
715 |
# Insert into the RAG queue
|
716 |
if content:
|
|
|
|
|
|
|
|
|
|
|
|
|
717 |
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
718 |
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
719 |
return True
|
|
|
475 |
def __init__(
|
476 |
self,
|
477 |
input_dir: str,
|
478 |
+
workspace: str = "", # New parameter for workspace isolation
|
479 |
supported_extensions: tuple = (
|
480 |
".txt",
|
481 |
".md",
|
|
|
516 |
".less", # LESS CSS
|
517 |
),
|
518 |
):
|
519 |
+
# Store the base input directory and workspace
|
520 |
+
self.base_input_dir = Path(input_dir)
|
521 |
+
self.workspace = workspace
|
522 |
self.supported_extensions = supported_extensions
|
523 |
self.indexed_files = set()
|
524 |
|
525 |
+
# Create workspace-specific input directory
|
526 |
+
# If workspace is provided, create a subdirectory for data isolation
|
527 |
+
if workspace:
|
528 |
+
self.input_dir = self.base_input_dir / workspace
|
529 |
+
else:
|
530 |
+
self.input_dir = self.base_input_dir
|
531 |
+
|
532 |
# Create input directory if it doesn't exist
|
533 |
self.input_dir.mkdir(parents=True, exist_ok=True)
|
534 |
|
|
|
724 |
|
725 |
# Insert into the RAG queue
|
726 |
if content:
|
727 |
+
# Check if content contains only whitespace characters
|
728 |
+
if not content.strip():
|
729 |
+
logger.warning(
|
730 |
+
f"File contains only whitespace characters. file_paths={file_path.name}"
|
731 |
+
)
|
732 |
+
|
733 |
await rag.apipeline_enqueue_documents(content, file_paths=file_path.name)
|
734 |
logger.info(f"Successfully fetched and enqueued file: {file_path.name}")
|
735 |
return True
|
lightrag/api/utils_api.py
CHANGED
@@ -284,8 +284,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
284 |
ASCIIColors.yellow(f"{args.vector_storage}")
|
285 |
ASCIIColors.white(" ├─ Graph Storage: ", end="")
|
286 |
ASCIIColors.yellow(f"{args.graph_storage}")
|
287 |
-
ASCIIColors.white("
|
288 |
ASCIIColors.yellow(f"{args.doc_status_storage}")
|
|
|
|
|
289 |
|
290 |
# Server Status
|
291 |
ASCIIColors.green("\n✨ Server starting up...\n")
|
|
|
284 |
ASCIIColors.yellow(f"{args.vector_storage}")
|
285 |
ASCIIColors.white(" ├─ Graph Storage: ", end="")
|
286 |
ASCIIColors.yellow(f"{args.graph_storage}")
|
287 |
+
ASCIIColors.white(" ├─ Document Status Storage: ", end="")
|
288 |
ASCIIColors.yellow(f"{args.doc_status_storage}")
|
289 |
+
ASCIIColors.white(" └─ Workspace: ", end="")
|
290 |
+
ASCIIColors.yellow(f"{args.workspace if args.workspace else '-'}")
|
291 |
|
292 |
# Server Status
|
293 |
ASCIIColors.green("\n✨ Server starting up...\n")
|
lightrag/api/webui/assets/{index-CRWiopRc.js → index-BlAjHenV.js}
RENAMED
Binary files a/lightrag/api/webui/assets/index-CRWiopRc.js and b/lightrag/api/webui/assets/index-BlAjHenV.js differ
|
|
lightrag/api/webui/index.html
CHANGED
Binary files a/lightrag/api/webui/index.html and b/lightrag/api/webui/index.html differ
|
|
lightrag/base.py
CHANGED
@@ -103,6 +103,7 @@ class QueryParam:
|
|
103 |
@dataclass
|
104 |
class StorageNameSpace(ABC):
|
105 |
namespace: str
|
|
|
106 |
global_config: dict[str, Any]
|
107 |
|
108 |
async def initialize(self):
|
|
|
103 |
@dataclass
|
104 |
class StorageNameSpace(ABC):
|
105 |
namespace: str
|
106 |
+
workspace: str
|
107 |
global_config: dict[str, Any]
|
108 |
|
109 |
async def initialize(self):
|
lightrag/kg/faiss_impl.py
CHANGED
@@ -38,9 +38,19 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
38 |
self.cosine_better_than_threshold = cosine_threshold
|
39 |
|
40 |
# Where to save index file if you want persistent storage
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
self._meta_file = self._faiss_index_file + ".meta.json"
|
45 |
|
46 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
|
|
38 |
self.cosine_better_than_threshold = cosine_threshold
|
39 |
|
40 |
# Where to save index file if you want persistent storage
|
41 |
+
working_dir = self.global_config["working_dir"]
|
42 |
+
if self.workspace:
|
43 |
+
# Include workspace in the file path for data isolation
|
44 |
+
workspace_dir = os.path.join(working_dir, self.workspace)
|
45 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
46 |
+
self._faiss_index_file = os.path.join(
|
47 |
+
workspace_dir, f"faiss_index_{self.namespace}.index"
|
48 |
+
)
|
49 |
+
else:
|
50 |
+
# Default behavior when workspace is empty
|
51 |
+
self._faiss_index_file = os.path.join(
|
52 |
+
working_dir, f"faiss_index_{self.namespace}.index"
|
53 |
+
)
|
54 |
self._meta_file = self._faiss_index_file + ".meta.json"
|
55 |
|
56 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
lightrag/kg/json_doc_status_impl.py
CHANGED
@@ -30,7 +30,18 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
30 |
|
31 |
def __post_init__(self):
|
32 |
working_dir = self.global_config["working_dir"]
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
self._data = None
|
35 |
self._storage_lock = None
|
36 |
self.storage_updated = None
|
|
|
30 |
|
31 |
def __post_init__(self):
|
32 |
working_dir = self.global_config["working_dir"]
|
33 |
+
if self.workspace:
|
34 |
+
# Include workspace in the file path for data isolation
|
35 |
+
workspace_dir = os.path.join(working_dir, self.workspace)
|
36 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
37 |
+
self._file_name = os.path.join(
|
38 |
+
workspace_dir, f"kv_store_{self.namespace}.json"
|
39 |
+
)
|
40 |
+
else:
|
41 |
+
# Default behavior when workspace is empty
|
42 |
+
self._file_name = os.path.join(
|
43 |
+
working_dir, f"kv_store_{self.namespace}.json"
|
44 |
+
)
|
45 |
self._data = None
|
46 |
self._storage_lock = None
|
47 |
self.storage_updated = None
|
lightrag/kg/json_kv_impl.py
CHANGED
@@ -26,7 +26,18 @@ from .shared_storage import (
|
|
26 |
class JsonKVStorage(BaseKVStorage):
|
27 |
def __post_init__(self):
|
28 |
working_dir = self.global_config["working_dir"]
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
self._data = None
|
31 |
self._storage_lock = None
|
32 |
self.storage_updated = None
|
|
|
26 |
class JsonKVStorage(BaseKVStorage):
|
27 |
def __post_init__(self):
|
28 |
working_dir = self.global_config["working_dir"]
|
29 |
+
if self.workspace:
|
30 |
+
# Include workspace in the file path for data isolation
|
31 |
+
workspace_dir = os.path.join(working_dir, self.workspace)
|
32 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
33 |
+
self._file_name = os.path.join(
|
34 |
+
workspace_dir, f"kv_store_{self.namespace}.json"
|
35 |
+
)
|
36 |
+
else:
|
37 |
+
# Default behavior when workspace is empty
|
38 |
+
self._file_name = os.path.join(
|
39 |
+
working_dir, f"kv_store_{self.namespace}.json"
|
40 |
+
)
|
41 |
self._data = None
|
42 |
self._storage_lock = None
|
43 |
self.storage_updated = None
|
lightrag/kg/milvus_impl.py
CHANGED
@@ -7,10 +7,6 @@ from lightrag.utils import logger, compute_mdhash_id
|
|
7 |
from ..base import BaseVectorStorage
|
8 |
import pipmaster as pm
|
9 |
|
10 |
-
|
11 |
-
if not pm.is_installed("configparser"):
|
12 |
-
pm.install("configparser")
|
13 |
-
|
14 |
if not pm.is_installed("pymilvus"):
|
15 |
pm.install("pymilvus")
|
16 |
|
@@ -660,6 +656,29 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|
660 |
raise
|
661 |
|
662 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
664 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
665 |
if cosine_threshold is None:
|
|
|
7 |
from ..base import BaseVectorStorage
|
8 |
import pipmaster as pm
|
9 |
|
|
|
|
|
|
|
|
|
10 |
if not pm.is_installed("pymilvus"):
|
11 |
pm.install("pymilvus")
|
12 |
|
|
|
656 |
raise
|
657 |
|
658 |
def __post_init__(self):
|
659 |
+
# Check for MILVUS_WORKSPACE environment variable first (higher priority)
|
660 |
+
# This allows administrators to force a specific workspace for all Milvus storage instances
|
661 |
+
milvus_workspace = os.environ.get("MILVUS_WORKSPACE")
|
662 |
+
if milvus_workspace and milvus_workspace.strip():
|
663 |
+
# Use environment variable value, overriding the passed workspace parameter
|
664 |
+
effective_workspace = milvus_workspace.strip()
|
665 |
+
logger.info(
|
666 |
+
f"Using MILVUS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
667 |
+
)
|
668 |
+
else:
|
669 |
+
# Use the workspace parameter passed during initialization
|
670 |
+
effective_workspace = self.workspace
|
671 |
+
if effective_workspace:
|
672 |
+
logger.debug(
|
673 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
674 |
+
)
|
675 |
+
|
676 |
+
# Build namespace with workspace prefix for data isolation
|
677 |
+
if effective_workspace:
|
678 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
679 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
680 |
+
# When workspace is empty, keep the original namespace unchanged
|
681 |
+
|
682 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
683 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
684 |
if cosine_threshold is None:
|
lightrag/kg/mongo_impl.py
CHANGED
@@ -25,6 +25,7 @@ if not pm.is_installed("pymongo"):
|
|
25 |
pm.install("pymongo")
|
26 |
|
27 |
from pymongo import AsyncMongoClient # type: ignore
|
|
|
28 |
from pymongo.asynchronous.database import AsyncDatabase # type: ignore
|
29 |
from pymongo.asynchronous.collection import AsyncCollection # type: ignore
|
30 |
from pymongo.operations import SearchIndexModel # type: ignore
|
@@ -81,7 +82,39 @@ class MongoKVStorage(BaseKVStorage):
|
|
81 |
db: AsyncDatabase = field(default=None)
|
82 |
_data: AsyncCollection = field(default=None)
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
self._collection_name = self.namespace
|
86 |
|
87 |
async def initialize(self):
|
@@ -142,7 +175,6 @@ class MongoKVStorage(BaseKVStorage):
|
|
142 |
|
143 |
# Unified handling for all namespaces with flattened keys
|
144 |
# Use bulk_write for better performance
|
145 |
-
from pymongo import UpdateOne
|
146 |
|
147 |
operations = []
|
148 |
current_time = int(time.time()) # Get current Unix timestamp
|
@@ -252,7 +284,39 @@ class MongoDocStatusStorage(DocStatusStorage):
|
|
252 |
db: AsyncDatabase = field(default=None)
|
253 |
_data: AsyncCollection = field(default=None)
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
self._collection_name = self.namespace
|
257 |
|
258 |
async def initialize(self):
|
@@ -367,12 +431,36 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
367 |
# edge collection storing source_node_id, target_node_id, and edge_properties
|
368 |
edgeCollection: AsyncCollection = field(default=None)
|
369 |
|
370 |
-
def __init__(self, namespace, global_config, embedding_func):
|
371 |
super().__init__(
|
372 |
namespace=namespace,
|
|
|
373 |
global_config=global_config,
|
374 |
embedding_func=embedding_func,
|
375 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
self._collection_name = self.namespace
|
377 |
self._edge_collection_name = f"{self._collection_name}_edges"
|
378 |
|
@@ -1230,8 +1318,52 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1230 |
class MongoVectorDBStorage(BaseVectorStorage):
|
1231 |
db: AsyncDatabase | None = field(default=None)
|
1232 |
_data: AsyncCollection | None = field(default=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1233 |
|
1234 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1235 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
1236 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
1237 |
if cosine_threshold is None:
|
@@ -1261,13 +1393,11 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|
1261 |
async def create_vector_index_if_not_exists(self):
|
1262 |
"""Creates an Atlas Vector Search index."""
|
1263 |
try:
|
1264 |
-
index_name = "vector_knn_index"
|
1265 |
-
|
1266 |
indexes_cursor = await self._data.list_search_indexes()
|
1267 |
indexes = await indexes_cursor.to_list(length=None)
|
1268 |
for index in indexes:
|
1269 |
-
if index["name"] ==
|
1270 |
-
logger.
|
1271 |
return
|
1272 |
|
1273 |
search_index_model = SearchIndexModel(
|
@@ -1281,15 +1411,15 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|
1281 |
}
|
1282 |
]
|
1283 |
},
|
1284 |
-
name=
|
1285 |
type="vectorSearch",
|
1286 |
)
|
1287 |
|
1288 |
await self._data.create_search_index(search_index_model)
|
1289 |
-
logger.info("Vector index created successfully.")
|
1290 |
|
1291 |
-
except PyMongoError as
|
1292 |
-
logger.
|
1293 |
|
1294 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
1295 |
logger.debug(f"Inserting {len(data)} to {self.namespace}")
|
@@ -1344,7 +1474,7 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|
1344 |
pipeline = [
|
1345 |
{
|
1346 |
"$vectorSearch": {
|
1347 |
-
"index":
|
1348 |
"path": "vector",
|
1349 |
"queryVector": query_vector,
|
1350 |
"numCandidates": 100, # Adjust for performance
|
|
|
25 |
pm.install("pymongo")
|
26 |
|
27 |
from pymongo import AsyncMongoClient # type: ignore
|
28 |
+
from pymongo import UpdateOne # type: ignore
|
29 |
from pymongo.asynchronous.database import AsyncDatabase # type: ignore
|
30 |
from pymongo.asynchronous.collection import AsyncCollection # type: ignore
|
31 |
from pymongo.operations import SearchIndexModel # type: ignore
|
|
|
82 |
db: AsyncDatabase = field(default=None)
|
83 |
_data: AsyncCollection = field(default=None)
|
84 |
|
85 |
+
def __init__(self, namespace, global_config, embedding_func, workspace=None):
|
86 |
+
super().__init__(
|
87 |
+
namespace=namespace,
|
88 |
+
workspace=workspace or "",
|
89 |
+
global_config=global_config,
|
90 |
+
embedding_func=embedding_func,
|
91 |
+
)
|
92 |
+
self.__post_init__()
|
93 |
+
|
94 |
def __post_init__(self):
|
95 |
+
# Check for MONGODB_WORKSPACE environment variable first (higher priority)
|
96 |
+
# This allows administrators to force a specific workspace for all MongoDB storage instances
|
97 |
+
mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
|
98 |
+
if mongodb_workspace and mongodb_workspace.strip():
|
99 |
+
# Use environment variable value, overriding the passed workspace parameter
|
100 |
+
effective_workspace = mongodb_workspace.strip()
|
101 |
+
logger.info(
|
102 |
+
f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
103 |
+
)
|
104 |
+
else:
|
105 |
+
# Use the workspace parameter passed during initialization
|
106 |
+
effective_workspace = self.workspace
|
107 |
+
if effective_workspace:
|
108 |
+
logger.debug(
|
109 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
110 |
+
)
|
111 |
+
|
112 |
+
# Build namespace with workspace prefix for data isolation
|
113 |
+
if effective_workspace:
|
114 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
115 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
116 |
+
# When workspace is empty, keep the original namespace unchanged
|
117 |
+
|
118 |
self._collection_name = self.namespace
|
119 |
|
120 |
async def initialize(self):
|
|
|
175 |
|
176 |
# Unified handling for all namespaces with flattened keys
|
177 |
# Use bulk_write for better performance
|
|
|
178 |
|
179 |
operations = []
|
180 |
current_time = int(time.time()) # Get current Unix timestamp
|
|
|
284 |
db: AsyncDatabase = field(default=None)
|
285 |
_data: AsyncCollection = field(default=None)
|
286 |
|
287 |
+
def __init__(self, namespace, global_config, embedding_func, workspace=None):
|
288 |
+
super().__init__(
|
289 |
+
namespace=namespace,
|
290 |
+
workspace=workspace or "",
|
291 |
+
global_config=global_config,
|
292 |
+
embedding_func=embedding_func,
|
293 |
+
)
|
294 |
+
self.__post_init__()
|
295 |
+
|
296 |
def __post_init__(self):
|
297 |
+
# Check for MONGODB_WORKSPACE environment variable first (higher priority)
|
298 |
+
# This allows administrators to force a specific workspace for all MongoDB storage instances
|
299 |
+
mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
|
300 |
+
if mongodb_workspace and mongodb_workspace.strip():
|
301 |
+
# Use environment variable value, overriding the passed workspace parameter
|
302 |
+
effective_workspace = mongodb_workspace.strip()
|
303 |
+
logger.info(
|
304 |
+
f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
305 |
+
)
|
306 |
+
else:
|
307 |
+
# Use the workspace parameter passed during initialization
|
308 |
+
effective_workspace = self.workspace
|
309 |
+
if effective_workspace:
|
310 |
+
logger.debug(
|
311 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
312 |
+
)
|
313 |
+
|
314 |
+
# Build namespace with workspace prefix for data isolation
|
315 |
+
if effective_workspace:
|
316 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
317 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
318 |
+
# When workspace is empty, keep the original namespace unchanged
|
319 |
+
|
320 |
self._collection_name = self.namespace
|
321 |
|
322 |
async def initialize(self):
|
|
|
431 |
# edge collection storing source_node_id, target_node_id, and edge_properties
|
432 |
edgeCollection: AsyncCollection = field(default=None)
|
433 |
|
434 |
+
def __init__(self, namespace, global_config, embedding_func, workspace=None):
|
435 |
super().__init__(
|
436 |
namespace=namespace,
|
437 |
+
workspace=workspace or "",
|
438 |
global_config=global_config,
|
439 |
embedding_func=embedding_func,
|
440 |
)
|
441 |
+
# Check for MONGODB_WORKSPACE environment variable first (higher priority)
|
442 |
+
# This allows administrators to force a specific workspace for all MongoDB storage instances
|
443 |
+
mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
|
444 |
+
if mongodb_workspace and mongodb_workspace.strip():
|
445 |
+
# Use environment variable value, overriding the passed workspace parameter
|
446 |
+
effective_workspace = mongodb_workspace.strip()
|
447 |
+
logger.info(
|
448 |
+
f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
449 |
+
)
|
450 |
+
else:
|
451 |
+
# Use the workspace parameter passed during initialization
|
452 |
+
effective_workspace = self.workspace
|
453 |
+
if effective_workspace:
|
454 |
+
logger.debug(
|
455 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
456 |
+
)
|
457 |
+
|
458 |
+
# Build namespace with workspace prefix for data isolation
|
459 |
+
if effective_workspace:
|
460 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
461 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
462 |
+
# When workspace is empty, keep the original namespace unchanged
|
463 |
+
|
464 |
self._collection_name = self.namespace
|
465 |
self._edge_collection_name = f"{self._collection_name}_edges"
|
466 |
|
|
|
1318 |
class MongoVectorDBStorage(BaseVectorStorage):
|
1319 |
db: AsyncDatabase | None = field(default=None)
|
1320 |
_data: AsyncCollection | None = field(default=None)
|
1321 |
+
_index_name: str = field(default="", init=False)
|
1322 |
+
|
1323 |
+
def __init__(
|
1324 |
+
self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
|
1325 |
+
):
|
1326 |
+
super().__init__(
|
1327 |
+
namespace=namespace,
|
1328 |
+
workspace=workspace or "",
|
1329 |
+
global_config=global_config,
|
1330 |
+
embedding_func=embedding_func,
|
1331 |
+
meta_fields=meta_fields or set(),
|
1332 |
+
)
|
1333 |
+
self.__post_init__()
|
1334 |
|
1335 |
def __post_init__(self):
|
1336 |
+
# Check for MONGODB_WORKSPACE environment variable first (higher priority)
|
1337 |
+
# This allows administrators to force a specific workspace for all MongoDB storage instances
|
1338 |
+
mongodb_workspace = os.environ.get("MONGODB_WORKSPACE")
|
1339 |
+
if mongodb_workspace and mongodb_workspace.strip():
|
1340 |
+
# Use environment variable value, overriding the passed workspace parameter
|
1341 |
+
effective_workspace = mongodb_workspace.strip()
|
1342 |
+
logger.info(
|
1343 |
+
f"Using MONGODB_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
1344 |
+
)
|
1345 |
+
else:
|
1346 |
+
# Use the workspace parameter passed during initialization
|
1347 |
+
effective_workspace = self.workspace
|
1348 |
+
if effective_workspace:
|
1349 |
+
logger.debug(
|
1350 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
1351 |
+
)
|
1352 |
+
|
1353 |
+
# Build namespace with workspace prefix for data isolation
|
1354 |
+
if effective_workspace:
|
1355 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
1356 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
1357 |
+
# When workspace is empty, keep the original namespace unchanged
|
1358 |
+
|
1359 |
+
# Set index name based on workspace for backward compatibility
|
1360 |
+
if effective_workspace:
|
1361 |
+
# Use collection-specific index name for workspaced collections to avoid conflicts
|
1362 |
+
self._index_name = f"vector_knn_index_{self.namespace}"
|
1363 |
+
else:
|
1364 |
+
# Keep original index name for backward compatibility with existing deployments
|
1365 |
+
self._index_name = "vector_knn_index"
|
1366 |
+
|
1367 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
1368 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
1369 |
if cosine_threshold is None:
|
|
|
1393 |
async def create_vector_index_if_not_exists(self):
|
1394 |
"""Creates an Atlas Vector Search index."""
|
1395 |
try:
|
|
|
|
|
1396 |
indexes_cursor = await self._data.list_search_indexes()
|
1397 |
indexes = await indexes_cursor.to_list(length=None)
|
1398 |
for index in indexes:
|
1399 |
+
if index["name"] == self._index_name:
|
1400 |
+
logger.info(f"vector index {self._index_name} already exist")
|
1401 |
return
|
1402 |
|
1403 |
search_index_model = SearchIndexModel(
|
|
|
1411 |
}
|
1412 |
]
|
1413 |
},
|
1414 |
+
name=self._index_name,
|
1415 |
type="vectorSearch",
|
1416 |
)
|
1417 |
|
1418 |
await self._data.create_search_index(search_index_model)
|
1419 |
+
logger.info(f"Vector index {self._index_name} created successfully.")
|
1420 |
|
1421 |
+
except PyMongoError as e:
|
1422 |
+
logger.error(f"Error creating vector index {self._index_name}: {e}")
|
1423 |
|
1424 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
1425 |
logger.debug(f"Inserting {len(data)} to {self.namespace}")
|
|
|
1474 |
pipeline = [
|
1475 |
{
|
1476 |
"$vectorSearch": {
|
1477 |
+
"index": self._index_name, # Use stored index name for consistency
|
1478 |
"path": "vector",
|
1479 |
"queryVector": query_vector,
|
1480 |
"numCandidates": 100, # Adjust for performance
|
lightrag/kg/nano_vector_db_impl.py
CHANGED
@@ -41,9 +41,19 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
41 |
)
|
42 |
self.cosine_better_than_threshold = cosine_threshold
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
48 |
|
49 |
self._client = NanoVectorDB(
|
|
|
41 |
)
|
42 |
self.cosine_better_than_threshold = cosine_threshold
|
43 |
|
44 |
+
working_dir = self.global_config["working_dir"]
|
45 |
+
if self.workspace:
|
46 |
+
# Include workspace in the file path for data isolation
|
47 |
+
workspace_dir = os.path.join(working_dir, self.workspace)
|
48 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
49 |
+
self._client_file_name = os.path.join(
|
50 |
+
workspace_dir, f"vdb_{self.namespace}.json"
|
51 |
+
)
|
52 |
+
else:
|
53 |
+
# Default behavior when workspace is empty
|
54 |
+
self._client_file_name = os.path.join(
|
55 |
+
working_dir, f"vdb_{self.namespace}.json"
|
56 |
+
)
|
57 |
self._max_batch_size = self.global_config["embedding_batch_num"]
|
58 |
|
59 |
self._client = NanoVectorDB(
|
lightrag/kg/neo4j_impl.py
CHANGED
@@ -50,14 +50,25 @@ logging.getLogger("neo4j").setLevel(logging.ERROR)
|
|
50 |
@final
|
51 |
@dataclass
|
52 |
class Neo4JStorage(BaseGraphStorage):
|
53 |
-
def __init__(self, namespace, global_config, embedding_func):
|
|
|
|
|
|
|
|
|
|
|
54 |
super().__init__(
|
55 |
namespace=namespace,
|
|
|
56 |
global_config=global_config,
|
57 |
embedding_func=embedding_func,
|
58 |
)
|
59 |
self._driver = None
|
60 |
|
|
|
|
|
|
|
|
|
|
|
61 |
async def initialize(self):
|
62 |
URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
|
63 |
USERNAME = os.environ.get(
|
@@ -153,13 +164,14 @@ class Neo4JStorage(BaseGraphStorage):
|
|
153 |
raise e
|
154 |
|
155 |
if connected:
|
156 |
-
# Create index for
|
|
|
157 |
try:
|
158 |
async with self._driver.session(database=database) as session:
|
159 |
# Check if index exists first
|
160 |
-
check_query = """
|
161 |
CALL db.indexes() YIELD name, labelsOrTypes, properties
|
162 |
-
WHERE labelsOrTypes = ['
|
163 |
RETURN count(*) > 0 AS exists
|
164 |
"""
|
165 |
try:
|
@@ -172,16 +184,16 @@ class Neo4JStorage(BaseGraphStorage):
|
|
172 |
if not index_exists:
|
173 |
# Create index only if it doesn't exist
|
174 |
result = await session.run(
|
175 |
-
"CREATE INDEX FOR (n
|
176 |
)
|
177 |
await result.consume()
|
178 |
logger.info(
|
179 |
-
f"Created index for
|
180 |
)
|
181 |
except Exception:
|
182 |
# Fallback if db.indexes() is not supported in this Neo4j version
|
183 |
result = await session.run(
|
184 |
-
"CREATE INDEX IF NOT EXISTS FOR (n
|
185 |
)
|
186 |
await result.consume()
|
187 |
except Exception as e:
|
@@ -216,11 +228,12 @@ class Neo4JStorage(BaseGraphStorage):
|
|
216 |
ValueError: If node_id is invalid
|
217 |
Exception: If there is an error executing the query
|
218 |
"""
|
|
|
219 |
async with self._driver.session(
|
220 |
database=self._DATABASE, default_access_mode="READ"
|
221 |
) as session:
|
222 |
try:
|
223 |
-
query = "MATCH (n
|
224 |
result = await session.run(query, entity_id=node_id)
|
225 |
single_result = await result.single()
|
226 |
await result.consume() # Ensure result is fully consumed
|
@@ -245,12 +258,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
245 |
ValueError: If either node_id is invalid
|
246 |
Exception: If there is an error executing the query
|
247 |
"""
|
|
|
248 |
async with self._driver.session(
|
249 |
database=self._DATABASE, default_access_mode="READ"
|
250 |
) as session:
|
251 |
try:
|
252 |
query = (
|
253 |
-
"MATCH (a
|
254 |
"RETURN COUNT(r) > 0 AS edgeExists"
|
255 |
)
|
256 |
result = await session.run(
|
@@ -282,11 +296,14 @@ class Neo4JStorage(BaseGraphStorage):
|
|
282 |
ValueError: If node_id is invalid
|
283 |
Exception: If there is an error executing the query
|
284 |
"""
|
|
|
285 |
async with self._driver.session(
|
286 |
database=self._DATABASE, default_access_mode="READ"
|
287 |
) as session:
|
288 |
try:
|
289 |
-
query =
|
|
|
|
|
290 |
result = await session.run(query, entity_id=node_id)
|
291 |
try:
|
292 |
records = await result.fetch(
|
@@ -300,12 +317,12 @@ class Neo4JStorage(BaseGraphStorage):
|
|
300 |
if records:
|
301 |
node = records[0]["n"]
|
302 |
node_dict = dict(node)
|
303 |
-
# Remove
|
304 |
if "labels" in node_dict:
|
305 |
node_dict["labels"] = [
|
306 |
label
|
307 |
for label in node_dict["labels"]
|
308 |
-
if label !=
|
309 |
]
|
310 |
# logger.debug(f"Neo4j query node {query} return: {node_dict}")
|
311 |
return node_dict
|
@@ -326,12 +343,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
326 |
Returns:
|
327 |
A dictionary mapping each node_id to its node data (or None if not found).
|
328 |
"""
|
|
|
329 |
async with self._driver.session(
|
330 |
database=self._DATABASE, default_access_mode="READ"
|
331 |
) as session:
|
332 |
-
query = """
|
333 |
UNWIND $node_ids AS id
|
334 |
-
MATCH (n
|
335 |
RETURN n.entity_id AS entity_id, n
|
336 |
"""
|
337 |
result = await session.run(query, node_ids=node_ids)
|
@@ -340,10 +358,12 @@ class Neo4JStorage(BaseGraphStorage):
|
|
340 |
entity_id = record["entity_id"]
|
341 |
node = record["n"]
|
342 |
node_dict = dict(node)
|
343 |
-
# Remove the
|
344 |
if "labels" in node_dict:
|
345 |
node_dict["labels"] = [
|
346 |
-
label
|
|
|
|
|
347 |
]
|
348 |
nodes[entity_id] = node_dict
|
349 |
await result.consume() # Make sure to consume the result fully
|
@@ -364,12 +384,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
364 |
ValueError: If node_id is invalid
|
365 |
Exception: If there is an error executing the query
|
366 |
"""
|
|
|
367 |
async with self._driver.session(
|
368 |
database=self._DATABASE, default_access_mode="READ"
|
369 |
) as session:
|
370 |
try:
|
371 |
-
query = """
|
372 |
-
MATCH (n
|
373 |
OPTIONAL MATCH (n)-[r]-()
|
374 |
RETURN COUNT(r) AS degree
|
375 |
"""
|
@@ -403,13 +424,14 @@ class Neo4JStorage(BaseGraphStorage):
|
|
403 |
A dictionary mapping each node_id to its degree (number of relationships).
|
404 |
If a node is not found, its degree will be set to 0.
|
405 |
"""
|
|
|
406 |
async with self._driver.session(
|
407 |
database=self._DATABASE, default_access_mode="READ"
|
408 |
) as session:
|
409 |
-
query = """
|
410 |
UNWIND $node_ids AS id
|
411 |
-
MATCH (n
|
412 |
-
RETURN n.entity_id AS entity_id, count { (n)--() } AS degree;
|
413 |
"""
|
414 |
result = await session.run(query, node_ids=node_ids)
|
415 |
degrees = {}
|
@@ -489,12 +511,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
489 |
ValueError: If either node_id is invalid
|
490 |
Exception: If there is an error executing the query
|
491 |
"""
|
|
|
492 |
try:
|
493 |
async with self._driver.session(
|
494 |
database=self._DATABASE, default_access_mode="READ"
|
495 |
) as session:
|
496 |
-
query = """
|
497 |
-
MATCH (start
|
498 |
RETURN properties(r) as edge_properties
|
499 |
"""
|
500 |
result = await session.run(
|
@@ -571,12 +594,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
571 |
Returns:
|
572 |
A dictionary mapping (src, tgt) tuples to their edge properties.
|
573 |
"""
|
|
|
574 |
async with self._driver.session(
|
575 |
database=self._DATABASE, default_access_mode="READ"
|
576 |
) as session:
|
577 |
-
query = """
|
578 |
UNWIND $pairs AS pair
|
579 |
-
MATCH (start
|
580 |
RETURN pair.src AS src_id, pair.tgt AS tgt_id, collect(properties(r)) AS edges
|
581 |
"""
|
582 |
result = await session.run(query, pairs=pairs)
|
@@ -627,8 +651,9 @@ class Neo4JStorage(BaseGraphStorage):
|
|
627 |
database=self._DATABASE, default_access_mode="READ"
|
628 |
) as session:
|
629 |
try:
|
630 |
-
|
631 |
-
|
|
|
632 |
WHERE connected.entity_id IS NOT NULL
|
633 |
RETURN n, r, connected"""
|
634 |
results = await session.run(query, entity_id=source_node_id)
|
@@ -689,10 +714,11 @@ class Neo4JStorage(BaseGraphStorage):
|
|
689 |
database=self._DATABASE, default_access_mode="READ"
|
690 |
) as session:
|
691 |
# Query to get both outgoing and incoming edges
|
692 |
-
|
|
|
693 |
UNWIND $node_ids AS id
|
694 |
-
MATCH (n
|
695 |
-
OPTIONAL MATCH (n)-[r]-(connected
|
696 |
RETURN id AS queried_id, n.entity_id AS node_entity_id,
|
697 |
connected.entity_id AS connected_entity_id,
|
698 |
startNode(r).entity_id AS start_entity_id
|
@@ -727,12 +753,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
727 |
return edges_dict
|
728 |
|
729 |
async def get_nodes_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
|
|
|
730 |
async with self._driver.session(
|
731 |
database=self._DATABASE, default_access_mode="READ"
|
732 |
) as session:
|
733 |
-
query = """
|
734 |
UNWIND $chunk_ids AS chunk_id
|
735 |
-
MATCH (n
|
736 |
WHERE n.source_id IS NOT NULL AND chunk_id IN split(n.source_id, $sep)
|
737 |
RETURN DISTINCT n
|
738 |
"""
|
@@ -748,12 +775,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
748 |
return nodes
|
749 |
|
750 |
async def get_edges_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
|
|
|
751 |
async with self._driver.session(
|
752 |
database=self._DATABASE, default_access_mode="READ"
|
753 |
) as session:
|
754 |
-
query = """
|
755 |
UNWIND $chunk_ids AS chunk_id
|
756 |
-
MATCH (a
|
757 |
WHERE r.source_id IS NOT NULL AND chunk_id IN split(r.source_id, $sep)
|
758 |
RETURN DISTINCT a.entity_id AS source, b.entity_id AS target, properties(r) AS properties
|
759 |
"""
|
@@ -787,6 +815,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
787 |
node_id: The unique identifier for the node (used as label)
|
788 |
node_data: Dictionary of node properties
|
789 |
"""
|
|
|
790 |
properties = node_data
|
791 |
entity_type = properties["entity_type"]
|
792 |
if "entity_id" not in properties:
|
@@ -796,14 +825,11 @@ class Neo4JStorage(BaseGraphStorage):
|
|
796 |
async with self._driver.session(database=self._DATABASE) as session:
|
797 |
|
798 |
async def execute_upsert(tx: AsyncManagedTransaction):
|
799 |
-
query =
|
800 |
-
|
801 |
-
MERGE (n:base {entity_id: $entity_id})
|
802 |
SET n += $properties
|
803 |
-
SET n
|
804 |
"""
|
805 |
-
% entity_type
|
806 |
-
)
|
807 |
result = await tx.run(
|
808 |
query, entity_id=node_id, properties=properties
|
809 |
)
|
@@ -847,10 +873,11 @@ class Neo4JStorage(BaseGraphStorage):
|
|
847 |
async with self._driver.session(database=self._DATABASE) as session:
|
848 |
|
849 |
async def execute_upsert(tx: AsyncManagedTransaction):
|
850 |
-
|
851 |
-
|
|
|
852 |
WITH source
|
853 |
-
MATCH (target
|
854 |
MERGE (source)-[r:DIRECTED]-(target)
|
855 |
SET r += $properties
|
856 |
RETURN r, source, target
|
@@ -889,6 +916,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
889 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
890 |
indicating whether the graph was truncated due to max_nodes limit
|
891 |
"""
|
|
|
892 |
result = KnowledgeGraph()
|
893 |
seen_nodes = set()
|
894 |
seen_edges = set()
|
@@ -899,7 +927,9 @@ class Neo4JStorage(BaseGraphStorage):
|
|
899 |
try:
|
900 |
if node_label == "*":
|
901 |
# First check total node count to determine if graph is truncated
|
902 |
-
count_query =
|
|
|
|
|
903 |
count_result = None
|
904 |
try:
|
905 |
count_result = await session.run(count_query)
|
@@ -915,13 +945,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
915 |
await count_result.consume()
|
916 |
|
917 |
# Run main query to get nodes with highest degree
|
918 |
-
main_query = """
|
919 |
-
MATCH (n)
|
920 |
OPTIONAL MATCH (n)-[r]-()
|
921 |
WITH n, COALESCE(count(r), 0) AS degree
|
922 |
ORDER BY degree DESC
|
923 |
LIMIT $max_nodes
|
924 |
-
WITH collect({node: n}) AS filtered_nodes
|
925 |
UNWIND filtered_nodes AS node_info
|
926 |
WITH collect(node_info.node) AS kept_nodes, filtered_nodes
|
927 |
OPTIONAL MATCH (a)-[r]-(b)
|
@@ -943,20 +973,21 @@ class Neo4JStorage(BaseGraphStorage):
|
|
943 |
else:
|
944 |
# return await self._robust_fallback(node_label, max_depth, max_nodes)
|
945 |
# First try without limit to check if we need to truncate
|
946 |
-
full_query = """
|
947 |
-
MATCH (start)
|
948 |
WHERE start.entity_id = $entity_id
|
949 |
WITH start
|
950 |
-
CALL apoc.path.subgraphAll(start, {
|
951 |
relationshipFilter: '',
|
|
|
952 |
minLevel: 0,
|
953 |
maxLevel: $max_depth,
|
954 |
bfs: true
|
955 |
-
})
|
956 |
YIELD nodes, relationships
|
957 |
WITH nodes, relationships, size(nodes) AS total_nodes
|
958 |
UNWIND nodes AS node
|
959 |
-
WITH collect({node: node}) AS node_info, relationships, total_nodes
|
960 |
RETURN node_info, relationships, total_nodes
|
961 |
"""
|
962 |
|
@@ -994,20 +1025,21 @@ class Neo4JStorage(BaseGraphStorage):
|
|
994 |
)
|
995 |
|
996 |
# Run limited query
|
997 |
-
limited_query = """
|
998 |
-
MATCH (start)
|
999 |
WHERE start.entity_id = $entity_id
|
1000 |
WITH start
|
1001 |
-
CALL apoc.path.subgraphAll(start, {
|
1002 |
relationshipFilter: '',
|
|
|
1003 |
minLevel: 0,
|
1004 |
maxLevel: $max_depth,
|
1005 |
limit: $max_nodes,
|
1006 |
bfs: true
|
1007 |
-
})
|
1008 |
YIELD nodes, relationships
|
1009 |
UNWIND nodes AS node
|
1010 |
-
WITH collect({node: node}) AS node_info, relationships
|
1011 |
RETURN node_info, relationships
|
1012 |
"""
|
1013 |
result_set = None
|
@@ -1094,11 +1126,12 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1094 |
visited_edge_pairs = set()
|
1095 |
|
1096 |
# Get the starting node's data
|
|
|
1097 |
async with self._driver.session(
|
1098 |
database=self._DATABASE, default_access_mode="READ"
|
1099 |
) as session:
|
1100 |
-
query = """
|
1101 |
-
MATCH (n
|
1102 |
RETURN id(n) as node_id, n
|
1103 |
"""
|
1104 |
node_result = await session.run(query, entity_id=node_label)
|
@@ -1156,8 +1189,9 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1156 |
async with self._driver.session(
|
1157 |
database=self._DATABASE, default_access_mode="READ"
|
1158 |
) as session:
|
1159 |
-
|
1160 |
-
|
|
|
1161 |
WITH r, b, id(r) as edge_id, id(b) as target_id
|
1162 |
RETURN r, b, edge_id, target_id
|
1163 |
"""
|
@@ -1241,6 +1275,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1241 |
Returns:
|
1242 |
["Person", "Company", ...] # Alphabetically sorted label list
|
1243 |
"""
|
|
|
1244 |
async with self._driver.session(
|
1245 |
database=self._DATABASE, default_access_mode="READ"
|
1246 |
) as session:
|
@@ -1248,8 +1283,8 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1248 |
# query = "CALL db.labels() YIELD label RETURN label"
|
1249 |
|
1250 |
# Method 2: Query compatible with older versions
|
1251 |
-
query = """
|
1252 |
-
MATCH (n
|
1253 |
WHERE n.entity_id IS NOT NULL
|
1254 |
RETURN DISTINCT n.entity_id AS label
|
1255 |
ORDER BY label
|
@@ -1285,8 +1320,9 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1285 |
"""
|
1286 |
|
1287 |
async def _do_delete(tx: AsyncManagedTransaction):
|
1288 |
-
|
1289 |
-
|
|
|
1290 |
DETACH DELETE n
|
1291 |
"""
|
1292 |
result = await tx.run(query, entity_id=node_id)
|
@@ -1342,8 +1378,9 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1342 |
for source, target in edges:
|
1343 |
|
1344 |
async def _do_delete_edge(tx: AsyncManagedTransaction):
|
1345 |
-
|
1346 |
-
|
|
|
1347 |
DELETE r
|
1348 |
"""
|
1349 |
result = await tx.run(
|
@@ -1360,26 +1397,32 @@ class Neo4JStorage(BaseGraphStorage):
|
|
1360 |
raise
|
1361 |
|
1362 |
async def drop(self) -> dict[str, str]:
|
1363 |
-
"""Drop all data from storage and clean up resources
|
1364 |
|
1365 |
-
This method will delete all nodes and relationships in the
|
1366 |
|
1367 |
Returns:
|
1368 |
dict[str, str]: Operation status and message
|
1369 |
-
- On success: {"status": "success", "message": "data dropped"}
|
1370 |
- On failure: {"status": "error", "message": "<error details>"}
|
1371 |
"""
|
|
|
1372 |
try:
|
1373 |
async with self._driver.session(database=self._DATABASE) as session:
|
1374 |
-
# Delete all nodes and relationships
|
1375 |
-
query = "MATCH (n) DETACH DELETE n"
|
1376 |
result = await session.run(query)
|
1377 |
await result.consume() # Ensure result is fully consumed
|
1378 |
|
1379 |
logger.info(
|
1380 |
-
f"Process {os.getpid()} drop Neo4j database {self._DATABASE}"
|
1381 |
)
|
1382 |
-
return {
|
|
|
|
|
|
|
1383 |
except Exception as e:
|
1384 |
-
logger.error(
|
|
|
|
|
1385 |
return {"status": "error", "message": str(e)}
|
|
|
50 |
@final
|
51 |
@dataclass
|
52 |
class Neo4JStorage(BaseGraphStorage):
|
53 |
+
def __init__(self, namespace, global_config, embedding_func, workspace=None):
|
54 |
+
# Check NEO4J_WORKSPACE environment variable and override workspace if set
|
55 |
+
neo4j_workspace = os.environ.get("NEO4J_WORKSPACE")
|
56 |
+
if neo4j_workspace and neo4j_workspace.strip():
|
57 |
+
workspace = neo4j_workspace
|
58 |
+
|
59 |
super().__init__(
|
60 |
namespace=namespace,
|
61 |
+
workspace=workspace or "",
|
62 |
global_config=global_config,
|
63 |
embedding_func=embedding_func,
|
64 |
)
|
65 |
self._driver = None
|
66 |
|
67 |
+
def _get_workspace_label(self) -> str:
|
68 |
+
"""Get workspace label, return 'base' for compatibility when workspace is empty"""
|
69 |
+
workspace = getattr(self, "workspace", None)
|
70 |
+
return workspace if workspace else "base"
|
71 |
+
|
72 |
async def initialize(self):
|
73 |
URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
|
74 |
USERNAME = os.environ.get(
|
|
|
164 |
raise e
|
165 |
|
166 |
if connected:
|
167 |
+
# Create index for workspace nodes on entity_id if it doesn't exist
|
168 |
+
workspace_label = self._get_workspace_label()
|
169 |
try:
|
170 |
async with self._driver.session(database=database) as session:
|
171 |
# Check if index exists first
|
172 |
+
check_query = f"""
|
173 |
CALL db.indexes() YIELD name, labelsOrTypes, properties
|
174 |
+
WHERE labelsOrTypes = ['{workspace_label}'] AND properties = ['entity_id']
|
175 |
RETURN count(*) > 0 AS exists
|
176 |
"""
|
177 |
try:
|
|
|
184 |
if not index_exists:
|
185 |
# Create index only if it doesn't exist
|
186 |
result = await session.run(
|
187 |
+
f"CREATE INDEX FOR (n:`{workspace_label}`) ON (n.entity_id)"
|
188 |
)
|
189 |
await result.consume()
|
190 |
logger.info(
|
191 |
+
f"Created index for {workspace_label} nodes on entity_id in {database}"
|
192 |
)
|
193 |
except Exception:
|
194 |
# Fallback if db.indexes() is not supported in this Neo4j version
|
195 |
result = await session.run(
|
196 |
+
f"CREATE INDEX IF NOT EXISTS FOR (n:`{workspace_label}`) ON (n.entity_id)"
|
197 |
)
|
198 |
await result.consume()
|
199 |
except Exception as e:
|
|
|
228 |
ValueError: If node_id is invalid
|
229 |
Exception: If there is an error executing the query
|
230 |
"""
|
231 |
+
workspace_label = self._get_workspace_label()
|
232 |
async with self._driver.session(
|
233 |
database=self._DATABASE, default_access_mode="READ"
|
234 |
) as session:
|
235 |
try:
|
236 |
+
query = f"MATCH (n:`{workspace_label}` {{entity_id: $entity_id}}) RETURN count(n) > 0 AS node_exists"
|
237 |
result = await session.run(query, entity_id=node_id)
|
238 |
single_result = await result.single()
|
239 |
await result.consume() # Ensure result is fully consumed
|
|
|
258 |
ValueError: If either node_id is invalid
|
259 |
Exception: If there is an error executing the query
|
260 |
"""
|
261 |
+
workspace_label = self._get_workspace_label()
|
262 |
async with self._driver.session(
|
263 |
database=self._DATABASE, default_access_mode="READ"
|
264 |
) as session:
|
265 |
try:
|
266 |
query = (
|
267 |
+
f"MATCH (a:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(b:`{workspace_label}` {{entity_id: $target_entity_id}}) "
|
268 |
"RETURN COUNT(r) > 0 AS edgeExists"
|
269 |
)
|
270 |
result = await session.run(
|
|
|
296 |
ValueError: If node_id is invalid
|
297 |
Exception: If there is an error executing the query
|
298 |
"""
|
299 |
+
workspace_label = self._get_workspace_label()
|
300 |
async with self._driver.session(
|
301 |
database=self._DATABASE, default_access_mode="READ"
|
302 |
) as session:
|
303 |
try:
|
304 |
+
query = (
|
305 |
+
f"MATCH (n:`{workspace_label}` {{entity_id: $entity_id}}) RETURN n"
|
306 |
+
)
|
307 |
result = await session.run(query, entity_id=node_id)
|
308 |
try:
|
309 |
records = await result.fetch(
|
|
|
317 |
if records:
|
318 |
node = records[0]["n"]
|
319 |
node_dict = dict(node)
|
320 |
+
# Remove workspace label from labels list if it exists
|
321 |
if "labels" in node_dict:
|
322 |
node_dict["labels"] = [
|
323 |
label
|
324 |
for label in node_dict["labels"]
|
325 |
+
if label != workspace_label
|
326 |
]
|
327 |
# logger.debug(f"Neo4j query node {query} return: {node_dict}")
|
328 |
return node_dict
|
|
|
343 |
Returns:
|
344 |
A dictionary mapping each node_id to its node data (or None if not found).
|
345 |
"""
|
346 |
+
workspace_label = self._get_workspace_label()
|
347 |
async with self._driver.session(
|
348 |
database=self._DATABASE, default_access_mode="READ"
|
349 |
) as session:
|
350 |
+
query = f"""
|
351 |
UNWIND $node_ids AS id
|
352 |
+
MATCH (n:`{workspace_label}` {{entity_id: id}})
|
353 |
RETURN n.entity_id AS entity_id, n
|
354 |
"""
|
355 |
result = await session.run(query, node_ids=node_ids)
|
|
|
358 |
entity_id = record["entity_id"]
|
359 |
node = record["n"]
|
360 |
node_dict = dict(node)
|
361 |
+
# Remove the workspace label if present in a 'labels' property
|
362 |
if "labels" in node_dict:
|
363 |
node_dict["labels"] = [
|
364 |
+
label
|
365 |
+
for label in node_dict["labels"]
|
366 |
+
if label != workspace_label
|
367 |
]
|
368 |
nodes[entity_id] = node_dict
|
369 |
await result.consume() # Make sure to consume the result fully
|
|
|
384 |
ValueError: If node_id is invalid
|
385 |
Exception: If there is an error executing the query
|
386 |
"""
|
387 |
+
workspace_label = self._get_workspace_label()
|
388 |
async with self._driver.session(
|
389 |
database=self._DATABASE, default_access_mode="READ"
|
390 |
) as session:
|
391 |
try:
|
392 |
+
query = f"""
|
393 |
+
MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
|
394 |
OPTIONAL MATCH (n)-[r]-()
|
395 |
RETURN COUNT(r) AS degree
|
396 |
"""
|
|
|
424 |
A dictionary mapping each node_id to its degree (number of relationships).
|
425 |
If a node is not found, its degree will be set to 0.
|
426 |
"""
|
427 |
+
workspace_label = self._get_workspace_label()
|
428 |
async with self._driver.session(
|
429 |
database=self._DATABASE, default_access_mode="READ"
|
430 |
) as session:
|
431 |
+
query = f"""
|
432 |
UNWIND $node_ids AS id
|
433 |
+
MATCH (n:`{workspace_label}` {{entity_id: id}})
|
434 |
+
RETURN n.entity_id AS entity_id, count {{ (n)--() }} AS degree;
|
435 |
"""
|
436 |
result = await session.run(query, node_ids=node_ids)
|
437 |
degrees = {}
|
|
|
511 |
ValueError: If either node_id is invalid
|
512 |
Exception: If there is an error executing the query
|
513 |
"""
|
514 |
+
workspace_label = self._get_workspace_label()
|
515 |
try:
|
516 |
async with self._driver.session(
|
517 |
database=self._DATABASE, default_access_mode="READ"
|
518 |
) as session:
|
519 |
+
query = f"""
|
520 |
+
MATCH (start:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(end:`{workspace_label}` {{entity_id: $target_entity_id}})
|
521 |
RETURN properties(r) as edge_properties
|
522 |
"""
|
523 |
result = await session.run(
|
|
|
594 |
Returns:
|
595 |
A dictionary mapping (src, tgt) tuples to their edge properties.
|
596 |
"""
|
597 |
+
workspace_label = self._get_workspace_label()
|
598 |
async with self._driver.session(
|
599 |
database=self._DATABASE, default_access_mode="READ"
|
600 |
) as session:
|
601 |
+
query = f"""
|
602 |
UNWIND $pairs AS pair
|
603 |
+
MATCH (start:`{workspace_label}` {{entity_id: pair.src}})-[r:DIRECTED]-(end:`{workspace_label}` {{entity_id: pair.tgt}})
|
604 |
RETURN pair.src AS src_id, pair.tgt AS tgt_id, collect(properties(r)) AS edges
|
605 |
"""
|
606 |
result = await session.run(query, pairs=pairs)
|
|
|
651 |
database=self._DATABASE, default_access_mode="READ"
|
652 |
) as session:
|
653 |
try:
|
654 |
+
workspace_label = self._get_workspace_label()
|
655 |
+
query = f"""MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
|
656 |
+
OPTIONAL MATCH (n)-[r]-(connected:`{workspace_label}`)
|
657 |
WHERE connected.entity_id IS NOT NULL
|
658 |
RETURN n, r, connected"""
|
659 |
results = await session.run(query, entity_id=source_node_id)
|
|
|
714 |
database=self._DATABASE, default_access_mode="READ"
|
715 |
) as session:
|
716 |
# Query to get both outgoing and incoming edges
|
717 |
+
workspace_label = self._get_workspace_label()
|
718 |
+
query = f"""
|
719 |
UNWIND $node_ids AS id
|
720 |
+
MATCH (n:`{workspace_label}` {{entity_id: id}})
|
721 |
+
OPTIONAL MATCH (n)-[r]-(connected:`{workspace_label}`)
|
722 |
RETURN id AS queried_id, n.entity_id AS node_entity_id,
|
723 |
connected.entity_id AS connected_entity_id,
|
724 |
startNode(r).entity_id AS start_entity_id
|
|
|
753 |
return edges_dict
|
754 |
|
755 |
async def get_nodes_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
|
756 |
+
workspace_label = self._get_workspace_label()
|
757 |
async with self._driver.session(
|
758 |
database=self._DATABASE, default_access_mode="READ"
|
759 |
) as session:
|
760 |
+
query = f"""
|
761 |
UNWIND $chunk_ids AS chunk_id
|
762 |
+
MATCH (n:`{workspace_label}`)
|
763 |
WHERE n.source_id IS NOT NULL AND chunk_id IN split(n.source_id, $sep)
|
764 |
RETURN DISTINCT n
|
765 |
"""
|
|
|
775 |
return nodes
|
776 |
|
777 |
async def get_edges_by_chunk_ids(self, chunk_ids: list[str]) -> list[dict]:
|
778 |
+
workspace_label = self._get_workspace_label()
|
779 |
async with self._driver.session(
|
780 |
database=self._DATABASE, default_access_mode="READ"
|
781 |
) as session:
|
782 |
+
query = f"""
|
783 |
UNWIND $chunk_ids AS chunk_id
|
784 |
+
MATCH (a:`{workspace_label}`)-[r]-(b:`{workspace_label}`)
|
785 |
WHERE r.source_id IS NOT NULL AND chunk_id IN split(r.source_id, $sep)
|
786 |
RETURN DISTINCT a.entity_id AS source, b.entity_id AS target, properties(r) AS properties
|
787 |
"""
|
|
|
815 |
node_id: The unique identifier for the node (used as label)
|
816 |
node_data: Dictionary of node properties
|
817 |
"""
|
818 |
+
workspace_label = self._get_workspace_label()
|
819 |
properties = node_data
|
820 |
entity_type = properties["entity_type"]
|
821 |
if "entity_id" not in properties:
|
|
|
825 |
async with self._driver.session(database=self._DATABASE) as session:
|
826 |
|
827 |
async def execute_upsert(tx: AsyncManagedTransaction):
|
828 |
+
query = f"""
|
829 |
+
MERGE (n:`{workspace_label}` {{entity_id: $entity_id}})
|
|
|
830 |
SET n += $properties
|
831 |
+
SET n:`{entity_type}`
|
832 |
"""
|
|
|
|
|
833 |
result = await tx.run(
|
834 |
query, entity_id=node_id, properties=properties
|
835 |
)
|
|
|
873 |
async with self._driver.session(database=self._DATABASE) as session:
|
874 |
|
875 |
async def execute_upsert(tx: AsyncManagedTransaction):
|
876 |
+
workspace_label = self._get_workspace_label()
|
877 |
+
query = f"""
|
878 |
+
MATCH (source:`{workspace_label}` {{entity_id: $source_entity_id}})
|
879 |
WITH source
|
880 |
+
MATCH (target:`{workspace_label}` {{entity_id: $target_entity_id}})
|
881 |
MERGE (source)-[r:DIRECTED]-(target)
|
882 |
SET r += $properties
|
883 |
RETURN r, source, target
|
|
|
916 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
917 |
indicating whether the graph was truncated due to max_nodes limit
|
918 |
"""
|
919 |
+
workspace_label = self._get_workspace_label()
|
920 |
result = KnowledgeGraph()
|
921 |
seen_nodes = set()
|
922 |
seen_edges = set()
|
|
|
927 |
try:
|
928 |
if node_label == "*":
|
929 |
# First check total node count to determine if graph is truncated
|
930 |
+
count_query = (
|
931 |
+
f"MATCH (n:`{workspace_label}`) RETURN count(n) as total"
|
932 |
+
)
|
933 |
count_result = None
|
934 |
try:
|
935 |
count_result = await session.run(count_query)
|
|
|
945 |
await count_result.consume()
|
946 |
|
947 |
# Run main query to get nodes with highest degree
|
948 |
+
main_query = f"""
|
949 |
+
MATCH (n:`{workspace_label}`)
|
950 |
OPTIONAL MATCH (n)-[r]-()
|
951 |
WITH n, COALESCE(count(r), 0) AS degree
|
952 |
ORDER BY degree DESC
|
953 |
LIMIT $max_nodes
|
954 |
+
WITH collect({{node: n}}) AS filtered_nodes
|
955 |
UNWIND filtered_nodes AS node_info
|
956 |
WITH collect(node_info.node) AS kept_nodes, filtered_nodes
|
957 |
OPTIONAL MATCH (a)-[r]-(b)
|
|
|
973 |
else:
|
974 |
# return await self._robust_fallback(node_label, max_depth, max_nodes)
|
975 |
# First try without limit to check if we need to truncate
|
976 |
+
full_query = f"""
|
977 |
+
MATCH (start:`{workspace_label}`)
|
978 |
WHERE start.entity_id = $entity_id
|
979 |
WITH start
|
980 |
+
CALL apoc.path.subgraphAll(start, {{
|
981 |
relationshipFilter: '',
|
982 |
+
labelFilter: '{workspace_label}',
|
983 |
minLevel: 0,
|
984 |
maxLevel: $max_depth,
|
985 |
bfs: true
|
986 |
+
}})
|
987 |
YIELD nodes, relationships
|
988 |
WITH nodes, relationships, size(nodes) AS total_nodes
|
989 |
UNWIND nodes AS node
|
990 |
+
WITH collect({{node: node}}) AS node_info, relationships, total_nodes
|
991 |
RETURN node_info, relationships, total_nodes
|
992 |
"""
|
993 |
|
|
|
1025 |
)
|
1026 |
|
1027 |
# Run limited query
|
1028 |
+
limited_query = f"""
|
1029 |
+
MATCH (start:`{workspace_label}`)
|
1030 |
WHERE start.entity_id = $entity_id
|
1031 |
WITH start
|
1032 |
+
CALL apoc.path.subgraphAll(start, {{
|
1033 |
relationshipFilter: '',
|
1034 |
+
labelFilter: '{workspace_label}',
|
1035 |
minLevel: 0,
|
1036 |
maxLevel: $max_depth,
|
1037 |
limit: $max_nodes,
|
1038 |
bfs: true
|
1039 |
+
}})
|
1040 |
YIELD nodes, relationships
|
1041 |
UNWIND nodes AS node
|
1042 |
+
WITH collect({{node: node}}) AS node_info, relationships
|
1043 |
RETURN node_info, relationships
|
1044 |
"""
|
1045 |
result_set = None
|
|
|
1126 |
visited_edge_pairs = set()
|
1127 |
|
1128 |
# Get the starting node's data
|
1129 |
+
workspace_label = self._get_workspace_label()
|
1130 |
async with self._driver.session(
|
1131 |
database=self._DATABASE, default_access_mode="READ"
|
1132 |
) as session:
|
1133 |
+
query = f"""
|
1134 |
+
MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
|
1135 |
RETURN id(n) as node_id, n
|
1136 |
"""
|
1137 |
node_result = await session.run(query, entity_id=node_label)
|
|
|
1189 |
async with self._driver.session(
|
1190 |
database=self._DATABASE, default_access_mode="READ"
|
1191 |
) as session:
|
1192 |
+
workspace_label = self._get_workspace_label()
|
1193 |
+
query = f"""
|
1194 |
+
MATCH (a:`{workspace_label}` {{entity_id: $entity_id}})-[r]-(b)
|
1195 |
WITH r, b, id(r) as edge_id, id(b) as target_id
|
1196 |
RETURN r, b, edge_id, target_id
|
1197 |
"""
|
|
|
1275 |
Returns:
|
1276 |
["Person", "Company", ...] # Alphabetically sorted label list
|
1277 |
"""
|
1278 |
+
workspace_label = self._get_workspace_label()
|
1279 |
async with self._driver.session(
|
1280 |
database=self._DATABASE, default_access_mode="READ"
|
1281 |
) as session:
|
|
|
1283 |
# query = "CALL db.labels() YIELD label RETURN label"
|
1284 |
|
1285 |
# Method 2: Query compatible with older versions
|
1286 |
+
query = f"""
|
1287 |
+
MATCH (n:`{workspace_label}`)
|
1288 |
WHERE n.entity_id IS NOT NULL
|
1289 |
RETURN DISTINCT n.entity_id AS label
|
1290 |
ORDER BY label
|
|
|
1320 |
"""
|
1321 |
|
1322 |
async def _do_delete(tx: AsyncManagedTransaction):
|
1323 |
+
workspace_label = self._get_workspace_label()
|
1324 |
+
query = f"""
|
1325 |
+
MATCH (n:`{workspace_label}` {{entity_id: $entity_id}})
|
1326 |
DETACH DELETE n
|
1327 |
"""
|
1328 |
result = await tx.run(query, entity_id=node_id)
|
|
|
1378 |
for source, target in edges:
|
1379 |
|
1380 |
async def _do_delete_edge(tx: AsyncManagedTransaction):
|
1381 |
+
workspace_label = self._get_workspace_label()
|
1382 |
+
query = f"""
|
1383 |
+
MATCH (source:`{workspace_label}` {{entity_id: $source_entity_id}})-[r]-(target:`{workspace_label}` {{entity_id: $target_entity_id}})
|
1384 |
DELETE r
|
1385 |
"""
|
1386 |
result = await tx.run(
|
|
|
1397 |
raise
|
1398 |
|
1399 |
async def drop(self) -> dict[str, str]:
|
1400 |
+
"""Drop all data from current workspace storage and clean up resources
|
1401 |
|
1402 |
+
This method will delete all nodes and relationships in the current workspace only.
|
1403 |
|
1404 |
Returns:
|
1405 |
dict[str, str]: Operation status and message
|
1406 |
+
- On success: {"status": "success", "message": "workspace data dropped"}
|
1407 |
- On failure: {"status": "error", "message": "<error details>"}
|
1408 |
"""
|
1409 |
+
workspace_label = self._get_workspace_label()
|
1410 |
try:
|
1411 |
async with self._driver.session(database=self._DATABASE) as session:
|
1412 |
+
# Delete all nodes and relationships in current workspace only
|
1413 |
+
query = f"MATCH (n:`{workspace_label}`) DETACH DELETE n"
|
1414 |
result = await session.run(query)
|
1415 |
await result.consume() # Ensure result is fully consumed
|
1416 |
|
1417 |
logger.info(
|
1418 |
+
f"Process {os.getpid()} drop Neo4j workspace '{workspace_label}' in database {self._DATABASE}"
|
1419 |
)
|
1420 |
+
return {
|
1421 |
+
"status": "success",
|
1422 |
+
"message": f"workspace '{workspace_label}' data dropped",
|
1423 |
+
}
|
1424 |
except Exception as e:
|
1425 |
+
logger.error(
|
1426 |
+
f"Error dropping Neo4j workspace '{workspace_label}' in database {self._DATABASE}: {e}"
|
1427 |
+
)
|
1428 |
return {"status": "error", "message": str(e)}
|
lightrag/kg/networkx_impl.py
CHANGED
@@ -46,9 +46,19 @@ class NetworkXStorage(BaseGraphStorage):
|
|
46 |
nx.write_graphml(graph, file_name)
|
47 |
|
48 |
def __post_init__(self):
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
self._storage_lock = None
|
53 |
self.storage_updated = None
|
54 |
self._graph = None
|
|
|
46 |
nx.write_graphml(graph, file_name)
|
47 |
|
48 |
def __post_init__(self):
|
49 |
+
working_dir = self.global_config["working_dir"]
|
50 |
+
if self.workspace:
|
51 |
+
# Include workspace in the file path for data isolation
|
52 |
+
workspace_dir = os.path.join(working_dir, self.workspace)
|
53 |
+
os.makedirs(workspace_dir, exist_ok=True)
|
54 |
+
self._graphml_xml_file = os.path.join(
|
55 |
+
workspace_dir, f"graph_{self.namespace}.graphml"
|
56 |
+
)
|
57 |
+
else:
|
58 |
+
# Default behavior when workspace is empty
|
59 |
+
self._graphml_xml_file = os.path.join(
|
60 |
+
working_dir, f"graph_{self.namespace}.graphml"
|
61 |
+
)
|
62 |
self._storage_lock = None
|
63 |
self.storage_updated = None
|
64 |
self._graph = None
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
|
|
4 |
import datetime
|
5 |
from datetime import timezone
|
6 |
from dataclasses import dataclass, field
|
@@ -319,7 +320,7 @@ class PostgreSQLDB:
|
|
319 |
# Get all old format data
|
320 |
old_data_sql = """
|
321 |
SELECT id, mode, original_prompt, return_value, chunk_id,
|
322 |
-
|
323 |
FROM LIGHTRAG_LLM_CACHE
|
324 |
WHERE id NOT LIKE '%:%'
|
325 |
"""
|
@@ -364,7 +365,9 @@ class PostgreSQLDB:
|
|
364 |
await self.execute(
|
365 |
insert_sql,
|
366 |
{
|
367 |
-
"workspace":
|
|
|
|
|
368 |
"id": new_key,
|
369 |
"mode": record["mode"],
|
370 |
"original_prompt": record["original_prompt"],
|
@@ -384,7 +387,9 @@ class PostgreSQLDB:
|
|
384 |
await self.execute(
|
385 |
delete_sql,
|
386 |
{
|
387 |
-
"workspace":
|
|
|
|
|
388 |
"mode": record["mode"],
|
389 |
"id": record["id"], # Old id
|
390 |
},
|
@@ -505,6 +510,29 @@ class PostgreSQLDB:
|
|
505 |
f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
|
506 |
)
|
507 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
508 |
# After all tables are created, attempt to migrate timestamp fields
|
509 |
try:
|
510 |
await self._migrate_timestamp_columns()
|
@@ -670,7 +698,7 @@ class ClientManager:
|
|
670 |
),
|
671 |
"workspace": os.environ.get(
|
672 |
"POSTGRES_WORKSPACE",
|
673 |
-
config.get("postgres", "workspace", fallback=
|
674 |
),
|
675 |
"max_connections": os.environ.get(
|
676 |
"POSTGRES_MAX_CONNECTIONS",
|
@@ -716,6 +744,18 @@ class PGKVStorage(BaseKVStorage):
|
|
716 |
async def initialize(self):
|
717 |
if self.db is None:
|
718 |
self.db = await ClientManager.get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
719 |
|
720 |
async def finalize(self):
|
721 |
if self.db is not None:
|
@@ -1047,6 +1087,18 @@ class PGVectorStorage(BaseVectorStorage):
|
|
1047 |
async def initialize(self):
|
1048 |
if self.db is None:
|
1049 |
self.db = await ClientManager.get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1050 |
|
1051 |
async def finalize(self):
|
1052 |
if self.db is not None:
|
@@ -1328,6 +1380,18 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
1328 |
async def initialize(self):
|
1329 |
if self.db is None:
|
1330 |
self.db = await ClientManager.get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1331 |
|
1332 |
async def finalize(self):
|
1333 |
if self.db is not None:
|
@@ -1606,9 +1670,34 @@ class PGGraphQueryException(Exception):
|
|
1606 |
@dataclass
|
1607 |
class PGGraphStorage(BaseGraphStorage):
|
1608 |
def __post_init__(self):
|
1609 |
-
|
1610 |
self.db: PostgreSQLDB | None = None
|
1611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1612 |
@staticmethod
|
1613 |
def _normalize_node_id(node_id: str) -> str:
|
1614 |
"""
|
@@ -1629,6 +1718,27 @@ class PGGraphStorage(BaseGraphStorage):
|
|
1629 |
async def initialize(self):
|
1630 |
if self.db is None:
|
1631 |
self.db = await ClientManager.get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1632 |
|
1633 |
# Execute each statement separately and ignore errors
|
1634 |
queries = [
|
@@ -2833,7 +2943,10 @@ class PGGraphStorage(BaseGraphStorage):
|
|
2833 |
$$) AS (result agtype)"""
|
2834 |
|
2835 |
await self._query(drop_query, readonly=False)
|
2836 |
-
return {
|
|
|
|
|
|
|
2837 |
except Exception as e:
|
2838 |
logger.error(f"Error dropping graph: {e}")
|
2839 |
return {"status": "error", "message": str(e)}
|
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
4 |
+
import re
|
5 |
import datetime
|
6 |
from datetime import timezone
|
7 |
from dataclasses import dataclass, field
|
|
|
320 |
# Get all old format data
|
321 |
old_data_sql = """
|
322 |
SELECT id, mode, original_prompt, return_value, chunk_id,
|
323 |
+
workspace, create_time, update_time
|
324 |
FROM LIGHTRAG_LLM_CACHE
|
325 |
WHERE id NOT LIKE '%:%'
|
326 |
"""
|
|
|
365 |
await self.execute(
|
366 |
insert_sql,
|
367 |
{
|
368 |
+
"workspace": record[
|
369 |
+
"workspace"
|
370 |
+
], # Use original record's workspace
|
371 |
"id": new_key,
|
372 |
"mode": record["mode"],
|
373 |
"original_prompt": record["original_prompt"],
|
|
|
387 |
await self.execute(
|
388 |
delete_sql,
|
389 |
{
|
390 |
+
"workspace": record[
|
391 |
+
"workspace"
|
392 |
+
], # Use original record's workspace
|
393 |
"mode": record["mode"],
|
394 |
"id": record["id"], # Old id
|
395 |
},
|
|
|
510 |
f"PostgreSQL, Failed to create index on table {k}, Got: {e}"
|
511 |
)
|
512 |
|
513 |
+
# Create composite index for (workspace, id) columns in each table
|
514 |
+
try:
|
515 |
+
composite_index_name = f"idx_{k.lower()}_workspace_id"
|
516 |
+
check_composite_index_sql = f"""
|
517 |
+
SELECT 1 FROM pg_indexes
|
518 |
+
WHERE indexname = '{composite_index_name}'
|
519 |
+
AND tablename = '{k.lower()}'
|
520 |
+
"""
|
521 |
+
composite_index_exists = await self.query(check_composite_index_sql)
|
522 |
+
|
523 |
+
if not composite_index_exists:
|
524 |
+
create_composite_index_sql = (
|
525 |
+
f"CREATE INDEX {composite_index_name} ON {k}(workspace, id)"
|
526 |
+
)
|
527 |
+
logger.info(
|
528 |
+
f"PostgreSQL, Creating composite index {composite_index_name} on table {k}"
|
529 |
+
)
|
530 |
+
await self.execute(create_composite_index_sql)
|
531 |
+
except Exception as e:
|
532 |
+
logger.error(
|
533 |
+
f"PostgreSQL, Failed to create composite index on table {k}, Got: {e}"
|
534 |
+
)
|
535 |
+
|
536 |
# After all tables are created, attempt to migrate timestamp fields
|
537 |
try:
|
538 |
await self._migrate_timestamp_columns()
|
|
|
698 |
),
|
699 |
"workspace": os.environ.get(
|
700 |
"POSTGRES_WORKSPACE",
|
701 |
+
config.get("postgres", "workspace", fallback=None),
|
702 |
),
|
703 |
"max_connections": os.environ.get(
|
704 |
"POSTGRES_MAX_CONNECTIONS",
|
|
|
744 |
async def initialize(self):
|
745 |
if self.db is None:
|
746 |
self.db = await ClientManager.get_client()
|
747 |
+
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
748 |
+
if self.db.workspace:
|
749 |
+
# Use PostgreSQLDB's workspace (highest priority)
|
750 |
+
final_workspace = self.db.workspace
|
751 |
+
elif hasattr(self, "workspace") and self.workspace:
|
752 |
+
# Use storage class's workspace (medium priority)
|
753 |
+
final_workspace = self.workspace
|
754 |
+
self.db.workspace = final_workspace
|
755 |
+
else:
|
756 |
+
# Use "default" for compatibility (lowest priority)
|
757 |
+
final_workspace = "default"
|
758 |
+
self.db.workspace = final_workspace
|
759 |
|
760 |
async def finalize(self):
|
761 |
if self.db is not None:
|
|
|
1087 |
async def initialize(self):
|
1088 |
if self.db is None:
|
1089 |
self.db = await ClientManager.get_client()
|
1090 |
+
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
1091 |
+
if self.db.workspace:
|
1092 |
+
# Use PostgreSQLDB's workspace (highest priority)
|
1093 |
+
final_workspace = self.db.workspace
|
1094 |
+
elif hasattr(self, "workspace") and self.workspace:
|
1095 |
+
# Use storage class's workspace (medium priority)
|
1096 |
+
final_workspace = self.workspace
|
1097 |
+
self.db.workspace = final_workspace
|
1098 |
+
else:
|
1099 |
+
# Use "default" for compatibility (lowest priority)
|
1100 |
+
final_workspace = "default"
|
1101 |
+
self.db.workspace = final_workspace
|
1102 |
|
1103 |
async def finalize(self):
|
1104 |
if self.db is not None:
|
|
|
1380 |
async def initialize(self):
|
1381 |
if self.db is None:
|
1382 |
self.db = await ClientManager.get_client()
|
1383 |
+
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
1384 |
+
if self.db.workspace:
|
1385 |
+
# Use PostgreSQLDB's workspace (highest priority)
|
1386 |
+
final_workspace = self.db.workspace
|
1387 |
+
elif hasattr(self, "workspace") and self.workspace:
|
1388 |
+
# Use storage class's workspace (medium priority)
|
1389 |
+
final_workspace = self.workspace
|
1390 |
+
self.db.workspace = final_workspace
|
1391 |
+
else:
|
1392 |
+
# Use "default" for compatibility (lowest priority)
|
1393 |
+
final_workspace = "default"
|
1394 |
+
self.db.workspace = final_workspace
|
1395 |
|
1396 |
async def finalize(self):
|
1397 |
if self.db is not None:
|
|
|
1670 |
@dataclass
|
1671 |
class PGGraphStorage(BaseGraphStorage):
|
1672 |
def __post_init__(self):
|
1673 |
+
# Graph name will be dynamically generated in initialize() based on workspace
|
1674 |
self.db: PostgreSQLDB | None = None
|
1675 |
|
1676 |
+
def _get_workspace_graph_name(self) -> str:
|
1677 |
+
"""
|
1678 |
+
Generate graph name based on workspace and namespace for data isolation.
|
1679 |
+
Rules:
|
1680 |
+
- If workspace is empty: graph_name = namespace
|
1681 |
+
- If workspace has value: graph_name = workspace_namespace
|
1682 |
+
|
1683 |
+
Args:
|
1684 |
+
None
|
1685 |
+
|
1686 |
+
Returns:
|
1687 |
+
str: The graph name for the current workspace
|
1688 |
+
"""
|
1689 |
+
workspace = getattr(self, "workspace", None)
|
1690 |
+
namespace = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
|
1691 |
+
|
1692 |
+
if workspace and workspace.strip():
|
1693 |
+
# Ensure names comply with PostgreSQL identifier specifications
|
1694 |
+
safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
|
1695 |
+
safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
1696 |
+
return f"{safe_workspace}_{safe_namespace}"
|
1697 |
+
else:
|
1698 |
+
# When workspace is empty, use namespace directly
|
1699 |
+
return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
1700 |
+
|
1701 |
@staticmethod
|
1702 |
def _normalize_node_id(node_id: str) -> str:
|
1703 |
"""
|
|
|
1718 |
async def initialize(self):
|
1719 |
if self.db is None:
|
1720 |
self.db = await ClientManager.get_client()
|
1721 |
+
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > None
|
1722 |
+
if self.db.workspace:
|
1723 |
+
# Use PostgreSQLDB's workspace (highest priority)
|
1724 |
+
final_workspace = self.db.workspace
|
1725 |
+
elif hasattr(self, "workspace") and self.workspace:
|
1726 |
+
# Use storage class's workspace (medium priority)
|
1727 |
+
final_workspace = self.workspace
|
1728 |
+
self.db.workspace = final_workspace
|
1729 |
+
else:
|
1730 |
+
# Use None for compatibility (lowest priority)
|
1731 |
+
final_workspace = None
|
1732 |
+
self.db.workspace = final_workspace
|
1733 |
+
|
1734 |
+
# Dynamically generate graph name based on workspace
|
1735 |
+
self.workspace = self.db.workspace
|
1736 |
+
self.graph_name = self._get_workspace_graph_name()
|
1737 |
+
|
1738 |
+
# Log the graph initialization for debugging
|
1739 |
+
logger.info(
|
1740 |
+
f"PostgreSQL Graph initialized: workspace='{self.workspace}', graph_name='{self.graph_name}'"
|
1741 |
+
)
|
1742 |
|
1743 |
# Execute each statement separately and ignore errors
|
1744 |
queries = [
|
|
|
2943 |
$$) AS (result agtype)"""
|
2944 |
|
2945 |
await self._query(drop_query, readonly=False)
|
2946 |
+
return {
|
2947 |
+
"status": "success",
|
2948 |
+
"message": f"workspace '{self.workspace}' graph data dropped",
|
2949 |
+
}
|
2950 |
except Exception as e:
|
2951 |
logger.error(f"Error dropping graph: {e}")
|
2952 |
return {"status": "error", "message": str(e)}
|
lightrag/kg/qdrant_impl.py
CHANGED
@@ -50,6 +50,18 @@ def compute_mdhash_id_for_qdrant(
|
|
50 |
@final
|
51 |
@dataclass
|
52 |
class QdrantVectorDBStorage(BaseVectorStorage):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
@staticmethod
|
54 |
def create_collection_if_not_exist(
|
55 |
client: QdrantClient, collection_name: str, **kwargs
|
@@ -59,6 +71,29 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
|
59 |
client.create_collection(collection_name, **kwargs)
|
60 |
|
61 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
63 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
64 |
if cosine_threshold is None:
|
|
|
50 |
@final
|
51 |
@dataclass
|
52 |
class QdrantVectorDBStorage(BaseVectorStorage):
|
53 |
+
def __init__(
|
54 |
+
self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
|
55 |
+
):
|
56 |
+
super().__init__(
|
57 |
+
namespace=namespace,
|
58 |
+
workspace=workspace or "",
|
59 |
+
global_config=global_config,
|
60 |
+
embedding_func=embedding_func,
|
61 |
+
meta_fields=meta_fields or set(),
|
62 |
+
)
|
63 |
+
self.__post_init__()
|
64 |
+
|
65 |
@staticmethod
|
66 |
def create_collection_if_not_exist(
|
67 |
client: QdrantClient, collection_name: str, **kwargs
|
|
|
71 |
client.create_collection(collection_name, **kwargs)
|
72 |
|
73 |
def __post_init__(self):
|
74 |
+
# Check for QDRANT_WORKSPACE environment variable first (higher priority)
|
75 |
+
# This allows administrators to force a specific workspace for all Qdrant storage instances
|
76 |
+
qdrant_workspace = os.environ.get("QDRANT_WORKSPACE")
|
77 |
+
if qdrant_workspace and qdrant_workspace.strip():
|
78 |
+
# Use environment variable value, overriding the passed workspace parameter
|
79 |
+
effective_workspace = qdrant_workspace.strip()
|
80 |
+
logger.info(
|
81 |
+
f"Using QDRANT_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
82 |
+
)
|
83 |
+
else:
|
84 |
+
# Use the workspace parameter passed during initialization
|
85 |
+
effective_workspace = self.workspace
|
86 |
+
if effective_workspace:
|
87 |
+
logger.debug(
|
88 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
89 |
+
)
|
90 |
+
|
91 |
+
# Build namespace with workspace prefix for data isolation
|
92 |
+
if effective_workspace:
|
93 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
94 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
95 |
+
# When workspace is empty, keep the original namespace unchanged
|
96 |
+
|
97 |
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
98 |
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
99 |
if cosine_threshold is None:
|
lightrag/kg/redis_impl.py
CHANGED
@@ -71,6 +71,29 @@ class RedisConnectionManager:
|
|
71 |
@dataclass
|
72 |
class RedisKVStorage(BaseKVStorage):
|
73 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
redis_url = os.environ.get(
|
75 |
"REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
|
76 |
)
|
@@ -461,6 +484,29 @@ class RedisDocStatusStorage(DocStatusStorage):
|
|
461 |
"""Redis implementation of document status storage"""
|
462 |
|
463 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
redis_url = os.environ.get(
|
465 |
"REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
|
466 |
)
|
|
|
71 |
@dataclass
|
72 |
class RedisKVStorage(BaseKVStorage):
|
73 |
def __post_init__(self):
|
74 |
+
# Check for REDIS_WORKSPACE environment variable first (higher priority)
|
75 |
+
# This allows administrators to force a specific workspace for all Redis storage instances
|
76 |
+
redis_workspace = os.environ.get("REDIS_WORKSPACE")
|
77 |
+
if redis_workspace and redis_workspace.strip():
|
78 |
+
# Use environment variable value, overriding the passed workspace parameter
|
79 |
+
effective_workspace = redis_workspace.strip()
|
80 |
+
logger.info(
|
81 |
+
f"Using REDIS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
82 |
+
)
|
83 |
+
else:
|
84 |
+
# Use the workspace parameter passed during initialization
|
85 |
+
effective_workspace = self.workspace
|
86 |
+
if effective_workspace:
|
87 |
+
logger.debug(
|
88 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
89 |
+
)
|
90 |
+
|
91 |
+
# Build namespace with workspace prefix for data isolation
|
92 |
+
if effective_workspace:
|
93 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
94 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
95 |
+
# When workspace is empty, keep the original namespace unchanged
|
96 |
+
|
97 |
redis_url = os.environ.get(
|
98 |
"REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
|
99 |
)
|
|
|
484 |
"""Redis implementation of document status storage"""
|
485 |
|
486 |
def __post_init__(self):
|
487 |
+
# Check for REDIS_WORKSPACE environment variable first (higher priority)
|
488 |
+
# This allows administrators to force a specific workspace for all Redis storage instances
|
489 |
+
redis_workspace = os.environ.get("REDIS_WORKSPACE")
|
490 |
+
if redis_workspace and redis_workspace.strip():
|
491 |
+
# Use environment variable value, overriding the passed workspace parameter
|
492 |
+
effective_workspace = redis_workspace.strip()
|
493 |
+
logger.info(
|
494 |
+
f"Using REDIS_WORKSPACE environment variable: '{effective_workspace}' (overriding passed workspace: '{self.workspace}')"
|
495 |
+
)
|
496 |
+
else:
|
497 |
+
# Use the workspace parameter passed during initialization
|
498 |
+
effective_workspace = self.workspace
|
499 |
+
if effective_workspace:
|
500 |
+
logger.debug(
|
501 |
+
f"Using passed workspace parameter: '{effective_workspace}'"
|
502 |
+
)
|
503 |
+
|
504 |
+
# Build namespace with workspace prefix for data isolation
|
505 |
+
if effective_workspace:
|
506 |
+
self.namespace = f"{effective_workspace}_{self.namespace}"
|
507 |
+
logger.debug(f"Final namespace with workspace prefix: '{self.namespace}'")
|
508 |
+
# When workspace is empty, keep the original namespace unchanged
|
509 |
+
|
510 |
redis_url = os.environ.get(
|
511 |
"REDIS_URI", config.get("redis", "uri", fallback="redis://localhost:6379")
|
512 |
)
|
lightrag/lightrag.py
CHANGED
@@ -51,7 +51,7 @@ from .base import (
|
|
51 |
StoragesStatus,
|
52 |
DeletionResult,
|
53 |
)
|
54 |
-
from .namespace import NameSpace
|
55 |
from .operate import (
|
56 |
chunking_by_token_size,
|
57 |
extract_entities,
|
@@ -97,9 +97,7 @@ class LightRAG:
|
|
97 |
# Directory
|
98 |
# ---
|
99 |
|
100 |
-
working_dir: str = field(
|
101 |
-
default=f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
|
102 |
-
)
|
103 |
"""Directory where cache and temporary files are stored."""
|
104 |
|
105 |
# Storage
|
@@ -117,6 +115,12 @@ class LightRAG:
|
|
117 |
doc_status_storage: str = field(default="JsonDocStatusStorage")
|
118 |
"""Storage type for tracking document processing statuses."""
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
# Logging (Deprecated, use setup_logger in utils.py instead)
|
121 |
# ---
|
122 |
log_level: int | None = field(default=None)
|
@@ -242,10 +246,6 @@ class LightRAG:
|
|
242 |
vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
|
243 |
"""Additional parameters for vector database storage."""
|
244 |
|
245 |
-
# TODO:deprecated, remove in the future, use WORKSPACE instead
|
246 |
-
namespace_prefix: str = field(default="")
|
247 |
-
"""Prefix for namespacing stored data across different environments."""
|
248 |
-
|
249 |
enable_llm_cache: bool = field(default=True)
|
250 |
"""Enables caching for LLM responses to avoid redundant computations."""
|
251 |
|
@@ -382,61 +382,53 @@ class LightRAG:
|
|
382 |
self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
|
383 |
|
384 |
self.llm_response_cache: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
385 |
-
namespace=
|
386 |
-
|
387 |
-
|
388 |
-
global_config=asdict(
|
389 |
-
self
|
390 |
-
), # Add global_config to ensure cache works properly
|
391 |
embedding_func=self.embedding_func,
|
392 |
)
|
393 |
|
394 |
self.full_docs: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
395 |
-
namespace=
|
396 |
-
|
397 |
-
),
|
398 |
embedding_func=self.embedding_func,
|
399 |
)
|
400 |
|
401 |
self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
402 |
-
namespace=
|
403 |
-
|
404 |
-
),
|
405 |
embedding_func=self.embedding_func,
|
406 |
)
|
407 |
|
408 |
self.chunk_entity_relation_graph: BaseGraphStorage = self.graph_storage_cls( # type: ignore
|
409 |
-
namespace=
|
410 |
-
|
411 |
-
),
|
412 |
embedding_func=self.embedding_func,
|
413 |
)
|
414 |
|
415 |
self.entities_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
416 |
-
namespace=
|
417 |
-
|
418 |
-
),
|
419 |
embedding_func=self.embedding_func,
|
420 |
meta_fields={"entity_name", "source_id", "content", "file_path"},
|
421 |
)
|
422 |
self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
423 |
-
namespace=
|
424 |
-
|
425 |
-
),
|
426 |
embedding_func=self.embedding_func,
|
427 |
meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"},
|
428 |
)
|
429 |
self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
430 |
-
namespace=
|
431 |
-
|
432 |
-
),
|
433 |
embedding_func=self.embedding_func,
|
434 |
meta_fields={"full_doc_id", "content", "file_path"},
|
435 |
)
|
436 |
|
437 |
# Initialize document status storage
|
438 |
self.doc_status: DocStatusStorage = self.doc_status_storage_cls(
|
439 |
-
namespace=
|
|
|
440 |
global_config=global_config,
|
441 |
embedding_func=None,
|
442 |
)
|
@@ -965,6 +957,9 @@ class LightRAG:
|
|
965 |
)
|
966 |
}
|
967 |
|
|
|
|
|
|
|
968 |
# Process document in two stages
|
969 |
# Stage 1: Process text chunks and docs (parallel execution)
|
970 |
doc_status_task = asyncio.create_task(
|
|
|
51 |
StoragesStatus,
|
52 |
DeletionResult,
|
53 |
)
|
54 |
+
from .namespace import NameSpace
|
55 |
from .operate import (
|
56 |
chunking_by_token_size,
|
57 |
extract_entities,
|
|
|
97 |
# Directory
|
98 |
# ---
|
99 |
|
100 |
+
working_dir: str = field(default="./rag_storage")
|
|
|
|
|
101 |
"""Directory where cache and temporary files are stored."""
|
102 |
|
103 |
# Storage
|
|
|
115 |
doc_status_storage: str = field(default="JsonDocStatusStorage")
|
116 |
"""Storage type for tracking document processing statuses."""
|
117 |
|
118 |
+
# Workspace
|
119 |
+
# ---
|
120 |
+
|
121 |
+
workspace: str = field(default_factory=lambda: os.getenv("WORKSPACE", ""))
|
122 |
+
"""Workspace for data isolation. Defaults to empty string if WORKSPACE environment variable is not set."""
|
123 |
+
|
124 |
# Logging (Deprecated, use setup_logger in utils.py instead)
|
125 |
# ---
|
126 |
log_level: int | None = field(default=None)
|
|
|
246 |
vector_db_storage_cls_kwargs: dict[str, Any] = field(default_factory=dict)
|
247 |
"""Additional parameters for vector database storage."""
|
248 |
|
|
|
|
|
|
|
|
|
249 |
enable_llm_cache: bool = field(default=True)
|
250 |
"""Enables caching for LLM responses to avoid redundant computations."""
|
251 |
|
|
|
382 |
self.doc_status_storage_cls = self._get_storage_class(self.doc_status_storage)
|
383 |
|
384 |
self.llm_response_cache: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
385 |
+
namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
|
386 |
+
workspace=self.workspace,
|
387 |
+
global_config=global_config,
|
|
|
|
|
|
|
388 |
embedding_func=self.embedding_func,
|
389 |
)
|
390 |
|
391 |
self.full_docs: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
392 |
+
namespace=NameSpace.KV_STORE_FULL_DOCS,
|
393 |
+
workspace=self.workspace,
|
|
|
394 |
embedding_func=self.embedding_func,
|
395 |
)
|
396 |
|
397 |
self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
398 |
+
namespace=NameSpace.KV_STORE_TEXT_CHUNKS,
|
399 |
+
workspace=self.workspace,
|
|
|
400 |
embedding_func=self.embedding_func,
|
401 |
)
|
402 |
|
403 |
self.chunk_entity_relation_graph: BaseGraphStorage = self.graph_storage_cls( # type: ignore
|
404 |
+
namespace=NameSpace.GRAPH_STORE_CHUNK_ENTITY_RELATION,
|
405 |
+
workspace=self.workspace,
|
|
|
406 |
embedding_func=self.embedding_func,
|
407 |
)
|
408 |
|
409 |
self.entities_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
410 |
+
namespace=NameSpace.VECTOR_STORE_ENTITIES,
|
411 |
+
workspace=self.workspace,
|
|
|
412 |
embedding_func=self.embedding_func,
|
413 |
meta_fields={"entity_name", "source_id", "content", "file_path"},
|
414 |
)
|
415 |
self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
416 |
+
namespace=NameSpace.VECTOR_STORE_RELATIONSHIPS,
|
417 |
+
workspace=self.workspace,
|
|
|
418 |
embedding_func=self.embedding_func,
|
419 |
meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"},
|
420 |
)
|
421 |
self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
422 |
+
namespace=NameSpace.VECTOR_STORE_CHUNKS,
|
423 |
+
workspace=self.workspace,
|
|
|
424 |
embedding_func=self.embedding_func,
|
425 |
meta_fields={"full_doc_id", "content", "file_path"},
|
426 |
)
|
427 |
|
428 |
# Initialize document status storage
|
429 |
self.doc_status: DocStatusStorage = self.doc_status_storage_cls(
|
430 |
+
namespace=NameSpace.DOC_STATUS,
|
431 |
+
workspace=self.workspace,
|
432 |
global_config=global_config,
|
433 |
embedding_func=None,
|
434 |
)
|
|
|
957 |
)
|
958 |
}
|
959 |
|
960 |
+
if not chunks:
|
961 |
+
logger.warning("No document chunks to process")
|
962 |
+
|
963 |
# Process document in two stages
|
964 |
# Stage 1: Process text chunks and docs (parallel execution)
|
965 |
doc_status_task = asyncio.create_task(
|
lightrag/namespace.py
CHANGED
@@ -17,10 +17,6 @@ class NameSpace:
|
|
17 |
DOC_STATUS = "doc_status"
|
18 |
|
19 |
|
20 |
-
def make_namespace(prefix: str, base_namespace: str):
|
21 |
-
return prefix + base_namespace
|
22 |
-
|
23 |
-
|
24 |
def is_namespace(namespace: str, base_namespace: str | Iterable[str]):
|
25 |
if isinstance(base_namespace, str):
|
26 |
return namespace.endswith(base_namespace)
|
|
|
17 |
DOC_STATUS = "doc_status"
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
20 |
def is_namespace(namespace: str, base_namespace: str | Iterable[str]):
|
21 |
if isinstance(base_namespace, str):
|
22 |
return namespace.endswith(base_namespace)
|
lightrag_webui/src/api/lightrag.ts
CHANGED
@@ -40,6 +40,8 @@ export type LightragStatus = {
|
|
40 |
doc_status_storage: string
|
41 |
graph_storage: string
|
42 |
vector_storage: string
|
|
|
|
|
43 |
}
|
44 |
update_status?: Record<string, any>
|
45 |
core_version?: string
|
|
|
40 |
doc_status_storage: string
|
41 |
graph_storage: string
|
42 |
vector_storage: string
|
43 |
+
workspace?: string
|
44 |
+
max_graph_nodes?: string
|
45 |
}
|
46 |
update_status?: Record<string, any>
|
47 |
core_version?: string
|
lightrag_webui/src/components/status/StatusCard.tsx
CHANGED
@@ -56,6 +56,10 @@ const StatusCard = ({ status }: { status: LightragStatus | null }) => {
|
|
56 |
<span>{status.configuration.graph_storage}</span>
|
57 |
<span>{t('graphPanel.statusCard.vectorStorage')}:</span>
|
58 |
<span>{status.configuration.vector_storage}</span>
|
|
|
|
|
|
|
|
|
59 |
</div>
|
60 |
</div>
|
61 |
</div>
|
|
|
56 |
<span>{status.configuration.graph_storage}</span>
|
57 |
<span>{t('graphPanel.statusCard.vectorStorage')}:</span>
|
58 |
<span>{status.configuration.vector_storage}</span>
|
59 |
+
<span>{t('graphPanel.statusCard.workspace')}:</span>
|
60 |
+
<span>{status.configuration.workspace || '-'}</span>
|
61 |
+
<span>{t('graphPanel.statusCard.maxGraphNodes')}:</span>
|
62 |
+
<span>{status.configuration.max_graph_nodes || '-'}</span>
|
63 |
</div>
|
64 |
</div>
|
65 |
</div>
|
lightrag_webui/src/locales/ar.json
CHANGED
@@ -263,7 +263,9 @@
|
|
263 |
"kvStorage": "تخزين المفتاح-القيمة",
|
264 |
"docStatusStorage": "تخزين حالة المستند",
|
265 |
"graphStorage": "تخزين الرسم البياني",
|
266 |
-
"vectorStorage": "تخزين المتجهات"
|
|
|
|
|
267 |
},
|
268 |
"propertiesView": {
|
269 |
"editProperty": "تعديل {{property}}",
|
|
|
263 |
"kvStorage": "تخزين المفتاح-القيمة",
|
264 |
"docStatusStorage": "تخزين حالة المستند",
|
265 |
"graphStorage": "تخزين الرسم البياني",
|
266 |
+
"vectorStorage": "تخزين المتجهات",
|
267 |
+
"workspace": "مساحة العمل",
|
268 |
+
"maxGraphNodes": "الحد الأقصى لعقد الرسم البياني"
|
269 |
},
|
270 |
"propertiesView": {
|
271 |
"editProperty": "تعديل {{property}}",
|
lightrag_webui/src/locales/en.json
CHANGED
@@ -263,7 +263,9 @@
|
|
263 |
"kvStorage": "KV Storage",
|
264 |
"docStatusStorage": "Doc Status Storage",
|
265 |
"graphStorage": "Graph Storage",
|
266 |
-
"vectorStorage": "Vector Storage"
|
|
|
|
|
267 |
},
|
268 |
"propertiesView": {
|
269 |
"editProperty": "Edit {{property}}",
|
|
|
263 |
"kvStorage": "KV Storage",
|
264 |
"docStatusStorage": "Doc Status Storage",
|
265 |
"graphStorage": "Graph Storage",
|
266 |
+
"vectorStorage": "Vector Storage",
|
267 |
+
"workspace": "Workspace",
|
268 |
+
"maxGraphNodes": "Max Graph Nodes"
|
269 |
},
|
270 |
"propertiesView": {
|
271 |
"editProperty": "Edit {{property}}",
|
lightrag_webui/src/locales/fr.json
CHANGED
@@ -263,7 +263,9 @@
|
|
263 |
"kvStorage": "Stockage clé-valeur",
|
264 |
"docStatusStorage": "Stockage de l'état des documents",
|
265 |
"graphStorage": "Stockage du graphe",
|
266 |
-
"vectorStorage": "Stockage vectoriel"
|
|
|
|
|
267 |
},
|
268 |
"propertiesView": {
|
269 |
"editProperty": "Modifier {{property}}",
|
|
|
263 |
"kvStorage": "Stockage clé-valeur",
|
264 |
"docStatusStorage": "Stockage de l'état des documents",
|
265 |
"graphStorage": "Stockage du graphe",
|
266 |
+
"vectorStorage": "Stockage vectoriel",
|
267 |
+
"workspace": "Espace de travail",
|
268 |
+
"maxGraphNodes": "Nombre maximum de nœuds du graphe"
|
269 |
},
|
270 |
"propertiesView": {
|
271 |
"editProperty": "Modifier {{property}}",
|
lightrag_webui/src/locales/zh.json
CHANGED
@@ -263,7 +263,9 @@
|
|
263 |
"kvStorage": "KV存储",
|
264 |
"docStatusStorage": "文档状态存储",
|
265 |
"graphStorage": "图存储",
|
266 |
-
"vectorStorage": "向量存储"
|
|
|
|
|
267 |
},
|
268 |
"propertiesView": {
|
269 |
"editProperty": "编辑{{property}}",
|
|
|
263 |
"kvStorage": "KV存储",
|
264 |
"docStatusStorage": "文档状态存储",
|
265 |
"graphStorage": "图存储",
|
266 |
+
"vectorStorage": "向量存储",
|
267 |
+
"workspace": "工作空间",
|
268 |
+
"maxGraphNodes": "最大图节点数"
|
269 |
},
|
270 |
"propertiesView": {
|
271 |
"editProperty": "编辑{{property}}",
|
lightrag_webui/src/locales/zh_TW.json
CHANGED
@@ -263,7 +263,9 @@
|
|
263 |
"kvStorage": "KV 儲存",
|
264 |
"docStatusStorage": "文件狀態儲存",
|
265 |
"graphStorage": "圖形儲存",
|
266 |
-
"vectorStorage": "向量儲存"
|
|
|
|
|
267 |
},
|
268 |
"propertiesView": {
|
269 |
"editProperty": "編輯{{property}}",
|
|
|
263 |
"kvStorage": "KV 儲存",
|
264 |
"docStatusStorage": "文件狀態儲存",
|
265 |
"graphStorage": "圖形儲存",
|
266 |
+
"vectorStorage": "向量儲存",
|
267 |
+
"workspace": "工作空間",
|
268 |
+
"maxGraphNodes": "最大圖形節點數"
|
269 |
},
|
270 |
"propertiesView": {
|
271 |
"editProperty": "編輯{{property}}",
|
pyproject.toml
CHANGED
@@ -11,7 +11,7 @@ authors = [
|
|
11 |
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
|
12 |
readme = "README.md"
|
13 |
license = {text = "MIT"}
|
14 |
-
requires-python = ">=3.
|
15 |
classifiers = [
|
16 |
"Development Status :: 4 - Beta",
|
17 |
"Programming Language :: Python :: 3",
|
@@ -91,3 +91,6 @@ version = {attr = "lightrag.__version__"}
|
|
91 |
|
92 |
[tool.setuptools.package-data]
|
93 |
lightrag = ["api/webui/**/*"]
|
|
|
|
|
|
|
|
11 |
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
|
12 |
readme = "README.md"
|
13 |
license = {text = "MIT"}
|
14 |
+
requires-python = ">=3.10"
|
15 |
classifiers = [
|
16 |
"Development Status :: 4 - Beta",
|
17 |
"Programming Language :: Python :: 3",
|
|
|
91 |
|
92 |
[tool.setuptools.package-data]
|
93 |
lightrag = ["api/webui/**/*"]
|
94 |
+
|
95 |
+
[tool.ruff]
|
96 |
+
target-version = "py310"
|
reproduce/Step_3.py
CHANGED
@@ -28,9 +28,10 @@ def run_queries_and_save_to_json(
|
|
28 |
):
|
29 |
loop = always_get_an_event_loop()
|
30 |
|
31 |
-
with
|
32 |
-
|
33 |
-
|
|
|
34 |
result_file.write("[\n")
|
35 |
first_entry = True
|
36 |
|
|
|
28 |
):
|
29 |
loop = always_get_an_event_loop()
|
30 |
|
31 |
+
with (
|
32 |
+
open(output_file, "a", encoding="utf-8") as result_file,
|
33 |
+
open(error_file, "a", encoding="utf-8") as err_file,
|
34 |
+
):
|
35 |
result_file.write("[\n")
|
36 |
first_entry = True
|
37 |
|
reproduce/Step_3_openai_compatible.py
CHANGED
@@ -59,9 +59,10 @@ def run_queries_and_save_to_json(
|
|
59 |
):
|
60 |
loop = always_get_an_event_loop()
|
61 |
|
62 |
-
with
|
63 |
-
|
64 |
-
|
|
|
65 |
result_file.write("[\n")
|
66 |
first_entry = True
|
67 |
|
|
|
59 |
):
|
60 |
loop = always_get_an_event_loop()
|
61 |
|
62 |
+
with (
|
63 |
+
open(output_file, "a", encoding="utf-8") as result_file,
|
64 |
+
open(error_file, "a", encoding="utf-8") as err_file,
|
65 |
+
):
|
66 |
result_file.write("[\n")
|
67 |
first_entry = True
|
68 |
|