Merge branch 'HKUDS:main' into main
Browse files- lightrag/__init__.py +1 -1
- lightrag/api/lightrag_server.py +21 -21
- lightrag/api/routers/document_routes.py +88 -45
- lightrag/api/routers/ollama_api.py +1 -1
- lightrag/api/utils_api.py +14 -1
- lightrag/base.py +24 -0
- lightrag/kg/chroma_impl.py +64 -0
- lightrag/kg/faiss_impl.py +43 -0
- lightrag/kg/json_doc_status_impl.py +34 -17
- lightrag/kg/json_kv_impl.py +58 -16
- lightrag/kg/milvus_impl.py +54 -0
- lightrag/kg/mongo_impl.py +53 -0
- lightrag/kg/nano_vector_db_impl.py +30 -0
- lightrag/kg/neo4j_impl.py +616 -355
- lightrag/kg/oracle_impl.py +74 -0
- lightrag/kg/postgres_impl.py +54 -0
- lightrag/kg/shared_storage.py +189 -36
- lightrag/kg/tidb_impl.py +94 -0
- lightrag/lightrag.py +77 -203
- lightrag/llm/azure_openai.py +2 -0
- lightrag/operate.py +220 -62
- lightrag/prompt.py +1 -1
- lightrag/utils.py +91 -14
- lightrag_webui/bun.lock +10 -0
- lightrag_webui/package.json +2 -0
- lightrag_webui/src/components/ThemeToggle.tsx +4 -2
- lightrag_webui/src/components/documents/ClearDocumentsDialog.tsx +10 -8
- lightrag_webui/src/components/documents/UploadDocumentsDialog.tsx +12 -10
- lightrag_webui/src/components/graph/FullScreenControl.tsx +4 -2
- lightrag_webui/src/components/graph/GraphLabels.tsx +6 -4
- lightrag_webui/src/components/graph/GraphSearch.tsx +4 -2
- lightrag_webui/src/components/graph/LayoutsControl.tsx +6 -3
- lightrag_webui/src/components/graph/PropertiesView.tsx +15 -12
- lightrag_webui/src/components/graph/Settings.tsx +18 -16
- lightrag_webui/src/components/graph/StatusCard.tsx +20 -18
- lightrag_webui/src/components/graph/StatusIndicator.tsx +3 -1
- lightrag_webui/src/components/graph/ZoomControl.tsx +5 -3
- lightrag_webui/src/components/retrieval/ChatMessage.tsx +5 -2
- lightrag_webui/src/components/retrieval/QuerySettings.tsx +43 -41
- lightrag_webui/src/features/DocumentManager.tsx +24 -22
- lightrag_webui/src/features/RetrievalTesting.tsx +8 -6
- lightrag_webui/src/features/SiteHeader.tsx +8 -5
- lightrag_webui/src/i18n.js +21 -0
- lightrag_webui/src/locales/en.json +234 -0
- lightrag_webui/src/locales/zh.json +235 -0
- lightrag_webui/src/main.tsx +2 -0
lightrag/__init__.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
-
__version__ = "1.2.
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
|
|
1 |
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
2 |
|
3 |
+
__version__ = "1.2.5"
|
4 |
__author__ = "Zirui Guo"
|
5 |
__url__ = "https://github.com/HKUDS/LightRAG"
|
lightrag/api/lightrag_server.py
CHANGED
@@ -50,9 +50,6 @@ from .auth import auth_handler
|
|
50 |
# This update allows the user to put a different.env file for each lightrag folder
|
51 |
load_dotenv(".env", override=True)
|
52 |
|
53 |
-
# Read entity extraction cache config
|
54 |
-
enable_llm_cache = os.getenv("ENABLE_LLM_CACHE_FOR_EXTRACT", "false").lower() == "true"
|
55 |
-
|
56 |
# Initialize config parser
|
57 |
config = configparser.ConfigParser()
|
58 |
config.read("config.ini")
|
@@ -144,23 +141,25 @@ def create_app(args):
|
|
144 |
try:
|
145 |
# Initialize database connections
|
146 |
await rag.initialize_storages()
|
147 |
-
await initialize_pipeline_status()
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
164 |
|
165 |
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
|
166 |
|
@@ -326,7 +325,7 @@ def create_app(args):
|
|
326 |
vector_db_storage_cls_kwargs={
|
327 |
"cosine_better_than_threshold": args.cosine_threshold
|
328 |
},
|
329 |
-
enable_llm_cache_for_entity_extract=
|
330 |
embedding_cache_config={
|
331 |
"enabled": True,
|
332 |
"similarity_threshold": 0.95,
|
@@ -355,7 +354,7 @@ def create_app(args):
|
|
355 |
vector_db_storage_cls_kwargs={
|
356 |
"cosine_better_than_threshold": args.cosine_threshold
|
357 |
},
|
358 |
-
enable_llm_cache_for_entity_extract=
|
359 |
embedding_cache_config={
|
360 |
"enabled": True,
|
361 |
"similarity_threshold": 0.95,
|
@@ -419,6 +418,7 @@ def create_app(args):
|
|
419 |
"doc_status_storage": args.doc_status_storage,
|
420 |
"graph_storage": args.graph_storage,
|
421 |
"vector_storage": args.vector_storage,
|
|
|
422 |
},
|
423 |
"update_status": update_status,
|
424 |
}
|
|
|
50 |
# This update allows the user to put a different.env file for each lightrag folder
|
51 |
load_dotenv(".env", override=True)
|
52 |
|
|
|
|
|
|
|
53 |
# Initialize config parser
|
54 |
config = configparser.ConfigParser()
|
55 |
config.read("config.ini")
|
|
|
141 |
try:
|
142 |
# Initialize database connections
|
143 |
await rag.initialize_storages()
|
|
|
144 |
|
145 |
+
await initialize_pipeline_status()
|
146 |
+
pipeline_status = await get_namespace_data("pipeline_status")
|
147 |
+
|
148 |
+
should_start_autoscan = False
|
149 |
+
async with get_pipeline_status_lock():
|
150 |
+
# Auto scan documents if enabled
|
151 |
+
if args.auto_scan_at_startup:
|
152 |
+
if not pipeline_status.get("autoscanned", False):
|
153 |
+
pipeline_status["autoscanned"] = True
|
154 |
+
should_start_autoscan = True
|
155 |
+
|
156 |
+
# Only run auto scan when no other process started it first
|
157 |
+
if should_start_autoscan:
|
158 |
+
# Create background task
|
159 |
+
task = asyncio.create_task(run_scanning_process(rag, doc_manager))
|
160 |
+
app.state.background_tasks.add(task)
|
161 |
+
task.add_done_callback(app.state.background_tasks.discard)
|
162 |
+
logger.info(f"Process {os.getpid()} auto scan task started at startup.")
|
163 |
|
164 |
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
|
165 |
|
|
|
325 |
vector_db_storage_cls_kwargs={
|
326 |
"cosine_better_than_threshold": args.cosine_threshold
|
327 |
},
|
328 |
+
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
329 |
embedding_cache_config={
|
330 |
"enabled": True,
|
331 |
"similarity_threshold": 0.95,
|
|
|
354 |
vector_db_storage_cls_kwargs={
|
355 |
"cosine_better_than_threshold": args.cosine_threshold
|
356 |
},
|
357 |
+
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
|
358 |
embedding_cache_config={
|
359 |
"enabled": True,
|
360 |
"similarity_threshold": 0.95,
|
|
|
418 |
"doc_status_storage": args.doc_status_storage,
|
419 |
"graph_storage": args.graph_storage,
|
420 |
"vector_storage": args.vector_storage,
|
421 |
+
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
422 |
},
|
423 |
"update_status": update_status,
|
424 |
}
|
lightrag/api/routers/document_routes.py
CHANGED
@@ -16,7 +16,11 @@ from pydantic import BaseModel, Field, field_validator
|
|
16 |
|
17 |
from lightrag import LightRAG
|
18 |
from lightrag.base import DocProcessingStatus, DocStatus
|
19 |
-
from
|
|
|
|
|
|
|
|
|
20 |
|
21 |
router = APIRouter(
|
22 |
prefix="/documents",
|
@@ -240,54 +244,93 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|
240 |
)
|
241 |
return False
|
242 |
case ".pdf":
|
243 |
-
if
|
244 |
-
pm.
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
case ".docx":
|
253 |
-
if
|
254 |
-
pm.
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
case ".pptx":
|
262 |
-
if
|
263 |
-
pm.
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
case ".xlsx":
|
274 |
-
if
|
275 |
-
pm.
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
)
|
288 |
-
|
289 |
-
)
|
290 |
-
content += "\n"
|
291 |
case _:
|
292 |
logger.error(
|
293 |
f"Unsupported file type: {file_path.name} (extension {ext})"
|
|
|
16 |
|
17 |
from lightrag import LightRAG
|
18 |
from lightrag.base import DocProcessingStatus, DocStatus
|
19 |
+
from lightrag.api.utils_api import (
|
20 |
+
get_api_key_dependency,
|
21 |
+
global_args,
|
22 |
+
get_auth_dependency,
|
23 |
+
)
|
24 |
|
25 |
router = APIRouter(
|
26 |
prefix="/documents",
|
|
|
244 |
)
|
245 |
return False
|
246 |
case ".pdf":
|
247 |
+
if global_args["main_args"].document_loading_engine == "DOCLING":
|
248 |
+
if not pm.is_installed("docling"): # type: ignore
|
249 |
+
pm.install("docling")
|
250 |
+
from docling.document_converter import DocumentConverter
|
251 |
+
|
252 |
+
converter = DocumentConverter()
|
253 |
+
result = converter.convert(file_path)
|
254 |
+
content = result.document.export_to_markdown()
|
255 |
+
else:
|
256 |
+
if not pm.is_installed("pypdf2"): # type: ignore
|
257 |
+
pm.install("pypdf2")
|
258 |
+
from PyPDF2 import PdfReader # type: ignore
|
259 |
+
from io import BytesIO
|
260 |
+
|
261 |
+
pdf_file = BytesIO(file)
|
262 |
+
reader = PdfReader(pdf_file)
|
263 |
+
for page in reader.pages:
|
264 |
+
content += page.extract_text() + "\n"
|
265 |
case ".docx":
|
266 |
+
if global_args["main_args"].document_loading_engine == "DOCLING":
|
267 |
+
if not pm.is_installed("docling"): # type: ignore
|
268 |
+
pm.install("docling")
|
269 |
+
from docling.document_converter import DocumentConverter
|
270 |
+
|
271 |
+
converter = DocumentConverter()
|
272 |
+
result = converter.convert(file_path)
|
273 |
+
content = result.document.export_to_markdown()
|
274 |
+
else:
|
275 |
+
if not pm.is_installed("python-docx"): # type: ignore
|
276 |
+
pm.install("docx")
|
277 |
+
from docx import Document # type: ignore
|
278 |
+
from io import BytesIO
|
279 |
+
|
280 |
+
docx_file = BytesIO(file)
|
281 |
+
doc = Document(docx_file)
|
282 |
+
content = "\n".join(
|
283 |
+
[paragraph.text for paragraph in doc.paragraphs]
|
284 |
+
)
|
285 |
case ".pptx":
|
286 |
+
if global_args["main_args"].document_loading_engine == "DOCLING":
|
287 |
+
if not pm.is_installed("docling"): # type: ignore
|
288 |
+
pm.install("docling")
|
289 |
+
from docling.document_converter import DocumentConverter
|
290 |
+
|
291 |
+
converter = DocumentConverter()
|
292 |
+
result = converter.convert(file_path)
|
293 |
+
content = result.document.export_to_markdown()
|
294 |
+
else:
|
295 |
+
if not pm.is_installed("python-pptx"): # type: ignore
|
296 |
+
pm.install("pptx")
|
297 |
+
from pptx import Presentation # type: ignore
|
298 |
+
from io import BytesIO
|
299 |
+
|
300 |
+
pptx_file = BytesIO(file)
|
301 |
+
prs = Presentation(pptx_file)
|
302 |
+
for slide in prs.slides:
|
303 |
+
for shape in slide.shapes:
|
304 |
+
if hasattr(shape, "text"):
|
305 |
+
content += shape.text + "\n"
|
306 |
case ".xlsx":
|
307 |
+
if global_args["main_args"].document_loading_engine == "DOCLING":
|
308 |
+
if not pm.is_installed("docling"): # type: ignore
|
309 |
+
pm.install("docling")
|
310 |
+
from docling.document_converter import DocumentConverter
|
311 |
+
|
312 |
+
converter = DocumentConverter()
|
313 |
+
result = converter.convert(file_path)
|
314 |
+
content = result.document.export_to_markdown()
|
315 |
+
else:
|
316 |
+
if not pm.is_installed("openpyxl"): # type: ignore
|
317 |
+
pm.install("openpyxl")
|
318 |
+
from openpyxl import load_workbook # type: ignore
|
319 |
+
from io import BytesIO
|
320 |
+
|
321 |
+
xlsx_file = BytesIO(file)
|
322 |
+
wb = load_workbook(xlsx_file)
|
323 |
+
for sheet in wb:
|
324 |
+
content += f"Sheet: {sheet.title}\n"
|
325 |
+
for row in sheet.iter_rows(values_only=True):
|
326 |
+
content += (
|
327 |
+
"\t".join(
|
328 |
+
str(cell) if cell is not None else ""
|
329 |
+
for cell in row
|
330 |
+
)
|
331 |
+
+ "\n"
|
332 |
)
|
333 |
+
content += "\n"
|
|
|
|
|
334 |
case _:
|
335 |
logger.error(
|
336 |
f"Unsupported file type: {file_path.name} (extension {ext})"
|
lightrag/api/routers/ollama_api.py
CHANGED
@@ -11,7 +11,7 @@ import asyncio
|
|
11 |
from ascii_colors import trace_exception
|
12 |
from lightrag import LightRAG, QueryParam
|
13 |
from lightrag.utils import encode_string_by_tiktoken
|
14 |
-
from
|
15 |
|
16 |
|
17 |
# query mode according to query prefix (bypass is not LightRAG quer mode)
|
|
|
11 |
from ascii_colors import trace_exception
|
12 |
from lightrag import LightRAG, QueryParam
|
13 |
from lightrag.utils import encode_string_by_tiktoken
|
14 |
+
from lightrag.api.utils_api import ollama_server_infos
|
15 |
|
16 |
|
17 |
# query mode according to query prefix (bypass is not LightRAG quer mode)
|
lightrag/api/utils_api.py
CHANGED
@@ -18,6 +18,8 @@ from .auth import auth_handler
|
|
18 |
# Load environment variables
|
19 |
load_dotenv(override=True)
|
20 |
|
|
|
|
|
21 |
|
22 |
class OllamaServerInfos:
|
23 |
# Constants for emulated Ollama model information
|
@@ -360,8 +362,17 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
|
|
360 |
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
361 |
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
|
364 |
|
|
|
365 |
return args
|
366 |
|
367 |
|
@@ -451,8 +462,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|
451 |
ASCIIColors.yellow(f"{args.history_turns}")
|
452 |
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
|
453 |
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
454 |
-
ASCIIColors.white("
|
455 |
ASCIIColors.yellow(f"{args.top_k}")
|
|
|
|
|
456 |
|
457 |
# System Configuration
|
458 |
ASCIIColors.magenta("\n💾 Storage Configuration:")
|
|
|
18 |
# Load environment variables
|
19 |
load_dotenv(override=True)
|
20 |
|
21 |
+
global_args = {"main_args": None}
|
22 |
+
|
23 |
|
24 |
class OllamaServerInfos:
|
25 |
# Constants for emulated Ollama model information
|
|
|
362 |
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
363 |
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
|
364 |
|
365 |
+
# Inject LLM cache configuration
|
366 |
+
args.enable_llm_cache_for_extract = get_env_value(
|
367 |
+
"ENABLE_LLM_CACHE_FOR_EXTRACT", False, bool
|
368 |
+
)
|
369 |
+
|
370 |
+
# Select Document loading tool (DOCLING, DEFAULT)
|
371 |
+
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
372 |
+
|
373 |
ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
|
374 |
|
375 |
+
global_args["main_args"] = args
|
376 |
return args
|
377 |
|
378 |
|
|
|
462 |
ASCIIColors.yellow(f"{args.history_turns}")
|
463 |
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
|
464 |
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
465 |
+
ASCIIColors.white(" ├─ Top-K: ", end="")
|
466 |
ASCIIColors.yellow(f"{args.top_k}")
|
467 |
+
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
|
468 |
+
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
|
469 |
|
470 |
# System Configuration
|
471 |
ASCIIColors.magenta("\n💾 Storage Configuration:")
|
lightrag/base.py
CHANGED
@@ -127,6 +127,30 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|
127 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
128 |
"""Delete relations for a given entity."""
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
@dataclass
|
132 |
class BaseKVStorage(StorageNameSpace, ABC):
|
|
|
127 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
128 |
"""Delete relations for a given entity."""
|
129 |
|
130 |
+
@abstractmethod
|
131 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
132 |
+
"""Get vector data by its ID
|
133 |
+
|
134 |
+
Args:
|
135 |
+
id: The unique identifier of the vector
|
136 |
+
|
137 |
+
Returns:
|
138 |
+
The vector data if found, or None if not found
|
139 |
+
"""
|
140 |
+
pass
|
141 |
+
|
142 |
+
@abstractmethod
|
143 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
144 |
+
"""Get multiple vector data by their IDs
|
145 |
+
|
146 |
+
Args:
|
147 |
+
ids: List of unique identifiers
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
List of vector data objects that were found
|
151 |
+
"""
|
152 |
+
pass
|
153 |
+
|
154 |
|
155 |
@dataclass
|
156 |
class BaseKVStorage(StorageNameSpace, ABC):
|
lightrag/kg/chroma_impl.py
CHANGED
@@ -271,3 +271,67 @@ class ChromaVectorDBStorage(BaseVectorStorage):
|
|
271 |
except Exception as e:
|
272 |
logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
|
273 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
except Exception as e:
|
272 |
logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
|
273 |
raise
|
274 |
+
|
275 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
276 |
+
"""Get vector data by its ID
|
277 |
+
|
278 |
+
Args:
|
279 |
+
id: The unique identifier of the vector
|
280 |
+
|
281 |
+
Returns:
|
282 |
+
The vector data if found, or None if not found
|
283 |
+
"""
|
284 |
+
try:
|
285 |
+
# Query the collection for a single vector by ID
|
286 |
+
result = self._collection.get(
|
287 |
+
ids=[id], include=["metadatas", "embeddings", "documents"]
|
288 |
+
)
|
289 |
+
|
290 |
+
if not result or not result["ids"] or len(result["ids"]) == 0:
|
291 |
+
return None
|
292 |
+
|
293 |
+
# Format the result to match the expected structure
|
294 |
+
return {
|
295 |
+
"id": result["ids"][0],
|
296 |
+
"vector": result["embeddings"][0],
|
297 |
+
"content": result["documents"][0],
|
298 |
+
**result["metadatas"][0],
|
299 |
+
}
|
300 |
+
except Exception as e:
|
301 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
302 |
+
return None
|
303 |
+
|
304 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
305 |
+
"""Get multiple vector data by their IDs
|
306 |
+
|
307 |
+
Args:
|
308 |
+
ids: List of unique identifiers
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
List of vector data objects that were found
|
312 |
+
"""
|
313 |
+
if not ids:
|
314 |
+
return []
|
315 |
+
|
316 |
+
try:
|
317 |
+
# Query the collection for multiple vectors by IDs
|
318 |
+
result = self._collection.get(
|
319 |
+
ids=ids, include=["metadatas", "embeddings", "documents"]
|
320 |
+
)
|
321 |
+
|
322 |
+
if not result or not result["ids"] or len(result["ids"]) == 0:
|
323 |
+
return []
|
324 |
+
|
325 |
+
# Format the results to match the expected structure
|
326 |
+
return [
|
327 |
+
{
|
328 |
+
"id": result["ids"][i],
|
329 |
+
"vector": result["embeddings"][i],
|
330 |
+
"content": result["documents"][i],
|
331 |
+
**result["metadatas"][i],
|
332 |
+
}
|
333 |
+
for i in range(len(result["ids"]))
|
334 |
+
]
|
335 |
+
except Exception as e:
|
336 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
337 |
+
return []
|
lightrag/kg/faiss_impl.py
CHANGED
@@ -394,3 +394,46 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
394 |
|
395 |
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
396 |
return matching_records
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
396 |
return matching_records
|
397 |
+
|
398 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
399 |
+
"""Get vector data by its ID
|
400 |
+
|
401 |
+
Args:
|
402 |
+
id: The unique identifier of the vector
|
403 |
+
|
404 |
+
Returns:
|
405 |
+
The vector data if found, or None if not found
|
406 |
+
"""
|
407 |
+
# Find the Faiss internal ID for the custom ID
|
408 |
+
fid = self._find_faiss_id_by_custom_id(id)
|
409 |
+
if fid is None:
|
410 |
+
return None
|
411 |
+
|
412 |
+
# Get the metadata for the found ID
|
413 |
+
metadata = self._id_to_meta.get(fid, {})
|
414 |
+
if not metadata:
|
415 |
+
return None
|
416 |
+
|
417 |
+
return {**metadata, "id": metadata.get("__id__")}
|
418 |
+
|
419 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
420 |
+
"""Get multiple vector data by their IDs
|
421 |
+
|
422 |
+
Args:
|
423 |
+
ids: List of unique identifiers
|
424 |
+
|
425 |
+
Returns:
|
426 |
+
List of vector data objects that were found
|
427 |
+
"""
|
428 |
+
if not ids:
|
429 |
+
return []
|
430 |
+
|
431 |
+
results = []
|
432 |
+
for id in ids:
|
433 |
+
fid = self._find_faiss_id_by_custom_id(id)
|
434 |
+
if fid is not None:
|
435 |
+
metadata = self._id_to_meta.get(fid, {})
|
436 |
+
if metadata:
|
437 |
+
results.append({**metadata, "id": metadata.get("__id__")})
|
438 |
+
|
439 |
+
return results
|
lightrag/kg/json_doc_status_impl.py
CHANGED
@@ -15,6 +15,10 @@ from lightrag.utils import (
|
|
15 |
from .shared_storage import (
|
16 |
get_namespace_data,
|
17 |
get_storage_lock,
|
|
|
|
|
|
|
|
|
18 |
try_initialize_namespace,
|
19 |
)
|
20 |
|
@@ -27,21 +31,25 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
27 |
def __post_init__(self):
|
28 |
working_dir = self.global_config["working_dir"]
|
29 |
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
30 |
-
self._storage_lock = get_storage_lock()
|
31 |
self._data = None
|
|
|
|
|
32 |
|
33 |
async def initialize(self):
|
34 |
"""Initialize storage data"""
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
async def filter_keys(self, keys: set[str]) -> set[str]:
|
47 |
"""Return keys that should be processed (not in storage or not successfully processed)"""
|
@@ -87,18 +95,24 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
87 |
|
88 |
async def index_done_callback(self) -> None:
|
89 |
async with self._storage_lock:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
96 |
-
logger.info(f"Inserting {len(data)} to {self.namespace}")
|
97 |
if not data:
|
98 |
return
|
99 |
-
|
100 |
async with self._storage_lock:
|
101 |
self._data.update(data)
|
|
|
|
|
102 |
await self.index_done_callback()
|
103 |
|
104 |
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
@@ -109,9 +123,12 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
109 |
async with self._storage_lock:
|
110 |
for doc_id in doc_ids:
|
111 |
self._data.pop(doc_id, None)
|
|
|
112 |
await self.index_done_callback()
|
113 |
|
114 |
async def drop(self) -> None:
|
115 |
"""Drop the storage"""
|
116 |
async with self._storage_lock:
|
117 |
self._data.clear()
|
|
|
|
|
|
15 |
from .shared_storage import (
|
16 |
get_namespace_data,
|
17 |
get_storage_lock,
|
18 |
+
get_data_init_lock,
|
19 |
+
get_update_flag,
|
20 |
+
set_all_update_flags,
|
21 |
+
clear_all_update_flags,
|
22 |
try_initialize_namespace,
|
23 |
)
|
24 |
|
|
|
31 |
def __post_init__(self):
|
32 |
working_dir = self.global_config["working_dir"]
|
33 |
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
|
|
34 |
self._data = None
|
35 |
+
self._storage_lock = None
|
36 |
+
self.storage_updated = None
|
37 |
|
38 |
async def initialize(self):
|
39 |
"""Initialize storage data"""
|
40 |
+
self._storage_lock = get_storage_lock()
|
41 |
+
self.storage_updated = await get_update_flag(self.namespace)
|
42 |
+
async with get_data_init_lock():
|
43 |
+
# check need_init must before get_namespace_data
|
44 |
+
need_init = await try_initialize_namespace(self.namespace)
|
45 |
+
self._data = await get_namespace_data(self.namespace)
|
46 |
+
if need_init:
|
47 |
+
loaded_data = load_json(self._file_name) or {}
|
48 |
+
async with self._storage_lock:
|
49 |
+
self._data.update(loaded_data)
|
50 |
+
logger.info(
|
51 |
+
f"Process {os.getpid()} doc status load {self.namespace} with {len(loaded_data)} records"
|
52 |
+
)
|
53 |
|
54 |
async def filter_keys(self, keys: set[str]) -> set[str]:
|
55 |
"""Return keys that should be processed (not in storage or not successfully processed)"""
|
|
|
95 |
|
96 |
async def index_done_callback(self) -> None:
|
97 |
async with self._storage_lock:
|
98 |
+
if self.storage_updated.value:
|
99 |
+
data_dict = (
|
100 |
+
dict(self._data) if hasattr(self._data, "_getvalue") else self._data
|
101 |
+
)
|
102 |
+
logger.info(
|
103 |
+
f"Process {os.getpid()} doc status writting {len(data_dict)} records to {self.namespace}"
|
104 |
+
)
|
105 |
+
write_json(data_dict, self._file_name)
|
106 |
+
await clear_all_update_flags(self.namespace)
|
107 |
|
108 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
|
|
109 |
if not data:
|
110 |
return
|
111 |
+
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
112 |
async with self._storage_lock:
|
113 |
self._data.update(data)
|
114 |
+
await set_all_update_flags(self.namespace)
|
115 |
+
|
116 |
await self.index_done_callback()
|
117 |
|
118 |
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
|
|
123 |
async with self._storage_lock:
|
124 |
for doc_id in doc_ids:
|
125 |
self._data.pop(doc_id, None)
|
126 |
+
await set_all_update_flags(self.namespace)
|
127 |
await self.index_done_callback()
|
128 |
|
129 |
async def drop(self) -> None:
|
130 |
"""Drop the storage"""
|
131 |
async with self._storage_lock:
|
132 |
self._data.clear()
|
133 |
+
await set_all_update_flags(self.namespace)
|
134 |
+
await self.index_done_callback()
|
lightrag/kg/json_kv_impl.py
CHANGED
@@ -13,6 +13,10 @@ from lightrag.utils import (
|
|
13 |
from .shared_storage import (
|
14 |
get_namespace_data,
|
15 |
get_storage_lock,
|
|
|
|
|
|
|
|
|
16 |
try_initialize_namespace,
|
17 |
)
|
18 |
|
@@ -23,26 +27,63 @@ class JsonKVStorage(BaseKVStorage):
|
|
23 |
def __post_init__(self):
|
24 |
working_dir = self.global_config["working_dir"]
|
25 |
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
26 |
-
self._storage_lock = get_storage_lock()
|
27 |
self._data = None
|
|
|
|
|
28 |
|
29 |
async def initialize(self):
|
30 |
"""Initialize storage data"""
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
async def index_done_callback(self) -> None:
|
41 |
async with self._storage_lock:
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
async def get_all(self) -> dict[str, Any]:
|
48 |
"""Get all data from storage
|
@@ -73,15 +114,16 @@ class JsonKVStorage(BaseKVStorage):
|
|
73 |
return set(keys) - set(self._data.keys())
|
74 |
|
75 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
76 |
-
logger.info(f"Inserting {len(data)} to {self.namespace}")
|
77 |
if not data:
|
78 |
return
|
|
|
79 |
async with self._storage_lock:
|
80 |
-
|
81 |
-
self.
|
82 |
|
83 |
async def delete(self, ids: list[str]) -> None:
|
84 |
async with self._storage_lock:
|
85 |
for doc_id in ids:
|
86 |
self._data.pop(doc_id, None)
|
|
|
87 |
await self.index_done_callback()
|
|
|
13 |
from .shared_storage import (
|
14 |
get_namespace_data,
|
15 |
get_storage_lock,
|
16 |
+
get_data_init_lock,
|
17 |
+
get_update_flag,
|
18 |
+
set_all_update_flags,
|
19 |
+
clear_all_update_flags,
|
20 |
try_initialize_namespace,
|
21 |
)
|
22 |
|
|
|
27 |
def __post_init__(self):
|
28 |
working_dir = self.global_config["working_dir"]
|
29 |
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
|
|
30 |
self._data = None
|
31 |
+
self._storage_lock = None
|
32 |
+
self.storage_updated = None
|
33 |
|
34 |
async def initialize(self):
|
35 |
"""Initialize storage data"""
|
36 |
+
self._storage_lock = get_storage_lock()
|
37 |
+
self.storage_updated = await get_update_flag(self.namespace)
|
38 |
+
async with get_data_init_lock():
|
39 |
+
# check need_init must before get_namespace_data
|
40 |
+
need_init = await try_initialize_namespace(self.namespace)
|
41 |
+
self._data = await get_namespace_data(self.namespace)
|
42 |
+
if need_init:
|
43 |
+
loaded_data = load_json(self._file_name) or {}
|
44 |
+
async with self._storage_lock:
|
45 |
+
self._data.update(loaded_data)
|
46 |
+
|
47 |
+
# Calculate data count based on namespace
|
48 |
+
if self.namespace.endswith("cache"):
|
49 |
+
# For cache namespaces, sum the cache entries across all cache types
|
50 |
+
data_count = sum(
|
51 |
+
len(first_level_dict)
|
52 |
+
for first_level_dict in loaded_data.values()
|
53 |
+
if isinstance(first_level_dict, dict)
|
54 |
+
)
|
55 |
+
else:
|
56 |
+
# For non-cache namespaces, use the original count method
|
57 |
+
data_count = len(loaded_data)
|
58 |
+
|
59 |
+
logger.info(
|
60 |
+
f"Process {os.getpid()} KV load {self.namespace} with {data_count} records"
|
61 |
+
)
|
62 |
|
63 |
async def index_done_callback(self) -> None:
|
64 |
async with self._storage_lock:
|
65 |
+
if self.storage_updated.value:
|
66 |
+
data_dict = (
|
67 |
+
dict(self._data) if hasattr(self._data, "_getvalue") else self._data
|
68 |
+
)
|
69 |
+
|
70 |
+
# Calculate data count based on namespace
|
71 |
+
if self.namespace.endswith("cache"):
|
72 |
+
# # For cache namespaces, sum the cache entries across all cache types
|
73 |
+
data_count = sum(
|
74 |
+
len(first_level_dict)
|
75 |
+
for first_level_dict in data_dict.values()
|
76 |
+
if isinstance(first_level_dict, dict)
|
77 |
+
)
|
78 |
+
else:
|
79 |
+
# For non-cache namespaces, use the original count method
|
80 |
+
data_count = len(data_dict)
|
81 |
+
|
82 |
+
logger.info(
|
83 |
+
f"Process {os.getpid()} KV writting {data_count} records to {self.namespace}"
|
84 |
+
)
|
85 |
+
write_json(data_dict, self._file_name)
|
86 |
+
await clear_all_update_flags(self.namespace)
|
87 |
|
88 |
async def get_all(self) -> dict[str, Any]:
|
89 |
"""Get all data from storage
|
|
|
114 |
return set(keys) - set(self._data.keys())
|
115 |
|
116 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
|
|
117 |
if not data:
|
118 |
return
|
119 |
+
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
120 |
async with self._storage_lock:
|
121 |
+
self._data.update(data)
|
122 |
+
await set_all_update_flags(self.namespace)
|
123 |
|
124 |
async def delete(self, ids: list[str]) -> None:
|
125 |
async with self._storage_lock:
|
126 |
for doc_id in ids:
|
127 |
self._data.pop(doc_id, None)
|
128 |
+
await set_all_update_flags(self.namespace)
|
129 |
await self.index_done_callback()
|
lightrag/kg/milvus_impl.py
CHANGED
@@ -233,3 +233,57 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|
233 |
except Exception as e:
|
234 |
logger.error(f"Error searching for records with prefix '{prefix}': {e}")
|
235 |
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
except Exception as e:
|
234 |
logger.error(f"Error searching for records with prefix '{prefix}': {e}")
|
235 |
return []
|
236 |
+
|
237 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
238 |
+
"""Get vector data by its ID
|
239 |
+
|
240 |
+
Args:
|
241 |
+
id: The unique identifier of the vector
|
242 |
+
|
243 |
+
Returns:
|
244 |
+
The vector data if found, or None if not found
|
245 |
+
"""
|
246 |
+
try:
|
247 |
+
# Query Milvus for a specific ID
|
248 |
+
result = self._client.query(
|
249 |
+
collection_name=self.namespace,
|
250 |
+
filter=f'id == "{id}"',
|
251 |
+
output_fields=list(self.meta_fields) + ["id"],
|
252 |
+
)
|
253 |
+
|
254 |
+
if not result or len(result) == 0:
|
255 |
+
return None
|
256 |
+
|
257 |
+
return result[0]
|
258 |
+
except Exception as e:
|
259 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
260 |
+
return None
|
261 |
+
|
262 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
263 |
+
"""Get multiple vector data by their IDs
|
264 |
+
|
265 |
+
Args:
|
266 |
+
ids: List of unique identifiers
|
267 |
+
|
268 |
+
Returns:
|
269 |
+
List of vector data objects that were found
|
270 |
+
"""
|
271 |
+
if not ids:
|
272 |
+
return []
|
273 |
+
|
274 |
+
try:
|
275 |
+
# Prepare the ID filter expression
|
276 |
+
id_list = '", "'.join(ids)
|
277 |
+
filter_expr = f'id in ["{id_list}"]'
|
278 |
+
|
279 |
+
# Query Milvus with the filter
|
280 |
+
result = self._client.query(
|
281 |
+
collection_name=self.namespace,
|
282 |
+
filter=filter_expr,
|
283 |
+
output_fields=list(self.meta_fields) + ["id"],
|
284 |
+
)
|
285 |
+
|
286 |
+
return result or []
|
287 |
+
except Exception as e:
|
288 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
289 |
+
return []
|
lightrag/kg/mongo_impl.py
CHANGED
@@ -1073,6 +1073,59 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|
1073 |
logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
|
1074 |
return []
|
1075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1076 |
|
1077 |
async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
|
1078 |
collection_names = await db.list_collection_names()
|
|
|
1073 |
logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
|
1074 |
return []
|
1075 |
|
1076 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
1077 |
+
"""Get vector data by its ID
|
1078 |
+
|
1079 |
+
Args:
|
1080 |
+
id: The unique identifier of the vector
|
1081 |
+
|
1082 |
+
Returns:
|
1083 |
+
The vector data if found, or None if not found
|
1084 |
+
"""
|
1085 |
+
try:
|
1086 |
+
# Search for the specific ID in MongoDB
|
1087 |
+
result = await self._data.find_one({"_id": id})
|
1088 |
+
if result:
|
1089 |
+
# Format the result to include id field expected by API
|
1090 |
+
result_dict = dict(result)
|
1091 |
+
if "_id" in result_dict and "id" not in result_dict:
|
1092 |
+
result_dict["id"] = result_dict["_id"]
|
1093 |
+
return result_dict
|
1094 |
+
return None
|
1095 |
+
except Exception as e:
|
1096 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
1097 |
+
return None
|
1098 |
+
|
1099 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
1100 |
+
"""Get multiple vector data by their IDs
|
1101 |
+
|
1102 |
+
Args:
|
1103 |
+
ids: List of unique identifiers
|
1104 |
+
|
1105 |
+
Returns:
|
1106 |
+
List of vector data objects that were found
|
1107 |
+
"""
|
1108 |
+
if not ids:
|
1109 |
+
return []
|
1110 |
+
|
1111 |
+
try:
|
1112 |
+
# Query MongoDB for multiple IDs
|
1113 |
+
cursor = self._data.find({"_id": {"$in": ids}})
|
1114 |
+
results = await cursor.to_list(length=None)
|
1115 |
+
|
1116 |
+
# Format results to include id field expected by API
|
1117 |
+
formatted_results = []
|
1118 |
+
for result in results:
|
1119 |
+
result_dict = dict(result)
|
1120 |
+
if "_id" in result_dict and "id" not in result_dict:
|
1121 |
+
result_dict["id"] = result_dict["_id"]
|
1122 |
+
formatted_results.append(result_dict)
|
1123 |
+
|
1124 |
+
return formatted_results
|
1125 |
+
except Exception as e:
|
1126 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
1127 |
+
return []
|
1128 |
+
|
1129 |
|
1130 |
async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
|
1131 |
collection_names = await db.list_collection_names()
|
lightrag/kg/nano_vector_db_impl.py
CHANGED
@@ -258,3 +258,33 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
258 |
|
259 |
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
260 |
return matching_records
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
260 |
return matching_records
|
261 |
+
|
262 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
263 |
+
"""Get vector data by its ID
|
264 |
+
|
265 |
+
Args:
|
266 |
+
id: The unique identifier of the vector
|
267 |
+
|
268 |
+
Returns:
|
269 |
+
The vector data if found, or None if not found
|
270 |
+
"""
|
271 |
+
client = await self._get_client()
|
272 |
+
result = client.get([id])
|
273 |
+
if result:
|
274 |
+
return result[0]
|
275 |
+
return None
|
276 |
+
|
277 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
278 |
+
"""Get multiple vector data by their IDs
|
279 |
+
|
280 |
+
Args:
|
281 |
+
ids: List of unique identifiers
|
282 |
+
|
283 |
+
Returns:
|
284 |
+
List of vector data objects that were found
|
285 |
+
"""
|
286 |
+
if not ids:
|
287 |
+
return []
|
288 |
+
|
289 |
+
client = await self._get_client()
|
290 |
+
return client.get(ids)
|
lightrag/kg/neo4j_impl.py
CHANGED
@@ -3,7 +3,7 @@ import inspect
|
|
3 |
import os
|
4 |
import re
|
5 |
from dataclasses import dataclass
|
6 |
-
from typing import Any,
|
7 |
import numpy as np
|
8 |
import configparser
|
9 |
|
@@ -15,6 +15,7 @@ from tenacity import (
|
|
15 |
retry_if_exception_type,
|
16 |
)
|
17 |
|
|
|
18 |
from ..utils import logger
|
19 |
from ..base import BaseGraphStorage
|
20 |
from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
|
@@ -37,6 +38,9 @@ config.read("config.ini", "utf-8")
|
|
37 |
# Get maximum number of graph nodes from environment variable, default is 1000
|
38 |
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
39 |
|
|
|
|
|
|
|
40 |
|
41 |
@final
|
42 |
@dataclass
|
@@ -60,19 +64,25 @@ class Neo4JStorage(BaseGraphStorage):
|
|
60 |
MAX_CONNECTION_POOL_SIZE = int(
|
61 |
os.environ.get(
|
62 |
"NEO4J_MAX_CONNECTION_POOL_SIZE",
|
63 |
-
config.get("neo4j", "connection_pool_size", fallback=
|
64 |
)
|
65 |
)
|
66 |
CONNECTION_TIMEOUT = float(
|
67 |
os.environ.get(
|
68 |
"NEO4J_CONNECTION_TIMEOUT",
|
69 |
-
config.get("neo4j", "connection_timeout", fallback=
|
70 |
),
|
71 |
)
|
72 |
CONNECTION_ACQUISITION_TIMEOUT = float(
|
73 |
os.environ.get(
|
74 |
"NEO4J_CONNECTION_ACQUISITION_TIMEOUT",
|
75 |
-
config.get("neo4j", "connection_acquisition_timeout", fallback=
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
),
|
77 |
)
|
78 |
DATABASE = os.environ.get(
|
@@ -85,6 +95,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
85 |
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
|
86 |
connection_timeout=CONNECTION_TIMEOUT,
|
87 |
connection_acquisition_timeout=CONNECTION_ACQUISITION_TIMEOUT,
|
|
|
88 |
)
|
89 |
|
90 |
# Try to connect to the database
|
@@ -152,65 +163,84 @@ class Neo4JStorage(BaseGraphStorage):
|
|
152 |
}
|
153 |
|
154 |
async def close(self):
|
|
|
155 |
if self._driver:
|
156 |
await self._driver.close()
|
157 |
self._driver = None
|
158 |
|
159 |
async def __aexit__(self, exc_type, exc, tb):
|
160 |
-
|
161 |
-
|
162 |
|
163 |
async def index_done_callback(self) -> None:
|
164 |
# Noe4J handles persistence automatically
|
165 |
pass
|
166 |
|
167 |
-
async def
|
168 |
-
"""
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
labels = [record["label"] for record in await result.data()]
|
174 |
-
return label in labels
|
175 |
-
except Exception as e:
|
176 |
-
logger.error(f"Error checking label existence: {e}")
|
177 |
-
return False
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
clean_label = label.strip('"')
|
182 |
-
if not await self._label_exists(clean_label):
|
183 |
-
logger.warning(f"Label '{clean_label}' does not exist in Neo4j")
|
184 |
-
return clean_label
|
185 |
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
|
200 |
-
|
201 |
-
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
async def get_node(self, node_id: str) -> dict[str, str] | None:
|
216 |
"""Get node by its label identifier.
|
@@ -221,161 +251,258 @@ class Neo4JStorage(BaseGraphStorage):
|
|
221 |
Returns:
|
222 |
dict: Node properties if found
|
223 |
None: If node not found
|
|
|
|
|
|
|
|
|
224 |
"""
|
225 |
-
async with self._driver.session(
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
async def node_degree(self, node_id: str) -> int:
|
240 |
-
|
|
|
|
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
# Convert None to 0 for addition
|
265 |
src_degree = 0 if src_degree is None else src_degree
|
266 |
trg_degree = 0 if trg_degree is None else trg_degree
|
267 |
|
268 |
degrees = int(src_degree) + int(trg_degree)
|
269 |
-
logger.debug(
|
270 |
-
f"{inspect.currentframe().f_code.co_name}:query:src_Degree+trg_degree:result:{degrees}"
|
271 |
-
)
|
272 |
return degrees
|
273 |
|
274 |
async def get_edge(
|
275 |
self, source_node_id: str, target_node_id: str
|
276 |
) -> dict[str, str] | None:
|
277 |
-
|
278 |
-
entity_name_label_source = source_node_id.strip('"')
|
279 |
-
entity_name_label_target = target_node_id.strip('"')
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
RETURN properties(r) as edge_properties
|
285 |
-
LIMIT 1
|
286 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
try:
|
292 |
-
result = dict(record["edge_properties"])
|
293 |
-
logger.info(f"Result: {result}")
|
294 |
-
# Ensure required keys exist with defaults
|
295 |
-
required_keys = {
|
296 |
-
"weight": 0.0,
|
297 |
-
"source_id": None,
|
298 |
-
"description": None,
|
299 |
-
"keywords": None,
|
300 |
-
}
|
301 |
-
for key, default_value in required_keys.items():
|
302 |
-
if key not in result:
|
303 |
-
result[key] = default_value
|
304 |
-
logger.warning(
|
305 |
-
f"Edge between {entity_name_label_source} and {entity_name_label_target} "
|
306 |
-
f"missing {key}, using default: {default_value}"
|
307 |
-
)
|
308 |
-
|
309 |
-
logger.debug(
|
310 |
-
f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{result}"
|
311 |
-
)
|
312 |
-
return result
|
313 |
-
except (KeyError, TypeError, ValueError) as e:
|
314 |
-
logger.error(
|
315 |
-
f"Error processing edge properties between {entity_name_label_source} "
|
316 |
-
f"and {entity_name_label_target}: {str(e)}"
|
317 |
)
|
318 |
-
|
319 |
-
|
320 |
-
"
|
321 |
-
"
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
except Exception as e:
|
338 |
logger.error(
|
339 |
f"Error in get_edge between {source_node_id} and {target_node_id}: {str(e)}"
|
340 |
)
|
341 |
-
|
342 |
-
return {
|
343 |
-
"weight": 0.0,
|
344 |
-
"description": None,
|
345 |
-
"keywords": None,
|
346 |
-
"source_id": None,
|
347 |
-
}
|
348 |
|
349 |
async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
|
350 |
-
|
351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
"""
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
-
|
376 |
-
|
377 |
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
|
380 |
@retry(
|
381 |
stop=stop_after_attempt(3),
|
@@ -397,26 +524,47 @@ class Neo4JStorage(BaseGraphStorage):
|
|
397 |
node_id: The unique identifier for the node (used as label)
|
398 |
node_data: Dictionary of node properties
|
399 |
"""
|
400 |
-
label = await self._ensure_label(node_id)
|
401 |
properties = node_data
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
SET n += $properties
|
407 |
-
"""
|
408 |
-
await tx.run(query, properties=properties)
|
409 |
-
logger.debug(
|
410 |
-
f"Upserted node with label '{label}' and properties: {properties}"
|
411 |
-
)
|
412 |
|
413 |
try:
|
414 |
async with self._driver.session(database=self._DATABASE) as session:
|
415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
416 |
except Exception as e:
|
417 |
logger.error(f"Error during upsert: {str(e)}")
|
418 |
raise
|
419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
@retry(
|
421 |
stop=stop_after_attempt(3),
|
422 |
wait=wait_exponential(multiplier=1, min=4, max=10),
|
@@ -434,34 +582,47 @@ class Neo4JStorage(BaseGraphStorage):
|
|
434 |
) -> None:
|
435 |
"""
|
436 |
Upsert an edge and its properties between two nodes identified by their labels.
|
|
|
|
|
437 |
|
438 |
Args:
|
439 |
source_node_id (str): Label of the source node (used as identifier)
|
440 |
target_node_id (str): Label of the target node (used as identifier)
|
441 |
edge_data (dict): Dictionary of properties to set on the edge
|
442 |
-
"""
|
443 |
-
source_label = await self._ensure_label(source_node_id)
|
444 |
-
target_label = await self._ensure_label(target_node_id)
|
445 |
-
edge_properties = edge_data
|
446 |
-
|
447 |
-
async def _do_upsert_edge(tx: AsyncManagedTransaction):
|
448 |
-
query = f"""
|
449 |
-
MATCH (source:`{source_label}`)
|
450 |
-
WITH source
|
451 |
-
MATCH (target:`{target_label}`)
|
452 |
-
MERGE (source)-[r:DIRECTED]->(target)
|
453 |
-
SET r += $properties
|
454 |
-
RETURN r
|
455 |
-
"""
|
456 |
-
result = await tx.run(query, properties=edge_properties)
|
457 |
-
record = await result.single()
|
458 |
-
logger.debug(
|
459 |
-
f"Upserted edge from '{source_label}' to '{target_label}' with properties: {edge_properties}, result: {record['r'] if record else None}"
|
460 |
-
)
|
461 |
|
|
|
|
|
|
|
462 |
try:
|
|
|
463 |
async with self._driver.session(database=self._DATABASE) as session:
|
464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
except Exception as e:
|
466 |
logger.error(f"Error during edge upsert: {str(e)}")
|
467 |
raise
|
@@ -470,199 +631,293 @@ class Neo4JStorage(BaseGraphStorage):
|
|
470 |
print("Implemented but never called.")
|
471 |
|
472 |
async def get_knowledge_graph(
|
473 |
-
self,
|
|
|
|
|
|
|
|
|
474 |
) -> KnowledgeGraph:
|
475 |
"""
|
476 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
477 |
Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
|
478 |
When reducing the number of nodes, the prioritization criteria are as follows:
|
479 |
-
1.
|
480 |
-
2.
|
481 |
-
3.
|
|
|
482 |
|
483 |
Args:
|
484 |
-
node_label
|
485 |
-
max_depth
|
|
|
|
|
486 |
Returns:
|
487 |
KnowledgeGraph: Complete connected subgraph for specified node
|
488 |
"""
|
489 |
-
label = node_label.strip('"')
|
490 |
-
# Escape single quotes to prevent injection attacks
|
491 |
-
escaped_label = label.replace("'", "\\'")
|
492 |
result = KnowledgeGraph()
|
493 |
seen_nodes = set()
|
494 |
seen_edges = set()
|
495 |
|
496 |
-
async with self._driver.session(
|
|
|
|
|
497 |
try:
|
498 |
-
if
|
499 |
main_query = """
|
500 |
MATCH (n)
|
501 |
OPTIONAL MATCH (n)-[r]-()
|
502 |
WITH n, count(r) AS degree
|
|
|
503 |
ORDER BY degree DESC
|
504 |
LIMIT $max_nodes
|
505 |
-
WITH collect(n) AS
|
506 |
-
|
507 |
-
|
508 |
-
|
|
|
|
|
|
|
509 |
"""
|
510 |
result_set = await session.run(
|
511 |
-
main_query,
|
|
|
512 |
)
|
513 |
|
514 |
else:
|
515 |
-
validate_query = f"""
|
516 |
-
MATCH (n)
|
517 |
-
WHERE any(label IN labels(n) WHERE label CONTAINS '{escaped_label}')
|
518 |
-
RETURN n LIMIT 1
|
519 |
-
"""
|
520 |
-
validate_result = await session.run(validate_query)
|
521 |
-
if not await validate_result.single():
|
522 |
-
logger.warning(
|
523 |
-
f"No nodes containing '{label}' in their labels found!"
|
524 |
-
)
|
525 |
-
return result
|
526 |
-
|
527 |
# Main query uses partial matching
|
528 |
-
main_query =
|
529 |
MATCH (start)
|
530 |
-
WHERE
|
|
|
|
|
|
|
|
|
531 |
WITH start
|
532 |
-
CALL apoc.path.subgraphAll(start, {
|
533 |
-
relationshipFilter: '
|
534 |
minLevel: 0,
|
535 |
-
maxLevel:
|
536 |
bfs: true
|
537 |
-
}
|
538 |
YIELD nodes, relationships
|
539 |
WITH start, nodes, relationships
|
540 |
UNWIND nodes AS node
|
541 |
OPTIONAL MATCH (node)-[r]-()
|
542 |
-
WITH node, count(r) AS degree, start, nodes, relationships
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
|
|
|
|
549 |
LIMIT $max_nodes
|
550 |
-
WITH collect(node) AS filtered_nodes
|
551 |
-
|
552 |
-
|
|
|
|
|
|
|
|
|
553 |
"""
|
554 |
result_set = await session.run(
|
555 |
-
main_query,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
556 |
)
|
557 |
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
)
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
)
|
589 |
-
|
590 |
-
seen_edges.add(edge_id)
|
591 |
|
592 |
-
|
593 |
-
|
594 |
-
|
|
|
|
|
595 |
|
596 |
except neo4jExceptions.ClientError as e:
|
597 |
-
logger.
|
598 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
599 |
|
600 |
return result
|
601 |
|
602 |
async def _robust_fallback(
|
603 |
-
self,
|
604 |
-
) ->
|
605 |
-
"""
|
606 |
-
|
|
|
|
|
|
|
|
|
607 |
visited_nodes = set()
|
608 |
visited_edges = set()
|
609 |
|
610 |
-
async def traverse(
|
|
|
|
|
|
|
|
|
|
|
611 |
if current_depth > max_depth:
|
|
|
612 |
return
|
613 |
-
|
614 |
-
|
615 |
-
node = await self.get_node(current_label)
|
616 |
-
if not node:
|
617 |
return
|
618 |
|
619 |
-
|
620 |
-
if
|
621 |
return
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
642 |
rel = record["r"]
|
643 |
-
edge_id =
|
644 |
if edge_id not in visited_edges:
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
"
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
664 |
|
665 |
-
await traverse(label, 0)
|
666 |
return result
|
667 |
|
668 |
async def get_all_labels(self) -> list[str]:
|
@@ -671,23 +926,28 @@ class Neo4JStorage(BaseGraphStorage):
|
|
671 |
Returns:
|
672 |
["Person", "Company", ...] # Alphabetically sorted label list
|
673 |
"""
|
674 |
-
async with self._driver.session(
|
|
|
|
|
675 |
# Method 1: Direct metadata query (Available for Neo4j 4.3+)
|
676 |
# query = "CALL db.labels() YIELD label RETURN label"
|
677 |
|
678 |
# Method 2: Query compatible with older versions
|
679 |
query = """
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
ORDER BY label
|
685 |
"""
|
686 |
-
|
687 |
result = await session.run(query)
|
688 |
labels = []
|
689 |
-
|
690 |
-
|
|
|
|
|
|
|
|
|
|
|
691 |
return labels
|
692 |
|
693 |
@retry(
|
@@ -708,15 +968,15 @@ class Neo4JStorage(BaseGraphStorage):
|
|
708 |
Args:
|
709 |
node_id: The label of the node to delete
|
710 |
"""
|
711 |
-
label = await self._ensure_label(node_id)
|
712 |
|
713 |
async def _do_delete(tx: AsyncManagedTransaction):
|
714 |
-
query =
|
715 |
-
MATCH (n
|
716 |
DETACH DELETE n
|
717 |
"""
|
718 |
-
await tx.run(query)
|
719 |
-
logger.debug(f"Deleted node with label '{
|
|
|
720 |
|
721 |
try:
|
722 |
async with self._driver.session(database=self._DATABASE) as session:
|
@@ -765,16 +1025,17 @@ class Neo4JStorage(BaseGraphStorage):
|
|
765 |
edges: List of edges to be deleted, each edge is a (source, target) tuple
|
766 |
"""
|
767 |
for source, target in edges:
|
768 |
-
source_label = await self._ensure_label(source)
|
769 |
-
target_label = await self._ensure_label(target)
|
770 |
|
771 |
async def _do_delete_edge(tx: AsyncManagedTransaction):
|
772 |
-
query =
|
773 |
-
MATCH (source
|
774 |
DELETE r
|
775 |
"""
|
776 |
-
await tx.run(
|
777 |
-
|
|
|
|
|
|
|
778 |
|
779 |
try:
|
780 |
async with self._driver.session(database=self._DATABASE) as session:
|
|
|
3 |
import os
|
4 |
import re
|
5 |
from dataclasses import dataclass
|
6 |
+
from typing import Any, final, Optional
|
7 |
import numpy as np
|
8 |
import configparser
|
9 |
|
|
|
15 |
retry_if_exception_type,
|
16 |
)
|
17 |
|
18 |
+
import logging
|
19 |
from ..utils import logger
|
20 |
from ..base import BaseGraphStorage
|
21 |
from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
|
|
|
38 |
# Get maximum number of graph nodes from environment variable, default is 1000
|
39 |
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
40 |
|
41 |
+
# Set neo4j logger level to ERROR to suppress warning logs
|
42 |
+
logging.getLogger("neo4j").setLevel(logging.ERROR)
|
43 |
+
|
44 |
|
45 |
@final
|
46 |
@dataclass
|
|
|
64 |
MAX_CONNECTION_POOL_SIZE = int(
|
65 |
os.environ.get(
|
66 |
"NEO4J_MAX_CONNECTION_POOL_SIZE",
|
67 |
+
config.get("neo4j", "connection_pool_size", fallback=50),
|
68 |
)
|
69 |
)
|
70 |
CONNECTION_TIMEOUT = float(
|
71 |
os.environ.get(
|
72 |
"NEO4J_CONNECTION_TIMEOUT",
|
73 |
+
config.get("neo4j", "connection_timeout", fallback=30.0),
|
74 |
),
|
75 |
)
|
76 |
CONNECTION_ACQUISITION_TIMEOUT = float(
|
77 |
os.environ.get(
|
78 |
"NEO4J_CONNECTION_ACQUISITION_TIMEOUT",
|
79 |
+
config.get("neo4j", "connection_acquisition_timeout", fallback=30.0),
|
80 |
+
),
|
81 |
+
)
|
82 |
+
MAX_TRANSACTION_RETRY_TIME = float(
|
83 |
+
os.environ.get(
|
84 |
+
"NEO4J_MAX_TRANSACTION_RETRY_TIME",
|
85 |
+
config.get("neo4j", "max_transaction_retry_time", fallback=30.0),
|
86 |
),
|
87 |
)
|
88 |
DATABASE = os.environ.get(
|
|
|
95 |
max_connection_pool_size=MAX_CONNECTION_POOL_SIZE,
|
96 |
connection_timeout=CONNECTION_TIMEOUT,
|
97 |
connection_acquisition_timeout=CONNECTION_ACQUISITION_TIMEOUT,
|
98 |
+
max_transaction_retry_time=MAX_TRANSACTION_RETRY_TIME,
|
99 |
)
|
100 |
|
101 |
# Try to connect to the database
|
|
|
163 |
}
|
164 |
|
165 |
async def close(self):
|
166 |
+
"""Close the Neo4j driver and release all resources"""
|
167 |
if self._driver:
|
168 |
await self._driver.close()
|
169 |
self._driver = None
|
170 |
|
171 |
async def __aexit__(self, exc_type, exc, tb):
|
172 |
+
"""Ensure driver is closed when context manager exits"""
|
173 |
+
await self.close()
|
174 |
|
175 |
async def index_done_callback(self) -> None:
|
176 |
# Noe4J handles persistence automatically
|
177 |
pass
|
178 |
|
179 |
+
async def has_node(self, node_id: str) -> bool:
|
180 |
+
"""
|
181 |
+
Check if a node with the given label exists in the database
|
182 |
+
|
183 |
+
Args:
|
184 |
+
node_id: Label of the node to check
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
+
Returns:
|
187 |
+
bool: True if node exists, False otherwise
|
|
|
|
|
|
|
|
|
188 |
|
189 |
+
Raises:
|
190 |
+
ValueError: If node_id is invalid
|
191 |
+
Exception: If there is an error executing the query
|
192 |
+
"""
|
193 |
+
async with self._driver.session(
|
194 |
+
database=self._DATABASE, default_access_mode="READ"
|
195 |
+
) as session:
|
196 |
+
try:
|
197 |
+
query = "MATCH (n:base {entity_id: $entity_id}) RETURN count(n) > 0 AS node_exists"
|
198 |
+
result = await session.run(query, entity_id=node_id)
|
199 |
+
single_result = await result.single()
|
200 |
+
await result.consume() # Ensure result is fully consumed
|
201 |
+
return single_result["node_exists"]
|
202 |
+
except Exception as e:
|
203 |
+
logger.error(f"Error checking node existence for {node_id}: {str(e)}")
|
204 |
+
await result.consume() # Ensure results are consumed even on error
|
205 |
+
raise
|
206 |
|
207 |
async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
|
208 |
+
"""
|
209 |
+
Check if an edge exists between two nodes
|
210 |
|
211 |
+
Args:
|
212 |
+
source_node_id: Label of the source node
|
213 |
+
target_node_id: Label of the target node
|
214 |
+
|
215 |
+
Returns:
|
216 |
+
bool: True if edge exists, False otherwise
|
217 |
+
|
218 |
+
Raises:
|
219 |
+
ValueError: If either node_id is invalid
|
220 |
+
Exception: If there is an error executing the query
|
221 |
+
"""
|
222 |
+
async with self._driver.session(
|
223 |
+
database=self._DATABASE, default_access_mode="READ"
|
224 |
+
) as session:
|
225 |
+
try:
|
226 |
+
query = (
|
227 |
+
"MATCH (a:base {entity_id: $source_entity_id})-[r]-(b:base {entity_id: $target_entity_id}) "
|
228 |
+
"RETURN COUNT(r) > 0 AS edgeExists"
|
229 |
+
)
|
230 |
+
result = await session.run(
|
231 |
+
query,
|
232 |
+
source_entity_id=source_node_id,
|
233 |
+
target_entity_id=target_node_id,
|
234 |
+
)
|
235 |
+
single_result = await result.single()
|
236 |
+
await result.consume() # Ensure result is fully consumed
|
237 |
+
return single_result["edgeExists"]
|
238 |
+
except Exception as e:
|
239 |
+
logger.error(
|
240 |
+
f"Error checking edge existence between {source_node_id} and {target_node_id}: {str(e)}"
|
241 |
+
)
|
242 |
+
await result.consume() # Ensure results are consumed even on error
|
243 |
+
raise
|
244 |
|
245 |
async def get_node(self, node_id: str) -> dict[str, str] | None:
|
246 |
"""Get node by its label identifier.
|
|
|
251 |
Returns:
|
252 |
dict: Node properties if found
|
253 |
None: If node not found
|
254 |
+
|
255 |
+
Raises:
|
256 |
+
ValueError: If node_id is invalid
|
257 |
+
Exception: If there is an error executing the query
|
258 |
"""
|
259 |
+
async with self._driver.session(
|
260 |
+
database=self._DATABASE, default_access_mode="READ"
|
261 |
+
) as session:
|
262 |
+
try:
|
263 |
+
query = "MATCH (n:base {entity_id: $entity_id}) RETURN n"
|
264 |
+
result = await session.run(query, entity_id=node_id)
|
265 |
+
try:
|
266 |
+
records = await result.fetch(
|
267 |
+
2
|
268 |
+
) # Get 2 records for duplication check
|
269 |
+
|
270 |
+
if len(records) > 1:
|
271 |
+
logger.warning(
|
272 |
+
f"Multiple nodes found with label '{node_id}'. Using first node."
|
273 |
+
)
|
274 |
+
if records:
|
275 |
+
node = records[0]["n"]
|
276 |
+
node_dict = dict(node)
|
277 |
+
# Remove base label from labels list if it exists
|
278 |
+
if "labels" in node_dict:
|
279 |
+
node_dict["labels"] = [
|
280 |
+
label
|
281 |
+
for label in node_dict["labels"]
|
282 |
+
if label != "base"
|
283 |
+
]
|
284 |
+
logger.debug(f"Neo4j query node {query} return: {node_dict}")
|
285 |
+
return node_dict
|
286 |
+
return None
|
287 |
+
finally:
|
288 |
+
await result.consume() # Ensure result is fully consumed
|
289 |
+
except Exception as e:
|
290 |
+
logger.error(f"Error getting node for {node_id}: {str(e)}")
|
291 |
+
raise
|
292 |
|
293 |
async def node_degree(self, node_id: str) -> int:
|
294 |
+
"""Get the degree (number of relationships) of a node with the given label.
|
295 |
+
If multiple nodes have the same label, returns the degree of the first node.
|
296 |
+
If no node is found, returns 0.
|
297 |
|
298 |
+
Args:
|
299 |
+
node_id: The label of the node
|
300 |
+
|
301 |
+
Returns:
|
302 |
+
int: The number of relationships the node has, or 0 if no node found
|
303 |
+
|
304 |
+
Raises:
|
305 |
+
ValueError: If node_id is invalid
|
306 |
+
Exception: If there is an error executing the query
|
307 |
+
"""
|
308 |
+
async with self._driver.session(
|
309 |
+
database=self._DATABASE, default_access_mode="READ"
|
310 |
+
) as session:
|
311 |
+
try:
|
312 |
+
query = """
|
313 |
+
MATCH (n:base {entity_id: $entity_id})
|
314 |
+
OPTIONAL MATCH (n)-[r]-()
|
315 |
+
RETURN COUNT(r) AS degree
|
316 |
+
"""
|
317 |
+
result = await session.run(query, entity_id=node_id)
|
318 |
+
try:
|
319 |
+
record = await result.single()
|
320 |
+
|
321 |
+
if not record:
|
322 |
+
logger.warning(f"No node found with label '{node_id}'")
|
323 |
+
return 0
|
324 |
+
|
325 |
+
degree = record["degree"]
|
326 |
+
logger.debug(
|
327 |
+
"Neo4j query node degree for {node_id} return: {degree}"
|
328 |
+
)
|
329 |
+
return degree
|
330 |
+
finally:
|
331 |
+
await result.consume() # Ensure result is fully consumed
|
332 |
+
except Exception as e:
|
333 |
+
logger.error(f"Error getting node degree for {node_id}: {str(e)}")
|
334 |
+
raise
|
335 |
|
336 |
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
337 |
+
"""Get the total degree (sum of relationships) of two nodes.
|
338 |
+
|
339 |
+
Args:
|
340 |
+
src_id: Label of the source node
|
341 |
+
tgt_id: Label of the target node
|
342 |
+
|
343 |
+
Returns:
|
344 |
+
int: Sum of the degrees of both nodes
|
345 |
+
"""
|
346 |
+
src_degree = await self.node_degree(src_id)
|
347 |
+
trg_degree = await self.node_degree(tgt_id)
|
348 |
|
349 |
# Convert None to 0 for addition
|
350 |
src_degree = 0 if src_degree is None else src_degree
|
351 |
trg_degree = 0 if trg_degree is None else trg_degree
|
352 |
|
353 |
degrees = int(src_degree) + int(trg_degree)
|
|
|
|
|
|
|
354 |
return degrees
|
355 |
|
356 |
async def get_edge(
|
357 |
self, source_node_id: str, target_node_id: str
|
358 |
) -> dict[str, str] | None:
|
359 |
+
"""Get edge properties between two nodes.
|
|
|
|
|
360 |
|
361 |
+
Args:
|
362 |
+
source_node_id: Label of the source node
|
363 |
+
target_node_id: Label of the target node
|
364 |
+
|
365 |
+
Returns:
|
366 |
+
dict: Edge properties if found, default properties if not found or on error
|
367 |
+
|
368 |
+
Raises:
|
369 |
+
ValueError: If either node_id is invalid
|
370 |
+
Exception: If there is an error executing the query
|
371 |
+
"""
|
372 |
+
try:
|
373 |
+
async with self._driver.session(
|
374 |
+
database=self._DATABASE, default_access_mode="READ"
|
375 |
+
) as session:
|
376 |
+
query = """
|
377 |
+
MATCH (start:base {entity_id: $source_entity_id})-[r]-(end:base {entity_id: $target_entity_id})
|
378 |
RETURN properties(r) as edge_properties
|
|
|
379 |
"""
|
380 |
+
result = await session.run(
|
381 |
+
query,
|
382 |
+
source_entity_id=source_node_id,
|
383 |
+
target_entity_id=target_node_id,
|
384 |
+
)
|
385 |
+
try:
|
386 |
+
records = await result.fetch(2)
|
387 |
|
388 |
+
if len(records) > 1:
|
389 |
+
logger.warning(
|
390 |
+
f"Multiple edges found between '{source_node_id}' and '{target_node_id}'. Using first edge."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
)
|
392 |
+
if records:
|
393 |
+
try:
|
394 |
+
edge_result = dict(records[0]["edge_properties"])
|
395 |
+
logger.debug(f"Result: {edge_result}")
|
396 |
+
# Ensure required keys exist with defaults
|
397 |
+
required_keys = {
|
398 |
+
"weight": 0.0,
|
399 |
+
"source_id": None,
|
400 |
+
"description": None,
|
401 |
+
"keywords": None,
|
402 |
+
}
|
403 |
+
for key, default_value in required_keys.items():
|
404 |
+
if key not in edge_result:
|
405 |
+
edge_result[key] = default_value
|
406 |
+
logger.warning(
|
407 |
+
f"Edge between {source_node_id} and {target_node_id} "
|
408 |
+
f"missing {key}, using default: {default_value}"
|
409 |
+
)
|
410 |
+
|
411 |
+
logger.debug(
|
412 |
+
f"{inspect.currentframe().f_code.co_name}:query:{query}:result:{edge_result}"
|
413 |
+
)
|
414 |
+
return edge_result
|
415 |
+
except (KeyError, TypeError, ValueError) as e:
|
416 |
+
logger.error(
|
417 |
+
f"Error processing edge properties between {source_node_id} "
|
418 |
+
f"and {target_node_id}: {str(e)}"
|
419 |
+
)
|
420 |
+
# Return default edge properties on error
|
421 |
+
return {
|
422 |
+
"weight": 0.0,
|
423 |
+
"source_id": None,
|
424 |
+
"description": None,
|
425 |
+
"keywords": None,
|
426 |
+
}
|
427 |
+
|
428 |
+
logger.debug(
|
429 |
+
f"{inspect.currentframe().f_code.co_name}: No edge found between {source_node_id} and {target_node_id}"
|
430 |
+
)
|
431 |
+
# Return default edge properties when no edge found
|
432 |
+
return {
|
433 |
+
"weight": 0.0,
|
434 |
+
"source_id": None,
|
435 |
+
"description": None,
|
436 |
+
"keywords": None,
|
437 |
+
}
|
438 |
+
finally:
|
439 |
+
await result.consume() # Ensure result is fully consumed
|
440 |
|
441 |
except Exception as e:
|
442 |
logger.error(
|
443 |
f"Error in get_edge between {source_node_id} and {target_node_id}: {str(e)}"
|
444 |
)
|
445 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
446 |
|
447 |
async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
|
448 |
+
"""Retrieves all edges (relationships) for a particular node identified by its label.
|
449 |
|
450 |
+
Args:
|
451 |
+
source_node_id: Label of the node to get edges for
|
452 |
+
|
453 |
+
Returns:
|
454 |
+
list[tuple[str, str]]: List of (source_label, target_label) tuples representing edges
|
455 |
+
None: If no edges found
|
456 |
+
|
457 |
+
Raises:
|
458 |
+
ValueError: If source_node_id is invalid
|
459 |
+
Exception: If there is an error executing the query
|
460 |
"""
|
461 |
+
try:
|
462 |
+
async with self._driver.session(
|
463 |
+
database=self._DATABASE, default_access_mode="READ"
|
464 |
+
) as session:
|
465 |
+
try:
|
466 |
+
query = """MATCH (n:base {entity_id: $entity_id})
|
467 |
+
OPTIONAL MATCH (n)-[r]-(connected:base)
|
468 |
+
WHERE connected.entity_id IS NOT NULL
|
469 |
+
RETURN n, r, connected"""
|
470 |
+
results = await session.run(query, entity_id=source_node_id)
|
471 |
+
|
472 |
+
edges = []
|
473 |
+
async for record in results:
|
474 |
+
source_node = record["n"]
|
475 |
+
connected_node = record["connected"]
|
476 |
+
|
477 |
+
# Skip if either node is None
|
478 |
+
if not source_node or not connected_node:
|
479 |
+
continue
|
480 |
+
|
481 |
+
source_label = (
|
482 |
+
source_node.get("entity_id")
|
483 |
+
if source_node.get("entity_id")
|
484 |
+
else None
|
485 |
+
)
|
486 |
+
target_label = (
|
487 |
+
connected_node.get("entity_id")
|
488 |
+
if connected_node.get("entity_id")
|
489 |
+
else None
|
490 |
+
)
|
491 |
|
492 |
+
if source_label and target_label:
|
493 |
+
edges.append((source_label, target_label))
|
494 |
|
495 |
+
await results.consume() # Ensure results are consumed
|
496 |
+
return edges
|
497 |
+
except Exception as e:
|
498 |
+
logger.error(
|
499 |
+
f"Error getting edges for node {source_node_id}: {str(e)}"
|
500 |
+
)
|
501 |
+
await results.consume() # Ensure results are consumed even on error
|
502 |
+
raise
|
503 |
+
except Exception as e:
|
504 |
+
logger.error(f"Error in get_node_edges for {source_node_id}: {str(e)}")
|
505 |
+
raise
|
506 |
|
507 |
@retry(
|
508 |
stop=stop_after_attempt(3),
|
|
|
524 |
node_id: The unique identifier for the node (used as label)
|
525 |
node_data: Dictionary of node properties
|
526 |
"""
|
|
|
527 |
properties = node_data
|
528 |
+
entity_type = properties["entity_type"]
|
529 |
+
entity_id = properties["entity_id"]
|
530 |
+
if "entity_id" not in properties:
|
531 |
+
raise ValueError("Neo4j: node properties must contain an 'entity_id' field")
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
|
533 |
try:
|
534 |
async with self._driver.session(database=self._DATABASE) as session:
|
535 |
+
|
536 |
+
async def execute_upsert(tx: AsyncManagedTransaction):
|
537 |
+
query = (
|
538 |
+
"""
|
539 |
+
MERGE (n:base {entity_id: $properties.entity_id})
|
540 |
+
SET n += $properties
|
541 |
+
SET n:`%s`
|
542 |
+
"""
|
543 |
+
% entity_type
|
544 |
+
)
|
545 |
+
result = await tx.run(query, properties=properties)
|
546 |
+
logger.debug(
|
547 |
+
f"Upserted node with entity_id '{entity_id}' and properties: {properties}"
|
548 |
+
)
|
549 |
+
await result.consume() # Ensure result is fully consumed
|
550 |
+
|
551 |
+
await session.execute_write(execute_upsert)
|
552 |
except Exception as e:
|
553 |
logger.error(f"Error during upsert: {str(e)}")
|
554 |
raise
|
555 |
|
556 |
+
@retry(
|
557 |
+
stop=stop_after_attempt(3),
|
558 |
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
559 |
+
retry=retry_if_exception_type(
|
560 |
+
(
|
561 |
+
neo4jExceptions.ServiceUnavailable,
|
562 |
+
neo4jExceptions.TransientError,
|
563 |
+
neo4jExceptions.WriteServiceUnavailable,
|
564 |
+
neo4jExceptions.ClientError,
|
565 |
+
)
|
566 |
+
),
|
567 |
+
)
|
568 |
@retry(
|
569 |
stop=stop_after_attempt(3),
|
570 |
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
|
582 |
) -> None:
|
583 |
"""
|
584 |
Upsert an edge and its properties between two nodes identified by their labels.
|
585 |
+
Ensures both source and target nodes exist and are unique before creating the edge.
|
586 |
+
Uses entity_id property to uniquely identify nodes.
|
587 |
|
588 |
Args:
|
589 |
source_node_id (str): Label of the source node (used as identifier)
|
590 |
target_node_id (str): Label of the target node (used as identifier)
|
591 |
edge_data (dict): Dictionary of properties to set on the edge
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
|
593 |
+
Raises:
|
594 |
+
ValueError: If either source or target node does not exist or is not unique
|
595 |
+
"""
|
596 |
try:
|
597 |
+
edge_properties = edge_data
|
598 |
async with self._driver.session(database=self._DATABASE) as session:
|
599 |
+
|
600 |
+
async def execute_upsert(tx: AsyncManagedTransaction):
|
601 |
+
query = """
|
602 |
+
MATCH (source:base {entity_id: $source_entity_id})
|
603 |
+
WITH source
|
604 |
+
MATCH (target:base {entity_id: $target_entity_id})
|
605 |
+
MERGE (source)-[r:DIRECTED]-(target)
|
606 |
+
SET r += $properties
|
607 |
+
RETURN r, source, target
|
608 |
+
"""
|
609 |
+
result = await tx.run(
|
610 |
+
query,
|
611 |
+
source_entity_id=source_node_id,
|
612 |
+
target_entity_id=target_node_id,
|
613 |
+
properties=edge_properties,
|
614 |
+
)
|
615 |
+
try:
|
616 |
+
records = await result.fetch(2)
|
617 |
+
if records:
|
618 |
+
logger.debug(
|
619 |
+
f"Upserted edge from '{source_node_id}' to '{target_node_id}'"
|
620 |
+
f"with properties: {edge_properties}"
|
621 |
+
)
|
622 |
+
finally:
|
623 |
+
await result.consume() # Ensure result is consumed
|
624 |
+
|
625 |
+
await session.execute_write(execute_upsert)
|
626 |
except Exception as e:
|
627 |
logger.error(f"Error during edge upsert: {str(e)}")
|
628 |
raise
|
|
|
631 |
print("Implemented but never called.")
|
632 |
|
633 |
async def get_knowledge_graph(
|
634 |
+
self,
|
635 |
+
node_label: str,
|
636 |
+
max_depth: int = 3,
|
637 |
+
min_degree: int = 0,
|
638 |
+
inclusive: bool = False,
|
639 |
) -> KnowledgeGraph:
|
640 |
"""
|
641 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
642 |
Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
|
643 |
When reducing the number of nodes, the prioritization criteria are as follows:
|
644 |
+
1. min_degree does not affect nodes directly connected to the matching nodes
|
645 |
+
2. Label matching nodes take precedence
|
646 |
+
3. Followed by nodes directly connected to the matching nodes
|
647 |
+
4. Finally, the degree of the nodes
|
648 |
|
649 |
Args:
|
650 |
+
node_label: Label of the starting node
|
651 |
+
max_depth: Maximum depth of the subgraph
|
652 |
+
min_degree: Minimum degree of nodes to include. Defaults to 0
|
653 |
+
inclusive: Do an inclusive search if true
|
654 |
Returns:
|
655 |
KnowledgeGraph: Complete connected subgraph for specified node
|
656 |
"""
|
|
|
|
|
|
|
657 |
result = KnowledgeGraph()
|
658 |
seen_nodes = set()
|
659 |
seen_edges = set()
|
660 |
|
661 |
+
async with self._driver.session(
|
662 |
+
database=self._DATABASE, default_access_mode="READ"
|
663 |
+
) as session:
|
664 |
try:
|
665 |
+
if node_label == "*":
|
666 |
main_query = """
|
667 |
MATCH (n)
|
668 |
OPTIONAL MATCH (n)-[r]-()
|
669 |
WITH n, count(r) AS degree
|
670 |
+
WHERE degree >= $min_degree
|
671 |
ORDER BY degree DESC
|
672 |
LIMIT $max_nodes
|
673 |
+
WITH collect({node: n}) AS filtered_nodes
|
674 |
+
UNWIND filtered_nodes AS node_info
|
675 |
+
WITH collect(node_info.node) AS kept_nodes, filtered_nodes
|
676 |
+
MATCH (a)-[r]-(b)
|
677 |
+
WHERE a IN kept_nodes AND b IN kept_nodes
|
678 |
+
RETURN filtered_nodes AS node_info,
|
679 |
+
collect(DISTINCT r) AS relationships
|
680 |
"""
|
681 |
result_set = await session.run(
|
682 |
+
main_query,
|
683 |
+
{"max_nodes": MAX_GRAPH_NODES, "min_degree": min_degree},
|
684 |
)
|
685 |
|
686 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
687 |
# Main query uses partial matching
|
688 |
+
main_query = """
|
689 |
MATCH (start)
|
690 |
+
WHERE
|
691 |
+
CASE
|
692 |
+
WHEN $inclusive THEN start.entity_id CONTAINS $entity_id
|
693 |
+
ELSE start.entity_id = $entity_id
|
694 |
+
END
|
695 |
WITH start
|
696 |
+
CALL apoc.path.subgraphAll(start, {
|
697 |
+
relationshipFilter: '',
|
698 |
minLevel: 0,
|
699 |
+
maxLevel: $max_depth,
|
700 |
bfs: true
|
701 |
+
})
|
702 |
YIELD nodes, relationships
|
703 |
WITH start, nodes, relationships
|
704 |
UNWIND nodes AS node
|
705 |
OPTIONAL MATCH (node)-[r]-()
|
706 |
+
WITH node, count(r) AS degree, start, nodes, relationships
|
707 |
+
WHERE node = start OR EXISTS((start)--(node)) OR degree >= $min_degree
|
708 |
+
ORDER BY
|
709 |
+
CASE
|
710 |
+
WHEN node = start THEN 3
|
711 |
+
WHEN EXISTS((start)--(node)) THEN 2
|
712 |
+
ELSE 1
|
713 |
+
END DESC,
|
714 |
+
degree DESC
|
715 |
LIMIT $max_nodes
|
716 |
+
WITH collect({node: node}) AS filtered_nodes
|
717 |
+
UNWIND filtered_nodes AS node_info
|
718 |
+
WITH collect(node_info.node) AS kept_nodes, filtered_nodes
|
719 |
+
MATCH (a)-[r]-(b)
|
720 |
+
WHERE a IN kept_nodes AND b IN kept_nodes
|
721 |
+
RETURN filtered_nodes AS node_info,
|
722 |
+
collect(DISTINCT r) AS relationships
|
723 |
"""
|
724 |
result_set = await session.run(
|
725 |
+
main_query,
|
726 |
+
{
|
727 |
+
"max_nodes": MAX_GRAPH_NODES,
|
728 |
+
"entity_id": node_label,
|
729 |
+
"inclusive": inclusive,
|
730 |
+
"max_depth": max_depth,
|
731 |
+
"min_degree": min_degree,
|
732 |
+
},
|
733 |
)
|
734 |
|
735 |
+
try:
|
736 |
+
record = await result_set.single()
|
737 |
+
|
738 |
+
if record:
|
739 |
+
# Handle nodes (compatible with multi-label cases)
|
740 |
+
for node_info in record["node_info"]:
|
741 |
+
node = node_info["node"]
|
742 |
+
node_id = node.id
|
743 |
+
if node_id not in seen_nodes:
|
744 |
+
result.nodes.append(
|
745 |
+
KnowledgeGraphNode(
|
746 |
+
id=f"{node_id}",
|
747 |
+
labels=[
|
748 |
+
label
|
749 |
+
for label in node.labels
|
750 |
+
if label != "base"
|
751 |
+
],
|
752 |
+
properties=dict(node),
|
753 |
+
)
|
754 |
)
|
755 |
+
seen_nodes.add(node_id)
|
756 |
+
|
757 |
+
# Handle relationships (including direction information)
|
758 |
+
for rel in record["relationships"]:
|
759 |
+
edge_id = rel.id
|
760 |
+
if edge_id not in seen_edges:
|
761 |
+
start = rel.start_node
|
762 |
+
end = rel.end_node
|
763 |
+
result.edges.append(
|
764 |
+
KnowledgeGraphEdge(
|
765 |
+
id=f"{edge_id}",
|
766 |
+
type=rel.type,
|
767 |
+
source=f"{start.id}",
|
768 |
+
target=f"{end.id}",
|
769 |
+
properties=dict(rel),
|
770 |
+
)
|
771 |
)
|
772 |
+
seen_edges.add(edge_id)
|
|
|
773 |
|
774 |
+
logger.info(
|
775 |
+
f"Process {os.getpid()} graph query return: {len(result.nodes)} nodes, {len(result.edges)} edges"
|
776 |
+
)
|
777 |
+
finally:
|
778 |
+
await result_set.consume() # Ensure result set is consumed
|
779 |
|
780 |
except neo4jExceptions.ClientError as e:
|
781 |
+
logger.warning(f"APOC plugin error: {str(e)}")
|
782 |
+
if node_label != "*":
|
783 |
+
logger.warning(
|
784 |
+
"Neo4j: falling back to basic Cypher recursive search..."
|
785 |
+
)
|
786 |
+
if inclusive:
|
787 |
+
logger.warning(
|
788 |
+
"Neo4j: inclusive search mode is not supported in recursive query, using exact matching"
|
789 |
+
)
|
790 |
+
return await self._robust_fallback(
|
791 |
+
node_label, max_depth, min_degree
|
792 |
+
)
|
793 |
|
794 |
return result
|
795 |
|
796 |
async def _robust_fallback(
|
797 |
+
self, node_label: str, max_depth: int, min_degree: int = 0
|
798 |
+
) -> KnowledgeGraph:
|
799 |
+
"""
|
800 |
+
Fallback implementation when APOC plugin is not available or incompatible.
|
801 |
+
This method implements the same functionality as get_knowledge_graph but uses
|
802 |
+
only basic Cypher queries and recursive traversal instead of APOC procedures.
|
803 |
+
"""
|
804 |
+
result = KnowledgeGraph()
|
805 |
visited_nodes = set()
|
806 |
visited_edges = set()
|
807 |
|
808 |
+
async def traverse(
|
809 |
+
node: KnowledgeGraphNode,
|
810 |
+
edge: Optional[KnowledgeGraphEdge],
|
811 |
+
current_depth: int,
|
812 |
+
):
|
813 |
+
# Check traversal limits
|
814 |
if current_depth > max_depth:
|
815 |
+
logger.debug(f"Reached max depth: {max_depth}")
|
816 |
return
|
817 |
+
if len(visited_nodes) >= MAX_GRAPH_NODES:
|
818 |
+
logger.debug(f"Reached max nodes limit: {MAX_GRAPH_NODES}")
|
|
|
|
|
819 |
return
|
820 |
|
821 |
+
# Check if node already visited
|
822 |
+
if node.id in visited_nodes:
|
823 |
return
|
824 |
+
|
825 |
+
# Get all edges and target nodes
|
826 |
+
async with self._driver.session(
|
827 |
+
database=self._DATABASE, default_access_mode="READ"
|
828 |
+
) as session:
|
829 |
+
query = """
|
830 |
+
MATCH (a:base {entity_id: $entity_id})-[r]-(b)
|
831 |
+
WITH r, b, id(r) as edge_id, id(b) as target_id
|
832 |
+
RETURN r, b, edge_id, target_id
|
833 |
+
"""
|
834 |
+
results = await session.run(query, entity_id=node.id)
|
835 |
+
|
836 |
+
# Get all records and release database connection
|
837 |
+
records = await results.fetch(
|
838 |
+
1000
|
839 |
+
) # Max neighbour nodes we can handled
|
840 |
+
await results.consume() # Ensure results are consumed
|
841 |
+
|
842 |
+
# Nodes not connected to start node need to check degree
|
843 |
+
if current_depth > 1 and len(records) < min_degree:
|
844 |
+
return
|
845 |
+
|
846 |
+
# Add current node to result
|
847 |
+
result.nodes.append(node)
|
848 |
+
visited_nodes.add(node.id)
|
849 |
+
|
850 |
+
# Add edge to result if it exists and not already added
|
851 |
+
if edge and edge.id not in visited_edges:
|
852 |
+
result.edges.append(edge)
|
853 |
+
visited_edges.add(edge.id)
|
854 |
+
|
855 |
+
# Prepare nodes and edges for recursive processing
|
856 |
+
nodes_to_process = []
|
857 |
+
for record in records:
|
858 |
rel = record["r"]
|
859 |
+
edge_id = str(record["edge_id"])
|
860 |
if edge_id not in visited_edges:
|
861 |
+
b_node = record["b"]
|
862 |
+
target_id = b_node.get("entity_id")
|
863 |
+
|
864 |
+
if target_id: # Only process if target node has entity_id
|
865 |
+
# Create KnowledgeGraphNode for target
|
866 |
+
target_node = KnowledgeGraphNode(
|
867 |
+
id=f"{target_id}",
|
868 |
+
labels=[
|
869 |
+
label for label in b_node.labels if label != "base"
|
870 |
+
],
|
871 |
+
properties=dict(b_node.properties),
|
872 |
+
)
|
873 |
+
|
874 |
+
# Create KnowledgeGraphEdge
|
875 |
+
target_edge = KnowledgeGraphEdge(
|
876 |
+
id=f"{edge_id}",
|
877 |
+
type=rel.type,
|
878 |
+
source=f"{node.id}",
|
879 |
+
target=f"{target_id}",
|
880 |
+
properties=dict(rel),
|
881 |
+
)
|
882 |
+
|
883 |
+
nodes_to_process.append((target_node, target_edge))
|
884 |
+
else:
|
885 |
+
logger.warning(
|
886 |
+
f"Skipping edge {edge_id} due to missing labels on target node"
|
887 |
+
)
|
888 |
+
|
889 |
+
# Process nodes after releasing database connection
|
890 |
+
for target_node, target_edge in nodes_to_process:
|
891 |
+
await traverse(target_node, target_edge, current_depth + 1)
|
892 |
+
|
893 |
+
# Get the starting node's data
|
894 |
+
async with self._driver.session(
|
895 |
+
database=self._DATABASE, default_access_mode="READ"
|
896 |
+
) as session:
|
897 |
+
query = """
|
898 |
+
MATCH (n:base {entity_id: $entity_id})
|
899 |
+
RETURN id(n) as node_id, n
|
900 |
+
"""
|
901 |
+
node_result = await session.run(query, entity_id=node_label)
|
902 |
+
try:
|
903 |
+
node_record = await node_result.single()
|
904 |
+
if not node_record:
|
905 |
+
return result
|
906 |
+
|
907 |
+
# Create initial KnowledgeGraphNode
|
908 |
+
start_node = KnowledgeGraphNode(
|
909 |
+
id=f"{node_record['n'].get('entity_id')}",
|
910 |
+
labels=[
|
911 |
+
label for label in node_record["n"].labels if label != "base"
|
912 |
+
],
|
913 |
+
properties=dict(node_record["n"].properties),
|
914 |
+
)
|
915 |
+
finally:
|
916 |
+
await node_result.consume() # Ensure results are consumed
|
917 |
+
|
918 |
+
# Start traversal with the initial node
|
919 |
+
await traverse(start_node, None, 0)
|
920 |
|
|
|
921 |
return result
|
922 |
|
923 |
async def get_all_labels(self) -> list[str]:
|
|
|
926 |
Returns:
|
927 |
["Person", "Company", ...] # Alphabetically sorted label list
|
928 |
"""
|
929 |
+
async with self._driver.session(
|
930 |
+
database=self._DATABASE, default_access_mode="READ"
|
931 |
+
) as session:
|
932 |
# Method 1: Direct metadata query (Available for Neo4j 4.3+)
|
933 |
# query = "CALL db.labels() YIELD label RETURN label"
|
934 |
|
935 |
# Method 2: Query compatible with older versions
|
936 |
query = """
|
937 |
+
MATCH (n)
|
938 |
+
WHERE n.entity_id IS NOT NULL
|
939 |
+
RETURN DISTINCT n.entity_id AS label
|
940 |
+
ORDER BY label
|
|
|
941 |
"""
|
|
|
942 |
result = await session.run(query)
|
943 |
labels = []
|
944 |
+
try:
|
945 |
+
async for record in result:
|
946 |
+
labels.append(record["label"])
|
947 |
+
finally:
|
948 |
+
await (
|
949 |
+
result.consume()
|
950 |
+
) # Ensure results are consumed even if processing fails
|
951 |
return labels
|
952 |
|
953 |
@retry(
|
|
|
968 |
Args:
|
969 |
node_id: The label of the node to delete
|
970 |
"""
|
|
|
971 |
|
972 |
async def _do_delete(tx: AsyncManagedTransaction):
|
973 |
+
query = """
|
974 |
+
MATCH (n:base {entity_id: $entity_id})
|
975 |
DETACH DELETE n
|
976 |
"""
|
977 |
+
result = await tx.run(query, entity_id=node_id)
|
978 |
+
logger.debug(f"Deleted node with label '{node_id}'")
|
979 |
+
await result.consume() # Ensure result is fully consumed
|
980 |
|
981 |
try:
|
982 |
async with self._driver.session(database=self._DATABASE) as session:
|
|
|
1025 |
edges: List of edges to be deleted, each edge is a (source, target) tuple
|
1026 |
"""
|
1027 |
for source, target in edges:
|
|
|
|
|
1028 |
|
1029 |
async def _do_delete_edge(tx: AsyncManagedTransaction):
|
1030 |
+
query = """
|
1031 |
+
MATCH (source:base {entity_id: $source_entity_id})-[r]-(target:base {entity_id: $target_entity_id})
|
1032 |
DELETE r
|
1033 |
"""
|
1034 |
+
result = await tx.run(
|
1035 |
+
query, source_entity_id=source, target_entity_id=target
|
1036 |
+
)
|
1037 |
+
logger.debug(f"Deleted edge from '{source}' to '{target}'")
|
1038 |
+
await result.consume() # Ensure result is fully consumed
|
1039 |
|
1040 |
try:
|
1041 |
async with self._driver.session(database=self._DATABASE) as session:
|
lightrag/kg/oracle_impl.py
CHANGED
@@ -531,6 +531,80 @@ class OracleVectorDBStorage(BaseVectorStorage):
|
|
531 |
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
532 |
return []
|
533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
|
535 |
@final
|
536 |
@dataclass
|
|
|
531 |
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
532 |
return []
|
533 |
|
534 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
535 |
+
"""Get vector data by its ID
|
536 |
+
|
537 |
+
Args:
|
538 |
+
id: The unique identifier of the vector
|
539 |
+
|
540 |
+
Returns:
|
541 |
+
The vector data if found, or None if not found
|
542 |
+
"""
|
543 |
+
try:
|
544 |
+
# Determine the table name based on namespace
|
545 |
+
table_name = namespace_to_table_name(self.namespace)
|
546 |
+
if not table_name:
|
547 |
+
logger.error(f"Unknown namespace for ID lookup: {self.namespace}")
|
548 |
+
return None
|
549 |
+
|
550 |
+
# Create the appropriate ID field name based on namespace
|
551 |
+
id_field = "entity_id" if "NODES" in table_name else "relation_id"
|
552 |
+
if "CHUNKS" in table_name:
|
553 |
+
id_field = "chunk_id"
|
554 |
+
|
555 |
+
# Prepare and execute the query
|
556 |
+
query = f"""
|
557 |
+
SELECT * FROM {table_name}
|
558 |
+
WHERE {id_field} = :id AND workspace = :workspace
|
559 |
+
"""
|
560 |
+
params = {"id": id, "workspace": self.db.workspace}
|
561 |
+
|
562 |
+
result = await self.db.query(query, params)
|
563 |
+
return result
|
564 |
+
except Exception as e:
|
565 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
566 |
+
return None
|
567 |
+
|
568 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
569 |
+
"""Get multiple vector data by their IDs
|
570 |
+
|
571 |
+
Args:
|
572 |
+
ids: List of unique identifiers
|
573 |
+
|
574 |
+
Returns:
|
575 |
+
List of vector data objects that were found
|
576 |
+
"""
|
577 |
+
if not ids:
|
578 |
+
return []
|
579 |
+
|
580 |
+
try:
|
581 |
+
# Determine the table name based on namespace
|
582 |
+
table_name = namespace_to_table_name(self.namespace)
|
583 |
+
if not table_name:
|
584 |
+
logger.error(f"Unknown namespace for IDs lookup: {self.namespace}")
|
585 |
+
return []
|
586 |
+
|
587 |
+
# Create the appropriate ID field name based on namespace
|
588 |
+
id_field = "entity_id" if "NODES" in table_name else "relation_id"
|
589 |
+
if "CHUNKS" in table_name:
|
590 |
+
id_field = "chunk_id"
|
591 |
+
|
592 |
+
# Format the list of IDs for SQL IN clause
|
593 |
+
ids_list = ", ".join([f"'{id}'" for id in ids])
|
594 |
+
|
595 |
+
# Prepare and execute the query
|
596 |
+
query = f"""
|
597 |
+
SELECT * FROM {table_name}
|
598 |
+
WHERE {id_field} IN ({ids_list}) AND workspace = :workspace
|
599 |
+
"""
|
600 |
+
params = {"workspace": self.db.workspace}
|
601 |
+
|
602 |
+
results = await self.db.query(query, params, multirows=True)
|
603 |
+
return results or []
|
604 |
+
except Exception as e:
|
605 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
606 |
+
return []
|
607 |
+
|
608 |
|
609 |
@final
|
610 |
@dataclass
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -621,6 +621,60 @@ class PGVectorStorage(BaseVectorStorage):
|
|
621 |
logger.error(f"Error during prefix search for '{prefix}': {e}")
|
622 |
return []
|
623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
624 |
|
625 |
@final
|
626 |
@dataclass
|
|
|
621 |
logger.error(f"Error during prefix search for '{prefix}': {e}")
|
622 |
return []
|
623 |
|
624 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
625 |
+
"""Get vector data by its ID
|
626 |
+
|
627 |
+
Args:
|
628 |
+
id: The unique identifier of the vector
|
629 |
+
|
630 |
+
Returns:
|
631 |
+
The vector data if found, or None if not found
|
632 |
+
"""
|
633 |
+
table_name = namespace_to_table_name(self.namespace)
|
634 |
+
if not table_name:
|
635 |
+
logger.error(f"Unknown namespace for ID lookup: {self.namespace}")
|
636 |
+
return None
|
637 |
+
|
638 |
+
query = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id=$2"
|
639 |
+
params = {"workspace": self.db.workspace, "id": id}
|
640 |
+
|
641 |
+
try:
|
642 |
+
result = await self.db.query(query, params)
|
643 |
+
if result:
|
644 |
+
return dict(result)
|
645 |
+
return None
|
646 |
+
except Exception as e:
|
647 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
648 |
+
return None
|
649 |
+
|
650 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
651 |
+
"""Get multiple vector data by their IDs
|
652 |
+
|
653 |
+
Args:
|
654 |
+
ids: List of unique identifiers
|
655 |
+
|
656 |
+
Returns:
|
657 |
+
List of vector data objects that were found
|
658 |
+
"""
|
659 |
+
if not ids:
|
660 |
+
return []
|
661 |
+
|
662 |
+
table_name = namespace_to_table_name(self.namespace)
|
663 |
+
if not table_name:
|
664 |
+
logger.error(f"Unknown namespace for IDs lookup: {self.namespace}")
|
665 |
+
return []
|
666 |
+
|
667 |
+
ids_str = ",".join([f"'{id}'" for id in ids])
|
668 |
+
query = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id IN ({ids_str})"
|
669 |
+
params = {"workspace": self.db.workspace}
|
670 |
+
|
671 |
+
try:
|
672 |
+
results = await self.db.query(query, params, multirows=True)
|
673 |
+
return [dict(record) for record in results]
|
674 |
+
except Exception as e:
|
675 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
676 |
+
return []
|
677 |
+
|
678 |
|
679 |
@final
|
680 |
@dataclass
|
lightrag/kg/shared_storage.py
CHANGED
@@ -7,12 +7,18 @@ from typing import Any, Dict, Optional, Union, TypeVar, Generic
|
|
7 |
|
8 |
|
9 |
# Define a direct print function for critical logs that must be visible in all processes
|
10 |
-
def direct_log(message, level="INFO"):
|
11 |
"""
|
12 |
Log a message directly to stderr to ensure visibility in all processes,
|
13 |
including the Gunicorn master process.
|
|
|
|
|
|
|
|
|
|
|
14 |
"""
|
15 |
-
|
|
|
16 |
|
17 |
|
18 |
T = TypeVar("T")
|
@@ -32,55 +38,165 @@ _update_flags: Optional[Dict[str, bool]] = None # namespace -> updated
|
|
32 |
_storage_lock: Optional[LockType] = None
|
33 |
_internal_lock: Optional[LockType] = None
|
34 |
_pipeline_status_lock: Optional[LockType] = None
|
|
|
|
|
35 |
|
36 |
|
37 |
class UnifiedLock(Generic[T]):
|
38 |
"""Provide a unified lock interface type for asyncio.Lock and multiprocessing.Lock"""
|
39 |
|
40 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
self._lock = lock
|
42 |
self._is_async = is_async
|
|
|
|
|
|
|
43 |
|
44 |
async def __aenter__(self) -> "UnifiedLock[T]":
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
def __enter__(self) -> "UnifiedLock[T]":
|
58 |
"""For backward compatibility"""
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def __exit__(self, exc_type, exc_val, exc_tb):
|
65 |
"""For backward compatibility"""
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
-
def get_internal_lock() -> UnifiedLock:
|
72 |
"""return unified storage lock for data consistency"""
|
73 |
-
return UnifiedLock(
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
|
76 |
-
def get_storage_lock() -> UnifiedLock:
|
77 |
"""return unified storage lock for data consistency"""
|
78 |
-
return UnifiedLock(
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
-
def get_pipeline_status_lock() -> UnifiedLock:
|
82 |
"""return unified storage lock for data consistency"""
|
83 |
-
return UnifiedLock(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
|
86 |
def initialize_share_data(workers: int = 1):
|
@@ -108,6 +224,8 @@ def initialize_share_data(workers: int = 1):
|
|
108 |
_storage_lock, \
|
109 |
_internal_lock, \
|
110 |
_pipeline_status_lock, \
|
|
|
|
|
111 |
_shared_dicts, \
|
112 |
_init_flags, \
|
113 |
_initialized, \
|
@@ -120,14 +238,16 @@ def initialize_share_data(workers: int = 1):
|
|
120 |
)
|
121 |
return
|
122 |
|
123 |
-
_manager = Manager()
|
124 |
_workers = workers
|
125 |
|
126 |
if workers > 1:
|
127 |
is_multiprocess = True
|
|
|
128 |
_internal_lock = _manager.Lock()
|
129 |
_storage_lock = _manager.Lock()
|
130 |
_pipeline_status_lock = _manager.Lock()
|
|
|
|
|
131 |
_shared_dicts = _manager.dict()
|
132 |
_init_flags = _manager.dict()
|
133 |
_update_flags = _manager.dict()
|
@@ -139,6 +259,8 @@ def initialize_share_data(workers: int = 1):
|
|
139 |
_internal_lock = asyncio.Lock()
|
140 |
_storage_lock = asyncio.Lock()
|
141 |
_pipeline_status_lock = asyncio.Lock()
|
|
|
|
|
142 |
_shared_dicts = {}
|
143 |
_init_flags = {}
|
144 |
_update_flags = {}
|
@@ -164,6 +286,7 @@ async def initialize_pipeline_status():
|
|
164 |
history_messages = _manager.list() if is_multiprocess else []
|
165 |
pipeline_namespace.update(
|
166 |
{
|
|
|
167 |
"busy": False, # Control concurrent processes
|
168 |
"job_name": "Default Job", # Current job name (indexing files/indexing texts)
|
169 |
"job_start": None, # Job start time
|
@@ -200,7 +323,12 @@ async def get_update_flag(namespace: str):
|
|
200 |
if is_multiprocess and _manager is not None:
|
201 |
new_update_flag = _manager.Value("b", False)
|
202 |
else:
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
_update_flags[namespace].append(new_update_flag)
|
206 |
return new_update_flag
|
@@ -220,7 +348,26 @@ async def set_all_update_flags(namespace: str):
|
|
220 |
if is_multiprocess:
|
221 |
_update_flags[namespace][i].value = True
|
222 |
else:
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
|
226 |
async def get_all_update_flags_status() -> Dict[str, list]:
|
@@ -247,7 +394,7 @@ async def get_all_update_flags_status() -> Dict[str, list]:
|
|
247 |
return result
|
248 |
|
249 |
|
250 |
-
def try_initialize_namespace(namespace: str) -> bool:
|
251 |
"""
|
252 |
Returns True if the current worker(process) gets initialization permission for loading data later.
|
253 |
The worker does not get the permission is prohibited to load data from files.
|
@@ -257,15 +404,17 @@ def try_initialize_namespace(namespace: str) -> bool:
|
|
257 |
if _init_flags is None:
|
258 |
raise ValueError("Try to create nanmespace before Shared-Data is initialized")
|
259 |
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
262 |
direct_log(
|
263 |
-
f"Process {os.getpid()}
|
264 |
)
|
265 |
-
|
266 |
-
direct_log(
|
267 |
-
f"Process {os.getpid()} storage namespace already initialized: [{namespace}]"
|
268 |
-
)
|
269 |
return False
|
270 |
|
271 |
|
@@ -304,6 +453,8 @@ def finalize_share_data():
|
|
304 |
_storage_lock, \
|
305 |
_internal_lock, \
|
306 |
_pipeline_status_lock, \
|
|
|
|
|
307 |
_shared_dicts, \
|
308 |
_init_flags, \
|
309 |
_initialized, \
|
@@ -369,6 +520,8 @@ def finalize_share_data():
|
|
369 |
_storage_lock = None
|
370 |
_internal_lock = None
|
371 |
_pipeline_status_lock = None
|
|
|
|
|
372 |
_update_flags = None
|
373 |
|
374 |
direct_log(f"Process {os.getpid()} storage data finalization complete")
|
|
|
7 |
|
8 |
|
9 |
# Define a direct print function for critical logs that must be visible in all processes
|
10 |
+
def direct_log(message, level="INFO", enable_output: bool = True):
|
11 |
"""
|
12 |
Log a message directly to stderr to ensure visibility in all processes,
|
13 |
including the Gunicorn master process.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
message: The message to log
|
17 |
+
level: Log level (default: "INFO")
|
18 |
+
enable_output: Whether to actually output the log (default: True)
|
19 |
"""
|
20 |
+
if enable_output:
|
21 |
+
print(f"{level}: {message}", file=sys.stderr, flush=True)
|
22 |
|
23 |
|
24 |
T = TypeVar("T")
|
|
|
38 |
_storage_lock: Optional[LockType] = None
|
39 |
_internal_lock: Optional[LockType] = None
|
40 |
_pipeline_status_lock: Optional[LockType] = None
|
41 |
+
_graph_db_lock: Optional[LockType] = None
|
42 |
+
_data_init_lock: Optional[LockType] = None
|
43 |
|
44 |
|
45 |
class UnifiedLock(Generic[T]):
|
46 |
"""Provide a unified lock interface type for asyncio.Lock and multiprocessing.Lock"""
|
47 |
|
48 |
+
def __init__(
|
49 |
+
self,
|
50 |
+
lock: Union[ProcessLock, asyncio.Lock],
|
51 |
+
is_async: bool,
|
52 |
+
name: str = "unnamed",
|
53 |
+
enable_logging: bool = True,
|
54 |
+
):
|
55 |
self._lock = lock
|
56 |
self._is_async = is_async
|
57 |
+
self._pid = os.getpid() # for debug only
|
58 |
+
self._name = name # for debug only
|
59 |
+
self._enable_logging = enable_logging # for debug only
|
60 |
|
61 |
async def __aenter__(self) -> "UnifiedLock[T]":
|
62 |
+
try:
|
63 |
+
direct_log(
|
64 |
+
f"== Lock == Process {self._pid}: Acquiring lock '{self._name}' (async={self._is_async})",
|
65 |
+
enable_output=self._enable_logging,
|
66 |
+
)
|
67 |
+
if self._is_async:
|
68 |
+
await self._lock.acquire()
|
69 |
+
else:
|
70 |
+
self._lock.acquire()
|
71 |
+
direct_log(
|
72 |
+
f"== Lock == Process {self._pid}: Lock '{self._name}' acquired (async={self._is_async})",
|
73 |
+
enable_output=self._enable_logging,
|
74 |
+
)
|
75 |
+
return self
|
76 |
+
except Exception as e:
|
77 |
+
direct_log(
|
78 |
+
f"== Lock == Process {self._pid}: Failed to acquire lock '{self._name}': {e}",
|
79 |
+
level="ERROR",
|
80 |
+
enable_output=self._enable_logging,
|
81 |
+
)
|
82 |
+
raise
|
83 |
|
84 |
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
85 |
+
try:
|
86 |
+
direct_log(
|
87 |
+
f"== Lock == Process {self._pid}: Releasing lock '{self._name}' (async={self._is_async})",
|
88 |
+
enable_output=self._enable_logging,
|
89 |
+
)
|
90 |
+
if self._is_async:
|
91 |
+
self._lock.release()
|
92 |
+
else:
|
93 |
+
self._lock.release()
|
94 |
+
direct_log(
|
95 |
+
f"== Lock == Process {self._pid}: Lock '{self._name}' released (async={self._is_async})",
|
96 |
+
enable_output=self._enable_logging,
|
97 |
+
)
|
98 |
+
except Exception as e:
|
99 |
+
direct_log(
|
100 |
+
f"== Lock == Process {self._pid}: Failed to release lock '{self._name}': {e}",
|
101 |
+
level="ERROR",
|
102 |
+
enable_output=self._enable_logging,
|
103 |
+
)
|
104 |
+
raise
|
105 |
|
106 |
def __enter__(self) -> "UnifiedLock[T]":
|
107 |
"""For backward compatibility"""
|
108 |
+
try:
|
109 |
+
if self._is_async:
|
110 |
+
raise RuntimeError("Use 'async with' for shared_storage lock")
|
111 |
+
direct_log(
|
112 |
+
f"== Lock == Process {self._pid}: Acquiring lock '{self._name}' (sync)",
|
113 |
+
enable_output=self._enable_logging,
|
114 |
+
)
|
115 |
+
self._lock.acquire()
|
116 |
+
direct_log(
|
117 |
+
f"== Lock == Process {self._pid}: Lock '{self._name}' acquired (sync)",
|
118 |
+
enable_output=self._enable_logging,
|
119 |
+
)
|
120 |
+
return self
|
121 |
+
except Exception as e:
|
122 |
+
direct_log(
|
123 |
+
f"== Lock == Process {self._pid}: Failed to acquire lock '{self._name}' (sync): {e}",
|
124 |
+
level="ERROR",
|
125 |
+
enable_output=self._enable_logging,
|
126 |
+
)
|
127 |
+
raise
|
128 |
|
129 |
def __exit__(self, exc_type, exc_val, exc_tb):
|
130 |
"""For backward compatibility"""
|
131 |
+
try:
|
132 |
+
if self._is_async:
|
133 |
+
raise RuntimeError("Use 'async with' for shared_storage lock")
|
134 |
+
direct_log(
|
135 |
+
f"== Lock == Process {self._pid}: Releasing lock '{self._name}' (sync)",
|
136 |
+
enable_output=self._enable_logging,
|
137 |
+
)
|
138 |
+
self._lock.release()
|
139 |
+
direct_log(
|
140 |
+
f"== Lock == Process {self._pid}: Lock '{self._name}' released (sync)",
|
141 |
+
enable_output=self._enable_logging,
|
142 |
+
)
|
143 |
+
except Exception as e:
|
144 |
+
direct_log(
|
145 |
+
f"== Lock == Process {self._pid}: Failed to release lock '{self._name}' (sync): {e}",
|
146 |
+
level="ERROR",
|
147 |
+
enable_output=self._enable_logging,
|
148 |
+
)
|
149 |
+
raise
|
150 |
|
151 |
|
152 |
+
def get_internal_lock(enable_logging: bool = False) -> UnifiedLock:
|
153 |
"""return unified storage lock for data consistency"""
|
154 |
+
return UnifiedLock(
|
155 |
+
lock=_internal_lock,
|
156 |
+
is_async=not is_multiprocess,
|
157 |
+
name="internal_lock",
|
158 |
+
enable_logging=enable_logging,
|
159 |
+
)
|
160 |
|
161 |
|
162 |
+
def get_storage_lock(enable_logging: bool = False) -> UnifiedLock:
|
163 |
"""return unified storage lock for data consistency"""
|
164 |
+
return UnifiedLock(
|
165 |
+
lock=_storage_lock,
|
166 |
+
is_async=not is_multiprocess,
|
167 |
+
name="storage_lock",
|
168 |
+
enable_logging=enable_logging,
|
169 |
+
)
|
170 |
|
171 |
|
172 |
+
def get_pipeline_status_lock(enable_logging: bool = False) -> UnifiedLock:
|
173 |
"""return unified storage lock for data consistency"""
|
174 |
+
return UnifiedLock(
|
175 |
+
lock=_pipeline_status_lock,
|
176 |
+
is_async=not is_multiprocess,
|
177 |
+
name="pipeline_status_lock",
|
178 |
+
enable_logging=enable_logging,
|
179 |
+
)
|
180 |
+
|
181 |
+
|
182 |
+
def get_graph_db_lock(enable_logging: bool = False) -> UnifiedLock:
|
183 |
+
"""return unified graph database lock for ensuring atomic operations"""
|
184 |
+
return UnifiedLock(
|
185 |
+
lock=_graph_db_lock,
|
186 |
+
is_async=not is_multiprocess,
|
187 |
+
name="graph_db_lock",
|
188 |
+
enable_logging=enable_logging,
|
189 |
+
)
|
190 |
+
|
191 |
+
|
192 |
+
def get_data_init_lock(enable_logging: bool = False) -> UnifiedLock:
|
193 |
+
"""return unified data initialization lock for ensuring atomic data initialization"""
|
194 |
+
return UnifiedLock(
|
195 |
+
lock=_data_init_lock,
|
196 |
+
is_async=not is_multiprocess,
|
197 |
+
name="data_init_lock",
|
198 |
+
enable_logging=enable_logging,
|
199 |
+
)
|
200 |
|
201 |
|
202 |
def initialize_share_data(workers: int = 1):
|
|
|
224 |
_storage_lock, \
|
225 |
_internal_lock, \
|
226 |
_pipeline_status_lock, \
|
227 |
+
_graph_db_lock, \
|
228 |
+
_data_init_lock, \
|
229 |
_shared_dicts, \
|
230 |
_init_flags, \
|
231 |
_initialized, \
|
|
|
238 |
)
|
239 |
return
|
240 |
|
|
|
241 |
_workers = workers
|
242 |
|
243 |
if workers > 1:
|
244 |
is_multiprocess = True
|
245 |
+
_manager = Manager()
|
246 |
_internal_lock = _manager.Lock()
|
247 |
_storage_lock = _manager.Lock()
|
248 |
_pipeline_status_lock = _manager.Lock()
|
249 |
+
_graph_db_lock = _manager.Lock()
|
250 |
+
_data_init_lock = _manager.Lock()
|
251 |
_shared_dicts = _manager.dict()
|
252 |
_init_flags = _manager.dict()
|
253 |
_update_flags = _manager.dict()
|
|
|
259 |
_internal_lock = asyncio.Lock()
|
260 |
_storage_lock = asyncio.Lock()
|
261 |
_pipeline_status_lock = asyncio.Lock()
|
262 |
+
_graph_db_lock = asyncio.Lock()
|
263 |
+
_data_init_lock = asyncio.Lock()
|
264 |
_shared_dicts = {}
|
265 |
_init_flags = {}
|
266 |
_update_flags = {}
|
|
|
286 |
history_messages = _manager.list() if is_multiprocess else []
|
287 |
pipeline_namespace.update(
|
288 |
{
|
289 |
+
"autoscanned": False, # Auto-scan started
|
290 |
"busy": False, # Control concurrent processes
|
291 |
"job_name": "Default Job", # Current job name (indexing files/indexing texts)
|
292 |
"job_start": None, # Job start time
|
|
|
323 |
if is_multiprocess and _manager is not None:
|
324 |
new_update_flag = _manager.Value("b", False)
|
325 |
else:
|
326 |
+
# Create a simple mutable object to store boolean value for compatibility with mutiprocess
|
327 |
+
class MutableBoolean:
|
328 |
+
def __init__(self, initial_value=False):
|
329 |
+
self.value = initial_value
|
330 |
+
|
331 |
+
new_update_flag = MutableBoolean(False)
|
332 |
|
333 |
_update_flags[namespace].append(new_update_flag)
|
334 |
return new_update_flag
|
|
|
348 |
if is_multiprocess:
|
349 |
_update_flags[namespace][i].value = True
|
350 |
else:
|
351 |
+
# Use .value attribute instead of direct assignment
|
352 |
+
_update_flags[namespace][i].value = True
|
353 |
+
|
354 |
+
|
355 |
+
async def clear_all_update_flags(namespace: str):
|
356 |
+
"""Clear all update flag of namespace indicating all workers need to reload data from files"""
|
357 |
+
global _update_flags
|
358 |
+
if _update_flags is None:
|
359 |
+
raise ValueError("Try to create namespace before Shared-Data is initialized")
|
360 |
+
|
361 |
+
async with get_internal_lock():
|
362 |
+
if namespace not in _update_flags:
|
363 |
+
raise ValueError(f"Namespace {namespace} not found in update flags")
|
364 |
+
# Update flags for both modes
|
365 |
+
for i in range(len(_update_flags[namespace])):
|
366 |
+
if is_multiprocess:
|
367 |
+
_update_flags[namespace][i].value = False
|
368 |
+
else:
|
369 |
+
# Use .value attribute instead of direct assignment
|
370 |
+
_update_flags[namespace][i].value = False
|
371 |
|
372 |
|
373 |
async def get_all_update_flags_status() -> Dict[str, list]:
|
|
|
394 |
return result
|
395 |
|
396 |
|
397 |
+
async def try_initialize_namespace(namespace: str) -> bool:
|
398 |
"""
|
399 |
Returns True if the current worker(process) gets initialization permission for loading data later.
|
400 |
The worker does not get the permission is prohibited to load data from files.
|
|
|
404 |
if _init_flags is None:
|
405 |
raise ValueError("Try to create nanmespace before Shared-Data is initialized")
|
406 |
|
407 |
+
async with get_internal_lock():
|
408 |
+
if namespace not in _init_flags:
|
409 |
+
_init_flags[namespace] = True
|
410 |
+
direct_log(
|
411 |
+
f"Process {os.getpid()} ready to initialize storage namespace: [{namespace}]"
|
412 |
+
)
|
413 |
+
return True
|
414 |
direct_log(
|
415 |
+
f"Process {os.getpid()} storage namespace already initialized: [{namespace}]"
|
416 |
)
|
417 |
+
|
|
|
|
|
|
|
418 |
return False
|
419 |
|
420 |
|
|
|
453 |
_storage_lock, \
|
454 |
_internal_lock, \
|
455 |
_pipeline_status_lock, \
|
456 |
+
_graph_db_lock, \
|
457 |
+
_data_init_lock, \
|
458 |
_shared_dicts, \
|
459 |
_init_flags, \
|
460 |
_initialized, \
|
|
|
520 |
_storage_lock = None
|
521 |
_internal_lock = None
|
522 |
_pipeline_status_lock = None
|
523 |
+
_graph_db_lock = None
|
524 |
+
_data_init_lock = None
|
525 |
_update_flags = None
|
526 |
|
527 |
direct_log(f"Process {os.getpid()} storage data finalization complete")
|
lightrag/kg/tidb_impl.py
CHANGED
@@ -465,6 +465,100 @@ class TiDBVectorDBStorage(BaseVectorStorage):
|
|
465 |
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
466 |
return []
|
467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
|
469 |
@final
|
470 |
@dataclass
|
|
|
465 |
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
466 |
return []
|
467 |
|
468 |
+
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
469 |
+
"""Get vector data by its ID
|
470 |
+
|
471 |
+
Args:
|
472 |
+
id: The unique identifier of the vector
|
473 |
+
|
474 |
+
Returns:
|
475 |
+
The vector data if found, or None if not found
|
476 |
+
"""
|
477 |
+
try:
|
478 |
+
# Determine which table to query based on namespace
|
479 |
+
if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
|
480 |
+
sql_template = """
|
481 |
+
SELECT entity_id as id, name as entity_name, entity_type, description, content
|
482 |
+
FROM LIGHTRAG_GRAPH_NODES
|
483 |
+
WHERE entity_id = :entity_id AND workspace = :workspace
|
484 |
+
"""
|
485 |
+
params = {"entity_id": id, "workspace": self.db.workspace}
|
486 |
+
elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
|
487 |
+
sql_template = """
|
488 |
+
SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
|
489 |
+
keywords, description, content
|
490 |
+
FROM LIGHTRAG_GRAPH_EDGES
|
491 |
+
WHERE relation_id = :relation_id AND workspace = :workspace
|
492 |
+
"""
|
493 |
+
params = {"relation_id": id, "workspace": self.db.workspace}
|
494 |
+
elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
|
495 |
+
sql_template = """
|
496 |
+
SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
|
497 |
+
FROM LIGHTRAG_DOC_CHUNKS
|
498 |
+
WHERE chunk_id = :chunk_id AND workspace = :workspace
|
499 |
+
"""
|
500 |
+
params = {"chunk_id": id, "workspace": self.db.workspace}
|
501 |
+
else:
|
502 |
+
logger.warning(
|
503 |
+
f"Namespace {self.namespace} not supported for get_by_id"
|
504 |
+
)
|
505 |
+
return None
|
506 |
+
|
507 |
+
result = await self.db.query(sql_template, params=params)
|
508 |
+
return result
|
509 |
+
except Exception as e:
|
510 |
+
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
511 |
+
return None
|
512 |
+
|
513 |
+
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
514 |
+
"""Get multiple vector data by their IDs
|
515 |
+
|
516 |
+
Args:
|
517 |
+
ids: List of unique identifiers
|
518 |
+
|
519 |
+
Returns:
|
520 |
+
List of vector data objects that were found
|
521 |
+
"""
|
522 |
+
if not ids:
|
523 |
+
return []
|
524 |
+
|
525 |
+
try:
|
526 |
+
# Format IDs for SQL IN clause
|
527 |
+
ids_str = ", ".join([f"'{id}'" for id in ids])
|
528 |
+
|
529 |
+
# Determine which table to query based on namespace
|
530 |
+
if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
|
531 |
+
sql_template = f"""
|
532 |
+
SELECT entity_id as id, name as entity_name, entity_type, description, content
|
533 |
+
FROM LIGHTRAG_GRAPH_NODES
|
534 |
+
WHERE entity_id IN ({ids_str}) AND workspace = :workspace
|
535 |
+
"""
|
536 |
+
elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
|
537 |
+
sql_template = f"""
|
538 |
+
SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
|
539 |
+
keywords, description, content
|
540 |
+
FROM LIGHTRAG_GRAPH_EDGES
|
541 |
+
WHERE relation_id IN ({ids_str}) AND workspace = :workspace
|
542 |
+
"""
|
543 |
+
elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
|
544 |
+
sql_template = f"""
|
545 |
+
SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
|
546 |
+
FROM LIGHTRAG_DOC_CHUNKS
|
547 |
+
WHERE chunk_id IN ({ids_str}) AND workspace = :workspace
|
548 |
+
"""
|
549 |
+
else:
|
550 |
+
logger.warning(
|
551 |
+
f"Namespace {self.namespace} not supported for get_by_ids"
|
552 |
+
)
|
553 |
+
return []
|
554 |
+
|
555 |
+
params = {"workspace": self.db.workspace}
|
556 |
+
results = await self.db.query(sql_template, params=params, multirows=True)
|
557 |
+
return results if results else []
|
558 |
+
except Exception as e:
|
559 |
+
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
560 |
+
return []
|
561 |
+
|
562 |
|
563 |
@final
|
564 |
@dataclass
|
lightrag/lightrag.py
CHANGED
@@ -30,11 +30,10 @@ from .namespace import NameSpace, make_namespace
|
|
30 |
from .operate import (
|
31 |
chunking_by_token_size,
|
32 |
extract_entities,
|
33 |
-
extract_keywords_only,
|
34 |
kg_query,
|
35 |
-
kg_query_with_keywords,
|
36 |
mix_kg_vector_query,
|
37 |
naive_query,
|
|
|
38 |
)
|
39 |
from .prompt import GRAPH_FIELD_SEP, PROMPTS
|
40 |
from .utils import (
|
@@ -45,6 +44,9 @@ from .utils import (
|
|
45 |
encode_string_by_tiktoken,
|
46 |
lazy_external_import,
|
47 |
limit_async_func_call,
|
|
|
|
|
|
|
48 |
logger,
|
49 |
)
|
50 |
from .types import KnowledgeGraph
|
@@ -309,7 +311,7 @@ class LightRAG:
|
|
309 |
# Verify storage implementation compatibility
|
310 |
verify_storage_implementation(storage_type, storage_name)
|
311 |
# Check environment variables
|
312 |
-
|
313 |
|
314 |
# Ensure vector_db_storage_cls_kwargs has required fields
|
315 |
self.vector_db_storage_cls_kwargs = {
|
@@ -354,6 +356,9 @@ class LightRAG:
|
|
354 |
namespace=make_namespace(
|
355 |
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
356 |
),
|
|
|
|
|
|
|
357 |
embedding_func=self.embedding_func,
|
358 |
)
|
359 |
|
@@ -404,18 +409,8 @@ class LightRAG:
|
|
404 |
embedding_func=None,
|
405 |
)
|
406 |
|
407 |
-
|
408 |
-
|
409 |
-
):
|
410 |
-
hashing_kv = self.llm_response_cache
|
411 |
-
else:
|
412 |
-
hashing_kv = self.key_string_value_json_storage_cls( # type: ignore
|
413 |
-
namespace=make_namespace(
|
414 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
415 |
-
),
|
416 |
-
global_config=asdict(self),
|
417 |
-
embedding_func=self.embedding_func,
|
418 |
-
)
|
419 |
|
420 |
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
421 |
partial(
|
@@ -543,11 +538,6 @@ class LightRAG:
|
|
543 |
storage_class = lazy_external_import(import_path, storage_name)
|
544 |
return storage_class
|
545 |
|
546 |
-
@staticmethod
|
547 |
-
def clean_text(text: str) -> str:
|
548 |
-
"""Clean text by removing null bytes (0x00) and whitespace"""
|
549 |
-
return text.strip().replace("\x00", "")
|
550 |
-
|
551 |
def insert(
|
552 |
self,
|
553 |
input: str | list[str],
|
@@ -590,6 +580,7 @@ class LightRAG:
|
|
590 |
split_by_character, split_by_character_only
|
591 |
)
|
592 |
|
|
|
593 |
def insert_custom_chunks(
|
594 |
self,
|
595 |
full_text: str,
|
@@ -601,14 +592,15 @@ class LightRAG:
|
|
601 |
self.ainsert_custom_chunks(full_text, text_chunks, doc_id)
|
602 |
)
|
603 |
|
|
|
604 |
async def ainsert_custom_chunks(
|
605 |
self, full_text: str, text_chunks: list[str], doc_id: str | None = None
|
606 |
) -> None:
|
607 |
update_storage = False
|
608 |
try:
|
609 |
# Clean input texts
|
610 |
-
full_text =
|
611 |
-
text_chunks = [
|
612 |
|
613 |
# Process cleaned texts
|
614 |
if doc_id is None:
|
@@ -687,7 +679,7 @@ class LightRAG:
|
|
687 |
contents = {id_: doc for id_, doc in zip(ids, input)}
|
688 |
else:
|
689 |
# Clean input text and remove duplicates
|
690 |
-
input = list(set(
|
691 |
# Generate contents dict of MD5 hash IDs and documents
|
692 |
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
693 |
|
@@ -703,7 +695,7 @@ class LightRAG:
|
|
703 |
new_docs: dict[str, Any] = {
|
704 |
id_: {
|
705 |
"content": content,
|
706 |
-
"content_summary":
|
707 |
"content_length": len(content),
|
708 |
"status": DocStatus.PENDING,
|
709 |
"created_at": datetime.now().isoformat(),
|
@@ -892,7 +884,9 @@ class LightRAG:
|
|
892 |
self.chunks_vdb.upsert(chunks)
|
893 |
)
|
894 |
entity_relation_task = asyncio.create_task(
|
895 |
-
self._process_entity_relation_graph(
|
|
|
|
|
896 |
)
|
897 |
full_docs_task = asyncio.create_task(
|
898 |
self.full_docs.upsert(
|
@@ -1007,21 +1001,27 @@ class LightRAG:
|
|
1007 |
pipeline_status["latest_message"] = log_message
|
1008 |
pipeline_status["history_messages"].append(log_message)
|
1009 |
|
1010 |
-
async def _process_entity_relation_graph(
|
|
|
|
|
1011 |
try:
|
1012 |
await extract_entities(
|
1013 |
chunk,
|
1014 |
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
1015 |
entity_vdb=self.entities_vdb,
|
1016 |
relationships_vdb=self.relationships_vdb,
|
1017 |
-
llm_response_cache=self.llm_response_cache,
|
1018 |
global_config=asdict(self),
|
|
|
|
|
|
|
1019 |
)
|
1020 |
except Exception as e:
|
1021 |
logger.error("Failed to extract entities and relationships")
|
1022 |
raise e
|
1023 |
|
1024 |
-
async def _insert_done(
|
|
|
|
|
1025 |
tasks = [
|
1026 |
cast(StorageNameSpace, storage_inst).index_done_callback()
|
1027 |
for storage_inst in [ # type: ignore
|
@@ -1040,12 +1040,10 @@ class LightRAG:
|
|
1040 |
log_message = "All Insert done"
|
1041 |
logger.info(log_message)
|
1042 |
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
-
pipeline_status["latest_message"] = log_message
|
1048 |
-
pipeline_status["history_messages"].append(log_message)
|
1049 |
|
1050 |
def insert_custom_kg(
|
1051 |
self, custom_kg: dict[str, Any], full_doc_id: str = None
|
@@ -1062,7 +1060,7 @@ class LightRAG:
|
|
1062 |
all_chunks_data: dict[str, dict[str, str]] = {}
|
1063 |
chunk_to_source_map: dict[str, str] = {}
|
1064 |
for chunk_data in custom_kg.get("chunks", []):
|
1065 |
-
chunk_content =
|
1066 |
source_id = chunk_data["source_id"]
|
1067 |
tokens = len(
|
1068 |
encode_string_by_tiktoken(
|
@@ -1260,16 +1258,7 @@ class LightRAG:
|
|
1260 |
self.text_chunks,
|
1261 |
param,
|
1262 |
asdict(self),
|
1263 |
-
hashing_kv=self.llm_response_cache
|
1264 |
-
if self.llm_response_cache
|
1265 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1266 |
-
else self.key_string_value_json_storage_cls(
|
1267 |
-
namespace=make_namespace(
|
1268 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1269 |
-
),
|
1270 |
-
global_config=asdict(self),
|
1271 |
-
embedding_func=self.embedding_func,
|
1272 |
-
),
|
1273 |
system_prompt=system_prompt,
|
1274 |
)
|
1275 |
elif param.mode == "naive":
|
@@ -1279,16 +1268,7 @@ class LightRAG:
|
|
1279 |
self.text_chunks,
|
1280 |
param,
|
1281 |
asdict(self),
|
1282 |
-
hashing_kv=self.llm_response_cache
|
1283 |
-
if self.llm_response_cache
|
1284 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1285 |
-
else self.key_string_value_json_storage_cls(
|
1286 |
-
namespace=make_namespace(
|
1287 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1288 |
-
),
|
1289 |
-
global_config=asdict(self),
|
1290 |
-
embedding_func=self.embedding_func,
|
1291 |
-
),
|
1292 |
system_prompt=system_prompt,
|
1293 |
)
|
1294 |
elif param.mode == "mix":
|
@@ -1301,16 +1281,7 @@ class LightRAG:
|
|
1301 |
self.text_chunks,
|
1302 |
param,
|
1303 |
asdict(self),
|
1304 |
-
hashing_kv=self.llm_response_cache
|
1305 |
-
if self.llm_response_cache
|
1306 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1307 |
-
else self.key_string_value_json_storage_cls(
|
1308 |
-
namespace=make_namespace(
|
1309 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1310 |
-
),
|
1311 |
-
global_config=asdict(self),
|
1312 |
-
embedding_func=self.embedding_func,
|
1313 |
-
),
|
1314 |
system_prompt=system_prompt,
|
1315 |
)
|
1316 |
else:
|
@@ -1322,8 +1293,17 @@ class LightRAG:
|
|
1322 |
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
1323 |
):
|
1324 |
"""
|
1325 |
-
|
1326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1327 |
"""
|
1328 |
loop = always_get_an_event_loop()
|
1329 |
return loop.run_until_complete(
|
@@ -1334,100 +1314,29 @@ class LightRAG:
|
|
1334 |
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
1335 |
) -> str | AsyncIterator[str]:
|
1336 |
"""
|
1337 |
-
|
1338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1339 |
"""
|
1340 |
-
|
1341 |
-
|
1342 |
-
|
1343 |
-
hl_keywords, ll_keywords = await extract_keywords_only(
|
1344 |
-
text=query,
|
1345 |
param=param,
|
|
|
|
|
|
|
|
|
|
|
1346 |
global_config=asdict(self),
|
1347 |
-
hashing_kv=self.llm_response_cache
|
1348 |
-
or self.key_string_value_json_storage_cls(
|
1349 |
-
namespace=make_namespace(
|
1350 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1351 |
-
),
|
1352 |
-
global_config=asdict(self),
|
1353 |
-
embedding_func=self.embedding_func,
|
1354 |
-
),
|
1355 |
)
|
1356 |
|
1357 |
-
param.hl_keywords = hl_keywords
|
1358 |
-
param.ll_keywords = ll_keywords
|
1359 |
-
|
1360 |
-
# ---------------------
|
1361 |
-
# STEP 2: Final Query Logic
|
1362 |
-
# ---------------------
|
1363 |
-
|
1364 |
-
# Create a new string with the prompt and the keywords
|
1365 |
-
ll_keywords_str = ", ".join(ll_keywords)
|
1366 |
-
hl_keywords_str = ", ".join(hl_keywords)
|
1367 |
-
formatted_question = f"{prompt}\n\n### Keywords:\nHigh-level: {hl_keywords_str}\nLow-level: {ll_keywords_str}\n\n### Query:\n{query}"
|
1368 |
-
|
1369 |
-
if param.mode in ["local", "global", "hybrid"]:
|
1370 |
-
response = await kg_query_with_keywords(
|
1371 |
-
formatted_question,
|
1372 |
-
self.chunk_entity_relation_graph,
|
1373 |
-
self.entities_vdb,
|
1374 |
-
self.relationships_vdb,
|
1375 |
-
self.text_chunks,
|
1376 |
-
param,
|
1377 |
-
asdict(self),
|
1378 |
-
hashing_kv=self.llm_response_cache
|
1379 |
-
if self.llm_response_cache
|
1380 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1381 |
-
else self.key_string_value_json_storage_cls(
|
1382 |
-
namespace=make_namespace(
|
1383 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1384 |
-
),
|
1385 |
-
global_config=asdict(self),
|
1386 |
-
embedding_func=self.embedding_func,
|
1387 |
-
),
|
1388 |
-
)
|
1389 |
-
elif param.mode == "naive":
|
1390 |
-
response = await naive_query(
|
1391 |
-
formatted_question,
|
1392 |
-
self.chunks_vdb,
|
1393 |
-
self.text_chunks,
|
1394 |
-
param,
|
1395 |
-
asdict(self),
|
1396 |
-
hashing_kv=self.llm_response_cache
|
1397 |
-
if self.llm_response_cache
|
1398 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1399 |
-
else self.key_string_value_json_storage_cls(
|
1400 |
-
namespace=make_namespace(
|
1401 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1402 |
-
),
|
1403 |
-
global_config=asdict(self),
|
1404 |
-
embedding_func=self.embedding_func,
|
1405 |
-
),
|
1406 |
-
)
|
1407 |
-
elif param.mode == "mix":
|
1408 |
-
response = await mix_kg_vector_query(
|
1409 |
-
formatted_question,
|
1410 |
-
self.chunk_entity_relation_graph,
|
1411 |
-
self.entities_vdb,
|
1412 |
-
self.relationships_vdb,
|
1413 |
-
self.chunks_vdb,
|
1414 |
-
self.text_chunks,
|
1415 |
-
param,
|
1416 |
-
asdict(self),
|
1417 |
-
hashing_kv=self.llm_response_cache
|
1418 |
-
if self.llm_response_cache
|
1419 |
-
and hasattr(self.llm_response_cache, "global_config")
|
1420 |
-
else self.key_string_value_json_storage_cls(
|
1421 |
-
namespace=make_namespace(
|
1422 |
-
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
1423 |
-
),
|
1424 |
-
global_config=asdict(self),
|
1425 |
-
embedding_func=self.embedding_func,
|
1426 |
-
),
|
1427 |
-
)
|
1428 |
-
else:
|
1429 |
-
raise ValueError(f"Unknown mode {param.mode}")
|
1430 |
-
|
1431 |
await self._query_done()
|
1432 |
return response
|
1433 |
|
@@ -1525,21 +1434,6 @@ class LightRAG:
|
|
1525 |
]
|
1526 |
)
|
1527 |
|
1528 |
-
def _get_content_summary(self, content: str, max_length: int = 100) -> str:
|
1529 |
-
"""Get summary of document content
|
1530 |
-
|
1531 |
-
Args:
|
1532 |
-
content: Original document content
|
1533 |
-
max_length: Maximum length of summary
|
1534 |
-
|
1535 |
-
Returns:
|
1536 |
-
Truncated content with ellipsis if needed
|
1537 |
-
"""
|
1538 |
-
content = content.strip()
|
1539 |
-
if len(content) <= max_length:
|
1540 |
-
return content
|
1541 |
-
return content[:max_length] + "..."
|
1542 |
-
|
1543 |
async def get_processing_status(self) -> dict[str, int]:
|
1544 |
"""Get current document processing status counts
|
1545 |
|
@@ -1816,19 +1710,7 @@ class LightRAG:
|
|
1816 |
async def get_entity_info(
|
1817 |
self, entity_name: str, include_vector_data: bool = False
|
1818 |
) -> dict[str, str | None | dict[str, str]]:
|
1819 |
-
"""Get detailed information of an entity
|
1820 |
-
|
1821 |
-
Args:
|
1822 |
-
entity_name: Entity name (no need for quotes)
|
1823 |
-
include_vector_data: Whether to include data from the vector database
|
1824 |
-
|
1825 |
-
Returns:
|
1826 |
-
dict: A dictionary containing entity information, including:
|
1827 |
-
- entity_name: Entity name
|
1828 |
-
- source_id: Source document ID
|
1829 |
-
- graph_data: Complete node data from the graph database
|
1830 |
-
- vector_data: (optional) Data from the vector database
|
1831 |
-
"""
|
1832 |
|
1833 |
# Get information from the graph
|
1834 |
node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
|
@@ -1843,29 +1725,15 @@ class LightRAG:
|
|
1843 |
# Optional: Get vector database information
|
1844 |
if include_vector_data:
|
1845 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
1846 |
-
vector_data = self.entities_vdb.
|
1847 |
-
result["vector_data"] = vector_data
|
1848 |
|
1849 |
return result
|
1850 |
|
1851 |
async def get_relation_info(
|
1852 |
self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
|
1853 |
) -> dict[str, str | None | dict[str, str]]:
|
1854 |
-
"""Get detailed information of a relationship
|
1855 |
-
|
1856 |
-
Args:
|
1857 |
-
src_entity: Source entity name (no need for quotes)
|
1858 |
-
tgt_entity: Target entity name (no need for quotes)
|
1859 |
-
include_vector_data: Whether to include data from the vector database
|
1860 |
-
|
1861 |
-
Returns:
|
1862 |
-
dict: A dictionary containing relationship information, including:
|
1863 |
-
- src_entity: Source entity name
|
1864 |
-
- tgt_entity: Target entity name
|
1865 |
-
- source_id: Source document ID
|
1866 |
-
- graph_data: Complete edge data from the graph database
|
1867 |
-
- vector_data: (optional) Data from the vector database
|
1868 |
-
"""
|
1869 |
|
1870 |
# Get information from the graph
|
1871 |
edge_data = await self.chunk_entity_relation_graph.get_edge(
|
@@ -1883,8 +1751,8 @@ class LightRAG:
|
|
1883 |
# Optional: Get vector database information
|
1884 |
if include_vector_data:
|
1885 |
rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
|
1886 |
-
vector_data = self.relationships_vdb.
|
1887 |
-
result["vector_data"] = vector_data
|
1888 |
|
1889 |
return result
|
1890 |
|
@@ -2682,6 +2550,12 @@ class LightRAG:
|
|
2682 |
|
2683 |
# 9. Delete source entities
|
2684 |
for entity_name in source_entities:
|
|
|
|
|
|
|
|
|
|
|
|
|
2685 |
# Delete entity node from knowledge graph
|
2686 |
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
2687 |
|
|
|
30 |
from .operate import (
|
31 |
chunking_by_token_size,
|
32 |
extract_entities,
|
|
|
33 |
kg_query,
|
|
|
34 |
mix_kg_vector_query,
|
35 |
naive_query,
|
36 |
+
query_with_keywords,
|
37 |
)
|
38 |
from .prompt import GRAPH_FIELD_SEP, PROMPTS
|
39 |
from .utils import (
|
|
|
44 |
encode_string_by_tiktoken,
|
45 |
lazy_external_import,
|
46 |
limit_async_func_call,
|
47 |
+
get_content_summary,
|
48 |
+
clean_text,
|
49 |
+
check_storage_env_vars,
|
50 |
logger,
|
51 |
)
|
52 |
from .types import KnowledgeGraph
|
|
|
311 |
# Verify storage implementation compatibility
|
312 |
verify_storage_implementation(storage_type, storage_name)
|
313 |
# Check environment variables
|
314 |
+
check_storage_env_vars(storage_name)
|
315 |
|
316 |
# Ensure vector_db_storage_cls_kwargs has required fields
|
317 |
self.vector_db_storage_cls_kwargs = {
|
|
|
356 |
namespace=make_namespace(
|
357 |
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
|
358 |
),
|
359 |
+
global_config=asdict(
|
360 |
+
self
|
361 |
+
), # Add global_config to ensure cache works properly
|
362 |
embedding_func=self.embedding_func,
|
363 |
)
|
364 |
|
|
|
409 |
embedding_func=None,
|
410 |
)
|
411 |
|
412 |
+
# Directly use llm_response_cache, don't create a new object
|
413 |
+
hashing_kv = self.llm_response_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
|
415 |
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
416 |
partial(
|
|
|
538 |
storage_class = lazy_external_import(import_path, storage_name)
|
539 |
return storage_class
|
540 |
|
|
|
|
|
|
|
|
|
|
|
541 |
def insert(
|
542 |
self,
|
543 |
input: str | list[str],
|
|
|
580 |
split_by_character, split_by_character_only
|
581 |
)
|
582 |
|
583 |
+
# TODO: deprecated, use insert instead
|
584 |
def insert_custom_chunks(
|
585 |
self,
|
586 |
full_text: str,
|
|
|
592 |
self.ainsert_custom_chunks(full_text, text_chunks, doc_id)
|
593 |
)
|
594 |
|
595 |
+
# TODO: deprecated, use ainsert instead
|
596 |
async def ainsert_custom_chunks(
|
597 |
self, full_text: str, text_chunks: list[str], doc_id: str | None = None
|
598 |
) -> None:
|
599 |
update_storage = False
|
600 |
try:
|
601 |
# Clean input texts
|
602 |
+
full_text = clean_text(full_text)
|
603 |
+
text_chunks = [clean_text(chunk) for chunk in text_chunks]
|
604 |
|
605 |
# Process cleaned texts
|
606 |
if doc_id is None:
|
|
|
679 |
contents = {id_: doc for id_, doc in zip(ids, input)}
|
680 |
else:
|
681 |
# Clean input text and remove duplicates
|
682 |
+
input = list(set(clean_text(doc) for doc in input))
|
683 |
# Generate contents dict of MD5 hash IDs and documents
|
684 |
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
685 |
|
|
|
695 |
new_docs: dict[str, Any] = {
|
696 |
id_: {
|
697 |
"content": content,
|
698 |
+
"content_summary": get_content_summary(content),
|
699 |
"content_length": len(content),
|
700 |
"status": DocStatus.PENDING,
|
701 |
"created_at": datetime.now().isoformat(),
|
|
|
884 |
self.chunks_vdb.upsert(chunks)
|
885 |
)
|
886 |
entity_relation_task = asyncio.create_task(
|
887 |
+
self._process_entity_relation_graph(
|
888 |
+
chunks, pipeline_status, pipeline_status_lock
|
889 |
+
)
|
890 |
)
|
891 |
full_docs_task = asyncio.create_task(
|
892 |
self.full_docs.upsert(
|
|
|
1001 |
pipeline_status["latest_message"] = log_message
|
1002 |
pipeline_status["history_messages"].append(log_message)
|
1003 |
|
1004 |
+
async def _process_entity_relation_graph(
|
1005 |
+
self, chunk: dict[str, Any], pipeline_status=None, pipeline_status_lock=None
|
1006 |
+
) -> None:
|
1007 |
try:
|
1008 |
await extract_entities(
|
1009 |
chunk,
|
1010 |
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
1011 |
entity_vdb=self.entities_vdb,
|
1012 |
relationships_vdb=self.relationships_vdb,
|
|
|
1013 |
global_config=asdict(self),
|
1014 |
+
pipeline_status=pipeline_status,
|
1015 |
+
pipeline_status_lock=pipeline_status_lock,
|
1016 |
+
llm_response_cache=self.llm_response_cache,
|
1017 |
)
|
1018 |
except Exception as e:
|
1019 |
logger.error("Failed to extract entities and relationships")
|
1020 |
raise e
|
1021 |
|
1022 |
+
async def _insert_done(
|
1023 |
+
self, pipeline_status=None, pipeline_status_lock=None
|
1024 |
+
) -> None:
|
1025 |
tasks = [
|
1026 |
cast(StorageNameSpace, storage_inst).index_done_callback()
|
1027 |
for storage_inst in [ # type: ignore
|
|
|
1040 |
log_message = "All Insert done"
|
1041 |
logger.info(log_message)
|
1042 |
|
1043 |
+
if pipeline_status is not None and pipeline_status_lock is not None:
|
1044 |
+
async with pipeline_status_lock:
|
1045 |
+
pipeline_status["latest_message"] = log_message
|
1046 |
+
pipeline_status["history_messages"].append(log_message)
|
|
|
|
|
1047 |
|
1048 |
def insert_custom_kg(
|
1049 |
self, custom_kg: dict[str, Any], full_doc_id: str = None
|
|
|
1060 |
all_chunks_data: dict[str, dict[str, str]] = {}
|
1061 |
chunk_to_source_map: dict[str, str] = {}
|
1062 |
for chunk_data in custom_kg.get("chunks", []):
|
1063 |
+
chunk_content = clean_text(chunk_data["content"])
|
1064 |
source_id = chunk_data["source_id"]
|
1065 |
tokens = len(
|
1066 |
encode_string_by_tiktoken(
|
|
|
1258 |
self.text_chunks,
|
1259 |
param,
|
1260 |
asdict(self),
|
1261 |
+
hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1262 |
system_prompt=system_prompt,
|
1263 |
)
|
1264 |
elif param.mode == "naive":
|
|
|
1268 |
self.text_chunks,
|
1269 |
param,
|
1270 |
asdict(self),
|
1271 |
+
hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1272 |
system_prompt=system_prompt,
|
1273 |
)
|
1274 |
elif param.mode == "mix":
|
|
|
1281 |
self.text_chunks,
|
1282 |
param,
|
1283 |
asdict(self),
|
1284 |
+
hashing_kv=self.llm_response_cache, # Directly use llm_response_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1285 |
system_prompt=system_prompt,
|
1286 |
)
|
1287 |
else:
|
|
|
1293 |
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
1294 |
):
|
1295 |
"""
|
1296 |
+
Query with separate keyword extraction step.
|
1297 |
+
|
1298 |
+
This method extracts keywords from the query first, then uses them for the query.
|
1299 |
+
|
1300 |
+
Args:
|
1301 |
+
query: User query
|
1302 |
+
prompt: Additional prompt for the query
|
1303 |
+
param: Query parameters
|
1304 |
+
|
1305 |
+
Returns:
|
1306 |
+
Query response
|
1307 |
"""
|
1308 |
loop = always_get_an_event_loop()
|
1309 |
return loop.run_until_complete(
|
|
|
1314 |
self, query: str, prompt: str, param: QueryParam = QueryParam()
|
1315 |
) -> str | AsyncIterator[str]:
|
1316 |
"""
|
1317 |
+
Async version of query_with_separate_keyword_extraction.
|
1318 |
+
|
1319 |
+
Args:
|
1320 |
+
query: User query
|
1321 |
+
prompt: Additional prompt for the query
|
1322 |
+
param: Query parameters
|
1323 |
+
|
1324 |
+
Returns:
|
1325 |
+
Query response or async iterator
|
1326 |
"""
|
1327 |
+
response = await query_with_keywords(
|
1328 |
+
query=query,
|
1329 |
+
prompt=prompt,
|
|
|
|
|
1330 |
param=param,
|
1331 |
+
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
1332 |
+
entities_vdb=self.entities_vdb,
|
1333 |
+
relationships_vdb=self.relationships_vdb,
|
1334 |
+
chunks_vdb=self.chunks_vdb,
|
1335 |
+
text_chunks_db=self.text_chunks,
|
1336 |
global_config=asdict(self),
|
1337 |
+
hashing_kv=self.llm_response_cache,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
)
|
1339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1340 |
await self._query_done()
|
1341 |
return response
|
1342 |
|
|
|
1434 |
]
|
1435 |
)
|
1436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1437 |
async def get_processing_status(self) -> dict[str, int]:
|
1438 |
"""Get current document processing status counts
|
1439 |
|
|
|
1710 |
async def get_entity_info(
|
1711 |
self, entity_name: str, include_vector_data: bool = False
|
1712 |
) -> dict[str, str | None | dict[str, str]]:
|
1713 |
+
"""Get detailed information of an entity"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1714 |
|
1715 |
# Get information from the graph
|
1716 |
node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
|
|
|
1725 |
# Optional: Get vector database information
|
1726 |
if include_vector_data:
|
1727 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
1728 |
+
vector_data = await self.entities_vdb.get_by_id(entity_id)
|
1729 |
+
result["vector_data"] = vector_data
|
1730 |
|
1731 |
return result
|
1732 |
|
1733 |
async def get_relation_info(
|
1734 |
self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
|
1735 |
) -> dict[str, str | None | dict[str, str]]:
|
1736 |
+
"""Get detailed information of a relationship"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1737 |
|
1738 |
# Get information from the graph
|
1739 |
edge_data = await self.chunk_entity_relation_graph.get_edge(
|
|
|
1751 |
# Optional: Get vector database information
|
1752 |
if include_vector_data:
|
1753 |
rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
|
1754 |
+
vector_data = await self.relationships_vdb.get_by_id(rel_id)
|
1755 |
+
result["vector_data"] = vector_data
|
1756 |
|
1757 |
return result
|
1758 |
|
|
|
2550 |
|
2551 |
# 9. Delete source entities
|
2552 |
for entity_name in source_entities:
|
2553 |
+
if entity_name == target_entity:
|
2554 |
+
logger.info(
|
2555 |
+
f"Skipping deletion of '{entity_name}' as it's also the target entity"
|
2556 |
+
)
|
2557 |
+
continue
|
2558 |
+
|
2559 |
# Delete entity node from knowledge graph
|
2560 |
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
2561 |
|
lightrag/llm/azure_openai.py
CHANGED
@@ -55,6 +55,7 @@ async def azure_openai_complete_if_cache(
|
|
55 |
|
56 |
openai_async_client = AsyncAzureOpenAI(
|
57 |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
|
58 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
59 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
60 |
)
|
@@ -136,6 +137,7 @@ async def azure_openai_embed(
|
|
136 |
|
137 |
openai_async_client = AsyncAzureOpenAI(
|
138 |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
|
139 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
140 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
141 |
)
|
|
|
55 |
|
56 |
openai_async_client = AsyncAzureOpenAI(
|
57 |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
58 |
+
azure_deployment=model,
|
59 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
60 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
61 |
)
|
|
|
137 |
|
138 |
openai_async_client = AsyncAzureOpenAI(
|
139 |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
140 |
+
azure_deployment=model,
|
141 |
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
142 |
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
143 |
)
|
lightrag/operate.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3 |
import asyncio
|
4 |
import json
|
5 |
import re
|
|
|
6 |
from typing import Any, AsyncIterator
|
7 |
from collections import Counter, defaultdict
|
8 |
|
@@ -140,18 +141,36 @@ async def _handle_single_entity_extraction(
|
|
140 |
):
|
141 |
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
142 |
return None
|
143 |
-
|
|
|
144 |
entity_name = clean_str(record_attributes[1]).strip('"')
|
145 |
if not entity_name.strip():
|
|
|
|
|
|
|
146 |
return None
|
|
|
|
|
147 |
entity_type = clean_str(record_attributes[2]).strip('"')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
entity_description = clean_str(record_attributes[3]).strip('"')
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
150 |
return dict(
|
151 |
entity_name=entity_name,
|
152 |
entity_type=entity_type,
|
153 |
description=entity_description,
|
154 |
-
source_id=
|
155 |
metadata={"created_at": time.time()},
|
156 |
)
|
157 |
|
@@ -220,6 +239,7 @@ async def _merge_nodes_then_upsert(
|
|
220 |
entity_name, description, global_config
|
221 |
)
|
222 |
node_data = dict(
|
|
|
223 |
entity_type=entity_type,
|
224 |
description=description,
|
225 |
source_id=source_id,
|
@@ -301,6 +321,7 @@ async def _merge_edges_then_upsert(
|
|
301 |
await knowledge_graph_inst.upsert_node(
|
302 |
need_insert_id,
|
303 |
node_data={
|
|
|
304 |
"source_id": source_id,
|
305 |
"description": description,
|
306 |
"entity_type": "UNKNOWN",
|
@@ -337,11 +358,10 @@ async def extract_entities(
|
|
337 |
entity_vdb: BaseVectorStorage,
|
338 |
relationships_vdb: BaseVectorStorage,
|
339 |
global_config: dict[str, str],
|
|
|
|
|
340 |
llm_response_cache: BaseKVStorage | None = None,
|
341 |
) -> None:
|
342 |
-
from lightrag.kg.shared_storage import get_namespace_data
|
343 |
-
|
344 |
-
pipeline_status = await get_namespace_data("pipeline_status")
|
345 |
use_llm_func: callable = global_config["llm_model_func"]
|
346 |
entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
|
347 |
enable_llm_cache_for_entity_extract: bool = global_config[
|
@@ -400,6 +420,7 @@ async def extract_entities(
|
|
400 |
else:
|
401 |
_prompt = input_text
|
402 |
|
|
|
403 |
arg_hash = compute_args_hash(_prompt)
|
404 |
cached_return, _1, _2, _3 = await handle_cache(
|
405 |
llm_response_cache,
|
@@ -407,7 +428,6 @@ async def extract_entities(
|
|
407 |
_prompt,
|
408 |
"default",
|
409 |
cache_type="extract",
|
410 |
-
force_llm_cache=True,
|
411 |
)
|
412 |
if cached_return:
|
413 |
logger.debug(f"Found cache for {arg_hash}")
|
@@ -436,30 +456,91 @@ async def extract_entities(
|
|
436 |
else:
|
437 |
return await use_llm_func(input_text)
|
438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
|
440 |
-
"""
|
441 |
Args:
|
442 |
chunk_key_dp (tuple[str, TextChunkSchema]):
|
443 |
-
("
|
444 |
"""
|
445 |
nonlocal processed_chunks
|
446 |
chunk_key = chunk_key_dp[0]
|
447 |
chunk_dp = chunk_key_dp[1]
|
448 |
content = chunk_dp["content"]
|
449 |
-
|
|
|
450 |
hint_prompt = entity_extract_prompt.format(
|
451 |
**context_base, input_text="{input_text}"
|
452 |
).format(**context_base, input_text=content)
|
453 |
|
454 |
final_result = await _user_llm_func_with_cache(hint_prompt)
|
455 |
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
for now_glean_index in range(entity_extract_max_gleaning):
|
457 |
glean_result = await _user_llm_func_with_cache(
|
458 |
continue_prompt, history_messages=history
|
459 |
)
|
460 |
|
461 |
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
|
462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
if now_glean_index == entity_extract_max_gleaning - 1:
|
464 |
break
|
465 |
|
@@ -470,42 +551,15 @@ async def extract_entities(
|
|
470 |
if if_loop_result != "yes":
|
471 |
break
|
472 |
|
473 |
-
records = split_string_by_multi_markers(
|
474 |
-
final_result,
|
475 |
-
[context_base["record_delimiter"], context_base["completion_delimiter"]],
|
476 |
-
)
|
477 |
-
|
478 |
-
maybe_nodes = defaultdict(list)
|
479 |
-
maybe_edges = defaultdict(list)
|
480 |
-
for record in records:
|
481 |
-
record = re.search(r"\((.*)\)", record)
|
482 |
-
if record is None:
|
483 |
-
continue
|
484 |
-
record = record.group(1)
|
485 |
-
record_attributes = split_string_by_multi_markers(
|
486 |
-
record, [context_base["tuple_delimiter"]]
|
487 |
-
)
|
488 |
-
if_entities = await _handle_single_entity_extraction(
|
489 |
-
record_attributes, chunk_key
|
490 |
-
)
|
491 |
-
if if_entities is not None:
|
492 |
-
maybe_nodes[if_entities["entity_name"]].append(if_entities)
|
493 |
-
continue
|
494 |
-
|
495 |
-
if_relation = await _handle_single_relationship_extraction(
|
496 |
-
record_attributes, chunk_key
|
497 |
-
)
|
498 |
-
if if_relation is not None:
|
499 |
-
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
|
500 |
-
if_relation
|
501 |
-
)
|
502 |
processed_chunks += 1
|
503 |
entities_count = len(maybe_nodes)
|
504 |
relations_count = len(maybe_edges)
|
505 |
log_message = f" Chunk {processed_chunks}/{total_chunks}: extracted {entities_count} entities and {relations_count} relationships (deduplicated)"
|
506 |
logger.info(log_message)
|
507 |
-
pipeline_status
|
508 |
-
|
|
|
|
|
509 |
return dict(maybe_nodes), dict(maybe_edges)
|
510 |
|
511 |
tasks = [_process_single_content(c) for c in ordered_chunks]
|
@@ -519,42 +573,58 @@ async def extract_entities(
|
|
519 |
for k, v in m_edges.items():
|
520 |
maybe_edges[tuple(sorted(k))].extend(v)
|
521 |
|
522 |
-
|
523 |
-
*[
|
524 |
-
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
|
525 |
-
for k, v in maybe_nodes.items()
|
526 |
-
]
|
527 |
-
)
|
528 |
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
535 |
|
536 |
if not (all_entities_data or all_relationships_data):
|
537 |
log_message = "Didn't extract any entities and relationships."
|
538 |
logger.info(log_message)
|
539 |
-
pipeline_status
|
540 |
-
|
|
|
|
|
541 |
return
|
542 |
|
543 |
if not all_entities_data:
|
544 |
log_message = "Didn't extract any entities"
|
545 |
logger.info(log_message)
|
546 |
-
pipeline_status
|
547 |
-
|
|
|
|
|
548 |
if not all_relationships_data:
|
549 |
log_message = "Didn't extract any relationships"
|
550 |
logger.info(log_message)
|
551 |
-
pipeline_status
|
552 |
-
|
|
|
|
|
553 |
|
554 |
log_message = f"Extracted {len(all_entities_data)} entities and {len(all_relationships_data)} relationships (deduplicated)"
|
555 |
logger.info(log_message)
|
556 |
-
pipeline_status
|
557 |
-
|
|
|
|
|
558 |
verbose_debug(
|
559 |
f"New entities:{all_entities_data}, relationships:{all_relationships_data}"
|
560 |
)
|
@@ -1020,6 +1090,7 @@ async def _build_query_context(
|
|
1020 |
text_chunks_db: BaseKVStorage,
|
1021 |
query_param: QueryParam,
|
1022 |
):
|
|
|
1023 |
if query_param.mode == "local":
|
1024 |
entities_context, relations_context, text_units_context = await _get_node_data(
|
1025 |
ll_keywords,
|
@@ -1845,3 +1916,90 @@ async def kg_query_with_keywords(
|
|
1845 |
)
|
1846 |
|
1847 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import asyncio
|
4 |
import json
|
5 |
import re
|
6 |
+
import os
|
7 |
from typing import Any, AsyncIterator
|
8 |
from collections import Counter, defaultdict
|
9 |
|
|
|
141 |
):
|
142 |
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
143 |
return None
|
144 |
+
|
145 |
+
# Clean and validate entity name
|
146 |
entity_name = clean_str(record_attributes[1]).strip('"')
|
147 |
if not entity_name.strip():
|
148 |
+
logger.warning(
|
149 |
+
f"Entity extraction error: empty entity name in: {record_attributes}"
|
150 |
+
)
|
151 |
return None
|
152 |
+
|
153 |
+
# Clean and validate entity type
|
154 |
entity_type = clean_str(record_attributes[2]).strip('"')
|
155 |
+
if not entity_type.strip() or entity_type.startswith('("'):
|
156 |
+
logger.warning(
|
157 |
+
f"Entity extraction error: invalid entity type in: {record_attributes}"
|
158 |
+
)
|
159 |
+
return None
|
160 |
+
|
161 |
+
# Clean and validate description
|
162 |
entity_description = clean_str(record_attributes[3]).strip('"')
|
163 |
+
if not entity_description.strip():
|
164 |
+
logger.warning(
|
165 |
+
f"Entity extraction error: empty description for entity '{entity_name}' of type '{entity_type}'"
|
166 |
+
)
|
167 |
+
return None
|
168 |
+
|
169 |
return dict(
|
170 |
entity_name=entity_name,
|
171 |
entity_type=entity_type,
|
172 |
description=entity_description,
|
173 |
+
source_id=chunk_key,
|
174 |
metadata={"created_at": time.time()},
|
175 |
)
|
176 |
|
|
|
239 |
entity_name, description, global_config
|
240 |
)
|
241 |
node_data = dict(
|
242 |
+
entity_id=entity_name,
|
243 |
entity_type=entity_type,
|
244 |
description=description,
|
245 |
source_id=source_id,
|
|
|
321 |
await knowledge_graph_inst.upsert_node(
|
322 |
need_insert_id,
|
323 |
node_data={
|
324 |
+
"entity_id": need_insert_id,
|
325 |
"source_id": source_id,
|
326 |
"description": description,
|
327 |
"entity_type": "UNKNOWN",
|
|
|
358 |
entity_vdb: BaseVectorStorage,
|
359 |
relationships_vdb: BaseVectorStorage,
|
360 |
global_config: dict[str, str],
|
361 |
+
pipeline_status: dict = None,
|
362 |
+
pipeline_status_lock=None,
|
363 |
llm_response_cache: BaseKVStorage | None = None,
|
364 |
) -> None:
|
|
|
|
|
|
|
365 |
use_llm_func: callable = global_config["llm_model_func"]
|
366 |
entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
|
367 |
enable_llm_cache_for_entity_extract: bool = global_config[
|
|
|
420 |
else:
|
421 |
_prompt = input_text
|
422 |
|
423 |
+
# TODO: add cache_type="extract"
|
424 |
arg_hash = compute_args_hash(_prompt)
|
425 |
cached_return, _1, _2, _3 = await handle_cache(
|
426 |
llm_response_cache,
|
|
|
428 |
_prompt,
|
429 |
"default",
|
430 |
cache_type="extract",
|
|
|
431 |
)
|
432 |
if cached_return:
|
433 |
logger.debug(f"Found cache for {arg_hash}")
|
|
|
456 |
else:
|
457 |
return await use_llm_func(input_text)
|
458 |
|
459 |
+
async def _process_extraction_result(result: str, chunk_key: str):
|
460 |
+
"""Process a single extraction result (either initial or gleaning)
|
461 |
+
Args:
|
462 |
+
result (str): The extraction result to process
|
463 |
+
chunk_key (str): The chunk key for source tracking
|
464 |
+
Returns:
|
465 |
+
tuple: (nodes_dict, edges_dict) containing the extracted entities and relationships
|
466 |
+
"""
|
467 |
+
maybe_nodes = defaultdict(list)
|
468 |
+
maybe_edges = defaultdict(list)
|
469 |
+
|
470 |
+
records = split_string_by_multi_markers(
|
471 |
+
result,
|
472 |
+
[context_base["record_delimiter"], context_base["completion_delimiter"]],
|
473 |
+
)
|
474 |
+
|
475 |
+
for record in records:
|
476 |
+
record = re.search(r"\((.*)\)", record)
|
477 |
+
if record is None:
|
478 |
+
continue
|
479 |
+
record = record.group(1)
|
480 |
+
record_attributes = split_string_by_multi_markers(
|
481 |
+
record, [context_base["tuple_delimiter"]]
|
482 |
+
)
|
483 |
+
|
484 |
+
if_entities = await _handle_single_entity_extraction(
|
485 |
+
record_attributes, chunk_key
|
486 |
+
)
|
487 |
+
if if_entities is not None:
|
488 |
+
maybe_nodes[if_entities["entity_name"]].append(if_entities)
|
489 |
+
continue
|
490 |
+
|
491 |
+
if_relation = await _handle_single_relationship_extraction(
|
492 |
+
record_attributes, chunk_key
|
493 |
+
)
|
494 |
+
if if_relation is not None:
|
495 |
+
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
|
496 |
+
if_relation
|
497 |
+
)
|
498 |
+
|
499 |
+
return maybe_nodes, maybe_edges
|
500 |
+
|
501 |
async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
|
502 |
+
"""Process a single chunk
|
503 |
Args:
|
504 |
chunk_key_dp (tuple[str, TextChunkSchema]):
|
505 |
+
("chunk-xxxxxx", {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int})
|
506 |
"""
|
507 |
nonlocal processed_chunks
|
508 |
chunk_key = chunk_key_dp[0]
|
509 |
chunk_dp = chunk_key_dp[1]
|
510 |
content = chunk_dp["content"]
|
511 |
+
|
512 |
+
# Get initial extraction
|
513 |
hint_prompt = entity_extract_prompt.format(
|
514 |
**context_base, input_text="{input_text}"
|
515 |
).format(**context_base, input_text=content)
|
516 |
|
517 |
final_result = await _user_llm_func_with_cache(hint_prompt)
|
518 |
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
|
519 |
+
|
520 |
+
# Process initial extraction
|
521 |
+
maybe_nodes, maybe_edges = await _process_extraction_result(
|
522 |
+
final_result, chunk_key
|
523 |
+
)
|
524 |
+
|
525 |
+
# Process additional gleaning results
|
526 |
for now_glean_index in range(entity_extract_max_gleaning):
|
527 |
glean_result = await _user_llm_func_with_cache(
|
528 |
continue_prompt, history_messages=history
|
529 |
)
|
530 |
|
531 |
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
|
532 |
+
|
533 |
+
# Process gleaning result separately
|
534 |
+
glean_nodes, glean_edges = await _process_extraction_result(
|
535 |
+
glean_result, chunk_key
|
536 |
+
)
|
537 |
+
|
538 |
+
# Merge results
|
539 |
+
for entity_name, entities in glean_nodes.items():
|
540 |
+
maybe_nodes[entity_name].extend(entities)
|
541 |
+
for edge_key, edges in glean_edges.items():
|
542 |
+
maybe_edges[edge_key].extend(edges)
|
543 |
+
|
544 |
if now_glean_index == entity_extract_max_gleaning - 1:
|
545 |
break
|
546 |
|
|
|
551 |
if if_loop_result != "yes":
|
552 |
break
|
553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
processed_chunks += 1
|
555 |
entities_count = len(maybe_nodes)
|
556 |
relations_count = len(maybe_edges)
|
557 |
log_message = f" Chunk {processed_chunks}/{total_chunks}: extracted {entities_count} entities and {relations_count} relationships (deduplicated)"
|
558 |
logger.info(log_message)
|
559 |
+
if pipeline_status is not None:
|
560 |
+
async with pipeline_status_lock:
|
561 |
+
pipeline_status["latest_message"] = log_message
|
562 |
+
pipeline_status["history_messages"].append(log_message)
|
563 |
return dict(maybe_nodes), dict(maybe_edges)
|
564 |
|
565 |
tasks = [_process_single_content(c) for c in ordered_chunks]
|
|
|
573 |
for k, v in m_edges.items():
|
574 |
maybe_edges[tuple(sorted(k))].extend(v)
|
575 |
|
576 |
+
from .kg.shared_storage import get_graph_db_lock
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
+
graph_db_lock = get_graph_db_lock(enable_logging=False)
|
579 |
+
|
580 |
+
# Ensure that nodes and edges are merged and upserted atomically
|
581 |
+
async with graph_db_lock:
|
582 |
+
all_entities_data = await asyncio.gather(
|
583 |
+
*[
|
584 |
+
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
|
585 |
+
for k, v in maybe_nodes.items()
|
586 |
+
]
|
587 |
+
)
|
588 |
+
|
589 |
+
all_relationships_data = await asyncio.gather(
|
590 |
+
*[
|
591 |
+
_merge_edges_then_upsert(
|
592 |
+
k[0], k[1], v, knowledge_graph_inst, global_config
|
593 |
+
)
|
594 |
+
for k, v in maybe_edges.items()
|
595 |
+
]
|
596 |
+
)
|
597 |
|
598 |
if not (all_entities_data or all_relationships_data):
|
599 |
log_message = "Didn't extract any entities and relationships."
|
600 |
logger.info(log_message)
|
601 |
+
if pipeline_status is not None:
|
602 |
+
async with pipeline_status_lock:
|
603 |
+
pipeline_status["latest_message"] = log_message
|
604 |
+
pipeline_status["history_messages"].append(log_message)
|
605 |
return
|
606 |
|
607 |
if not all_entities_data:
|
608 |
log_message = "Didn't extract any entities"
|
609 |
logger.info(log_message)
|
610 |
+
if pipeline_status is not None:
|
611 |
+
async with pipeline_status_lock:
|
612 |
+
pipeline_status["latest_message"] = log_message
|
613 |
+
pipeline_status["history_messages"].append(log_message)
|
614 |
if not all_relationships_data:
|
615 |
log_message = "Didn't extract any relationships"
|
616 |
logger.info(log_message)
|
617 |
+
if pipeline_status is not None:
|
618 |
+
async with pipeline_status_lock:
|
619 |
+
pipeline_status["latest_message"] = log_message
|
620 |
+
pipeline_status["history_messages"].append(log_message)
|
621 |
|
622 |
log_message = f"Extracted {len(all_entities_data)} entities and {len(all_relationships_data)} relationships (deduplicated)"
|
623 |
logger.info(log_message)
|
624 |
+
if pipeline_status is not None:
|
625 |
+
async with pipeline_status_lock:
|
626 |
+
pipeline_status["latest_message"] = log_message
|
627 |
+
pipeline_status["history_messages"].append(log_message)
|
628 |
verbose_debug(
|
629 |
f"New entities:{all_entities_data}, relationships:{all_relationships_data}"
|
630 |
)
|
|
|
1090 |
text_chunks_db: BaseKVStorage,
|
1091 |
query_param: QueryParam,
|
1092 |
):
|
1093 |
+
logger.info(f"Process {os.getpid()} buidling query context...")
|
1094 |
if query_param.mode == "local":
|
1095 |
entities_context, relations_context, text_units_context = await _get_node_data(
|
1096 |
ll_keywords,
|
|
|
1916 |
)
|
1917 |
|
1918 |
return response
|
1919 |
+
|
1920 |
+
|
1921 |
+
async def query_with_keywords(
|
1922 |
+
query: str,
|
1923 |
+
prompt: str,
|
1924 |
+
param: QueryParam,
|
1925 |
+
knowledge_graph_inst: BaseGraphStorage,
|
1926 |
+
entities_vdb: BaseVectorStorage,
|
1927 |
+
relationships_vdb: BaseVectorStorage,
|
1928 |
+
chunks_vdb: BaseVectorStorage,
|
1929 |
+
text_chunks_db: BaseKVStorage,
|
1930 |
+
global_config: dict[str, str],
|
1931 |
+
hashing_kv: BaseKVStorage | None = None,
|
1932 |
+
) -> str | AsyncIterator[str]:
|
1933 |
+
"""
|
1934 |
+
Extract keywords from the query and then use them for retrieving information.
|
1935 |
+
|
1936 |
+
1. Extracts high-level and low-level keywords from the query
|
1937 |
+
2. Formats the query with the extracted keywords and prompt
|
1938 |
+
3. Uses the appropriate query method based on param.mode
|
1939 |
+
|
1940 |
+
Args:
|
1941 |
+
query: The user's query
|
1942 |
+
prompt: Additional prompt to prepend to the query
|
1943 |
+
param: Query parameters
|
1944 |
+
knowledge_graph_inst: Knowledge graph storage
|
1945 |
+
entities_vdb: Entities vector database
|
1946 |
+
relationships_vdb: Relationships vector database
|
1947 |
+
chunks_vdb: Document chunks vector database
|
1948 |
+
text_chunks_db: Text chunks storage
|
1949 |
+
global_config: Global configuration
|
1950 |
+
hashing_kv: Cache storage
|
1951 |
+
|
1952 |
+
Returns:
|
1953 |
+
Query response or async iterator
|
1954 |
+
"""
|
1955 |
+
# Extract keywords
|
1956 |
+
hl_keywords, ll_keywords = await extract_keywords_only(
|
1957 |
+
text=query,
|
1958 |
+
param=param,
|
1959 |
+
global_config=global_config,
|
1960 |
+
hashing_kv=hashing_kv,
|
1961 |
+
)
|
1962 |
+
|
1963 |
+
param.hl_keywords = hl_keywords
|
1964 |
+
param.ll_keywords = ll_keywords
|
1965 |
+
|
1966 |
+
# Create a new string with the prompt and the keywords
|
1967 |
+
ll_keywords_str = ", ".join(ll_keywords)
|
1968 |
+
hl_keywords_str = ", ".join(hl_keywords)
|
1969 |
+
formatted_question = f"{prompt}\n\n### Keywords:\nHigh-level: {hl_keywords_str}\nLow-level: {ll_keywords_str}\n\n### Query:\n{query}"
|
1970 |
+
|
1971 |
+
# Use appropriate query method based on mode
|
1972 |
+
if param.mode in ["local", "global", "hybrid"]:
|
1973 |
+
return await kg_query_with_keywords(
|
1974 |
+
formatted_question,
|
1975 |
+
knowledge_graph_inst,
|
1976 |
+
entities_vdb,
|
1977 |
+
relationships_vdb,
|
1978 |
+
text_chunks_db,
|
1979 |
+
param,
|
1980 |
+
global_config,
|
1981 |
+
hashing_kv=hashing_kv,
|
1982 |
+
)
|
1983 |
+
elif param.mode == "naive":
|
1984 |
+
return await naive_query(
|
1985 |
+
formatted_question,
|
1986 |
+
chunks_vdb,
|
1987 |
+
text_chunks_db,
|
1988 |
+
param,
|
1989 |
+
global_config,
|
1990 |
+
hashing_kv=hashing_kv,
|
1991 |
+
)
|
1992 |
+
elif param.mode == "mix":
|
1993 |
+
return await mix_kg_vector_query(
|
1994 |
+
formatted_question,
|
1995 |
+
knowledge_graph_inst,
|
1996 |
+
entities_vdb,
|
1997 |
+
relationships_vdb,
|
1998 |
+
chunks_vdb,
|
1999 |
+
text_chunks_db,
|
2000 |
+
param,
|
2001 |
+
global_config,
|
2002 |
+
hashing_kv=hashing_kv,
|
2003 |
+
)
|
2004 |
+
else:
|
2005 |
+
raise ValueError(f"Unknown mode {param.mode}")
|
lightrag/prompt.py
CHANGED
@@ -236,7 +236,7 @@ Given the query and conversation history, list both high-level and low-level key
|
|
236 |
---Instructions---
|
237 |
|
238 |
- Consider both the current query and relevant conversation history when extracting keywords
|
239 |
-
- Output the keywords in JSON format
|
240 |
- The JSON should have two keys:
|
241 |
- "high_level_keywords" for overarching concepts or themes
|
242 |
- "low_level_keywords" for specific entities or details
|
|
|
236 |
---Instructions---
|
237 |
|
238 |
- Consider both the current query and relevant conversation history when extracting keywords
|
239 |
+
- Output the keywords in JSON format, it will be parsed by a JSON parser, do not add any extra content in output
|
240 |
- The JSON should have two keys:
|
241 |
- "high_level_keywords" for overarching concepts or themes
|
242 |
- "low_level_keywords" for specific entities or details
|
lightrag/utils.py
CHANGED
@@ -633,15 +633,15 @@ async def handle_cache(
|
|
633 |
prompt,
|
634 |
mode="default",
|
635 |
cache_type=None,
|
636 |
-
force_llm_cache=False,
|
637 |
):
|
638 |
"""Generic cache handling function"""
|
639 |
-
if hashing_kv is None
|
640 |
-
force_llm_cache or hashing_kv.global_config.get("enable_llm_cache")
|
641 |
-
):
|
642 |
return None, None, None, None
|
643 |
|
644 |
-
if mode != "default":
|
|
|
|
|
|
|
645 |
# Get embedding cache configuration
|
646 |
embedding_cache_config = hashing_kv.global_config.get(
|
647 |
"embedding_cache_config",
|
@@ -651,8 +651,7 @@ async def handle_cache(
|
|
651 |
use_llm_check = embedding_cache_config.get("use_llm_check", False)
|
652 |
|
653 |
quantized = min_val = max_val = None
|
654 |
-
if is_embedding_cache_enabled:
|
655 |
-
# Use embedding cache
|
656 |
current_embedding = await hashing_kv.embedding_func([prompt])
|
657 |
llm_model_func = hashing_kv.global_config.get("llm_model_func")
|
658 |
quantized, min_val, max_val = quantize_embedding(current_embedding[0])
|
@@ -667,24 +666,29 @@ async def handle_cache(
|
|
667 |
cache_type=cache_type,
|
668 |
)
|
669 |
if best_cached_response is not None:
|
670 |
-
logger.
|
671 |
return best_cached_response, None, None, None
|
672 |
else:
|
673 |
# if caching keyword embedding is enabled, return the quantized embedding for saving it latter
|
674 |
-
logger.
|
675 |
return None, quantized, min_val, max_val
|
676 |
|
677 |
-
#
|
678 |
-
|
|
|
|
|
|
|
|
|
|
|
679 |
if exists_func(hashing_kv, "get_by_mode_and_id"):
|
680 |
mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
|
681 |
else:
|
682 |
mode_cache = await hashing_kv.get_by_id(mode) or {}
|
683 |
if args_hash in mode_cache:
|
684 |
-
logger.
|
685 |
return mode_cache[args_hash]["return"], None, None, None
|
686 |
|
687 |
-
logger.
|
688 |
return None, None, None, None
|
689 |
|
690 |
|
@@ -701,9 +705,22 @@ class CacheData:
|
|
701 |
|
702 |
|
703 |
async def save_to_cache(hashing_kv, cache_data: CacheData):
|
704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
705 |
return
|
706 |
|
|
|
707 |
if exists_func(hashing_kv, "get_by_mode_and_id"):
|
708 |
mode_cache = (
|
709 |
await hashing_kv.get_by_mode_and_id(cache_data.mode, cache_data.args_hash)
|
@@ -712,6 +729,16 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
712 |
else:
|
713 |
mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
|
714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
715 |
mode_cache[cache_data.args_hash] = {
|
716 |
"return": cache_data.content,
|
717 |
"cache_type": cache_data.cache_type,
|
@@ -726,6 +753,7 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|
726 |
"original_prompt": cache_data.prompt,
|
727 |
}
|
728 |
|
|
|
729 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
730 |
|
731 |
|
@@ -862,3 +890,52 @@ def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any
|
|
862 |
return cls(*args, **kwargs)
|
863 |
|
864 |
return import_class
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
633 |
prompt,
|
634 |
mode="default",
|
635 |
cache_type=None,
|
|
|
636 |
):
|
637 |
"""Generic cache handling function"""
|
638 |
+
if hashing_kv is None:
|
|
|
|
|
639 |
return None, None, None, None
|
640 |
|
641 |
+
if mode != "default": # handle cache for all type of query
|
642 |
+
if not hashing_kv.global_config.get("enable_llm_cache"):
|
643 |
+
return None, None, None, None
|
644 |
+
|
645 |
# Get embedding cache configuration
|
646 |
embedding_cache_config = hashing_kv.global_config.get(
|
647 |
"embedding_cache_config",
|
|
|
651 |
use_llm_check = embedding_cache_config.get("use_llm_check", False)
|
652 |
|
653 |
quantized = min_val = max_val = None
|
654 |
+
if is_embedding_cache_enabled: # Use embedding simularity to match cache
|
|
|
655 |
current_embedding = await hashing_kv.embedding_func([prompt])
|
656 |
llm_model_func = hashing_kv.global_config.get("llm_model_func")
|
657 |
quantized, min_val, max_val = quantize_embedding(current_embedding[0])
|
|
|
666 |
cache_type=cache_type,
|
667 |
)
|
668 |
if best_cached_response is not None:
|
669 |
+
logger.debug(f"Embedding cached hit(mode:{mode} type:{cache_type})")
|
670 |
return best_cached_response, None, None, None
|
671 |
else:
|
672 |
# if caching keyword embedding is enabled, return the quantized embedding for saving it latter
|
673 |
+
logger.debug(f"Embedding cached missed(mode:{mode} type:{cache_type})")
|
674 |
return None, quantized, min_val, max_val
|
675 |
|
676 |
+
else: # handle cache for entity extraction
|
677 |
+
if not hashing_kv.global_config.get("enable_llm_cache_for_entity_extract"):
|
678 |
+
return None, None, None, None
|
679 |
+
|
680 |
+
# Here is the conditions of code reaching this point:
|
681 |
+
# 1. All query mode: enable_llm_cache is True and embedding simularity is not enabled
|
682 |
+
# 2. Entity extract: enable_llm_cache_for_entity_extract is True
|
683 |
if exists_func(hashing_kv, "get_by_mode_and_id"):
|
684 |
mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
|
685 |
else:
|
686 |
mode_cache = await hashing_kv.get_by_id(mode) or {}
|
687 |
if args_hash in mode_cache:
|
688 |
+
logger.debug(f"Non-embedding cached hit(mode:{mode} type:{cache_type})")
|
689 |
return mode_cache[args_hash]["return"], None, None, None
|
690 |
|
691 |
+
logger.debug(f"Non-embedding cached missed(mode:{mode} type:{cache_type})")
|
692 |
return None, None, None, None
|
693 |
|
694 |
|
|
|
705 |
|
706 |
|
707 |
async def save_to_cache(hashing_kv, cache_data: CacheData):
|
708 |
+
"""Save data to cache, with improved handling for streaming responses and duplicate content.
|
709 |
+
|
710 |
+
Args:
|
711 |
+
hashing_kv: The key-value storage for caching
|
712 |
+
cache_data: The cache data to save
|
713 |
+
"""
|
714 |
+
# Skip if storage is None or content is a streaming response
|
715 |
+
if hashing_kv is None or not cache_data.content:
|
716 |
+
return
|
717 |
+
|
718 |
+
# If content is a streaming response, don't cache it
|
719 |
+
if hasattr(cache_data.content, "__aiter__"):
|
720 |
+
logger.debug("Streaming response detected, skipping cache")
|
721 |
return
|
722 |
|
723 |
+
# Get existing cache data
|
724 |
if exists_func(hashing_kv, "get_by_mode_and_id"):
|
725 |
mode_cache = (
|
726 |
await hashing_kv.get_by_mode_and_id(cache_data.mode, cache_data.args_hash)
|
|
|
729 |
else:
|
730 |
mode_cache = await hashing_kv.get_by_id(cache_data.mode) or {}
|
731 |
|
732 |
+
# Check if we already have identical content cached
|
733 |
+
if cache_data.args_hash in mode_cache:
|
734 |
+
existing_content = mode_cache[cache_data.args_hash].get("return")
|
735 |
+
if existing_content == cache_data.content:
|
736 |
+
logger.info(
|
737 |
+
f"Cache content unchanged for {cache_data.args_hash}, skipping update"
|
738 |
+
)
|
739 |
+
return
|
740 |
+
|
741 |
+
# Update cache with new content
|
742 |
mode_cache[cache_data.args_hash] = {
|
743 |
"return": cache_data.content,
|
744 |
"cache_type": cache_data.cache_type,
|
|
|
753 |
"original_prompt": cache_data.prompt,
|
754 |
}
|
755 |
|
756 |
+
# Only upsert if there's actual new content
|
757 |
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
758 |
|
759 |
|
|
|
890 |
return cls(*args, **kwargs)
|
891 |
|
892 |
return import_class
|
893 |
+
|
894 |
+
|
895 |
+
def get_content_summary(content: str, max_length: int = 100) -> str:
|
896 |
+
"""Get summary of document content
|
897 |
+
|
898 |
+
Args:
|
899 |
+
content: Original document content
|
900 |
+
max_length: Maximum length of summary
|
901 |
+
|
902 |
+
Returns:
|
903 |
+
Truncated content with ellipsis if needed
|
904 |
+
"""
|
905 |
+
content = content.strip()
|
906 |
+
if len(content) <= max_length:
|
907 |
+
return content
|
908 |
+
return content[:max_length] + "..."
|
909 |
+
|
910 |
+
|
911 |
+
def clean_text(text: str) -> str:
|
912 |
+
"""Clean text by removing null bytes (0x00) and whitespace
|
913 |
+
|
914 |
+
Args:
|
915 |
+
text: Input text to clean
|
916 |
+
|
917 |
+
Returns:
|
918 |
+
Cleaned text
|
919 |
+
"""
|
920 |
+
return text.strip().replace("\x00", "")
|
921 |
+
|
922 |
+
|
923 |
+
def check_storage_env_vars(storage_name: str) -> None:
|
924 |
+
"""Check if all required environment variables for storage implementation exist
|
925 |
+
|
926 |
+
Args:
|
927 |
+
storage_name: Storage implementation name
|
928 |
+
|
929 |
+
Raises:
|
930 |
+
ValueError: If required environment variables are missing
|
931 |
+
"""
|
932 |
+
from lightrag.kg import STORAGE_ENV_REQUIREMENTS
|
933 |
+
|
934 |
+
required_vars = STORAGE_ENV_REQUIREMENTS.get(storage_name, [])
|
935 |
+
missing_vars = [var for var in required_vars if var not in os.environ]
|
936 |
+
|
937 |
+
if missing_vars:
|
938 |
+
raise ValueError(
|
939 |
+
f"Storage implementation '{storage_name}' requires the following "
|
940 |
+
f"environment variables: {', '.join(missing_vars)}"
|
941 |
+
)
|
lightrag_webui/bun.lock
CHANGED
@@ -34,11 +34,13 @@
|
|
34 |
"cmdk": "^1.0.4",
|
35 |
"graphology": "^0.26.0",
|
36 |
"graphology-generators": "^0.11.2",
|
|
|
37 |
"lucide-react": "^0.475.0",
|
38 |
"minisearch": "^7.1.2",
|
39 |
"react": "^19.0.0",
|
40 |
"react-dom": "^19.0.0",
|
41 |
"react-dropzone": "^14.3.6",
|
|
|
42 |
"react-markdown": "^9.1.0",
|
43 |
"react-number-format": "^5.4.3",
|
44 |
"react-syntax-highlighter": "^15.6.1",
|
@@ -765,8 +767,12 @@
|
|
765 |
|
766 |
"hoist-non-react-statics": ["[email protected]", "", { "dependencies": { "react-is": "^16.7.0" } }, "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw=="],
|
767 |
|
|
|
|
|
768 |
"html-url-attributes": ["[email protected]", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="],
|
769 |
|
|
|
|
|
770 |
"ignore": ["[email protected]", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
|
771 |
|
772 |
"import-fresh": ["[email protected]", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
|
@@ -1093,6 +1099,8 @@
|
|
1093 |
|
1094 |
"react-dropzone": ["[email protected]", "", { "dependencies": { "attr-accept": "^2.2.4", "file-selector": "^2.1.0", "prop-types": "^15.8.1" }, "peerDependencies": { "react": ">= 16.8 || 18.0.0" } }, "sha512-U792j+x0rcwH/U/Slv/OBNU/LGFYbDLHKKiJoPhNaOianayZevCt4Y5S0CraPssH/6/wT6xhKDfzdXUgCBS0HQ=="],
|
1095 |
|
|
|
|
|
1096 |
"react-is": ["[email protected]", "", {}, "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="],
|
1097 |
|
1098 |
"react-markdown": ["[email protected]", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "html-url-attributes": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "peerDependencies": { "@types/react": ">=18", "react": ">=18" } }, "sha512-xaijuJB0kzGiUdG7nc2MOMDUDBWPyGAjZtUrow9XxUeua8IqeP+VlIfAZ3bphpcLTnSZXz6z9jcVC/TCwbfgdw=="],
|
@@ -1271,6 +1279,8 @@
|
|
1271 |
|
1272 |
"vite": ["[email protected]", "", { "dependencies": { "esbuild": "^0.24.2", "postcss": "^8.5.2", "rollup": "^4.30.1" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-4GgM54XrwRfrOp297aIYspIti66k56v16ZnqHvrIM7mG+HjDlAwS7p+Srr7J6fGvEdOJ5JcQ/D9T7HhtdXDTzA=="],
|
1273 |
|
|
|
|
|
1274 |
"which": ["[email protected]", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
|
1275 |
|
1276 |
"which-boxed-primitive": ["[email protected]", "", { "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", "is-number-object": "^1.1.1", "is-string": "^1.1.1", "is-symbol": "^1.1.1" } }, "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA=="],
|
|
|
34 |
"cmdk": "^1.0.4",
|
35 |
"graphology": "^0.26.0",
|
36 |
"graphology-generators": "^0.11.2",
|
37 |
+
"i18next": "^24.2.2",
|
38 |
"lucide-react": "^0.475.0",
|
39 |
"minisearch": "^7.1.2",
|
40 |
"react": "^19.0.0",
|
41 |
"react-dom": "^19.0.0",
|
42 |
"react-dropzone": "^14.3.6",
|
43 |
+
"react-i18next": "^15.4.1",
|
44 |
"react-markdown": "^9.1.0",
|
45 |
"react-number-format": "^5.4.3",
|
46 |
"react-syntax-highlighter": "^15.6.1",
|
|
|
767 |
|
768 |
"hoist-non-react-statics": ["[email protected]", "", { "dependencies": { "react-is": "^16.7.0" } }, "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw=="],
|
769 |
|
770 |
+
"html-parse-stringify": ["[email protected]", "", { "dependencies": { "void-elements": "3.1.0" } }, "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg=="],
|
771 |
+
|
772 |
"html-url-attributes": ["[email protected]", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="],
|
773 |
|
774 |
+
"i18next": ["[email protected]", "", { "dependencies": { "@babel/runtime": "^7.23.2" }, "peerDependencies": { "typescript": "^5" }, "optionalPeers": ["typescript"] }, "sha512-NE6i86lBCKRYZa5TaUDkU5S4HFgLIEJRLr3Whf2psgaxBleQ2LC1YW1Vc+SCgkAW7VEzndT6al6+CzegSUHcTQ=="],
|
775 |
+
|
776 |
"ignore": ["[email protected]", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
|
777 |
|
778 |
"import-fresh": ["[email protected]", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
|
|
|
1099 |
|
1100 |
"react-dropzone": ["[email protected]", "", { "dependencies": { "attr-accept": "^2.2.4", "file-selector": "^2.1.0", "prop-types": "^15.8.1" }, "peerDependencies": { "react": ">= 16.8 || 18.0.0" } }, "sha512-U792j+x0rcwH/U/Slv/OBNU/LGFYbDLHKKiJoPhNaOianayZevCt4Y5S0CraPssH/6/wT6xhKDfzdXUgCBS0HQ=="],
|
1101 |
|
1102 |
+
"react-i18next": ["[email protected]", "", { "dependencies": { "@babel/runtime": "^7.25.0", "html-parse-stringify": "^3.0.1" }, "peerDependencies": { "i18next": ">= 23.2.3", "react": ">= 16.8.0" } }, "sha512-ahGab+IaSgZmNPYXdV1n+OYky95TGpFwnKRflX/16dY04DsYYKHtVLjeny7sBSCREEcoMbAgSkFiGLF5g5Oofw=="],
|
1103 |
+
|
1104 |
"react-is": ["[email protected]", "", {}, "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="],
|
1105 |
|
1106 |
"react-markdown": ["[email protected]", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "html-url-attributes": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "peerDependencies": { "@types/react": ">=18", "react": ">=18" } }, "sha512-xaijuJB0kzGiUdG7nc2MOMDUDBWPyGAjZtUrow9XxUeua8IqeP+VlIfAZ3bphpcLTnSZXz6z9jcVC/TCwbfgdw=="],
|
|
|
1279 |
|
1280 |
"vite": ["[email protected]", "", { "dependencies": { "esbuild": "^0.24.2", "postcss": "^8.5.2", "rollup": "^4.30.1" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-4GgM54XrwRfrOp297aIYspIti66k56v16ZnqHvrIM7mG+HjDlAwS7p+Srr7J6fGvEdOJ5JcQ/D9T7HhtdXDTzA=="],
|
1281 |
|
1282 |
+
"void-elements": ["[email protected]", "", {}, "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w=="],
|
1283 |
+
|
1284 |
"which": ["[email protected]", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
|
1285 |
|
1286 |
"which-boxed-primitive": ["[email protected]", "", { "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", "is-number-object": "^1.1.1", "is-string": "^1.1.1", "is-symbol": "^1.1.1" } }, "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA=="],
|
lightrag_webui/package.json
CHANGED
@@ -43,11 +43,13 @@
|
|
43 |
"cmdk": "^1.0.4",
|
44 |
"graphology": "^0.26.0",
|
45 |
"graphology-generators": "^0.11.2",
|
|
|
46 |
"lucide-react": "^0.475.0",
|
47 |
"minisearch": "^7.1.2",
|
48 |
"react": "^19.0.0",
|
49 |
"react-dom": "^19.0.0",
|
50 |
"react-dropzone": "^14.3.6",
|
|
|
51 |
"react-markdown": "^9.1.0",
|
52 |
"react-number-format": "^5.4.3",
|
53 |
"react-syntax-highlighter": "^15.6.1",
|
|
|
43 |
"cmdk": "^1.0.4",
|
44 |
"graphology": "^0.26.0",
|
45 |
"graphology-generators": "^0.11.2",
|
46 |
+
"i18next": "^24.2.2",
|
47 |
"lucide-react": "^0.475.0",
|
48 |
"minisearch": "^7.1.2",
|
49 |
"react": "^19.0.0",
|
50 |
"react-dom": "^19.0.0",
|
51 |
"react-dropzone": "^14.3.6",
|
52 |
+
"react-i18next": "^15.4.1",
|
53 |
"react-markdown": "^9.1.0",
|
54 |
"react-number-format": "^5.4.3",
|
55 |
"react-syntax-highlighter": "^15.6.1",
|
lightrag_webui/src/components/ThemeToggle.tsx
CHANGED
@@ -3,6 +3,7 @@ import useTheme from '@/hooks/useTheme'
|
|
3 |
import { MoonIcon, SunIcon } from 'lucide-react'
|
4 |
import { useCallback } from 'react'
|
5 |
import { controlButtonVariant } from '@/lib/constants'
|
|
|
6 |
|
7 |
/**
|
8 |
* Component that toggles the theme between light and dark.
|
@@ -11,13 +12,14 @@ export default function ThemeToggle() {
|
|
11 |
const { theme, setTheme } = useTheme()
|
12 |
const setLight = useCallback(() => setTheme('light'), [setTheme])
|
13 |
const setDark = useCallback(() => setTheme('dark'), [setTheme])
|
|
|
14 |
|
15 |
if (theme === 'dark') {
|
16 |
return (
|
17 |
<Button
|
18 |
onClick={setLight}
|
19 |
variant={controlButtonVariant}
|
20 |
-
tooltip=
|
21 |
size="icon"
|
22 |
side="bottom"
|
23 |
>
|
@@ -29,7 +31,7 @@ export default function ThemeToggle() {
|
|
29 |
<Button
|
30 |
onClick={setDark}
|
31 |
variant={controlButtonVariant}
|
32 |
-
tooltip=
|
33 |
size="icon"
|
34 |
side="bottom"
|
35 |
>
|
|
|
3 |
import { MoonIcon, SunIcon } from 'lucide-react'
|
4 |
import { useCallback } from 'react'
|
5 |
import { controlButtonVariant } from '@/lib/constants'
|
6 |
+
import { useTranslation } from 'react-i18next'
|
7 |
|
8 |
/**
|
9 |
* Component that toggles the theme between light and dark.
|
|
|
12 |
const { theme, setTheme } = useTheme()
|
13 |
const setLight = useCallback(() => setTheme('light'), [setTheme])
|
14 |
const setDark = useCallback(() => setTheme('dark'), [setTheme])
|
15 |
+
const { t } = useTranslation()
|
16 |
|
17 |
if (theme === 'dark') {
|
18 |
return (
|
19 |
<Button
|
20 |
onClick={setLight}
|
21 |
variant={controlButtonVariant}
|
22 |
+
tooltip={t('header.themeToggle.switchToLight')}
|
23 |
size="icon"
|
24 |
side="bottom"
|
25 |
>
|
|
|
31 |
<Button
|
32 |
onClick={setDark}
|
33 |
variant={controlButtonVariant}
|
34 |
+
tooltip={t('header.themeToggle.switchToDark')}
|
35 |
size="icon"
|
36 |
side="bottom"
|
37 |
>
|
lightrag_webui/src/components/documents/ClearDocumentsDialog.tsx
CHANGED
@@ -13,38 +13,40 @@ import { errorMessage } from '@/lib/utils'
|
|
13 |
import { clearDocuments } from '@/api/lightrag'
|
14 |
|
15 |
import { EraserIcon } from 'lucide-react'
|
|
|
16 |
|
17 |
export default function ClearDocumentsDialog() {
|
|
|
18 |
const [open, setOpen] = useState(false)
|
19 |
|
20 |
const handleClear = useCallback(async () => {
|
21 |
try {
|
22 |
const result = await clearDocuments()
|
23 |
if (result.status === 'success') {
|
24 |
-
toast.success('
|
25 |
setOpen(false)
|
26 |
} else {
|
27 |
-
toast.error(
|
28 |
}
|
29 |
} catch (err) {
|
30 |
-
toast.error('
|
31 |
}
|
32 |
}, [setOpen])
|
33 |
|
34 |
return (
|
35 |
<Dialog open={open} onOpenChange={setOpen}>
|
36 |
<DialogTrigger asChild>
|
37 |
-
<Button variant="outline" side="bottom" tooltip='
|
38 |
-
<EraserIcon/>
|
39 |
</Button>
|
40 |
</DialogTrigger>
|
41 |
<DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
|
42 |
<DialogHeader>
|
43 |
-
<DialogTitle>
|
44 |
-
<DialogDescription>
|
45 |
</DialogHeader>
|
46 |
<Button variant="destructive" onClick={handleClear}>
|
47 |
-
|
48 |
</Button>
|
49 |
</DialogContent>
|
50 |
</Dialog>
|
|
|
13 |
import { clearDocuments } from '@/api/lightrag'
|
14 |
|
15 |
import { EraserIcon } from 'lucide-react'
|
16 |
+
import { useTranslation } from 'react-i18next'
|
17 |
|
18 |
export default function ClearDocumentsDialog() {
|
19 |
+
const { t } = useTranslation()
|
20 |
const [open, setOpen] = useState(false)
|
21 |
|
22 |
const handleClear = useCallback(async () => {
|
23 |
try {
|
24 |
const result = await clearDocuments()
|
25 |
if (result.status === 'success') {
|
26 |
+
toast.success(t('documentPanel.clearDocuments.success'))
|
27 |
setOpen(false)
|
28 |
} else {
|
29 |
+
toast.error(t('documentPanel.clearDocuments.failed', { message: result.message }))
|
30 |
}
|
31 |
} catch (err) {
|
32 |
+
toast.error(t('documentPanel.clearDocuments.error', { error: errorMessage(err) }))
|
33 |
}
|
34 |
}, [setOpen])
|
35 |
|
36 |
return (
|
37 |
<Dialog open={open} onOpenChange={setOpen}>
|
38 |
<DialogTrigger asChild>
|
39 |
+
<Button variant="outline" side="bottom" tooltip={t('documentPanel.clearDocuments.tooltip')} size="sm">
|
40 |
+
<EraserIcon/> {t('documentPanel.clearDocuments.button')}
|
41 |
</Button>
|
42 |
</DialogTrigger>
|
43 |
<DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
|
44 |
<DialogHeader>
|
45 |
+
<DialogTitle>{t('documentPanel.clearDocuments.title')}</DialogTitle>
|
46 |
+
<DialogDescription>{t('documentPanel.clearDocuments.confirm')}</DialogDescription>
|
47 |
</DialogHeader>
|
48 |
<Button variant="destructive" onClick={handleClear}>
|
49 |
+
{t('documentPanel.clearDocuments.confirmButton')}
|
50 |
</Button>
|
51 |
</DialogContent>
|
52 |
</Dialog>
|
lightrag_webui/src/components/documents/UploadDocumentsDialog.tsx
CHANGED
@@ -14,8 +14,10 @@ import { errorMessage } from '@/lib/utils'
|
|
14 |
import { uploadDocument } from '@/api/lightrag'
|
15 |
|
16 |
import { UploadIcon } from 'lucide-react'
|
|
|
17 |
|
18 |
export default function UploadDocumentsDialog() {
|
|
|
19 |
const [open, setOpen] = useState(false)
|
20 |
const [isUploading, setIsUploading] = useState(false)
|
21 |
const [progresses, setProgresses] = useState<Record<string, number>>({})
|
@@ -29,24 +31,24 @@ export default function UploadDocumentsDialog() {
|
|
29 |
filesToUpload.map(async (file) => {
|
30 |
try {
|
31 |
const result = await uploadDocument(file, (percentCompleted: number) => {
|
32 |
-
console.debug(
|
33 |
setProgresses((pre) => ({
|
34 |
...pre,
|
35 |
[file.name]: percentCompleted
|
36 |
}))
|
37 |
})
|
38 |
if (result.status === 'success') {
|
39 |
-
toast.success(
|
40 |
} else {
|
41 |
-
toast.error(
|
42 |
}
|
43 |
} catch (err) {
|
44 |
-
toast.error(
|
45 |
}
|
46 |
})
|
47 |
)
|
48 |
} catch (err) {
|
49 |
-
toast.error('
|
50 |
} finally {
|
51 |
setIsUploading(false)
|
52 |
// setOpen(false)
|
@@ -66,21 +68,21 @@ export default function UploadDocumentsDialog() {
|
|
66 |
}}
|
67 |
>
|
68 |
<DialogTrigger asChild>
|
69 |
-
<Button variant="default" side="bottom" tooltip=
|
70 |
-
<UploadIcon />
|
71 |
</Button>
|
72 |
</DialogTrigger>
|
73 |
<DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
|
74 |
<DialogHeader>
|
75 |
-
<DialogTitle>
|
76 |
<DialogDescription>
|
77 |
-
|
78 |
</DialogDescription>
|
79 |
</DialogHeader>
|
80 |
<FileUploader
|
81 |
maxFileCount={Infinity}
|
82 |
maxSize={200 * 1024 * 1024}
|
83 |
-
description=
|
84 |
onUpload={handleDocumentsUpload}
|
85 |
progresses={progresses}
|
86 |
disabled={isUploading}
|
|
|
14 |
import { uploadDocument } from '@/api/lightrag'
|
15 |
|
16 |
import { UploadIcon } from 'lucide-react'
|
17 |
+
import { useTranslation } from 'react-i18next'
|
18 |
|
19 |
export default function UploadDocumentsDialog() {
|
20 |
+
const { t } = useTranslation()
|
21 |
const [open, setOpen] = useState(false)
|
22 |
const [isUploading, setIsUploading] = useState(false)
|
23 |
const [progresses, setProgresses] = useState<Record<string, number>>({})
|
|
|
31 |
filesToUpload.map(async (file) => {
|
32 |
try {
|
33 |
const result = await uploadDocument(file, (percentCompleted: number) => {
|
34 |
+
console.debug(t('documentPanel.uploadDocuments.uploading', { name: file.name, percent: percentCompleted }))
|
35 |
setProgresses((pre) => ({
|
36 |
...pre,
|
37 |
[file.name]: percentCompleted
|
38 |
}))
|
39 |
})
|
40 |
if (result.status === 'success') {
|
41 |
+
toast.success(t('documentPanel.uploadDocuments.success', { name: file.name }))
|
42 |
} else {
|
43 |
+
toast.error(t('documentPanel.uploadDocuments.failed', { name: file.name, message: result.message }))
|
44 |
}
|
45 |
} catch (err) {
|
46 |
+
toast.error(t('documentPanel.uploadDocuments.error', { name: file.name, error: errorMessage(err) }))
|
47 |
}
|
48 |
})
|
49 |
)
|
50 |
} catch (err) {
|
51 |
+
toast.error(t('documentPanel.uploadDocuments.generalError', { error: errorMessage(err) }))
|
52 |
} finally {
|
53 |
setIsUploading(false)
|
54 |
// setOpen(false)
|
|
|
68 |
}}
|
69 |
>
|
70 |
<DialogTrigger asChild>
|
71 |
+
<Button variant="default" side="bottom" tooltip={t('documentPanel.uploadDocuments.tooltip')} size="sm">
|
72 |
+
<UploadIcon /> {t('documentPanel.uploadDocuments.button')}
|
73 |
</Button>
|
74 |
</DialogTrigger>
|
75 |
<DialogContent className="sm:max-w-xl" onCloseAutoFocus={(e) => e.preventDefault()}>
|
76 |
<DialogHeader>
|
77 |
+
<DialogTitle>{t('documentPanel.uploadDocuments.title')}</DialogTitle>
|
78 |
<DialogDescription>
|
79 |
+
{t('documentPanel.uploadDocuments.description')}
|
80 |
</DialogDescription>
|
81 |
</DialogHeader>
|
82 |
<FileUploader
|
83 |
maxFileCount={Infinity}
|
84 |
maxSize={200 * 1024 * 1024}
|
85 |
+
description={t('documentPanel.uploadDocuments.fileTypes')}
|
86 |
onUpload={handleDocumentsUpload}
|
87 |
progresses={progresses}
|
88 |
disabled={isUploading}
|
lightrag_webui/src/components/graph/FullScreenControl.tsx
CHANGED
@@ -2,21 +2,23 @@ import { useFullScreen } from '@react-sigma/core'
|
|
2 |
import { MaximizeIcon, MinimizeIcon } from 'lucide-react'
|
3 |
import { controlButtonVariant } from '@/lib/constants'
|
4 |
import Button from '@/components/ui/Button'
|
|
|
5 |
|
6 |
/**
|
7 |
* Component that toggles full screen mode.
|
8 |
*/
|
9 |
const FullScreenControl = () => {
|
10 |
const { isFullScreen, toggle } = useFullScreen()
|
|
|
11 |
|
12 |
return (
|
13 |
<>
|
14 |
{isFullScreen ? (
|
15 |
-
<Button variant={controlButtonVariant} onClick={toggle} tooltip=
|
16 |
<MinimizeIcon />
|
17 |
</Button>
|
18 |
) : (
|
19 |
-
<Button variant={controlButtonVariant} onClick={toggle} tooltip=
|
20 |
<MaximizeIcon />
|
21 |
</Button>
|
22 |
)}
|
|
|
2 |
import { MaximizeIcon, MinimizeIcon } from 'lucide-react'
|
3 |
import { controlButtonVariant } from '@/lib/constants'
|
4 |
import Button from '@/components/ui/Button'
|
5 |
+
import { useTranslation } from 'react-i18next'
|
6 |
|
7 |
/**
|
8 |
* Component that toggles full screen mode.
|
9 |
*/
|
10 |
const FullScreenControl = () => {
|
11 |
const { isFullScreen, toggle } = useFullScreen()
|
12 |
+
const { t } = useTranslation()
|
13 |
|
14 |
return (
|
15 |
<>
|
16 |
{isFullScreen ? (
|
17 |
+
<Button variant={controlButtonVariant} onClick={toggle} tooltip={t('graphPanel.sideBar.fullScreenControl.windowed')} size="icon">
|
18 |
<MinimizeIcon />
|
19 |
</Button>
|
20 |
) : (
|
21 |
+
<Button variant={controlButtonVariant} onClick={toggle} tooltip={t('graphPanel.sideBar.fullScreenControl.fullScreen')} size="icon">
|
22 |
<MaximizeIcon />
|
23 |
</Button>
|
24 |
)}
|
lightrag_webui/src/components/graph/GraphLabels.tsx
CHANGED
@@ -5,6 +5,7 @@ import { useSettingsStore } from '@/stores/settings'
|
|
5 |
import { useGraphStore } from '@/stores/graph'
|
6 |
import { labelListLimit } from '@/lib/constants'
|
7 |
import MiniSearch from 'minisearch'
|
|
|
8 |
|
9 |
const lastGraph: any = {
|
10 |
graph: null,
|
@@ -13,6 +14,7 @@ const lastGraph: any = {
|
|
13 |
}
|
14 |
|
15 |
const GraphLabels = () => {
|
|
|
16 |
const label = useSettingsStore.use.queryLabel()
|
17 |
const graph = useGraphStore.use.sigmaGraph()
|
18 |
|
@@ -69,7 +71,7 @@ const GraphLabels = () => {
|
|
69 |
|
70 |
return result.length <= labelListLimit
|
71 |
? result
|
72 |
-
: [...result.slice(0, labelListLimit),
|
73 |
},
|
74 |
[getSearchEngine]
|
75 |
)
|
@@ -84,14 +86,14 @@ const GraphLabels = () => {
|
|
84 |
className="ml-2"
|
85 |
triggerClassName="max-h-8"
|
86 |
searchInputClassName="max-h-8"
|
87 |
-
triggerTooltip=
|
88 |
fetcher={fetchData}
|
89 |
renderOption={(item) => <div>{item}</div>}
|
90 |
getOptionValue={(item) => item}
|
91 |
getDisplayValue={(item) => <div>{item}</div>}
|
92 |
notFound={<div className="py-6 text-center text-sm">No labels found</div>}
|
93 |
-
label=
|
94 |
-
placeholder=
|
95 |
value={label !== null ? label : ''}
|
96 |
onChange={setQueryLabel}
|
97 |
/>
|
|
|
5 |
import { useGraphStore } from '@/stores/graph'
|
6 |
import { labelListLimit } from '@/lib/constants'
|
7 |
import MiniSearch from 'minisearch'
|
8 |
+
import { useTranslation } from 'react-i18next'
|
9 |
|
10 |
const lastGraph: any = {
|
11 |
graph: null,
|
|
|
14 |
}
|
15 |
|
16 |
const GraphLabels = () => {
|
17 |
+
const { t } = useTranslation()
|
18 |
const label = useSettingsStore.use.queryLabel()
|
19 |
const graph = useGraphStore.use.sigmaGraph()
|
20 |
|
|
|
71 |
|
72 |
return result.length <= labelListLimit
|
73 |
? result
|
74 |
+
: [...result.slice(0, labelListLimit), t('graphLabels.andOthers', { count: result.length - labelListLimit })]
|
75 |
},
|
76 |
[getSearchEngine]
|
77 |
)
|
|
|
86 |
className="ml-2"
|
87 |
triggerClassName="max-h-8"
|
88 |
searchInputClassName="max-h-8"
|
89 |
+
triggerTooltip={t('graphPanel.graphLabels.selectTooltip')}
|
90 |
fetcher={fetchData}
|
91 |
renderOption={(item) => <div>{item}</div>}
|
92 |
getOptionValue={(item) => item}
|
93 |
getDisplayValue={(item) => <div>{item}</div>}
|
94 |
notFound={<div className="py-6 text-center text-sm">No labels found</div>}
|
95 |
+
label={t('graphPanel.graphLabels.label')}
|
96 |
+
placeholder={t('graphPanel.graphLabels.placeholder')}
|
97 |
value={label !== null ? label : ''}
|
98 |
onChange={setQueryLabel}
|
99 |
/>
|
lightrag_webui/src/components/graph/GraphSearch.tsx
CHANGED
@@ -9,6 +9,7 @@ import { AsyncSearch } from '@/components/ui/AsyncSearch'
|
|
9 |
import { searchResultLimit } from '@/lib/constants'
|
10 |
import { useGraphStore } from '@/stores/graph'
|
11 |
import MiniSearch from 'minisearch'
|
|
|
12 |
|
13 |
interface OptionItem {
|
14 |
id: string
|
@@ -44,6 +45,7 @@ export const GraphSearchInput = ({
|
|
44 |
onFocus?: GraphSearchInputProps['onFocus']
|
45 |
value?: GraphSearchInputProps['value']
|
46 |
}) => {
|
|
|
47 |
const graph = useGraphStore.use.sigmaGraph()
|
48 |
|
49 |
const searchEngine = useMemo(() => {
|
@@ -97,7 +99,7 @@ export const GraphSearchInput = ({
|
|
97 |
{
|
98 |
type: 'message',
|
99 |
id: messageId,
|
100 |
-
message:
|
101 |
}
|
102 |
]
|
103 |
},
|
@@ -118,7 +120,7 @@ export const GraphSearchInput = ({
|
|
118 |
if (id !== messageId && onFocus) onFocus(id ? { id, type: 'nodes' } : null)
|
119 |
}}
|
120 |
label={'item'}
|
121 |
-
placeholder=
|
122 |
/>
|
123 |
)
|
124 |
}
|
|
|
9 |
import { searchResultLimit } from '@/lib/constants'
|
10 |
import { useGraphStore } from '@/stores/graph'
|
11 |
import MiniSearch from 'minisearch'
|
12 |
+
import { useTranslation } from 'react-i18next'
|
13 |
|
14 |
interface OptionItem {
|
15 |
id: string
|
|
|
45 |
onFocus?: GraphSearchInputProps['onFocus']
|
46 |
value?: GraphSearchInputProps['value']
|
47 |
}) => {
|
48 |
+
const { t } = useTranslation()
|
49 |
const graph = useGraphStore.use.sigmaGraph()
|
50 |
|
51 |
const searchEngine = useMemo(() => {
|
|
|
99 |
{
|
100 |
type: 'message',
|
101 |
id: messageId,
|
102 |
+
message: t('graphPanel.search.message', { count: result.length - searchResultLimit })
|
103 |
}
|
104 |
]
|
105 |
},
|
|
|
120 |
if (id !== messageId && onFocus) onFocus(id ? { id, type: 'nodes' } : null)
|
121 |
}}
|
122 |
label={'item'}
|
123 |
+
placeholder={t('graphPanel.search.placeholder')}
|
124 |
/>
|
125 |
)
|
126 |
}
|
lightrag_webui/src/components/graph/LayoutsControl.tsx
CHANGED
@@ -16,6 +16,7 @@ import { controlButtonVariant } from '@/lib/constants'
|
|
16 |
import { useSettingsStore } from '@/stores/settings'
|
17 |
|
18 |
import { GripIcon, PlayIcon, PauseIcon } from 'lucide-react'
|
|
|
19 |
|
20 |
type LayoutName =
|
21 |
| 'Circular'
|
@@ -28,6 +29,7 @@ type LayoutName =
|
|
28 |
const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) => {
|
29 |
const sigma = useSigma()
|
30 |
const { stop, start, isRunning } = layout
|
|
|
31 |
|
32 |
/**
|
33 |
* Init component when Sigma or component settings change.
|
@@ -61,7 +63,7 @@ const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) =
|
|
61 |
<Button
|
62 |
size="icon"
|
63 |
onClick={() => (isRunning ? stop() : start())}
|
64 |
-
tooltip={isRunning ? '
|
65 |
variant={controlButtonVariant}
|
66 |
>
|
67 |
{isRunning ? <PauseIcon /> : <PlayIcon />}
|
@@ -74,6 +76,7 @@ const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) =
|
|
74 |
*/
|
75 |
const LayoutsControl = () => {
|
76 |
const sigma = useSigma()
|
|
|
77 |
const [layout, setLayout] = useState<LayoutName>('Circular')
|
78 |
const [opened, setOpened] = useState<boolean>(false)
|
79 |
|
@@ -149,7 +152,7 @@ const LayoutsControl = () => {
|
|
149 |
size="icon"
|
150 |
variant={controlButtonVariant}
|
151 |
onClick={() => setOpened((e: boolean) => !e)}
|
152 |
-
tooltip=
|
153 |
>
|
154 |
<GripIcon />
|
155 |
</Button>
|
@@ -166,7 +169,7 @@ const LayoutsControl = () => {
|
|
166 |
key={name}
|
167 |
className="cursor-pointer text-xs"
|
168 |
>
|
169 |
-
{name}
|
170 |
</CommandItem>
|
171 |
))}
|
172 |
</CommandGroup>
|
|
|
16 |
import { useSettingsStore } from '@/stores/settings'
|
17 |
|
18 |
import { GripIcon, PlayIcon, PauseIcon } from 'lucide-react'
|
19 |
+
import { useTranslation } from 'react-i18next'
|
20 |
|
21 |
type LayoutName =
|
22 |
| 'Circular'
|
|
|
29 |
const WorkerLayoutControl = ({ layout, autoRunFor }: WorkerLayoutControlProps) => {
|
30 |
const sigma = useSigma()
|
31 |
const { stop, start, isRunning } = layout
|
32 |
+
const { t } = useTranslation()
|
33 |
|
34 |
/**
|
35 |
* Init component when Sigma or component settings change.
|
|
|
63 |
<Button
|
64 |
size="icon"
|
65 |
onClick={() => (isRunning ? stop() : start())}
|
66 |
+
tooltip={isRunning ? t('graphPanel.sideBar.layoutsControl.stopAnimation') : t('graphPanel.sideBar.layoutsControl.startAnimation')}
|
67 |
variant={controlButtonVariant}
|
68 |
>
|
69 |
{isRunning ? <PauseIcon /> : <PlayIcon />}
|
|
|
76 |
*/
|
77 |
const LayoutsControl = () => {
|
78 |
const sigma = useSigma()
|
79 |
+
const { t } = useTranslation()
|
80 |
const [layout, setLayout] = useState<LayoutName>('Circular')
|
81 |
const [opened, setOpened] = useState<boolean>(false)
|
82 |
|
|
|
152 |
size="icon"
|
153 |
variant={controlButtonVariant}
|
154 |
onClick={() => setOpened((e: boolean) => !e)}
|
155 |
+
tooltip={t('graphPanel.sideBar.layoutsControl.layoutGraph')}
|
156 |
>
|
157 |
<GripIcon />
|
158 |
</Button>
|
|
|
169 |
key={name}
|
170 |
className="cursor-pointer text-xs"
|
171 |
>
|
172 |
+
{t(`graphPanel.sideBar.layoutsControl.layouts.${name}`)}
|
173 |
</CommandItem>
|
174 |
))}
|
175 |
</CommandGroup>
|
lightrag_webui/src/components/graph/PropertiesView.tsx
CHANGED
@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'
|
|
2 |
import { useGraphStore, RawNodeType, RawEdgeType } from '@/stores/graph'
|
3 |
import Text from '@/components/ui/Text'
|
4 |
import useLightragGraph from '@/hooks/useLightragGraph'
|
|
|
5 |
|
6 |
/**
|
7 |
* Component that view properties of elements in graph.
|
@@ -147,21 +148,22 @@ const PropertyRow = ({
|
|
147 |
}
|
148 |
|
149 |
const NodePropertiesView = ({ node }: { node: NodeType }) => {
|
|
|
150 |
return (
|
151 |
<div className="flex flex-col gap-2">
|
152 |
-
<label className="text-md pl-1 font-bold tracking-wide text-sky-300">
|
153 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
154 |
-
<PropertyRow name={'
|
155 |
<PropertyRow
|
156 |
-
name={'
|
157 |
value={node.labels.join(', ')}
|
158 |
onClick={() => {
|
159 |
useGraphStore.getState().setSelectedNode(node.id, true)
|
160 |
}}
|
161 |
/>
|
162 |
-
<PropertyRow name={'
|
163 |
</div>
|
164 |
-
<label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">
|
165 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
166 |
{Object.keys(node.properties)
|
167 |
.sort()
|
@@ -172,7 +174,7 @@ const NodePropertiesView = ({ node }: { node: NodeType }) => {
|
|
172 |
{node.relationships.length > 0 && (
|
173 |
<>
|
174 |
<label className="text-md pl-1 font-bold tracking-wide text-teal-600/90">
|
175 |
-
|
176 |
</label>
|
177 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
178 |
{node.relationships.map(({ type, id, label }) => {
|
@@ -195,28 +197,29 @@ const NodePropertiesView = ({ node }: { node: NodeType }) => {
|
|
195 |
}
|
196 |
|
197 |
const EdgePropertiesView = ({ edge }: { edge: EdgeType }) => {
|
|
|
198 |
return (
|
199 |
<div className="flex flex-col gap-2">
|
200 |
-
<label className="text-md pl-1 font-bold tracking-wide text-teal-600">
|
201 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
202 |
-
<PropertyRow name={'
|
203 |
-
{edge.type && <PropertyRow name={'
|
204 |
<PropertyRow
|
205 |
-
name={'
|
206 |
value={edge.sourceNode ? edge.sourceNode.labels.join(', ') : edge.source}
|
207 |
onClick={() => {
|
208 |
useGraphStore.getState().setSelectedNode(edge.source, true)
|
209 |
}}
|
210 |
/>
|
211 |
<PropertyRow
|
212 |
-
name={'
|
213 |
value={edge.targetNode ? edge.targetNode.labels.join(', ') : edge.target}
|
214 |
onClick={() => {
|
215 |
useGraphStore.getState().setSelectedNode(edge.target, true)
|
216 |
}}
|
217 |
/>
|
218 |
</div>
|
219 |
-
<label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">
|
220 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
221 |
{Object.keys(edge.properties)
|
222 |
.sort()
|
|
|
2 |
import { useGraphStore, RawNodeType, RawEdgeType } from '@/stores/graph'
|
3 |
import Text from '@/components/ui/Text'
|
4 |
import useLightragGraph from '@/hooks/useLightragGraph'
|
5 |
+
import { useTranslation } from 'react-i18next'
|
6 |
|
7 |
/**
|
8 |
* Component that view properties of elements in graph.
|
|
|
148 |
}
|
149 |
|
150 |
const NodePropertiesView = ({ node }: { node: NodeType }) => {
|
151 |
+
const { t } = useTranslation()
|
152 |
return (
|
153 |
<div className="flex flex-col gap-2">
|
154 |
+
<label className="text-md pl-1 font-bold tracking-wide text-sky-300">{t('graphPanel.propertiesView.node.title')}</label>
|
155 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
156 |
+
<PropertyRow name={t('graphPanel.propertiesView.node.id')} value={node.id} />
|
157 |
<PropertyRow
|
158 |
+
name={t('graphPanel.propertiesView.node.labels')}
|
159 |
value={node.labels.join(', ')}
|
160 |
onClick={() => {
|
161 |
useGraphStore.getState().setSelectedNode(node.id, true)
|
162 |
}}
|
163 |
/>
|
164 |
+
<PropertyRow name={t('graphPanel.propertiesView.node.degree')} value={node.degree} />
|
165 |
</div>
|
166 |
+
<label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">{t('graphPanel.propertiesView.node.properties')}</label>
|
167 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
168 |
{Object.keys(node.properties)
|
169 |
.sort()
|
|
|
174 |
{node.relationships.length > 0 && (
|
175 |
<>
|
176 |
<label className="text-md pl-1 font-bold tracking-wide text-teal-600/90">
|
177 |
+
{t('graphPanel.propertiesView.node.relationships')}
|
178 |
</label>
|
179 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
180 |
{node.relationships.map(({ type, id, label }) => {
|
|
|
197 |
}
|
198 |
|
199 |
const EdgePropertiesView = ({ edge }: { edge: EdgeType }) => {
|
200 |
+
const { t } = useTranslation()
|
201 |
return (
|
202 |
<div className="flex flex-col gap-2">
|
203 |
+
<label className="text-md pl-1 font-bold tracking-wide text-teal-600">{t('graphPanel.propertiesView.edge.title')}</label>
|
204 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
205 |
+
<PropertyRow name={t('graphPanel.propertiesView.edge.id')} value={edge.id} />
|
206 |
+
{edge.type && <PropertyRow name={t('graphPanel.propertiesView.edge.type')} value={edge.type} />}
|
207 |
<PropertyRow
|
208 |
+
name={t('graphPanel.propertiesView.edge.source')}
|
209 |
value={edge.sourceNode ? edge.sourceNode.labels.join(', ') : edge.source}
|
210 |
onClick={() => {
|
211 |
useGraphStore.getState().setSelectedNode(edge.source, true)
|
212 |
}}
|
213 |
/>
|
214 |
<PropertyRow
|
215 |
+
name={t('graphPanel.propertiesView.edge.target')}
|
216 |
value={edge.targetNode ? edge.targetNode.labels.join(', ') : edge.target}
|
217 |
onClick={() => {
|
218 |
useGraphStore.getState().setSelectedNode(edge.target, true)
|
219 |
}}
|
220 |
/>
|
221 |
</div>
|
222 |
+
<label className="text-md pl-1 font-bold tracking-wide text-yellow-400/90">{t('graphPanel.propertiesView.edge.properties')}</label>
|
223 |
<div className="bg-primary/5 max-h-96 overflow-auto rounded p-1">
|
224 |
{Object.keys(edge.properties)
|
225 |
.sort()
|
lightrag_webui/src/components/graph/Settings.tsx
CHANGED
@@ -10,6 +10,7 @@ import { useSettingsStore } from '@/stores/settings'
|
|
10 |
import { useBackendState } from '@/stores/state'
|
11 |
|
12 |
import { SettingsIcon } from 'lucide-react'
|
|
|
13 |
|
14 |
/**
|
15 |
* Component that displays a checkbox with a label.
|
@@ -204,10 +205,12 @@ export default function Settings() {
|
|
204 |
[setTempApiKey]
|
205 |
)
|
206 |
|
|
|
|
|
207 |
return (
|
208 |
<Popover open={opened} onOpenChange={setOpened}>
|
209 |
<PopoverTrigger asChild>
|
210 |
-
<Button variant={controlButtonVariant} tooltip="
|
211 |
<SettingsIcon />
|
212 |
</Button>
|
213 |
</PopoverTrigger>
|
@@ -221,7 +224,7 @@ export default function Settings() {
|
|
221 |
<LabeledCheckBox
|
222 |
checked={enableHealthCheck}
|
223 |
onCheckedChange={setEnableHealthCheck}
|
224 |
-
label="
|
225 |
/>
|
226 |
|
227 |
<Separator />
|
@@ -229,12 +232,12 @@ export default function Settings() {
|
|
229 |
<LabeledCheckBox
|
230 |
checked={showPropertyPanel}
|
231 |
onCheckedChange={setShowPropertyPanel}
|
232 |
-
label="
|
233 |
/>
|
234 |
<LabeledCheckBox
|
235 |
checked={showNodeSearchBar}
|
236 |
onCheckedChange={setShowNodeSearchBar}
|
237 |
-
label="
|
238 |
/>
|
239 |
|
240 |
<Separator />
|
@@ -242,12 +245,12 @@ export default function Settings() {
|
|
242 |
<LabeledCheckBox
|
243 |
checked={showNodeLabel}
|
244 |
onCheckedChange={setShowNodeLabel}
|
245 |
-
label="
|
246 |
/>
|
247 |
<LabeledCheckBox
|
248 |
checked={enableNodeDrag}
|
249 |
onCheckedChange={setEnableNodeDrag}
|
250 |
-
label="
|
251 |
/>
|
252 |
|
253 |
<Separator />
|
@@ -255,51 +258,50 @@ export default function Settings() {
|
|
255 |
<LabeledCheckBox
|
256 |
checked={showEdgeLabel}
|
257 |
onCheckedChange={setShowEdgeLabel}
|
258 |
-
label="
|
259 |
/>
|
260 |
<LabeledCheckBox
|
261 |
checked={enableHideUnselectedEdges}
|
262 |
onCheckedChange={setEnableHideUnselectedEdges}
|
263 |
-
label="
|
264 |
/>
|
265 |
<LabeledCheckBox
|
266 |
checked={enableEdgeEvents}
|
267 |
onCheckedChange={setEnableEdgeEvents}
|
268 |
-
label="
|
269 |
/>
|
270 |
|
271 |
<Separator />
|
272 |
<LabeledNumberInput
|
273 |
-
label="
|
274 |
min={1}
|
275 |
value={graphQueryMaxDepth}
|
276 |
onEditFinished={setGraphQueryMaxDepth}
|
277 |
/>
|
278 |
<LabeledNumberInput
|
279 |
-
label="
|
280 |
min={0}
|
281 |
value={graphMinDegree}
|
282 |
onEditFinished={setGraphMinDegree}
|
283 |
/>
|
284 |
<LabeledNumberInput
|
285 |
-
label="
|
286 |
min={1}
|
287 |
max={20}
|
288 |
value={graphLayoutMaxIterations}
|
289 |
onEditFinished={setGraphLayoutMaxIterations}
|
290 |
/>
|
291 |
-
|
292 |
<Separator />
|
293 |
|
294 |
<div className="flex flex-col gap-2">
|
295 |
-
<label className="text-sm font-medium">
|
296 |
<form className="flex h-6 gap-2" onSubmit={(e) => e.preventDefault()}>
|
297 |
<div className="w-0 flex-1">
|
298 |
<Input
|
299 |
type="password"
|
300 |
value={tempApiKey}
|
301 |
onChange={handleTempApiKeyChange}
|
302 |
-
placeholder="
|
303 |
className="max-h-full w-full min-w-0"
|
304 |
autoComplete="off"
|
305 |
/>
|
@@ -310,7 +312,7 @@ export default function Settings() {
|
|
310 |
size="sm"
|
311 |
className="max-h-full shrink-0"
|
312 |
>
|
313 |
-
|
314 |
</Button>
|
315 |
</form>
|
316 |
</div>
|
|
|
10 |
import { useBackendState } from '@/stores/state'
|
11 |
|
12 |
import { SettingsIcon } from 'lucide-react'
|
13 |
+
import { useTranslation } from "react-i18next";
|
14 |
|
15 |
/**
|
16 |
* Component that displays a checkbox with a label.
|
|
|
205 |
[setTempApiKey]
|
206 |
)
|
207 |
|
208 |
+
const { t } = useTranslation();
|
209 |
+
|
210 |
return (
|
211 |
<Popover open={opened} onOpenChange={setOpened}>
|
212 |
<PopoverTrigger asChild>
|
213 |
+
<Button variant={controlButtonVariant} tooltip={t("graphPanel.sideBar.settings.settings")} size="icon">
|
214 |
<SettingsIcon />
|
215 |
</Button>
|
216 |
</PopoverTrigger>
|
|
|
224 |
<LabeledCheckBox
|
225 |
checked={enableHealthCheck}
|
226 |
onCheckedChange={setEnableHealthCheck}
|
227 |
+
label={t("graphPanel.sideBar.settings.healthCheck")}
|
228 |
/>
|
229 |
|
230 |
<Separator />
|
|
|
232 |
<LabeledCheckBox
|
233 |
checked={showPropertyPanel}
|
234 |
onCheckedChange={setShowPropertyPanel}
|
235 |
+
label={t("graphPanel.sideBar.settings.showPropertyPanel")}
|
236 |
/>
|
237 |
<LabeledCheckBox
|
238 |
checked={showNodeSearchBar}
|
239 |
onCheckedChange={setShowNodeSearchBar}
|
240 |
+
label={t("graphPanel.sideBar.settings.showSearchBar")}
|
241 |
/>
|
242 |
|
243 |
<Separator />
|
|
|
245 |
<LabeledCheckBox
|
246 |
checked={showNodeLabel}
|
247 |
onCheckedChange={setShowNodeLabel}
|
248 |
+
label={t("graphPanel.sideBar.settings.showNodeLabel")}
|
249 |
/>
|
250 |
<LabeledCheckBox
|
251 |
checked={enableNodeDrag}
|
252 |
onCheckedChange={setEnableNodeDrag}
|
253 |
+
label={t("graphPanel.sideBar.settings.nodeDraggable")}
|
254 |
/>
|
255 |
|
256 |
<Separator />
|
|
|
258 |
<LabeledCheckBox
|
259 |
checked={showEdgeLabel}
|
260 |
onCheckedChange={setShowEdgeLabel}
|
261 |
+
label={t("graphPanel.sideBar.settings.showEdgeLabel")}
|
262 |
/>
|
263 |
<LabeledCheckBox
|
264 |
checked={enableHideUnselectedEdges}
|
265 |
onCheckedChange={setEnableHideUnselectedEdges}
|
266 |
+
label={t("graphPanel.sideBar.settings.hideUnselectedEdges")}
|
267 |
/>
|
268 |
<LabeledCheckBox
|
269 |
checked={enableEdgeEvents}
|
270 |
onCheckedChange={setEnableEdgeEvents}
|
271 |
+
label={t("graphPanel.sideBar.settings.edgeEvents")}
|
272 |
/>
|
273 |
|
274 |
<Separator />
|
275 |
<LabeledNumberInput
|
276 |
+
label={t("graphPanel.sideBar.settings.maxQueryDepth")}
|
277 |
min={1}
|
278 |
value={graphQueryMaxDepth}
|
279 |
onEditFinished={setGraphQueryMaxDepth}
|
280 |
/>
|
281 |
<LabeledNumberInput
|
282 |
+
label={t("graphPanel.sideBar.settings.minDegree")}
|
283 |
min={0}
|
284 |
value={graphMinDegree}
|
285 |
onEditFinished={setGraphMinDegree}
|
286 |
/>
|
287 |
<LabeledNumberInput
|
288 |
+
label={t("graphPanel.sideBar.settings.maxLayoutIterations")}
|
289 |
min={1}
|
290 |
max={20}
|
291 |
value={graphLayoutMaxIterations}
|
292 |
onEditFinished={setGraphLayoutMaxIterations}
|
293 |
/>
|
|
|
294 |
<Separator />
|
295 |
|
296 |
<div className="flex flex-col gap-2">
|
297 |
+
<label className="text-sm font-medium">{t("graphPanel.sideBar.settings.apiKey")}</label>
|
298 |
<form className="flex h-6 gap-2" onSubmit={(e) => e.preventDefault()}>
|
299 |
<div className="w-0 flex-1">
|
300 |
<Input
|
301 |
type="password"
|
302 |
value={tempApiKey}
|
303 |
onChange={handleTempApiKeyChange}
|
304 |
+
placeholder={t("graphPanel.sideBar.settings.enterYourAPIkey")}
|
305 |
className="max-h-full w-full min-w-0"
|
306 |
autoComplete="off"
|
307 |
/>
|
|
|
312 |
size="sm"
|
313 |
className="max-h-full shrink-0"
|
314 |
>
|
315 |
+
{t("graphPanel.sideBar.settings.save")}
|
316 |
</Button>
|
317 |
</form>
|
318 |
</div>
|
lightrag_webui/src/components/graph/StatusCard.tsx
CHANGED
@@ -1,58 +1,60 @@
|
|
1 |
import { LightragStatus } from '@/api/lightrag'
|
|
|
2 |
|
3 |
const StatusCard = ({ status }: { status: LightragStatus | null }) => {
|
|
|
4 |
if (!status) {
|
5 |
-
return <div className="text-muted-foreground text-sm">
|
6 |
}
|
7 |
|
8 |
return (
|
9 |
<div className="min-w-[300px] space-y-3 text-sm">
|
10 |
<div className="space-y-1">
|
11 |
-
<h4 className="font-medium">
|
12 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
13 |
-
<span>
|
14 |
<span className="truncate">{status.working_directory}</span>
|
15 |
-
<span>
|
16 |
<span className="truncate">{status.input_directory}</span>
|
17 |
</div>
|
18 |
</div>
|
19 |
|
20 |
<div className="space-y-1">
|
21 |
-
<h4 className="font-medium">
|
22 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
23 |
-
<span>
|
24 |
<span>{status.configuration.llm_binding}</span>
|
25 |
-
<span>
|
26 |
<span>{status.configuration.llm_binding_host}</span>
|
27 |
-
<span>
|
28 |
<span>{status.configuration.llm_model}</span>
|
29 |
-
<span>
|
30 |
<span>{status.configuration.max_tokens}</span>
|
31 |
</div>
|
32 |
</div>
|
33 |
|
34 |
<div className="space-y-1">
|
35 |
-
<h4 className="font-medium">
|
36 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
37 |
-
<span>
|
38 |
<span>{status.configuration.embedding_binding}</span>
|
39 |
-
<span>
|
40 |
<span>{status.configuration.embedding_binding_host}</span>
|
41 |
-
<span>
|
42 |
<span>{status.configuration.embedding_model}</span>
|
43 |
</div>
|
44 |
</div>
|
45 |
|
46 |
<div className="space-y-1">
|
47 |
-
<h4 className="font-medium">
|
48 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
49 |
-
<span>
|
50 |
<span>{status.configuration.kv_storage}</span>
|
51 |
-
<span>
|
52 |
<span>{status.configuration.doc_status_storage}</span>
|
53 |
-
<span>
|
54 |
<span>{status.configuration.graph_storage}</span>
|
55 |
-
<span>
|
56 |
<span>{status.configuration.vector_storage}</span>
|
57 |
</div>
|
58 |
</div>
|
|
|
1 |
import { LightragStatus } from '@/api/lightrag'
|
2 |
+
import { useTranslation } from 'react-i18next'
|
3 |
|
4 |
const StatusCard = ({ status }: { status: LightragStatus | null }) => {
|
5 |
+
const { t } = useTranslation()
|
6 |
if (!status) {
|
7 |
+
return <div className="text-muted-foreground text-sm">{t('graphPanel.statusCard.unavailable')}</div>
|
8 |
}
|
9 |
|
10 |
return (
|
11 |
<div className="min-w-[300px] space-y-3 text-sm">
|
12 |
<div className="space-y-1">
|
13 |
+
<h4 className="font-medium">{t('graphPanel.statusCard.storageInfo')}</h4>
|
14 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
15 |
+
<span>{t('graphPanel.statusCard.workingDirectory')}:</span>
|
16 |
<span className="truncate">{status.working_directory}</span>
|
17 |
+
<span>{t('graphPanel.statusCard.inputDirectory')}:</span>
|
18 |
<span className="truncate">{status.input_directory}</span>
|
19 |
</div>
|
20 |
</div>
|
21 |
|
22 |
<div className="space-y-1">
|
23 |
+
<h4 className="font-medium">{t('graphPanel.statusCard.llmConfig')}</h4>
|
24 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
25 |
+
<span>{t('graphPanel.statusCard.llmBinding')}:</span>
|
26 |
<span>{status.configuration.llm_binding}</span>
|
27 |
+
<span>{t('graphPanel.statusCard.llmBindingHost')}:</span>
|
28 |
<span>{status.configuration.llm_binding_host}</span>
|
29 |
+
<span>{t('graphPanel.statusCard.llmModel')}:</span>
|
30 |
<span>{status.configuration.llm_model}</span>
|
31 |
+
<span>{t('graphPanel.statusCard.maxTokens')}:</span>
|
32 |
<span>{status.configuration.max_tokens}</span>
|
33 |
</div>
|
34 |
</div>
|
35 |
|
36 |
<div className="space-y-1">
|
37 |
+
<h4 className="font-medium">{t('graphPanel.statusCard.embeddingConfig')}</h4>
|
38 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
39 |
+
<span>{t('graphPanel.statusCard.embeddingBinding')}:</span>
|
40 |
<span>{status.configuration.embedding_binding}</span>
|
41 |
+
<span>{t('graphPanel.statusCard.embeddingBindingHost')}:</span>
|
42 |
<span>{status.configuration.embedding_binding_host}</span>
|
43 |
+
<span>{t('graphPanel.statusCard.embeddingModel')}:</span>
|
44 |
<span>{status.configuration.embedding_model}</span>
|
45 |
</div>
|
46 |
</div>
|
47 |
|
48 |
<div className="space-y-1">
|
49 |
+
<h4 className="font-medium">{t('graphPanel.statusCard.storageConfig')}</h4>
|
50 |
<div className="text-muted-foreground grid grid-cols-2 gap-1">
|
51 |
+
<span>{t('graphPanel.statusCard.kvStorage')}:</span>
|
52 |
<span>{status.configuration.kv_storage}</span>
|
53 |
+
<span>{t('graphPanel.statusCard.docStatusStorage')}:</span>
|
54 |
<span>{status.configuration.doc_status_storage}</span>
|
55 |
+
<span>{t('graphPanel.statusCard.graphStorage')}:</span>
|
56 |
<span>{status.configuration.graph_storage}</span>
|
57 |
+
<span>{t('graphPanel.statusCard.vectorStorage')}:</span>
|
58 |
<span>{status.configuration.vector_storage}</span>
|
59 |
</div>
|
60 |
</div>
|
lightrag_webui/src/components/graph/StatusIndicator.tsx
CHANGED
@@ -3,8 +3,10 @@ import { useBackendState } from '@/stores/state'
|
|
3 |
import { useEffect, useState } from 'react'
|
4 |
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/Popover'
|
5 |
import StatusCard from '@/components/graph/StatusCard'
|
|
|
6 |
|
7 |
const StatusIndicator = () => {
|
|
|
8 |
const health = useBackendState.use.health()
|
9 |
const lastCheckTime = useBackendState.use.lastCheckTime()
|
10 |
const status = useBackendState.use.status()
|
@@ -33,7 +35,7 @@ const StatusIndicator = () => {
|
|
33 |
)}
|
34 |
/>
|
35 |
<span className="text-muted-foreground text-xs">
|
36 |
-
{health ? '
|
37 |
</span>
|
38 |
</div>
|
39 |
</PopoverTrigger>
|
|
|
3 |
import { useEffect, useState } from 'react'
|
4 |
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/Popover'
|
5 |
import StatusCard from '@/components/graph/StatusCard'
|
6 |
+
import { useTranslation } from 'react-i18next'
|
7 |
|
8 |
const StatusIndicator = () => {
|
9 |
+
const { t } = useTranslation()
|
10 |
const health = useBackendState.use.health()
|
11 |
const lastCheckTime = useBackendState.use.lastCheckTime()
|
12 |
const status = useBackendState.use.status()
|
|
|
35 |
)}
|
36 |
/>
|
37 |
<span className="text-muted-foreground text-xs">
|
38 |
+
{health ? t('graphPanel.statusIndicator.connected') : t('graphPanel.statusIndicator.disconnected')}
|
39 |
</span>
|
40 |
</div>
|
41 |
</PopoverTrigger>
|
lightrag_webui/src/components/graph/ZoomControl.tsx
CHANGED
@@ -3,12 +3,14 @@ import { useCallback } from 'react'
|
|
3 |
import Button from '@/components/ui/Button'
|
4 |
import { ZoomInIcon, ZoomOutIcon, FullscreenIcon } from 'lucide-react'
|
5 |
import { controlButtonVariant } from '@/lib/constants'
|
|
|
6 |
|
7 |
/**
|
8 |
* Component that provides zoom controls for the graph viewer.
|
9 |
*/
|
10 |
const ZoomControl = () => {
|
11 |
const { zoomIn, zoomOut, reset } = useCamera({ duration: 200, factor: 1.5 })
|
|
|
12 |
|
13 |
const handleZoomIn = useCallback(() => zoomIn(), [zoomIn])
|
14 |
const handleZoomOut = useCallback(() => zoomOut(), [zoomOut])
|
@@ -16,16 +18,16 @@ const ZoomControl = () => {
|
|
16 |
|
17 |
return (
|
18 |
<>
|
19 |
-
<Button variant={controlButtonVariant} onClick={handleZoomIn} tooltip="
|
20 |
<ZoomInIcon />
|
21 |
</Button>
|
22 |
-
<Button variant={controlButtonVariant} onClick={handleZoomOut} tooltip="
|
23 |
<ZoomOutIcon />
|
24 |
</Button>
|
25 |
<Button
|
26 |
variant={controlButtonVariant}
|
27 |
onClick={handleResetZoom}
|
28 |
-
tooltip="
|
29 |
size="icon"
|
30 |
>
|
31 |
<FullscreenIcon />
|
|
|
3 |
import Button from '@/components/ui/Button'
|
4 |
import { ZoomInIcon, ZoomOutIcon, FullscreenIcon } from 'lucide-react'
|
5 |
import { controlButtonVariant } from '@/lib/constants'
|
6 |
+
import { useTranslation } from "react-i18next";
|
7 |
|
8 |
/**
|
9 |
* Component that provides zoom controls for the graph viewer.
|
10 |
*/
|
11 |
const ZoomControl = () => {
|
12 |
const { zoomIn, zoomOut, reset } = useCamera({ duration: 200, factor: 1.5 })
|
13 |
+
const { t } = useTranslation();
|
14 |
|
15 |
const handleZoomIn = useCallback(() => zoomIn(), [zoomIn])
|
16 |
const handleZoomOut = useCallback(() => zoomOut(), [zoomOut])
|
|
|
18 |
|
19 |
return (
|
20 |
<>
|
21 |
+
<Button variant={controlButtonVariant} onClick={handleZoomIn} tooltip={t("graphPanel.sideBar.zoomControl.zoomIn")} size="icon">
|
22 |
<ZoomInIcon />
|
23 |
</Button>
|
24 |
+
<Button variant={controlButtonVariant} onClick={handleZoomOut} tooltip={t("graphPanel.sideBar.zoomControl.zoomOut")} size="icon">
|
25 |
<ZoomOutIcon />
|
26 |
</Button>
|
27 |
<Button
|
28 |
variant={controlButtonVariant}
|
29 |
onClick={handleResetZoom}
|
30 |
+
tooltip={t("graphPanel.sideBar.zoomControl.resetZoom")}
|
31 |
size="icon"
|
32 |
>
|
33 |
<FullscreenIcon />
|
lightrag_webui/src/components/retrieval/ChatMessage.tsx
CHANGED
@@ -15,18 +15,21 @@ import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'
|
|
15 |
import { oneLight, oneDark } from 'react-syntax-highlighter/dist/cjs/styles/prism'
|
16 |
|
17 |
import { LoaderIcon, CopyIcon } from 'lucide-react'
|
|
|
18 |
|
19 |
export type MessageWithError = Message & {
|
20 |
isError?: boolean
|
21 |
}
|
22 |
|
23 |
export const ChatMessage = ({ message }: { message: MessageWithError }) => {
|
|
|
|
|
24 |
const handleCopyMarkdown = useCallback(async () => {
|
25 |
if (message.content) {
|
26 |
try {
|
27 |
await navigator.clipboard.writeText(message.content)
|
28 |
} catch (err) {
|
29 |
-
console.error('
|
30 |
}
|
31 |
}
|
32 |
}, [message])
|
@@ -57,7 +60,7 @@ export const ChatMessage = ({ message }: { message: MessageWithError }) => {
|
|
57 |
<Button
|
58 |
onClick={handleCopyMarkdown}
|
59 |
className="absolute right-0 bottom-0 size-6 rounded-md opacity-20 transition-opacity hover:opacity-100"
|
60 |
-
tooltip=
|
61 |
variant="default"
|
62 |
size="icon"
|
63 |
>
|
|
|
15 |
import { oneLight, oneDark } from 'react-syntax-highlighter/dist/cjs/styles/prism'
|
16 |
|
17 |
import { LoaderIcon, CopyIcon } from 'lucide-react'
|
18 |
+
import { useTranslation } from 'react-i18next'
|
19 |
|
20 |
export type MessageWithError = Message & {
|
21 |
isError?: boolean
|
22 |
}
|
23 |
|
24 |
export const ChatMessage = ({ message }: { message: MessageWithError }) => {
|
25 |
+
const { t } = useTranslation()
|
26 |
+
|
27 |
const handleCopyMarkdown = useCallback(async () => {
|
28 |
if (message.content) {
|
29 |
try {
|
30 |
await navigator.clipboard.writeText(message.content)
|
31 |
} catch (err) {
|
32 |
+
console.error(t('chat.copyError'), err)
|
33 |
}
|
34 |
}
|
35 |
}, [message])
|
|
|
60 |
<Button
|
61 |
onClick={handleCopyMarkdown}
|
62 |
className="absolute right-0 bottom-0 size-6 rounded-md opacity-20 transition-opacity hover:opacity-100"
|
63 |
+
tooltip={t('retrievePanel.chatMessage.copyTooltip')}
|
64 |
variant="default"
|
65 |
size="icon"
|
66 |
>
|
lightrag_webui/src/components/retrieval/QuerySettings.tsx
CHANGED
@@ -14,8 +14,10 @@ import {
|
|
14 |
SelectValue
|
15 |
} from '@/components/ui/Select'
|
16 |
import { useSettingsStore } from '@/stores/settings'
|
|
|
17 |
|
18 |
export default function QuerySettings() {
|
|
|
19 |
const querySettings = useSettingsStore((state) => state.querySettings)
|
20 |
|
21 |
const handleChange = useCallback((key: keyof QueryRequest, value: any) => {
|
@@ -25,8 +27,8 @@ export default function QuerySettings() {
|
|
25 |
return (
|
26 |
<Card className="flex shrink-0 flex-col">
|
27 |
<CardHeader className="px-4 pt-4 pb-2">
|
28 |
-
<CardTitle>
|
29 |
-
<CardDescription>
|
30 |
</CardHeader>
|
31 |
<CardContent className="m-0 flex grow flex-col p-0 text-xs">
|
32 |
<div className="relative size-full">
|
@@ -35,8 +37,8 @@ export default function QuerySettings() {
|
|
35 |
<>
|
36 |
<Text
|
37 |
className="ml-1"
|
38 |
-
text=
|
39 |
-
tooltip=
|
40 |
side="left"
|
41 |
/>
|
42 |
<Select
|
@@ -48,11 +50,11 @@ export default function QuerySettings() {
|
|
48 |
</SelectTrigger>
|
49 |
<SelectContent>
|
50 |
<SelectGroup>
|
51 |
-
<SelectItem value="naive">
|
52 |
-
<SelectItem value="local">
|
53 |
-
<SelectItem value="global">
|
54 |
-
<SelectItem value="hybrid">
|
55 |
-
<SelectItem value="mix">
|
56 |
</SelectGroup>
|
57 |
</SelectContent>
|
58 |
</Select>
|
@@ -62,8 +64,8 @@ export default function QuerySettings() {
|
|
62 |
<>
|
63 |
<Text
|
64 |
className="ml-1"
|
65 |
-
text=
|
66 |
-
tooltip=
|
67 |
side="left"
|
68 |
/>
|
69 |
<Select
|
@@ -75,9 +77,9 @@ export default function QuerySettings() {
|
|
75 |
</SelectTrigger>
|
76 |
<SelectContent>
|
77 |
<SelectGroup>
|
78 |
-
<SelectItem value="Multiple Paragraphs">
|
79 |
-
<SelectItem value="Single Paragraph">
|
80 |
-
<SelectItem value="Bullet Points">
|
81 |
</SelectGroup>
|
82 |
</SelectContent>
|
83 |
</Select>
|
@@ -87,8 +89,8 @@ export default function QuerySettings() {
|
|
87 |
<>
|
88 |
<Text
|
89 |
className="ml-1"
|
90 |
-
text=
|
91 |
-
tooltip=
|
92 |
side="left"
|
93 |
/>
|
94 |
<NumberInput
|
@@ -97,7 +99,7 @@ export default function QuerySettings() {
|
|
97 |
value={querySettings.top_k}
|
98 |
onValueChange={(v) => handleChange('top_k', v)}
|
99 |
min={1}
|
100 |
-
placeholder=
|
101 |
/>
|
102 |
</>
|
103 |
|
@@ -106,8 +108,8 @@ export default function QuerySettings() {
|
|
106 |
<>
|
107 |
<Text
|
108 |
className="ml-1"
|
109 |
-
text=
|
110 |
-
tooltip=
|
111 |
side="left"
|
112 |
/>
|
113 |
<NumberInput
|
@@ -116,14 +118,14 @@ export default function QuerySettings() {
|
|
116 |
value={querySettings.max_token_for_text_unit}
|
117 |
onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
|
118 |
min={1}
|
119 |
-
placeholder=
|
120 |
/>
|
121 |
</>
|
122 |
|
123 |
<>
|
124 |
<Text
|
125 |
-
text=
|
126 |
-
tooltip=
|
127 |
side="left"
|
128 |
/>
|
129 |
<NumberInput
|
@@ -132,15 +134,15 @@ export default function QuerySettings() {
|
|
132 |
value={querySettings.max_token_for_global_context}
|
133 |
onValueChange={(v) => handleChange('max_token_for_global_context', v)}
|
134 |
min={1}
|
135 |
-
placeholder=
|
136 |
/>
|
137 |
</>
|
138 |
|
139 |
<>
|
140 |
<Text
|
141 |
className="ml-1"
|
142 |
-
text=
|
143 |
-
tooltip=
|
144 |
side="left"
|
145 |
/>
|
146 |
<NumberInput
|
@@ -149,7 +151,7 @@ export default function QuerySettings() {
|
|
149 |
value={querySettings.max_token_for_local_context}
|
150 |
onValueChange={(v) => handleChange('max_token_for_local_context', v)}
|
151 |
min={1}
|
152 |
-
placeholder=
|
153 |
/>
|
154 |
</>
|
155 |
</>
|
@@ -158,8 +160,8 @@ export default function QuerySettings() {
|
|
158 |
<>
|
159 |
<Text
|
160 |
className="ml-1"
|
161 |
-
text=
|
162 |
-
tooltip=
|
163 |
side="left"
|
164 |
/>
|
165 |
<NumberInput
|
@@ -170,7 +172,7 @@ export default function QuerySettings() {
|
|
170 |
value={querySettings.history_turns}
|
171 |
onValueChange={(v) => handleChange('history_turns', v)}
|
172 |
min={0}
|
173 |
-
placeholder=
|
174 |
/>
|
175 |
</>
|
176 |
|
@@ -179,8 +181,8 @@ export default function QuerySettings() {
|
|
179 |
<>
|
180 |
<Text
|
181 |
className="ml-1"
|
182 |
-
text=
|
183 |
-
tooltip=
|
184 |
side="left"
|
185 |
/>
|
186 |
<Input
|
@@ -194,15 +196,15 @@ export default function QuerySettings() {
|
|
194 |
.filter((k) => k !== '')
|
195 |
handleChange('hl_keywords', keywords)
|
196 |
}}
|
197 |
-
placeholder=
|
198 |
/>
|
199 |
</>
|
200 |
|
201 |
<>
|
202 |
<Text
|
203 |
className="ml-1"
|
204 |
-
text=
|
205 |
-
tooltip=
|
206 |
side="left"
|
207 |
/>
|
208 |
<Input
|
@@ -216,7 +218,7 @@ export default function QuerySettings() {
|
|
216 |
.filter((k) => k !== '')
|
217 |
handleChange('ll_keywords', keywords)
|
218 |
}}
|
219 |
-
placeholder=
|
220 |
/>
|
221 |
</>
|
222 |
</>
|
@@ -226,8 +228,8 @@ export default function QuerySettings() {
|
|
226 |
<div className="flex items-center gap-2">
|
227 |
<Text
|
228 |
className="ml-1"
|
229 |
-
text=
|
230 |
-
tooltip=
|
231 |
side="left"
|
232 |
/>
|
233 |
<div className="grow" />
|
@@ -242,8 +244,8 @@ export default function QuerySettings() {
|
|
242 |
<div className="flex items-center gap-2">
|
243 |
<Text
|
244 |
className="ml-1"
|
245 |
-
text=
|
246 |
-
tooltip=
|
247 |
side="left"
|
248 |
/>
|
249 |
<div className="grow" />
|
@@ -258,8 +260,8 @@ export default function QuerySettings() {
|
|
258 |
<div className="flex items-center gap-2">
|
259 |
<Text
|
260 |
className="ml-1"
|
261 |
-
text=
|
262 |
-
tooltip=
|
263 |
side="left"
|
264 |
/>
|
265 |
<div className="grow" />
|
|
|
14 |
SelectValue
|
15 |
} from '@/components/ui/Select'
|
16 |
import { useSettingsStore } from '@/stores/settings'
|
17 |
+
import { useTranslation } from 'react-i18next'
|
18 |
|
19 |
export default function QuerySettings() {
|
20 |
+
const { t } = useTranslation()
|
21 |
const querySettings = useSettingsStore((state) => state.querySettings)
|
22 |
|
23 |
const handleChange = useCallback((key: keyof QueryRequest, value: any) => {
|
|
|
27 |
return (
|
28 |
<Card className="flex shrink-0 flex-col">
|
29 |
<CardHeader className="px-4 pt-4 pb-2">
|
30 |
+
<CardTitle>{t('retrievePanel.querySettings.parametersTitle')}</CardTitle>
|
31 |
+
<CardDescription>{t('retrievePanel.querySettings.parametersDescription')}</CardDescription>
|
32 |
</CardHeader>
|
33 |
<CardContent className="m-0 flex grow flex-col p-0 text-xs">
|
34 |
<div className="relative size-full">
|
|
|
37 |
<>
|
38 |
<Text
|
39 |
className="ml-1"
|
40 |
+
text={t('retrievePanel.querySettings.queryMode')}
|
41 |
+
tooltip={t('retrievePanel.querySettings.queryModeTooltip')}
|
42 |
side="left"
|
43 |
/>
|
44 |
<Select
|
|
|
50 |
</SelectTrigger>
|
51 |
<SelectContent>
|
52 |
<SelectGroup>
|
53 |
+
<SelectItem value="naive">{t('retrievePanel.querySettings.queryModeOptions.naive')}</SelectItem>
|
54 |
+
<SelectItem value="local">{t('retrievePanel.querySettings.queryModeOptions.local')}</SelectItem>
|
55 |
+
<SelectItem value="global">{t('retrievePanel.querySettings.queryModeOptions.global')}</SelectItem>
|
56 |
+
<SelectItem value="hybrid">{t('retrievePanel.querySettings.queryModeOptions.hybrid')}</SelectItem>
|
57 |
+
<SelectItem value="mix">{t('retrievePanel.querySettings.queryModeOptions.mix')}</SelectItem>
|
58 |
</SelectGroup>
|
59 |
</SelectContent>
|
60 |
</Select>
|
|
|
64 |
<>
|
65 |
<Text
|
66 |
className="ml-1"
|
67 |
+
text={t('retrievePanel.querySettings.responseFormat')}
|
68 |
+
tooltip={t('retrievePanel.querySettings.responseFormatTooltip')}
|
69 |
side="left"
|
70 |
/>
|
71 |
<Select
|
|
|
77 |
</SelectTrigger>
|
78 |
<SelectContent>
|
79 |
<SelectGroup>
|
80 |
+
<SelectItem value="Multiple Paragraphs">{t('retrievePanel.querySettings.responseFormatOptions.multipleParagraphs')}</SelectItem>
|
81 |
+
<SelectItem value="Single Paragraph">{t('retrievePanel.querySettings.responseFormatOptions.singleParagraph')}</SelectItem>
|
82 |
+
<SelectItem value="Bullet Points">{t('retrievePanel.querySettings.responseFormatOptions.bulletPoints')}</SelectItem>
|
83 |
</SelectGroup>
|
84 |
</SelectContent>
|
85 |
</Select>
|
|
|
89 |
<>
|
90 |
<Text
|
91 |
className="ml-1"
|
92 |
+
text={t('retrievePanel.querySettings.topK')}
|
93 |
+
tooltip={t('retrievePanel.querySettings.topKTooltip')}
|
94 |
side="left"
|
95 |
/>
|
96 |
<NumberInput
|
|
|
99 |
value={querySettings.top_k}
|
100 |
onValueChange={(v) => handleChange('top_k', v)}
|
101 |
min={1}
|
102 |
+
placeholder={t('retrievePanel.querySettings.topKPlaceholder')}
|
103 |
/>
|
104 |
</>
|
105 |
|
|
|
108 |
<>
|
109 |
<Text
|
110 |
className="ml-1"
|
111 |
+
text={t('retrievePanel.querySettings.maxTokensTextUnit')}
|
112 |
+
tooltip={t('retrievePanel.querySettings.maxTokensTextUnitTooltip')}
|
113 |
side="left"
|
114 |
/>
|
115 |
<NumberInput
|
|
|
118 |
value={querySettings.max_token_for_text_unit}
|
119 |
onValueChange={(v) => handleChange('max_token_for_text_unit', v)}
|
120 |
min={1}
|
121 |
+
placeholder={t('retrievePanel.querySettings.maxTokensTextUnit')}
|
122 |
/>
|
123 |
</>
|
124 |
|
125 |
<>
|
126 |
<Text
|
127 |
+
text={t('retrievePanel.querySettings.maxTokensGlobalContext')}
|
128 |
+
tooltip={t('retrievePanel.querySettings.maxTokensGlobalContextTooltip')}
|
129 |
side="left"
|
130 |
/>
|
131 |
<NumberInput
|
|
|
134 |
value={querySettings.max_token_for_global_context}
|
135 |
onValueChange={(v) => handleChange('max_token_for_global_context', v)}
|
136 |
min={1}
|
137 |
+
placeholder={t('retrievePanel.querySettings.maxTokensGlobalContext')}
|
138 |
/>
|
139 |
</>
|
140 |
|
141 |
<>
|
142 |
<Text
|
143 |
className="ml-1"
|
144 |
+
text={t('retrievePanel.querySettings.maxTokensLocalContext')}
|
145 |
+
tooltip={t('retrievePanel.querySettings.maxTokensLocalContextTooltip')}
|
146 |
side="left"
|
147 |
/>
|
148 |
<NumberInput
|
|
|
151 |
value={querySettings.max_token_for_local_context}
|
152 |
onValueChange={(v) => handleChange('max_token_for_local_context', v)}
|
153 |
min={1}
|
154 |
+
placeholder={t('retrievePanel.querySettings.maxTokensLocalContext')}
|
155 |
/>
|
156 |
</>
|
157 |
</>
|
|
|
160 |
<>
|
161 |
<Text
|
162 |
className="ml-1"
|
163 |
+
text={t('retrievePanel.querySettings.historyTurns')}
|
164 |
+
tooltip={t('retrievePanel.querySettings.historyTurnsTooltip')}
|
165 |
side="left"
|
166 |
/>
|
167 |
<NumberInput
|
|
|
172 |
value={querySettings.history_turns}
|
173 |
onValueChange={(v) => handleChange('history_turns', v)}
|
174 |
min={0}
|
175 |
+
placeholder={t('retrievePanel.querySettings.historyTurnsPlaceholder')}
|
176 |
/>
|
177 |
</>
|
178 |
|
|
|
181 |
<>
|
182 |
<Text
|
183 |
className="ml-1"
|
184 |
+
text={t('retrievePanel.querySettings.hlKeywords')}
|
185 |
+
tooltip={t('retrievePanel.querySettings.hlKeywordsTooltip')}
|
186 |
side="left"
|
187 |
/>
|
188 |
<Input
|
|
|
196 |
.filter((k) => k !== '')
|
197 |
handleChange('hl_keywords', keywords)
|
198 |
}}
|
199 |
+
placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
|
200 |
/>
|
201 |
</>
|
202 |
|
203 |
<>
|
204 |
<Text
|
205 |
className="ml-1"
|
206 |
+
text={t('retrievePanel.querySettings.llKeywords')}
|
207 |
+
tooltip={t('retrievePanel.querySettings.llKeywordsTooltip')}
|
208 |
side="left"
|
209 |
/>
|
210 |
<Input
|
|
|
218 |
.filter((k) => k !== '')
|
219 |
handleChange('ll_keywords', keywords)
|
220 |
}}
|
221 |
+
placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
|
222 |
/>
|
223 |
</>
|
224 |
</>
|
|
|
228 |
<div className="flex items-center gap-2">
|
229 |
<Text
|
230 |
className="ml-1"
|
231 |
+
text={t('retrievePanel.querySettings.onlyNeedContext')}
|
232 |
+
tooltip={t('retrievePanel.querySettings.onlyNeedContextTooltip')}
|
233 |
side="left"
|
234 |
/>
|
235 |
<div className="grow" />
|
|
|
244 |
<div className="flex items-center gap-2">
|
245 |
<Text
|
246 |
className="ml-1"
|
247 |
+
text={t('retrievePanel.querySettings.onlyNeedPrompt')}
|
248 |
+
tooltip={t('retrievePanel.querySettings.onlyNeedPromptTooltip')}
|
249 |
side="left"
|
250 |
/>
|
251 |
<div className="grow" />
|
|
|
260 |
<div className="flex items-center gap-2">
|
261 |
<Text
|
262 |
className="ml-1"
|
263 |
+
text={t('retrievePanel.querySettings.streamResponse')}
|
264 |
+
tooltip={t('retrievePanel.querySettings.streamResponseTooltip')}
|
265 |
side="left"
|
266 |
/>
|
267 |
<div className="grow" />
|
lightrag_webui/src/features/DocumentManager.tsx
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import { useState, useEffect, useCallback } from 'react'
|
|
|
2 |
import Button from '@/components/ui/Button'
|
3 |
import {
|
4 |
Table,
|
@@ -22,6 +23,7 @@ import { useBackendState } from '@/stores/state'
|
|
22 |
import { RefreshCwIcon } from 'lucide-react'
|
23 |
|
24 |
export default function DocumentManager() {
|
|
|
25 |
const health = useBackendState.use.health()
|
26 |
const [docs, setDocs] = useState<DocsStatusesResponse | null>(null)
|
27 |
|
@@ -44,7 +46,7 @@ export default function DocumentManager() {
|
|
44 |
setDocs(null)
|
45 |
}
|
46 |
} catch (err) {
|
47 |
-
toast.error('
|
48 |
}
|
49 |
}, [setDocs])
|
50 |
|
@@ -57,7 +59,7 @@ export default function DocumentManager() {
|
|
57 |
const { status } = await scanNewDocuments()
|
58 |
toast.message(status)
|
59 |
} catch (err) {
|
60 |
-
toast.error('
|
61 |
}
|
62 |
}, [])
|
63 |
|
@@ -69,7 +71,7 @@ export default function DocumentManager() {
|
|
69 |
try {
|
70 |
await fetchDocuments()
|
71 |
} catch (err) {
|
72 |
-
toast.error('
|
73 |
}
|
74 |
}, 5000)
|
75 |
return () => clearInterval(interval)
|
@@ -78,7 +80,7 @@ export default function DocumentManager() {
|
|
78 |
return (
|
79 |
<Card className="!size-full !rounded-none !border-none">
|
80 |
<CardHeader>
|
81 |
-
<CardTitle className="text-lg">
|
82 |
</CardHeader>
|
83 |
<CardContent className="space-y-4">
|
84 |
<div className="flex gap-2">
|
@@ -86,10 +88,10 @@ export default function DocumentManager() {
|
|
86 |
variant="outline"
|
87 |
onClick={scanDocuments}
|
88 |
side="bottom"
|
89 |
-
tooltip=
|
90 |
size="sm"
|
91 |
>
|
92 |
-
<RefreshCwIcon />
|
93 |
</Button>
|
94 |
<div className="flex-1" />
|
95 |
<ClearDocumentsDialog />
|
@@ -98,29 +100,29 @@ export default function DocumentManager() {
|
|
98 |
|
99 |
<Card>
|
100 |
<CardHeader>
|
101 |
-
<CardTitle>
|
102 |
-
<CardDescription>
|
103 |
</CardHeader>
|
104 |
|
105 |
<CardContent>
|
106 |
{!docs && (
|
107 |
<EmptyCard
|
108 |
-
title=
|
109 |
-
description=
|
110 |
/>
|
111 |
)}
|
112 |
{docs && (
|
113 |
<Table>
|
114 |
<TableHeader>
|
115 |
<TableRow>
|
116 |
-
<TableHead>
|
117 |
-
<TableHead>
|
118 |
-
<TableHead>
|
119 |
-
<TableHead>
|
120 |
-
<TableHead>
|
121 |
-
<TableHead>
|
122 |
-
<TableHead>
|
123 |
-
<TableHead>
|
124 |
</TableRow>
|
125 |
</TableHeader>
|
126 |
<TableBody className="text-sm">
|
@@ -137,13 +139,13 @@ export default function DocumentManager() {
|
|
137 |
</TableCell>
|
138 |
<TableCell>
|
139 |
{status === 'processed' && (
|
140 |
-
<span className="text-green-600">
|
141 |
)}
|
142 |
{status === 'processing' && (
|
143 |
-
<span className="text-blue-600">
|
144 |
)}
|
145 |
-
{status === 'pending' && <span className="text-yellow-600">
|
146 |
-
{status === 'failed' && <span className="text-red-600">
|
147 |
{doc.error && (
|
148 |
<span className="ml-2 text-red-500" title={doc.error}>
|
149 |
⚠️
|
|
|
1 |
import { useState, useEffect, useCallback } from 'react'
|
2 |
+
import { useTranslation } from 'react-i18next'
|
3 |
import Button from '@/components/ui/Button'
|
4 |
import {
|
5 |
Table,
|
|
|
23 |
import { RefreshCwIcon } from 'lucide-react'
|
24 |
|
25 |
export default function DocumentManager() {
|
26 |
+
const { t } = useTranslation()
|
27 |
const health = useBackendState.use.health()
|
28 |
const [docs, setDocs] = useState<DocsStatusesResponse | null>(null)
|
29 |
|
|
|
46 |
setDocs(null)
|
47 |
}
|
48 |
} catch (err) {
|
49 |
+
toast.error(t('documentPanel.documentManager.errors.loadFailed', { error: errorMessage(err) }))
|
50 |
}
|
51 |
}, [setDocs])
|
52 |
|
|
|
59 |
const { status } = await scanNewDocuments()
|
60 |
toast.message(status)
|
61 |
} catch (err) {
|
62 |
+
toast.error(t('documentPanel.documentManager.errors.scanFailed', { error: errorMessage(err) }))
|
63 |
}
|
64 |
}, [])
|
65 |
|
|
|
71 |
try {
|
72 |
await fetchDocuments()
|
73 |
} catch (err) {
|
74 |
+
toast.error(t('documentPanel.documentManager.errors.scanProgressFailed', { error: errorMessage(err) }))
|
75 |
}
|
76 |
}, 5000)
|
77 |
return () => clearInterval(interval)
|
|
|
80 |
return (
|
81 |
<Card className="!size-full !rounded-none !border-none">
|
82 |
<CardHeader>
|
83 |
+
<CardTitle className="text-lg">{t('documentPanel.documentManager.title')}</CardTitle>
|
84 |
</CardHeader>
|
85 |
<CardContent className="space-y-4">
|
86 |
<div className="flex gap-2">
|
|
|
88 |
variant="outline"
|
89 |
onClick={scanDocuments}
|
90 |
side="bottom"
|
91 |
+
tooltip={t('documentPanel.documentManager.scanTooltip')}
|
92 |
size="sm"
|
93 |
>
|
94 |
+
<RefreshCwIcon /> {t('documentPanel.documentManager.scanButton')}
|
95 |
</Button>
|
96 |
<div className="flex-1" />
|
97 |
<ClearDocumentsDialog />
|
|
|
100 |
|
101 |
<Card>
|
102 |
<CardHeader>
|
103 |
+
<CardTitle>{t('documentPanel.documentManager.uploadedTitle')}</CardTitle>
|
104 |
+
<CardDescription>{t('documentPanel.documentManager.uploadedDescription')}</CardDescription>
|
105 |
</CardHeader>
|
106 |
|
107 |
<CardContent>
|
108 |
{!docs && (
|
109 |
<EmptyCard
|
110 |
+
title={t('documentPanel.documentManager.emptyTitle')}
|
111 |
+
description={t('documentPanel.documentManager.emptyDescription')}
|
112 |
/>
|
113 |
)}
|
114 |
{docs && (
|
115 |
<Table>
|
116 |
<TableHeader>
|
117 |
<TableRow>
|
118 |
+
<TableHead>{t('documentPanel.documentManager.columns.id')}</TableHead>
|
119 |
+
<TableHead>{t('documentPanel.documentManager.columns.summary')}</TableHead>
|
120 |
+
<TableHead>{t('documentPanel.documentManager.columns.status')}</TableHead>
|
121 |
+
<TableHead>{t('documentPanel.documentManager.columns.length')}</TableHead>
|
122 |
+
<TableHead>{t('documentPanel.documentManager.columns.chunks')}</TableHead>
|
123 |
+
<TableHead>{t('documentPanel.documentManager.columns.created')}</TableHead>
|
124 |
+
<TableHead>{t('documentPanel.documentManager.columns.updated')}</TableHead>
|
125 |
+
<TableHead>{t('documentPanel.documentManager.columns.metadata')}</TableHead>
|
126 |
</TableRow>
|
127 |
</TableHeader>
|
128 |
<TableBody className="text-sm">
|
|
|
139 |
</TableCell>
|
140 |
<TableCell>
|
141 |
{status === 'processed' && (
|
142 |
+
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
|
143 |
)}
|
144 |
{status === 'processing' && (
|
145 |
+
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
|
146 |
)}
|
147 |
+
{status === 'pending' && <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>}
|
148 |
+
{status === 'failed' && <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>}
|
149 |
{doc.error && (
|
150 |
<span className="ml-2 text-red-500" title={doc.error}>
|
151 |
⚠️
|
lightrag_webui/src/features/RetrievalTesting.tsx
CHANGED
@@ -8,8 +8,10 @@ import { useDebounce } from '@/hooks/useDebounce'
|
|
8 |
import QuerySettings from '@/components/retrieval/QuerySettings'
|
9 |
import { ChatMessage, MessageWithError } from '@/components/retrieval/ChatMessage'
|
10 |
import { EraserIcon, SendIcon } from 'lucide-react'
|
|
|
11 |
|
12 |
export default function RetrievalTesting() {
|
|
|
13 |
const [messages, setMessages] = useState<MessageWithError[]>(
|
14 |
() => useSettingsStore.getState().retrievalHistory || []
|
15 |
)
|
@@ -89,7 +91,7 @@ export default function RetrievalTesting() {
|
|
89 |
}
|
90 |
} catch (err) {
|
91 |
// Handle error
|
92 |
-
updateAssistantMessage(
|
93 |
} finally {
|
94 |
// Clear loading and add messages to state
|
95 |
setIsLoading(false)
|
@@ -98,7 +100,7 @@ export default function RetrievalTesting() {
|
|
98 |
.setRetrievalHistory([...prevMessages, userMessage, assistantMessage])
|
99 |
}
|
100 |
},
|
101 |
-
[inputValue, isLoading, messages, setMessages]
|
102 |
)
|
103 |
|
104 |
const debouncedMessages = useDebounce(messages, 100)
|
@@ -117,7 +119,7 @@ export default function RetrievalTesting() {
|
|
117 |
<div className="flex min-h-0 flex-1 flex-col gap-2">
|
118 |
{messages.length === 0 ? (
|
119 |
<div className="text-muted-foreground flex h-full items-center justify-center text-lg">
|
120 |
-
|
121 |
</div>
|
122 |
) : (
|
123 |
messages.map((message, idx) => (
|
@@ -143,18 +145,18 @@ export default function RetrievalTesting() {
|
|
143 |
size="sm"
|
144 |
>
|
145 |
<EraserIcon />
|
146 |
-
|
147 |
</Button>
|
148 |
<Input
|
149 |
className="flex-1"
|
150 |
value={inputValue}
|
151 |
onChange={(e) => setInputValue(e.target.value)}
|
152 |
-
placeholder=
|
153 |
disabled={isLoading}
|
154 |
/>
|
155 |
<Button type="submit" variant="default" disabled={isLoading} size="sm">
|
156 |
<SendIcon />
|
157 |
-
|
158 |
</Button>
|
159 |
</form>
|
160 |
</div>
|
|
|
8 |
import QuerySettings from '@/components/retrieval/QuerySettings'
|
9 |
import { ChatMessage, MessageWithError } from '@/components/retrieval/ChatMessage'
|
10 |
import { EraserIcon, SendIcon } from 'lucide-react'
|
11 |
+
import { useTranslation } from 'react-i18next'
|
12 |
|
13 |
export default function RetrievalTesting() {
|
14 |
+
const { t } = useTranslation()
|
15 |
const [messages, setMessages] = useState<MessageWithError[]>(
|
16 |
() => useSettingsStore.getState().retrievalHistory || []
|
17 |
)
|
|
|
91 |
}
|
92 |
} catch (err) {
|
93 |
// Handle error
|
94 |
+
updateAssistantMessage(`${t('retrievePanel.retrieval.error')}\n${errorMessage(err)}`, true)
|
95 |
} finally {
|
96 |
// Clear loading and add messages to state
|
97 |
setIsLoading(false)
|
|
|
100 |
.setRetrievalHistory([...prevMessages, userMessage, assistantMessage])
|
101 |
}
|
102 |
},
|
103 |
+
[inputValue, isLoading, messages, setMessages, t]
|
104 |
)
|
105 |
|
106 |
const debouncedMessages = useDebounce(messages, 100)
|
|
|
119 |
<div className="flex min-h-0 flex-1 flex-col gap-2">
|
120 |
{messages.length === 0 ? (
|
121 |
<div className="text-muted-foreground flex h-full items-center justify-center text-lg">
|
122 |
+
{t('retrievePanel.retrieval.startPrompt')}
|
123 |
</div>
|
124 |
) : (
|
125 |
messages.map((message, idx) => (
|
|
|
145 |
size="sm"
|
146 |
>
|
147 |
<EraserIcon />
|
148 |
+
{t('retrievePanel.retrieval.clear')}
|
149 |
</Button>
|
150 |
<Input
|
151 |
className="flex-1"
|
152 |
value={inputValue}
|
153 |
onChange={(e) => setInputValue(e.target.value)}
|
154 |
+
placeholder={t('retrievePanel.retrieval.placeholder')}
|
155 |
disabled={isLoading}
|
156 |
/>
|
157 |
<Button type="submit" variant="default" disabled={isLoading} size="sm">
|
158 |
<SendIcon />
|
159 |
+
{t('retrievePanel.retrieval.send')}
|
160 |
</Button>
|
161 |
</form>
|
162 |
</div>
|
lightrag_webui/src/features/SiteHeader.tsx
CHANGED
@@ -4,6 +4,7 @@ import ThemeToggle from '@/components/ThemeToggle'
|
|
4 |
import { TabsList, TabsTrigger } from '@/components/ui/Tabs'
|
5 |
import { useSettingsStore } from '@/stores/settings'
|
6 |
import { cn } from '@/lib/utils'
|
|
|
7 |
|
8 |
import { ZapIcon, GithubIcon } from 'lucide-react'
|
9 |
|
@@ -29,21 +30,22 @@ function NavigationTab({ value, currentTab, children }: NavigationTabProps) {
|
|
29 |
|
30 |
function TabsNavigation() {
|
31 |
const currentTab = useSettingsStore.use.currentTab()
|
|
|
32 |
|
33 |
return (
|
34 |
<div className="flex h-8 self-center">
|
35 |
<TabsList className="h-full gap-2">
|
36 |
<NavigationTab value="documents" currentTab={currentTab}>
|
37 |
-
|
38 |
</NavigationTab>
|
39 |
<NavigationTab value="knowledge-graph" currentTab={currentTab}>
|
40 |
-
|
41 |
</NavigationTab>
|
42 |
<NavigationTab value="retrieval" currentTab={currentTab}>
|
43 |
-
|
44 |
</NavigationTab>
|
45 |
<NavigationTab value="api" currentTab={currentTab}>
|
46 |
-
|
47 |
</NavigationTab>
|
48 |
</TabsList>
|
49 |
</div>
|
@@ -51,6 +53,7 @@ function TabsNavigation() {
|
|
51 |
}
|
52 |
|
53 |
export default function SiteHeader() {
|
|
|
54 |
return (
|
55 |
<header className="border-border/40 bg-background/95 supports-[backdrop-filter]:bg-background/60 sticky top-0 z-50 flex h-10 w-full border-b px-4 backdrop-blur">
|
56 |
<a href="/" className="mr-6 flex items-center gap-2">
|
@@ -64,7 +67,7 @@ export default function SiteHeader() {
|
|
64 |
</div>
|
65 |
|
66 |
<nav className="flex items-center">
|
67 |
-
<Button variant="ghost" size="icon" side="bottom" tooltip=
|
68 |
<a href={SiteInfo.github} target="_blank" rel="noopener noreferrer">
|
69 |
<GithubIcon className="size-4" aria-hidden="true" />
|
70 |
</a>
|
|
|
4 |
import { TabsList, TabsTrigger } from '@/components/ui/Tabs'
|
5 |
import { useSettingsStore } from '@/stores/settings'
|
6 |
import { cn } from '@/lib/utils'
|
7 |
+
import { useTranslation } from 'react-i18next'
|
8 |
|
9 |
import { ZapIcon, GithubIcon } from 'lucide-react'
|
10 |
|
|
|
30 |
|
31 |
function TabsNavigation() {
|
32 |
const currentTab = useSettingsStore.use.currentTab()
|
33 |
+
const { t } = useTranslation()
|
34 |
|
35 |
return (
|
36 |
<div className="flex h-8 self-center">
|
37 |
<TabsList className="h-full gap-2">
|
38 |
<NavigationTab value="documents" currentTab={currentTab}>
|
39 |
+
{t('header.documents')}
|
40 |
</NavigationTab>
|
41 |
<NavigationTab value="knowledge-graph" currentTab={currentTab}>
|
42 |
+
{t('header.knowledgeGraph')}
|
43 |
</NavigationTab>
|
44 |
<NavigationTab value="retrieval" currentTab={currentTab}>
|
45 |
+
{t('header.retrieval')}
|
46 |
</NavigationTab>
|
47 |
<NavigationTab value="api" currentTab={currentTab}>
|
48 |
+
{t('header.api')}
|
49 |
</NavigationTab>
|
50 |
</TabsList>
|
51 |
</div>
|
|
|
53 |
}
|
54 |
|
55 |
export default function SiteHeader() {
|
56 |
+
const { t } = useTranslation()
|
57 |
return (
|
58 |
<header className="border-border/40 bg-background/95 supports-[backdrop-filter]:bg-background/60 sticky top-0 z-50 flex h-10 w-full border-b px-4 backdrop-blur">
|
59 |
<a href="/" className="mr-6 flex items-center gap-2">
|
|
|
67 |
</div>
|
68 |
|
69 |
<nav className="flex items-center">
|
70 |
+
<Button variant="ghost" size="icon" side="bottom" tooltip={t('header.projectRepository')}>
|
71 |
<a href={SiteInfo.github} target="_blank" rel="noopener noreferrer">
|
72 |
<GithubIcon className="size-4" aria-hidden="true" />
|
73 |
</a>
|
lightrag_webui/src/i18n.js
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import i18n from "i18next";
|
2 |
+
import { initReactI18next } from "react-i18next";
|
3 |
+
|
4 |
+
import en from "./locales/en.json";
|
5 |
+
import zh from "./locales/zh.json";
|
6 |
+
|
7 |
+
i18n
|
8 |
+
.use(initReactI18next)
|
9 |
+
.init({
|
10 |
+
resources: {
|
11 |
+
en: { translation: en },
|
12 |
+
zh: { translation: zh }
|
13 |
+
},
|
14 |
+
lng: "en", // default
|
15 |
+
fallbackLng: "en",
|
16 |
+
interpolation: {
|
17 |
+
escapeValue: false
|
18 |
+
}
|
19 |
+
});
|
20 |
+
|
21 |
+
export default i18n;
|
lightrag_webui/src/locales/en.json
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"header": {
|
3 |
+
"documents": "Documents",
|
4 |
+
"knowledgeGraph": "Knowledge Graph",
|
5 |
+
"retrieval": "Retrieval",
|
6 |
+
"api": "API",
|
7 |
+
"projectRepository": "Project Repository",
|
8 |
+
"themeToggle": {
|
9 |
+
"switchToLight": "Switch to light theme",
|
10 |
+
"switchToDark": "Switch to dark theme"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"documentPanel": {
|
14 |
+
"clearDocuments": {
|
15 |
+
"button": "Clear",
|
16 |
+
"tooltip": "Clear documents",
|
17 |
+
"title": "Clear Documents",
|
18 |
+
"confirm": "Do you really want to clear all documents?",
|
19 |
+
"confirmButton": "YES",
|
20 |
+
"success": "Documents cleared successfully",
|
21 |
+
"failed": "Clear Documents Failed:\n{{message}}",
|
22 |
+
"error": "Clear Documents Failed:\n{{error}}"
|
23 |
+
},
|
24 |
+
"uploadDocuments": {
|
25 |
+
"button": "Upload",
|
26 |
+
"tooltip": "Upload documents",
|
27 |
+
"title": "Upload Documents",
|
28 |
+
"description": "Drag and drop your documents here or click to browse.",
|
29 |
+
"uploading": "Uploading {{name}}: {{percent}}%",
|
30 |
+
"success": "Upload Success:\n{{name}} uploaded successfully",
|
31 |
+
"failed": "Upload Failed:\n{{name}}\n{{message}}",
|
32 |
+
"error": "Upload Failed:\n{{name}}\n{{error}}",
|
33 |
+
"generalError": "Upload Failed\n{{error}}",
|
34 |
+
"fileTypes": "Supported types: TXT, MD, DOCX, PDF, PPTX, RTF, ODT, EPUB, HTML, HTM, TEX, JSON, XML, YAML, YML, CSV, LOG, CONF, INI, PROPERTIES, SQL, BAT, SH, C, CPP, PY, JAVA, JS, TS, SWIFT, GO, RB, PHP, CSS, SCSS, LESS"
|
35 |
+
},
|
36 |
+
"documentManager": {
|
37 |
+
"title": "Document Management",
|
38 |
+
"scanButton": "Scan",
|
39 |
+
"scanTooltip": "Scan documents",
|
40 |
+
"uploadedTitle": "Uploaded Documents",
|
41 |
+
"uploadedDescription": "List of uploaded documents and their statuses.",
|
42 |
+
"emptyTitle": "No Documents",
|
43 |
+
"emptyDescription": "There are no uploaded documents yet.",
|
44 |
+
"columns": {
|
45 |
+
"id": "ID",
|
46 |
+
"summary": "Summary",
|
47 |
+
"status": "Status",
|
48 |
+
"length": "Length",
|
49 |
+
"chunks": "Chunks",
|
50 |
+
"created": "Created",
|
51 |
+
"updated": "Updated",
|
52 |
+
"metadata": "Metadata"
|
53 |
+
},
|
54 |
+
"status": {
|
55 |
+
"completed": "Completed",
|
56 |
+
"processing": "Processing",
|
57 |
+
"pending": "Pending",
|
58 |
+
"failed": "Failed"
|
59 |
+
},
|
60 |
+
"errors": {
|
61 |
+
"loadFailed": "Failed to load documents\n{{error}}",
|
62 |
+
"scanFailed": "Failed to scan documents\n{{error}}",
|
63 |
+
"scanProgressFailed": "Failed to get scan progress\n{{error}}"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
},
|
67 |
+
"graphPanel": {
|
68 |
+
"sideBar": {
|
69 |
+
"settings": {
|
70 |
+
"settings": "Settings",
|
71 |
+
"healthCheck": "Health Check",
|
72 |
+
"showPropertyPanel": "Show Property Panel",
|
73 |
+
"showSearchBar": "Show Search Bar",
|
74 |
+
"showNodeLabel": "Show Node Label",
|
75 |
+
"nodeDraggable": "Node Draggable",
|
76 |
+
"showEdgeLabel": "Show Edge Label",
|
77 |
+
"hideUnselectedEdges": "Hide Unselected Edges",
|
78 |
+
"edgeEvents": "Edge Events",
|
79 |
+
"maxQueryDepth": "Max Query Depth",
|
80 |
+
"minDegree": "Minimum Degree",
|
81 |
+
"maxLayoutIterations": "Max Layout Iterations",
|
82 |
+
"apiKey": "API Key",
|
83 |
+
"enterYourAPIkey": "Enter your API key",
|
84 |
+
"save": "Save"
|
85 |
+
},
|
86 |
+
|
87 |
+
"zoomControl": {
|
88 |
+
"zoomIn": "Zoom In",
|
89 |
+
"zoomOut": "Zoom Out",
|
90 |
+
"resetZoom": "Reset Zoom"
|
91 |
+
},
|
92 |
+
|
93 |
+
"layoutsControl": {
|
94 |
+
"startAnimation": "Start the layout animation",
|
95 |
+
"stopAnimation": "Stop the layout animation",
|
96 |
+
"layoutGraph": "Layout Graph",
|
97 |
+
"layouts": {
|
98 |
+
"Circular": "Circular",
|
99 |
+
"Circlepack": "Circlepack",
|
100 |
+
"Random": "Random",
|
101 |
+
"Noverlaps": "Noverlaps",
|
102 |
+
"Force Directed": "Force Directed",
|
103 |
+
"Force Atlas": "Force Atlas"
|
104 |
+
}
|
105 |
+
},
|
106 |
+
|
107 |
+
"fullScreenControl": {
|
108 |
+
"fullScreen": "Full Screen",
|
109 |
+
"windowed": "Windowed"
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"statusIndicator": {
|
113 |
+
"connected": "Connected",
|
114 |
+
"disconnected": "Disconnected"
|
115 |
+
},
|
116 |
+
"statusCard": {
|
117 |
+
"unavailable": "Status information unavailable",
|
118 |
+
"storageInfo": "Storage Info",
|
119 |
+
"workingDirectory": "Working Directory",
|
120 |
+
"inputDirectory": "Input Directory",
|
121 |
+
"llmConfig": "LLM Configuration",
|
122 |
+
"llmBinding": "LLM Binding",
|
123 |
+
"llmBindingHost": "LLM Binding Host",
|
124 |
+
"llmModel": "LLM Model",
|
125 |
+
"maxTokens": "Max Tokens",
|
126 |
+
"embeddingConfig": "Embedding Configuration",
|
127 |
+
"embeddingBinding": "Embedding Binding",
|
128 |
+
"embeddingBindingHost": "Embedding Binding Host",
|
129 |
+
"embeddingModel": "Embedding Model",
|
130 |
+
"storageConfig": "Storage Configuration",
|
131 |
+
"kvStorage": "KV Storage",
|
132 |
+
"docStatusStorage": "Doc Status Storage",
|
133 |
+
"graphStorage": "Graph Storage",
|
134 |
+
"vectorStorage": "Vector Storage"
|
135 |
+
},
|
136 |
+
"propertiesView": {
|
137 |
+
"node": {
|
138 |
+
"title": "Node",
|
139 |
+
"id": "ID",
|
140 |
+
"labels": "Labels",
|
141 |
+
"degree": "Degree",
|
142 |
+
"properties": "Properties",
|
143 |
+
"relationships": "Relationships"
|
144 |
+
},
|
145 |
+
"edge": {
|
146 |
+
"title": "Relationship",
|
147 |
+
"id": "ID",
|
148 |
+
"type": "Type",
|
149 |
+
"source": "Source",
|
150 |
+
"target": "Target",
|
151 |
+
"properties": "Properties"
|
152 |
+
}
|
153 |
+
},
|
154 |
+
"search": {
|
155 |
+
"placeholder": "Search nodes...",
|
156 |
+
"message": "And {count} others"
|
157 |
+
},
|
158 |
+
"graphLabels": {
|
159 |
+
"selectTooltip": "Select query label",
|
160 |
+
"noLabels": "No labels found",
|
161 |
+
"label": "Label",
|
162 |
+
"placeholder": "Search labels...",
|
163 |
+
"andOthers": "And {count} others"
|
164 |
+
}
|
165 |
+
},
|
166 |
+
"retrievePanel": {
|
167 |
+
"chatMessage": {
|
168 |
+
"copyTooltip": "Copy to clipboard",
|
169 |
+
"copyError": "Failed to copy text to clipboard"
|
170 |
+
},
|
171 |
+
"retrieval": {
|
172 |
+
"startPrompt": "Start a retrieval by typing your query below",
|
173 |
+
"clear": "Clear",
|
174 |
+
"send": "Send",
|
175 |
+
"placeholder": "Type your query...",
|
176 |
+
"error": "Error: Failed to get response"
|
177 |
+
},
|
178 |
+
"querySettings": {
|
179 |
+
"parametersTitle": "Parameters",
|
180 |
+
"parametersDescription": "Configure your query parameters",
|
181 |
+
|
182 |
+
"queryMode": "Query Mode",
|
183 |
+
"queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval",
|
184 |
+
"queryModeOptions": {
|
185 |
+
"naive": "Naive",
|
186 |
+
"local": "Local",
|
187 |
+
"global": "Global",
|
188 |
+
"hybrid": "Hybrid",
|
189 |
+
"mix": "Mix"
|
190 |
+
},
|
191 |
+
|
192 |
+
"responseFormat": "Response Format",
|
193 |
+
"responseFormatTooltip": "Defines the response format. Examples:\n• Multiple Paragraphs\n• Single Paragraph\n• Bullet Points",
|
194 |
+
"responseFormatOptions": {
|
195 |
+
"multipleParagraphs": "Multiple Paragraphs",
|
196 |
+
"singleParagraph": "Single Paragraph",
|
197 |
+
"bulletPoints": "Bullet Points"
|
198 |
+
},
|
199 |
+
|
200 |
+
"topK": "Top K Results",
|
201 |
+
"topKTooltip": "Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode",
|
202 |
+
"topKPlaceholder": "Number of results",
|
203 |
+
|
204 |
+
"maxTokensTextUnit": "Max Tokens for Text Unit",
|
205 |
+
"maxTokensTextUnitTooltip": "Maximum number of tokens allowed for each retrieved text chunk",
|
206 |
+
|
207 |
+
"maxTokensGlobalContext": "Max Tokens for Global Context",
|
208 |
+
"maxTokensGlobalContextTooltip": "Maximum number of tokens allocated for relationship descriptions in global retrieval",
|
209 |
+
|
210 |
+
"maxTokensLocalContext": "Max Tokens for Local Context",
|
211 |
+
"maxTokensLocalContextTooltip": "Maximum number of tokens allocated for entity descriptions in local retrieval",
|
212 |
+
|
213 |
+
"historyTurns": "History Turns",
|
214 |
+
"historyTurnsTooltip": "Number of complete conversation turns (user-assistant pairs) to consider in the response context",
|
215 |
+
"historyTurnsPlaceholder": "Number of history turns",
|
216 |
+
|
217 |
+
"hlKeywords": "High-Level Keywords",
|
218 |
+
"hlKeywordsTooltip": "List of high-level keywords to prioritize in retrieval. Separate with commas",
|
219 |
+
"hlkeywordsPlaceHolder": "Enter keywords",
|
220 |
+
|
221 |
+
"llKeywords": "Low-Level Keywords",
|
222 |
+
"llKeywordsTooltip": "List of low-level keywords to refine retrieval focus. Separate with commas",
|
223 |
+
|
224 |
+
"onlyNeedContext": "Only Need Context",
|
225 |
+
"onlyNeedContextTooltip": "If True, only returns the retrieved context without generating a response",
|
226 |
+
|
227 |
+
"onlyNeedPrompt": "Only Need Prompt",
|
228 |
+
"onlyNeedPromptTooltip": "If True, only returns the generated prompt without producing a response",
|
229 |
+
|
230 |
+
"streamResponse": "Stream Response",
|
231 |
+
"streamResponseTooltip": "If True, enables streaming output for real-time responses"
|
232 |
+
}
|
233 |
+
}
|
234 |
+
}
|
lightrag_webui/src/locales/zh.json
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"header": {
|
3 |
+
"documents": "文档",
|
4 |
+
"knowledgeGraph": "知识图谱",
|
5 |
+
"retrieval": "检索",
|
6 |
+
"api": "API",
|
7 |
+
"projectRepository": "项目仓库",
|
8 |
+
"themeToggle": {
|
9 |
+
"switchToLight": "切换到亮色主题",
|
10 |
+
"switchToDark": "切换到暗色主题"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"documentPanel": {
|
14 |
+
"clearDocuments": {
|
15 |
+
"button": "清除",
|
16 |
+
"tooltip": "清除文档",
|
17 |
+
"title": "清除文档",
|
18 |
+
"confirm": "您确定要清除所有文档吗?",
|
19 |
+
"confirmButton": "确定",
|
20 |
+
"success": "文档已成功清除",
|
21 |
+
"failed": "清除文档失败:\n{{message}}",
|
22 |
+
"error": "清除文档失败:\n{{error}}"
|
23 |
+
},
|
24 |
+
"uploadDocuments": {
|
25 |
+
"button": "上传",
|
26 |
+
"tooltip": "上传文档",
|
27 |
+
"title": "上传文档",
|
28 |
+
"description": "拖放文档到此处或点击浏览。",
|
29 |
+
"uploading": "正在上传 {{name}}: {{percent}}%",
|
30 |
+
"success": "上传成功:\n{{name}} 上传成功",
|
31 |
+
"failed": "上传失败:\n{{name}}\n{{message}}",
|
32 |
+
"error": "上传失败:\n{{name}}\n{{error}}",
|
33 |
+
"generalError": "上传失败\n{{error}}",
|
34 |
+
"fileTypes": "支持的文件类型: TXT, MD, DOCX, PDF, PPTX, RTF, ODT, EPUB, HTML, HTM, TEX, JSON, XML, YAML, YML, CSV, LOG, CONF, INI, PROPERTIES, SQL, BAT, SH, C, CPP, PY, JAVA, JS, TS, SWIFT, GO, RB, PHP, CSS, SCSS, LESS"
|
35 |
+
},
|
36 |
+
"documentManager": {
|
37 |
+
"title": "文档管理",
|
38 |
+
"scanButton": "扫描",
|
39 |
+
"scanTooltip": "扫描文档",
|
40 |
+
"uploadedTitle": "已上传文档",
|
41 |
+
"uploadedDescription": "已上传文档及其状态列表。",
|
42 |
+
"emptyTitle": "暂无文档",
|
43 |
+
"emptyDescription": "尚未上传任何文档。",
|
44 |
+
"columns": {
|
45 |
+
"id": "ID",
|
46 |
+
"summary": "摘要",
|
47 |
+
"status": "状态",
|
48 |
+
"length": "长度",
|
49 |
+
"chunks": "分块",
|
50 |
+
"created": "创建时间",
|
51 |
+
"updated": "更新时间",
|
52 |
+
"metadata": "元数据"
|
53 |
+
},
|
54 |
+
"status": {
|
55 |
+
"completed": "已完成",
|
56 |
+
"processing": "处理中",
|
57 |
+
"pending": "待处理",
|
58 |
+
"failed": "失败"
|
59 |
+
},
|
60 |
+
"errors": {
|
61 |
+
"loadFailed": "加载文档失败\n{{error}}",
|
62 |
+
"scanFailed": "扫描文档失败\n{{error}}",
|
63 |
+
"scanProgressFailed": "获取扫描进度失败\n{{error}}"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
},
|
67 |
+
"graphPanel": {
|
68 |
+
"sideBar": {
|
69 |
+
"settings": {
|
70 |
+
"settings": "设置",
|
71 |
+
"healthCheck": "健康检查",
|
72 |
+
"showPropertyPanel": "显示属性面板",
|
73 |
+
"showSearchBar": "显示搜索栏",
|
74 |
+
"showNodeLabel": "显示节点标签",
|
75 |
+
"nodeDraggable": "节点可拖动",
|
76 |
+
"showEdgeLabel": "显示边标签",
|
77 |
+
"hideUnselectedEdges": "隐藏未选中边",
|
78 |
+
"edgeEvents": "边事件",
|
79 |
+
"maxQueryDepth": "最大查询深度",
|
80 |
+
"minDegree": "最小度数",
|
81 |
+
"maxLayoutIterations": "最大布局迭代次数",
|
82 |
+
"apiKey": "API 密钥",
|
83 |
+
"enterYourAPIkey": "输入您的 API 密钥",
|
84 |
+
"save": "保存"
|
85 |
+
},
|
86 |
+
|
87 |
+
"zoomControl": {
|
88 |
+
"zoomIn": "放大",
|
89 |
+
"zoomOut": "缩小",
|
90 |
+
"resetZoom": "重置缩放"
|
91 |
+
},
|
92 |
+
|
93 |
+
"layoutsControl": {
|
94 |
+
"startAnimation": "开始布局动画",
|
95 |
+
"stopAnimation": "停止布局动画",
|
96 |
+
"layoutGraph": "布局图",
|
97 |
+
"layouts": {
|
98 |
+
"Circular": "环形布局",
|
99 |
+
"Circlepack": "圆形打包布局",
|
100 |
+
"Random": "随机布局",
|
101 |
+
"Noverlaps": "无重叠布局",
|
102 |
+
"Force Directed": "力导向布局",
|
103 |
+
"Force Atlas": "力导向图谱布局"
|
104 |
+
}
|
105 |
+
},
|
106 |
+
|
107 |
+
"fullScreenControl": {
|
108 |
+
"fullScreen": "全屏",
|
109 |
+
"windowed": "窗口模式"
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"statusIndicator": {
|
113 |
+
"connected": "已连接",
|
114 |
+
"disconnected": "未连接"
|
115 |
+
},
|
116 |
+
"statusCard": {
|
117 |
+
"unavailable": "状态信息不可用",
|
118 |
+
"storageInfo": "存储信息",
|
119 |
+
"workingDirectory": "工作目录",
|
120 |
+
"inputDirectory": "输入目录",
|
121 |
+
"llmConfig": "LLM 配置",
|
122 |
+
"llmBinding": "LLM 绑定",
|
123 |
+
"llmBindingHost": "LLM 绑定主机",
|
124 |
+
"llmModel": "LLM 模型",
|
125 |
+
"maxTokens": "最大 Token 数",
|
126 |
+
"embeddingConfig": "嵌入配置",
|
127 |
+
"embeddingBinding": "嵌入绑定",
|
128 |
+
"embeddingBindingHost": "嵌入绑定主机",
|
129 |
+
"embeddingModel": "嵌入模型",
|
130 |
+
"storageConfig": "存储配置",
|
131 |
+
"kvStorage": "KV 存储",
|
132 |
+
"docStatusStorage": "文档状态存储",
|
133 |
+
"graphStorage": "图存储",
|
134 |
+
"vectorStorage": "向量存储"
|
135 |
+
},
|
136 |
+
"propertiesView": {
|
137 |
+
"node": {
|
138 |
+
"title": "节点",
|
139 |
+
"id": "ID",
|
140 |
+
"labels": "标签",
|
141 |
+
"degree": "度数",
|
142 |
+
"properties": "属性",
|
143 |
+
"relationships": "关系"
|
144 |
+
},
|
145 |
+
"edge": {
|
146 |
+
"title": "关系",
|
147 |
+
"id": "ID",
|
148 |
+
"type": "类型",
|
149 |
+
"source": "源",
|
150 |
+
"target": "目标",
|
151 |
+
"properties": "属性"
|
152 |
+
}
|
153 |
+
},
|
154 |
+
"search": {
|
155 |
+
"placeholder": "搜索节点...",
|
156 |
+
"message": "以及其它 {count} 项"
|
157 |
+
},
|
158 |
+
"graphLabels": {
|
159 |
+
"selectTooltip": "选择查询标签",
|
160 |
+
"noLabels": "未找到标签",
|
161 |
+
"label": "标签",
|
162 |
+
"placeholder": "搜索标签...",
|
163 |
+
"andOthers": "以及其它 {count} 个"
|
164 |
+
}
|
165 |
+
},
|
166 |
+
"retrievePanel": {
|
167 |
+
"chatMessage": {
|
168 |
+
"copyTooltip": "复制到剪贴板",
|
169 |
+
"copyError": "无法复制文本到剪贴板"
|
170 |
+
},
|
171 |
+
|
172 |
+
"retrieval": {
|
173 |
+
"startPrompt": "在下面输入您的查询以开始检索",
|
174 |
+
"clear": "清除",
|
175 |
+
"send": "发送",
|
176 |
+
"placeholder": "输入您的查询...",
|
177 |
+
"error": "错误:无法获取响应"
|
178 |
+
},
|
179 |
+
"querySettings": {
|
180 |
+
"parametersTitle": "参数设置",
|
181 |
+
"parametersDescription": "配置查询参数",
|
182 |
+
|
183 |
+
"queryMode": "查询模式",
|
184 |
+
"queryModeTooltip": "选择检索策略:\n• 朴素:不使用高级技术的基本搜索\n• 本地:基于上下文的信息检索\n• 全局:利用全局知识库\n• 混合:结合本地和全局检索\n• 综合:集成知识图谱与向量检索",
|
185 |
+
"queryModeOptions": {
|
186 |
+
"naive": "朴素",
|
187 |
+
"local": "本地",
|
188 |
+
"global": "全局",
|
189 |
+
"hybrid": "混合",
|
190 |
+
"mix": "综合"
|
191 |
+
},
|
192 |
+
|
193 |
+
"responseFormat": "响应格式",
|
194 |
+
"responseFormatTooltip": "定义响应格式。例如:\n• 多个段落\n• 单个段落\n• 项目符号",
|
195 |
+
"responseFormatOptions": {
|
196 |
+
"multipleParagraphs": "多个段落",
|
197 |
+
"singleParagraph": "单个段落",
|
198 |
+
"bulletPoints": "项目符号"
|
199 |
+
},
|
200 |
+
|
201 |
+
"topK": "Top K 结果数",
|
202 |
+
"topKTooltip": "要检索的前 K 个项目数量。在“本地”模式下表示实体,在“全局”模式下表示关系",
|
203 |
+
"topKPlaceholder": "结果数",
|
204 |
+
|
205 |
+
"maxTokensTextUnit": "文本单元最大 Token 数",
|
206 |
+
"maxTokensTextUnitTooltip": "每个检索到的文本块允许的最大 Token 数",
|
207 |
+
|
208 |
+
"maxTokensGlobalContext": "全局上下文最大 Token 数",
|
209 |
+
"maxTokensGlobalContextTooltip": "在全局检索中为关系描述分配的最大 Token 数",
|
210 |
+
|
211 |
+
"maxTokensLocalContext": "本地上下文最大 Token 数",
|
212 |
+
"maxTokensLocalContextTooltip": "在本地检索中为实体描述分配的最大 Token 数",
|
213 |
+
|
214 |
+
"historyTurns": "历史轮次",
|
215 |
+
"historyTurnsTooltip": "在响应上下文中考虑的完整对话轮次(用户-助手对)",
|
216 |
+
"historyTurnsPlaceholder": "历史轮次的数量",
|
217 |
+
|
218 |
+
"hlKeywords": "高级关键词",
|
219 |
+
"hlKeywordsTooltip": "检索时优先考虑的高级关键词。请用逗号分隔",
|
220 |
+
"hlkeywordsPlaceHolder": "输入关键词",
|
221 |
+
|
222 |
+
"llKeywords": "低级关键词",
|
223 |
+
"llKeywordsTooltip": "用于优化检索焦点的低级关键词。请用逗号分隔",
|
224 |
+
|
225 |
+
"onlyNeedContext": "仅需要上下文",
|
226 |
+
"onlyNeedContextTooltip": "如果为 True,则仅返回检索到的上下文,而不会生成回复",
|
227 |
+
|
228 |
+
"onlyNeedPrompt": "仅需要提示",
|
229 |
+
"onlyNeedPromptTooltip": "如果为 True,则仅返回生成的提示,而不会生成回复",
|
230 |
+
|
231 |
+
"streamResponse": "流式响应",
|
232 |
+
"streamResponseTooltip": "如果为 True,则启用流式输出以获得实时响应"
|
233 |
+
}
|
234 |
+
}
|
235 |
+
}
|
lightrag_webui/src/main.tsx
CHANGED
@@ -2,6 +2,8 @@ import { StrictMode } from 'react'
|
|
2 |
import { createRoot } from 'react-dom/client'
|
3 |
import './index.css'
|
4 |
import App from './App.tsx'
|
|
|
|
|
5 |
|
6 |
createRoot(document.getElementById('root')!).render(
|
7 |
<StrictMode>
|
|
|
2 |
import { createRoot } from 'react-dom/client'
|
3 |
import './index.css'
|
4 |
import App from './App.tsx'
|
5 |
+
import "./i18n";
|
6 |
+
|
7 |
|
8 |
createRoot(document.getElementById('root')!).render(
|
9 |
<StrictMode>
|