Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

gzdaniel commited on Jul 5

Commit

bf17f9e

2 Parent(s): 2760b95 46e075f

Merge branch 'main' into add-Memgraph-graph-db

Browse files

Files changed (8) hide show

README-zh.md +2 -0
README.md +2 -0
env.example +18 -1
lightrag/kg/{age_impl.py → deprecated/age_impl.py} +0 -0
lightrag/kg/faiss_impl.py +4 -10
lightrag/operate.py +21 -0
pyproject.toml +93 -0
setup.py +4 -105

README-zh.md CHANGED Viewed

@@ -757,6 +757,8 @@ async def initialize_rag():
 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
 - 安装所需依赖：

 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
+在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
 - 安装所需依赖：

README.md CHANGED Viewed

@@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
 - Install the required dependencies:

 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
+You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
+Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
 - Install the required dependencies:

env.example CHANGED Viewed

@@ -108,11 +108,28 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
 # AZURE_EMBEDDING_ENDPOINT=your_endpoint
 # AZURE_EMBEDDING_API_KEY=your_api_key
 ### Data storage selection
 # LIGHTRAG_KV_STORAGE=PGKVStorage
-# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
 ### PostgreSQL Configuration
 POSTGRES_HOST=localhost

 # AZURE_EMBEDDING_ENDPOINT=your_endpoint
 # AZURE_EMBEDDING_API_KEY=your_api_key
+###########################
 ### Data storage selection
+###########################
+### PostgreSQL
 # LIGHTRAG_KV_STORAGE=PGKVStorage
 # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
+### MongoDB
+# LIGHTRAG_KV_STORAGE=MongoKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
+# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
+# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
+### KV Storage
+# LIGHTRAG_KV_STORAGE=RedisKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
+### Vector Storage
+# LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
+# LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
+### Graph Storage
 # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
+# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
 ### PostgreSQL Configuration
 POSTGRES_HOST=localhost

lightrag/kg/{age_impl.py → deprecated/age_impl.py} RENAMED Viewed

File without changes

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -4,9 +4,7 @@ import asyncio
 from typing import Any, final
 import json
 import numpy as np
 from dataclasses import dataclass
-import pipmaster as pm
 from lightrag.utils import logger, compute_mdhash_id
 from lightrag.base import BaseVectorStorage
@@ -17,11 +15,7 @@ from .shared_storage import (
     set_all_update_flags,
 )
-USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
-FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
-if not pm.is_installed(FAISS_PACKAGE):
-    pm.install(FAISS_PACKAGE)
 import faiss  # type: ignore
@@ -165,7 +159,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
             meta["__vector__"] = embeddings[i].tolist()
             self._id_to_meta.update({fid: meta})
-        logger.info(f"Upserted {len(list_data)} vectors into Faiss index.")
         return [m["__id__"] for m in list_data]
     async def query(
@@ -228,7 +222,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
         2. Only one process should updating the storage at a time before index_done_callback,
            KG-storage-log should be used to avoid data corruption
         """
-        logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
         to_remove = []
         for cid in ids:
             fid = self._find_faiss_id_by_custom_id(cid)
@@ -330,7 +324,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
         and rebuild in-memory structures so we can query.
         """
         if not os.path.exists(self._faiss_index_file):
-            logger.warning("No existing Faiss index file found. Starting fresh.")
             return
         try:

 from typing import Any, final
 import json
 import numpy as np
 from dataclasses import dataclass
 from lightrag.utils import logger, compute_mdhash_id
 from lightrag.base import BaseVectorStorage
     set_all_update_flags,
 )
+# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 import faiss  # type: ignore
             meta["__vector__"] = embeddings[i].tolist()
             self._id_to_meta.update({fid: meta})
+        logger.debug(f"Upserted {len(list_data)} vectors into Faiss index.")
         return [m["__id__"] for m in list_data]
     async def query(
         2. Only one process should updating the storage at a time before index_done_callback,
            KG-storage-log should be used to avoid data corruption
         """
+        logger.debug(f"Deleting {len(ids)} vectors from {self.namespace}")
         to_remove = []
         for cid in ids:
             fid = self._find_faiss_id_by_custom_id(cid)
         and rebuild in-memory structures so we can query.
         """
         if not os.path.exists(self._faiss_index_file):
+            logger.warning(f"No existing Faiss index file found for {self.namespace}")
             return
         try:

lightrag/operate.py CHANGED Viewed

@@ -168,6 +168,13 @@ async def _handle_single_entity_extraction(
     # Normalize entity name
     entity_name = normalize_extracted_info(entity_name, is_entity=True)
     # Clean and validate entity type
     entity_type = clean_str(record_attributes[2]).strip('"')
     if not entity_type.strip() or entity_type.startswith('("'):
@@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction(
     # Normalize source and target entity names
     source = normalize_extracted_info(source, is_entity=True)
     target = normalize_extracted_info(target, is_entity=True)
     if source == target:
         logger.debug(
             f"Relationship source and target are the same in: {record_attributes}"

     # Normalize entity name
     entity_name = normalize_extracted_info(entity_name, is_entity=True)
+    # Check if entity name became empty after normalization
+    if not entity_name or not entity_name.strip():
+        logger.warning(
+            f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'"
+        )
+        return None
     # Clean and validate entity type
     entity_type = clean_str(record_attributes[2]).strip('"')
     if not entity_type.strip() or entity_type.startswith('("'):
     # Normalize source and target entity names
     source = normalize_extracted_info(source, is_entity=True)
     target = normalize_extracted_info(target, is_entity=True)
+    # Check if source or target became empty after normalization
+    if not source or not source.strip():
+        logger.warning(
+            f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'"
+        )
+        return None
+    if not target or not target.strip():
+        logger.warning(
+            f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'"
+        )
+        return None
     if source == target:
         logger.debug(
             f"Relationship source and target are the same in: {record_attributes}"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,93 @@

+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "lightrag-hku"
+dynamic = ["version"]
+authors = [
+    {name = "Zirui Guo"}
+]
+description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+]
+[project.optional-dependencies]
+api = [
+    # Core dependencies
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "openai",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+    # API-specific dependencies
+    "aiofiles",
+    "ascii_colors",
+    "asyncpg",
+    "distro",
+    "fastapi",
+    "httpcore",
+    "httpx",
+    "jiter",
+    "passlib[bcrypt]",
+    "PyJWT",
+    "python-jose[cryptography]",
+    "python-multipart",
+    "pytz",
+    "uvicorn",
+]
+[project.scripts]
+lightrag-server = "lightrag.api.lightrag_server:main"
+lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
+[project.urls]
+Homepage = "https://github.com/HKUDS/LightRAG"
+Documentation = "https://github.com/HKUDS/LightRAG"
+Repository = "https://github.com/HKUDS/LightRAG"
+"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
+[tool.setuptools]
+packages = ["lightrag"]
+include-package-data = true
+[tool.setuptools.dynamic]
+version = {attr = "lightrag.__version__"}
+[tool.setuptools.package-data]
+lightrag = ["api/webui/**/*"]

setup.py CHANGED Viewed

@@ -1,107 +1,6 @@
-import setuptools
-from pathlib import Path
-# Reading the long description from README.md
-def read_long_description():
-    try:
-        return Path("README.md").read_text(encoding="utf-8")
-    except FileNotFoundError:
-        return "A description of LightRAG is currently unavailable."
-# Retrieving metadata from __init__.py
-def retrieve_metadata():
-    vars2find = ["__author__", "__version__", "__url__"]
-    vars2readme = {}
-    try:
-        with open("./lightrag/__init__.py") as f:
-            for line in f.readlines():
-                for v in vars2find:
-                    if line.startswith(v):
-                        line = (
-                            line.replace(" ", "")
-                            .replace('"', "")
-                            .replace("'", "")
-                            .strip()
-                        )
-                        vars2readme[v] = line.split("=")[1]
-    except FileNotFoundError:
-        raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
-    # Checking if all required variables are found
-    missing_vars = [v for v in vars2find if v not in vars2readme]
-    if missing_vars:
-        raise ValueError(
-            f"Missing required metadata variables in __init__.py: {missing_vars}"
-        )
-    return vars2readme
-# Reading dependencies from requirements.txt
-def read_requirements(file_path="requirements.txt"):
-    deps = []
-    try:
-        with open(file_path) as f:
-            deps = [
-                line.strip() for line in f if line.strip() and not line.startswith("#")
-            ]
-    except FileNotFoundError:
-        print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
-    return deps
-def read_api_requirements():
-    return read_requirements("lightrag/api/requirements.txt")
-def read_extra_requirements():
-    return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
-metadata = retrieve_metadata()
-long_description = read_long_description()
-requirements = read_requirements()
-setuptools.setup(
-    name="lightrag-hku",
-    url=metadata["__url__"],
-    version=metadata["__version__"],
-    author=metadata["__author__"],
-    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    packages=setuptools.find_packages(
-        exclude=("tests*", "docs*")
-    ),  # Automatically find packages
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Intended Audience :: Developers",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.9",
-    install_requires=requirements,
-    include_package_data=True,  # Includes non-code files from MANIFEST.in
-    project_urls={  # Additional project metadata
-        "Documentation": metadata.get("__url__", ""),
-        "Source": metadata.get("__url__", ""),
-        "Tracker": f"{metadata.get('__url__', '')}/issues"
-        if metadata.get("__url__")
-        else "",
-    },
-    extras_require={
-        "api": requirements + read_api_requirements(),
-        "tools": read_extra_requirements(),  # API requirements as optional
-    },
-    entry_points={
-        "console_scripts": [
-            "lightrag-server=lightrag.api.lightrag_server:main [api]",
-            "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
-            "lightrag-viewer=lightrag.tools.lightrag_visualizer.graph_visualizer:main [tools]",
-        ],
-    },
-)

+# Minimal setup.py for backward compatibility
+# Primary configuration is now in pyproject.toml
+from setuptools import setup
+setup()