gzdaniel commited on
Commit
bf17f9e
·
2 Parent(s): 2760b95 46e075f

Merge branch 'main' into add-Memgraph-graph-db

Browse files
README-zh.md CHANGED
@@ -757,6 +757,8 @@ async def initialize_rag():
757
 
758
  <details>
759
  <summary> <b>使用Faiss进行存储</b> </summary>
 
 
760
 
761
  - 安装所需依赖:
762
 
 
757
 
758
  <details>
759
  <summary> <b>使用Faiss进行存储</b> </summary>
760
+ 在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
761
+
762
 
763
  - 安装所需依赖:
764
 
README.md CHANGED
@@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
819
 
820
  <details>
821
  <summary> <b>Using Faiss for Storage</b> </summary>
 
 
822
 
823
  - Install the required dependencies:
824
 
 
819
 
820
  <details>
821
  <summary> <b>Using Faiss for Storage</b> </summary>
822
+ You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
823
+ Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
824
 
825
  - Install the required dependencies:
826
 
env.example CHANGED
@@ -108,11 +108,28 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
108
  # AZURE_EMBEDDING_ENDPOINT=your_endpoint
109
  # AZURE_EMBEDDING_API_KEY=your_api_key
110
 
 
111
  ### Data storage selection
 
 
112
  # LIGHTRAG_KV_STORAGE=PGKVStorage
113
- # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
114
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
 
116
 
117
  ### PostgreSQL Configuration
118
  POSTGRES_HOST=localhost
 
108
  # AZURE_EMBEDDING_ENDPOINT=your_endpoint
109
  # AZURE_EMBEDDING_API_KEY=your_api_key
110
 
111
+ ###########################
112
  ### Data storage selection
113
+ ###########################
114
+ ### PostgreSQL
115
  # LIGHTRAG_KV_STORAGE=PGKVStorage
 
116
  # LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
117
+ # LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
118
+ # LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
119
+ ### MongoDB
120
+ # LIGHTRAG_KV_STORAGE=MongoKVStorage
121
+ # LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
122
+ # LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
123
+ # LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
124
+ ### KV Storage
125
+ # LIGHTRAG_KV_STORAGE=RedisKVStorage
126
+ # LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
127
+ ### Vector Storage
128
+ # LIGHTRAG_VECTOR_STORAGE=FaissVectorDBStorage
129
+ # LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
130
+ ### Graph Storage
131
  # LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
132
+ # LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
133
 
134
  ### PostgreSQL Configuration
135
  POSTGRES_HOST=localhost
lightrag/kg/{age_impl.py → deprecated/age_impl.py} RENAMED
File without changes
lightrag/kg/faiss_impl.py CHANGED
@@ -4,9 +4,7 @@ import asyncio
4
  from typing import Any, final
5
  import json
6
  import numpy as np
7
-
8
  from dataclasses import dataclass
9
- import pipmaster as pm
10
 
11
  from lightrag.utils import logger, compute_mdhash_id
12
  from lightrag.base import BaseVectorStorage
@@ -17,11 +15,7 @@ from .shared_storage import (
17
  set_all_update_flags,
18
  )
19
 
20
- USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
21
- FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
22
- if not pm.is_installed(FAISS_PACKAGE):
23
- pm.install(FAISS_PACKAGE)
24
-
25
  import faiss # type: ignore
26
 
27
 
@@ -165,7 +159,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
165
  meta["__vector__"] = embeddings[i].tolist()
166
  self._id_to_meta.update({fid: meta})
167
 
168
- logger.info(f"Upserted {len(list_data)} vectors into Faiss index.")
169
  return [m["__id__"] for m in list_data]
170
 
171
  async def query(
@@ -228,7 +222,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
228
  2. Only one process should updating the storage at a time before index_done_callback,
229
  KG-storage-log should be used to avoid data corruption
230
  """
231
- logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
232
  to_remove = []
233
  for cid in ids:
234
  fid = self._find_faiss_id_by_custom_id(cid)
@@ -330,7 +324,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
330
  and rebuild in-memory structures so we can query.
331
  """
332
  if not os.path.exists(self._faiss_index_file):
333
- logger.warning("No existing Faiss index file found. Starting fresh.")
334
  return
335
 
336
  try:
 
4
  from typing import Any, final
5
  import json
6
  import numpy as np
 
7
  from dataclasses import dataclass
 
8
 
9
  from lightrag.utils import logger, compute_mdhash_id
10
  from lightrag.base import BaseVectorStorage
 
15
  set_all_update_flags,
16
  )
17
 
18
+ # You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 
 
 
 
19
  import faiss # type: ignore
20
 
21
 
 
159
  meta["__vector__"] = embeddings[i].tolist()
160
  self._id_to_meta.update({fid: meta})
161
 
162
+ logger.debug(f"Upserted {len(list_data)} vectors into Faiss index.")
163
  return [m["__id__"] for m in list_data]
164
 
165
  async def query(
 
222
  2. Only one process should updating the storage at a time before index_done_callback,
223
  KG-storage-log should be used to avoid data corruption
224
  """
225
+ logger.debug(f"Deleting {len(ids)} vectors from {self.namespace}")
226
  to_remove = []
227
  for cid in ids:
228
  fid = self._find_faiss_id_by_custom_id(cid)
 
324
  and rebuild in-memory structures so we can query.
325
  """
326
  if not os.path.exists(self._faiss_index_file):
327
+ logger.warning(f"No existing Faiss index file found for {self.namespace}")
328
  return
329
 
330
  try:
lightrag/operate.py CHANGED
@@ -168,6 +168,13 @@ async def _handle_single_entity_extraction(
168
  # Normalize entity name
169
  entity_name = normalize_extracted_info(entity_name, is_entity=True)
170
 
 
 
 
 
 
 
 
171
  # Clean and validate entity type
172
  entity_type = clean_str(record_attributes[2]).strip('"')
173
  if not entity_type.strip() or entity_type.startswith('("'):
@@ -209,6 +216,20 @@ async def _handle_single_relationship_extraction(
209
  # Normalize source and target entity names
210
  source = normalize_extracted_info(source, is_entity=True)
211
  target = normalize_extracted_info(target, is_entity=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  if source == target:
213
  logger.debug(
214
  f"Relationship source and target are the same in: {record_attributes}"
 
168
  # Normalize entity name
169
  entity_name = normalize_extracted_info(entity_name, is_entity=True)
170
 
171
+ # Check if entity name became empty after normalization
172
+ if not entity_name or not entity_name.strip():
173
+ logger.warning(
174
+ f"Entity extraction error: entity name became empty after normalization. Original: '{record_attributes[1]}'"
175
+ )
176
+ return None
177
+
178
  # Clean and validate entity type
179
  entity_type = clean_str(record_attributes[2]).strip('"')
180
  if not entity_type.strip() or entity_type.startswith('("'):
 
216
  # Normalize source and target entity names
217
  source = normalize_extracted_info(source, is_entity=True)
218
  target = normalize_extracted_info(target, is_entity=True)
219
+
220
+ # Check if source or target became empty after normalization
221
+ if not source or not source.strip():
222
+ logger.warning(
223
+ f"Relationship extraction error: source entity became empty after normalization. Original: '{record_attributes[1]}'"
224
+ )
225
+ return None
226
+
227
+ if not target or not target.strip():
228
+ logger.warning(
229
+ f"Relationship extraction error: target entity became empty after normalization. Original: '{record_attributes[2]}'"
230
+ )
231
+ return None
232
+
233
  if source == target:
234
  logger.debug(
235
  f"Relationship source and target are the same in: {record_attributes}"
pyproject.toml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "lightrag-hku"
7
+ dynamic = ["version"]
8
+ authors = [
9
+ {name = "Zirui Guo"}
10
+ ]
11
+ description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
12
+ readme = "README.md"
13
+ license = {text = "MIT"}
14
+ requires-python = ">=3.9"
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Software Development :: Libraries :: Python Modules",
22
+ ]
23
+ dependencies = [
24
+ "aiohttp",
25
+ "configparser",
26
+ "dotenv",
27
+ "future",
28
+ "numpy",
29
+ "pandas>=2.0.0",
30
+ "pipmaster",
31
+ "pydantic",
32
+ "python-dotenv",
33
+ "pyuca",
34
+ "setuptools",
35
+ "tenacity",
36
+ "tiktoken",
37
+ "xlsxwriter>=3.1.0",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ api = [
42
+ # Core dependencies
43
+ "aiohttp",
44
+ "configparser",
45
+ "dotenv",
46
+ "future",
47
+ "numpy",
48
+ "openai",
49
+ "pandas>=2.0.0",
50
+ "pipmaster",
51
+ "pydantic",
52
+ "python-dotenv",
53
+ "pyuca",
54
+ "setuptools",
55
+ "tenacity",
56
+ "tiktoken",
57
+ "xlsxwriter>=3.1.0",
58
+ # API-specific dependencies
59
+ "aiofiles",
60
+ "ascii_colors",
61
+ "asyncpg",
62
+ "distro",
63
+ "fastapi",
64
+ "httpcore",
65
+ "httpx",
66
+ "jiter",
67
+ "passlib[bcrypt]",
68
+ "PyJWT",
69
+ "python-jose[cryptography]",
70
+ "python-multipart",
71
+ "pytz",
72
+ "uvicorn",
73
+ ]
74
+
75
+ [project.scripts]
76
+ lightrag-server = "lightrag.api.lightrag_server:main"
77
+ lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
78
+
79
+ [project.urls]
80
+ Homepage = "https://github.com/HKUDS/LightRAG"
81
+ Documentation = "https://github.com/HKUDS/LightRAG"
82
+ Repository = "https://github.com/HKUDS/LightRAG"
83
+ "Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
84
+
85
+ [tool.setuptools]
86
+ packages = ["lightrag"]
87
+ include-package-data = true
88
+
89
+ [tool.setuptools.dynamic]
90
+ version = {attr = "lightrag.__version__"}
91
+
92
+ [tool.setuptools.package-data]
93
+ lightrag = ["api/webui/**/*"]
setup.py CHANGED
@@ -1,107 +1,6 @@
1
- import setuptools
2
- from pathlib import Path
3
 
 
4
 
5
- # Reading the long description from README.md
6
- def read_long_description():
7
- try:
8
- return Path("README.md").read_text(encoding="utf-8")
9
- except FileNotFoundError:
10
- return "A description of LightRAG is currently unavailable."
11
-
12
-
13
- # Retrieving metadata from __init__.py
14
- def retrieve_metadata():
15
- vars2find = ["__author__", "__version__", "__url__"]
16
- vars2readme = {}
17
- try:
18
- with open("./lightrag/__init__.py") as f:
19
- for line in f.readlines():
20
- for v in vars2find:
21
- if line.startswith(v):
22
- line = (
23
- line.replace(" ", "")
24
- .replace('"', "")
25
- .replace("'", "")
26
- .strip()
27
- )
28
- vars2readme[v] = line.split("=")[1]
29
- except FileNotFoundError:
30
- raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
31
-
32
- # Checking if all required variables are found
33
- missing_vars = [v for v in vars2find if v not in vars2readme]
34
- if missing_vars:
35
- raise ValueError(
36
- f"Missing required metadata variables in __init__.py: {missing_vars}"
37
- )
38
-
39
- return vars2readme
40
-
41
-
42
- # Reading dependencies from requirements.txt
43
- def read_requirements(file_path="requirements.txt"):
44
- deps = []
45
- try:
46
- with open(file_path) as f:
47
- deps = [
48
- line.strip() for line in f if line.strip() and not line.startswith("#")
49
- ]
50
- except FileNotFoundError:
51
- print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
52
- return deps
53
-
54
-
55
- def read_api_requirements():
56
- return read_requirements("lightrag/api/requirements.txt")
57
-
58
-
59
- def read_extra_requirements():
60
- return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
61
-
62
-
63
- metadata = retrieve_metadata()
64
- long_description = read_long_description()
65
- requirements = read_requirements()
66
-
67
- setuptools.setup(
68
- name="lightrag-hku",
69
- url=metadata["__url__"],
70
- version=metadata["__version__"],
71
- author=metadata["__author__"],
72
- description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
73
- long_description=long_description,
74
- long_description_content_type="text/markdown",
75
- packages=setuptools.find_packages(
76
- exclude=("tests*", "docs*")
77
- ), # Automatically find packages
78
- classifiers=[
79
- "Development Status :: 4 - Beta",
80
- "Programming Language :: Python :: 3",
81
- "License :: OSI Approved :: MIT License",
82
- "Operating System :: OS Independent",
83
- "Intended Audience :: Developers",
84
- "Topic :: Software Development :: Libraries :: Python Modules",
85
- ],
86
- python_requires=">=3.9",
87
- install_requires=requirements,
88
- include_package_data=True, # Includes non-code files from MANIFEST.in
89
- project_urls={ # Additional project metadata
90
- "Documentation": metadata.get("__url__", ""),
91
- "Source": metadata.get("__url__", ""),
92
- "Tracker": f"{metadata.get('__url__', '')}/issues"
93
- if metadata.get("__url__")
94
- else "",
95
- },
96
- extras_require={
97
- "api": requirements + read_api_requirements(),
98
- "tools": read_extra_requirements(), # API requirements as optional
99
- },
100
- entry_points={
101
- "console_scripts": [
102
- "lightrag-server=lightrag.api.lightrag_server:main [api]",
103
- "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
104
- "lightrag-viewer=lightrag.tools.lightrag_visualizer.graph_visualizer:main [tools]",
105
- ],
106
- },
107
- )
 
1
+ # Minimal setup.py for backward compatibility
2
+ # Primary configuration is now in pyproject.toml
3
 
4
+ from setuptools import setup
5
 
6
+ setup()