Move max_graph_nodes to global config
Browse files- lightrag/api/config.py +3 -0
- lightrag/api/lightrag_server.py +3 -1
- lightrag/kg/mongo_impl.py +17 -12
- lightrag/kg/neo4j_impl.py +8 -4
- lightrag/kg/networkx_impl.py +8 -3
- lightrag/kg/postgres_impl.py +8 -5
- lightrag/lightrag.py +11 -2
lightrag/api/config.py
CHANGED
@@ -244,6 +244,9 @@ def parse_args() -> argparse.Namespace:
|
|
244 |
# Get MAX_PARALLEL_INSERT from environment
|
245 |
args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
|
246 |
|
|
|
|
|
|
|
247 |
# Handle openai-ollama special case
|
248 |
if args.llm_binding == "openai-ollama":
|
249 |
args.llm_binding = "openai"
|
|
|
244 |
# Get MAX_PARALLEL_INSERT from environment
|
245 |
args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
|
246 |
|
247 |
+
# Get MAX_GRAPH_NODES from environment
|
248 |
+
args.max_graph_nodes = get_env_value("MAX_GRAPH_NODES", 1000, int)
|
249 |
+
|
250 |
# Handle openai-ollama special case
|
251 |
if args.llm_binding == "openai-ollama":
|
252 |
args.llm_binding = "openai"
|
lightrag/api/lightrag_server.py
CHANGED
@@ -326,6 +326,7 @@ def create_app(args):
|
|
326 |
enable_llm_cache=args.enable_llm_cache,
|
327 |
auto_manage_storages_states=False,
|
328 |
max_parallel_insert=args.max_parallel_insert,
|
|
|
329 |
addon_params={"language": args.summary_language},
|
330 |
)
|
331 |
else: # azure_openai
|
@@ -353,6 +354,7 @@ def create_app(args):
|
|
353 |
enable_llm_cache=args.enable_llm_cache,
|
354 |
auto_manage_storages_states=False,
|
355 |
max_parallel_insert=args.max_parallel_insert,
|
|
|
356 |
addon_params={"language": args.summary_language},
|
357 |
)
|
358 |
|
@@ -475,7 +477,7 @@ def create_app(args):
|
|
475 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
476 |
"enable_llm_cache": args.enable_llm_cache,
|
477 |
"workspace": args.workspace,
|
478 |
-
"max_graph_nodes":
|
479 |
},
|
480 |
"auth_mode": auth_mode,
|
481 |
"pipeline_busy": pipeline_status.get("busy", False),
|
|
|
326 |
enable_llm_cache=args.enable_llm_cache,
|
327 |
auto_manage_storages_states=False,
|
328 |
max_parallel_insert=args.max_parallel_insert,
|
329 |
+
max_graph_nodes=args.max_graph_nodes,
|
330 |
addon_params={"language": args.summary_language},
|
331 |
)
|
332 |
else: # azure_openai
|
|
|
354 |
enable_llm_cache=args.enable_llm_cache,
|
355 |
auto_manage_storages_states=False,
|
356 |
max_parallel_insert=args.max_parallel_insert,
|
357 |
+
max_graph_nodes=args.max_graph_nodes,
|
358 |
addon_params={"language": args.summary_language},
|
359 |
)
|
360 |
|
|
|
477 |
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
|
478 |
"enable_llm_cache": args.enable_llm_cache,
|
479 |
"workspace": args.workspace,
|
480 |
+
"max_graph_nodes": args.max_graph_nodes,
|
481 |
},
|
482 |
"auth_mode": auth_mode,
|
483 |
"pipeline_busy": pipeline_status.get("busy", False),
|
lightrag/kg/mongo_impl.py
CHANGED
@@ -34,8 +34,6 @@ from pymongo.errors import PyMongoError # type: ignore
|
|
34 |
config = configparser.ConfigParser()
|
35 |
config.read("config.ini", "utf-8")
|
36 |
|
37 |
-
# Get maximum number of graph nodes from environment variable, default is 1000
|
38 |
-
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
39 |
GRAPH_BFS_MODE = os.getenv("MONGO_GRAPH_BFS_MODE", "bidirectional")
|
40 |
|
41 |
|
@@ -883,7 +881,7 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
883 |
)
|
884 |
|
885 |
async def get_knowledge_graph_all_by_degree(
|
886 |
-
self, max_depth: int
|
887 |
) -> KnowledgeGraph:
|
888 |
"""
|
889 |
It's possible that the node with one or multiple relationships is retrieved,
|
@@ -961,9 +959,9 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
961 |
node_labels: list[str],
|
962 |
seen_nodes: set[str],
|
963 |
result: KnowledgeGraph,
|
964 |
-
depth: int
|
965 |
-
max_depth: int
|
966 |
-
max_nodes: int
|
967 |
) -> KnowledgeGraph:
|
968 |
if depth > max_depth or len(result.nodes) > max_nodes:
|
969 |
return result
|
@@ -1006,9 +1004,9 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1006 |
async def get_knowledge_subgraph_bidirectional_bfs(
|
1007 |
self,
|
1008 |
node_label: str,
|
1009 |
-
depth
|
1010 |
-
max_depth: int
|
1011 |
-
max_nodes: int
|
1012 |
) -> KnowledgeGraph:
|
1013 |
seen_nodes = set()
|
1014 |
seen_edges = set()
|
@@ -1038,7 +1036,7 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1038 |
return result
|
1039 |
|
1040 |
async def get_knowledge_subgraph_in_out_bound_bfs(
|
1041 |
-
self, node_label: str, max_depth: int
|
1042 |
) -> KnowledgeGraph:
|
1043 |
seen_nodes = set()
|
1044 |
seen_edges = set()
|
@@ -1152,7 +1150,7 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1152 |
self,
|
1153 |
node_label: str,
|
1154 |
max_depth: int = 3,
|
1155 |
-
max_nodes: int =
|
1156 |
) -> KnowledgeGraph:
|
1157 |
"""
|
1158 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
@@ -1160,7 +1158,7 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1160 |
Args:
|
1161 |
node_label: Label of the starting node, * means all nodes
|
1162 |
max_depth: Maximum depth of the subgraph, Defaults to 3
|
1163 |
-
max_nodes:
|
1164 |
|
1165 |
Returns:
|
1166 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
@@ -1184,6 +1182,13 @@ class MongoGraphStorage(BaseGraphStorage):
|
|
1184 |
C → B
|
1185 |
C → D
|
1186 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1187 |
result = KnowledgeGraph()
|
1188 |
start = time.perf_counter()
|
1189 |
|
|
|
34 |
config = configparser.ConfigParser()
|
35 |
config.read("config.ini", "utf-8")
|
36 |
|
|
|
|
|
37 |
GRAPH_BFS_MODE = os.getenv("MONGO_GRAPH_BFS_MODE", "bidirectional")
|
38 |
|
39 |
|
|
|
881 |
)
|
882 |
|
883 |
async def get_knowledge_graph_all_by_degree(
|
884 |
+
self, max_depth: int, max_nodes: int
|
885 |
) -> KnowledgeGraph:
|
886 |
"""
|
887 |
It's possible that the node with one or multiple relationships is retrieved,
|
|
|
959 |
node_labels: list[str],
|
960 |
seen_nodes: set[str],
|
961 |
result: KnowledgeGraph,
|
962 |
+
depth: int,
|
963 |
+
max_depth: int,
|
964 |
+
max_nodes: int,
|
965 |
) -> KnowledgeGraph:
|
966 |
if depth > max_depth or len(result.nodes) > max_nodes:
|
967 |
return result
|
|
|
1004 |
async def get_knowledge_subgraph_bidirectional_bfs(
|
1005 |
self,
|
1006 |
node_label: str,
|
1007 |
+
depth: int,
|
1008 |
+
max_depth: int,
|
1009 |
+
max_nodes: int,
|
1010 |
) -> KnowledgeGraph:
|
1011 |
seen_nodes = set()
|
1012 |
seen_edges = set()
|
|
|
1036 |
return result
|
1037 |
|
1038 |
async def get_knowledge_subgraph_in_out_bound_bfs(
|
1039 |
+
self, node_label: str, max_depth: int, max_nodes: int
|
1040 |
) -> KnowledgeGraph:
|
1041 |
seen_nodes = set()
|
1042 |
seen_edges = set()
|
|
|
1150 |
self,
|
1151 |
node_label: str,
|
1152 |
max_depth: int = 3,
|
1153 |
+
max_nodes: int = None,
|
1154 |
) -> KnowledgeGraph:
|
1155 |
"""
|
1156 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
|
|
1158 |
Args:
|
1159 |
node_label: Label of the starting node, * means all nodes
|
1160 |
max_depth: Maximum depth of the subgraph, Defaults to 3
|
1161 |
+
max_nodes: Maximum nodes to return, Defaults to global_config max_graph_nodes
|
1162 |
|
1163 |
Returns:
|
1164 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
|
|
1182 |
C → B
|
1183 |
C → D
|
1184 |
"""
|
1185 |
+
# Use global_config max_graph_nodes as default if max_nodes is None
|
1186 |
+
if max_nodes is None:
|
1187 |
+
max_nodes = self.global_config.get("max_graph_nodes", 1000)
|
1188 |
+
else:
|
1189 |
+
# Limit max_nodes to not exceed global_config max_graph_nodes
|
1190 |
+
max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
|
1191 |
+
|
1192 |
result = KnowledgeGraph()
|
1193 |
start = time.perf_counter()
|
1194 |
|
lightrag/kg/neo4j_impl.py
CHANGED
@@ -36,9 +36,6 @@ from dotenv import load_dotenv
|
|
36 |
# the OS environment variables take precedence over the .env file
|
37 |
load_dotenv(dotenv_path=".env", override=False)
|
38 |
|
39 |
-
# Get maximum number of graph nodes from environment variable, default is 1000
|
40 |
-
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
41 |
-
|
42 |
config = configparser.ConfigParser()
|
43 |
config.read("config.ini", "utf-8")
|
44 |
|
@@ -902,7 +899,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
902 |
self,
|
903 |
node_label: str,
|
904 |
max_depth: int = 3,
|
905 |
-
max_nodes: int =
|
906 |
) -> KnowledgeGraph:
|
907 |
"""
|
908 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
@@ -916,6 +913,13 @@ class Neo4JStorage(BaseGraphStorage):
|
|
916 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
917 |
indicating whether the graph was truncated due to max_nodes limit
|
918 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
919 |
workspace_label = self._get_workspace_label()
|
920 |
result = KnowledgeGraph()
|
921 |
seen_nodes = set()
|
|
|
36 |
# the OS environment variables take precedence over the .env file
|
37 |
load_dotenv(dotenv_path=".env", override=False)
|
38 |
|
|
|
|
|
|
|
39 |
config = configparser.ConfigParser()
|
40 |
config.read("config.ini", "utf-8")
|
41 |
|
|
|
899 |
self,
|
900 |
node_label: str,
|
901 |
max_depth: int = 3,
|
902 |
+
max_nodes: int = None,
|
903 |
) -> KnowledgeGraph:
|
904 |
"""
|
905 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
|
|
913 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
914 |
indicating whether the graph was truncated due to max_nodes limit
|
915 |
"""
|
916 |
+
# Get max_nodes from global_config if not provided
|
917 |
+
if max_nodes is None:
|
918 |
+
max_nodes = self.global_config.get("max_graph_nodes", 1000)
|
919 |
+
else:
|
920 |
+
# Limit max_nodes to not exceed global_config max_graph_nodes
|
921 |
+
max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
|
922 |
+
|
923 |
workspace_label = self._get_workspace_label()
|
924 |
result = KnowledgeGraph()
|
925 |
seen_nodes = set()
|
lightrag/kg/networkx_impl.py
CHANGED
@@ -26,8 +26,6 @@ from dotenv import load_dotenv
|
|
26 |
# the OS environment variables take precedence over the .env file
|
27 |
load_dotenv(dotenv_path=".env", override=False)
|
28 |
|
29 |
-
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
30 |
-
|
31 |
|
32 |
@final
|
33 |
@dataclass
|
@@ -218,7 +216,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
218 |
self,
|
219 |
node_label: str,
|
220 |
max_depth: int = 3,
|
221 |
-
max_nodes: int =
|
222 |
) -> KnowledgeGraph:
|
223 |
"""
|
224 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
@@ -232,6 +230,13 @@ class NetworkXStorage(BaseGraphStorage):
|
|
232 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
233 |
indicating whether the graph was truncated due to max_nodes limit
|
234 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
graph = await self._get_graph()
|
236 |
|
237 |
result = KnowledgeGraph()
|
|
|
26 |
# the OS environment variables take precedence over the .env file
|
27 |
load_dotenv(dotenv_path=".env", override=False)
|
28 |
|
|
|
|
|
29 |
|
30 |
@final
|
31 |
@dataclass
|
|
|
216 |
self,
|
217 |
node_label: str,
|
218 |
max_depth: int = 3,
|
219 |
+
max_nodes: int = None,
|
220 |
) -> KnowledgeGraph:
|
221 |
"""
|
222 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
|
|
230 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
231 |
indicating whether the graph was truncated due to max_nodes limit
|
232 |
"""
|
233 |
+
# Get max_nodes from global_config if not provided
|
234 |
+
if max_nodes is None:
|
235 |
+
max_nodes = self.global_config.get("max_graph_nodes", 1000)
|
236 |
+
else:
|
237 |
+
# Limit max_nodes to not exceed global_config max_graph_nodes
|
238 |
+
max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
|
239 |
+
|
240 |
graph = await self._get_graph()
|
241 |
|
242 |
result = KnowledgeGraph()
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -45,9 +45,6 @@ from dotenv import load_dotenv
|
|
45 |
# the OS environment variables take precedence over the .env file
|
46 |
load_dotenv(dotenv_path=".env", override=False)
|
47 |
|
48 |
-
# Get maximum number of graph nodes from environment variable, default is 1000
|
49 |
-
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
50 |
-
|
51 |
|
52 |
class PostgreSQLDB:
|
53 |
def __init__(self, config: dict[str, Any], **kwargs: Any):
|
@@ -2819,7 +2816,7 @@ class PGGraphStorage(BaseGraphStorage):
|
|
2819 |
self,
|
2820 |
node_label: str,
|
2821 |
max_depth: int = 3,
|
2822 |
-
max_nodes: int =
|
2823 |
) -> KnowledgeGraph:
|
2824 |
"""
|
2825 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
@@ -2827,12 +2824,18 @@ class PGGraphStorage(BaseGraphStorage):
|
|
2827 |
Args:
|
2828 |
node_label: Label of the starting node, * means all nodes
|
2829 |
max_depth: Maximum depth of the subgraph, Defaults to 3
|
2830 |
-
max_nodes:
|
2831 |
|
2832 |
Returns:
|
2833 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
2834 |
indicating whether the graph was truncated due to max_nodes limit
|
2835 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
2836 |
kg = KnowledgeGraph()
|
2837 |
|
2838 |
# Handle wildcard query - get all nodes
|
|
|
45 |
# the OS environment variables take precedence over the .env file
|
46 |
load_dotenv(dotenv_path=".env", override=False)
|
47 |
|
|
|
|
|
|
|
48 |
|
49 |
class PostgreSQLDB:
|
50 |
def __init__(self, config: dict[str, Any], **kwargs: Any):
|
|
|
2816 |
self,
|
2817 |
node_label: str,
|
2818 |
max_depth: int = 3,
|
2819 |
+
max_nodes: int = None,
|
2820 |
) -> KnowledgeGraph:
|
2821 |
"""
|
2822 |
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
|
|
2824 |
Args:
|
2825 |
node_label: Label of the starting node, * means all nodes
|
2826 |
max_depth: Maximum depth of the subgraph, Defaults to 3
|
2827 |
+
max_nodes: Maximum nodes to return, Defaults to global_config max_graph_nodes
|
2828 |
|
2829 |
Returns:
|
2830 |
KnowledgeGraph object containing nodes and edges, with an is_truncated flag
|
2831 |
indicating whether the graph was truncated due to max_nodes limit
|
2832 |
"""
|
2833 |
+
# Use global_config max_graph_nodes as default if max_nodes is None
|
2834 |
+
if max_nodes is None:
|
2835 |
+
max_nodes = self.global_config.get("max_graph_nodes", 1000)
|
2836 |
+
else:
|
2837 |
+
# Limit max_nodes to not exceed global_config max_graph_nodes
|
2838 |
+
max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
|
2839 |
kg = KnowledgeGraph()
|
2840 |
|
2841 |
# Handle wildcard query - get all nodes
|
lightrag/lightrag.py
CHANGED
@@ -258,6 +258,9 @@ class LightRAG:
|
|
258 |
max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
|
259 |
"""Maximum number of parallel insert operations."""
|
260 |
|
|
|
|
|
|
|
261 |
addon_params: dict[str, Any] = field(
|
262 |
default_factory=lambda: {
|
263 |
"language": get_env_value("SUMMARY_LANGUAGE", "English", str)
|
@@ -526,18 +529,24 @@ class LightRAG:
|
|
526 |
self,
|
527 |
node_label: str,
|
528 |
max_depth: int = 3,
|
529 |
-
max_nodes: int =
|
530 |
) -> KnowledgeGraph:
|
531 |
"""Get knowledge graph for a given label
|
532 |
|
533 |
Args:
|
534 |
node_label (str): Label to get knowledge graph for
|
535 |
max_depth (int): Maximum depth of graph
|
536 |
-
max_nodes (int, optional): Maximum number of nodes to return. Defaults to
|
537 |
|
538 |
Returns:
|
539 |
KnowledgeGraph: Knowledge graph containing nodes and edges
|
540 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
|
542 |
return await self.chunk_entity_relation_graph.get_knowledge_graph(
|
543 |
node_label, max_depth, max_nodes
|
|
|
258 |
max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
|
259 |
"""Maximum number of parallel insert operations."""
|
260 |
|
261 |
+
max_graph_nodes: int = field(default=get_env_value("MAX_GRAPH_NODES", 1000, int))
|
262 |
+
"""Maximum number of graph nodes to return in knowledge graph queries."""
|
263 |
+
|
264 |
addon_params: dict[str, Any] = field(
|
265 |
default_factory=lambda: {
|
266 |
"language": get_env_value("SUMMARY_LANGUAGE", "English", str)
|
|
|
529 |
self,
|
530 |
node_label: str,
|
531 |
max_depth: int = 3,
|
532 |
+
max_nodes: int = None,
|
533 |
) -> KnowledgeGraph:
|
534 |
"""Get knowledge graph for a given label
|
535 |
|
536 |
Args:
|
537 |
node_label (str): Label to get knowledge graph for
|
538 |
max_depth (int): Maximum depth of graph
|
539 |
+
max_nodes (int, optional): Maximum number of nodes to return. Defaults to self.max_graph_nodes.
|
540 |
|
541 |
Returns:
|
542 |
KnowledgeGraph: Knowledge graph containing nodes and edges
|
543 |
"""
|
544 |
+
# Use self.max_graph_nodes as default if max_nodes is None
|
545 |
+
if max_nodes is None:
|
546 |
+
max_nodes = self.max_graph_nodes
|
547 |
+
else:
|
548 |
+
# Limit max_nodes to not exceed self.max_graph_nodes
|
549 |
+
max_nodes = min(max_nodes, self.max_graph_nodes)
|
550 |
|
551 |
return await self.chunk_entity_relation_graph.get_knowledge_graph(
|
552 |
node_label, max_depth, max_nodes
|