gzdaniel commited on
Commit
4fa1a37
·
1 Parent(s): 8089f33

Move max_graph_nodes to global config

Browse files
lightrag/api/config.py CHANGED
@@ -244,6 +244,9 @@ def parse_args() -> argparse.Namespace:
244
  # Get MAX_PARALLEL_INSERT from environment
245
  args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
246
 
 
 
 
247
  # Handle openai-ollama special case
248
  if args.llm_binding == "openai-ollama":
249
  args.llm_binding = "openai"
 
244
  # Get MAX_PARALLEL_INSERT from environment
245
  args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
246
 
247
+ # Get MAX_GRAPH_NODES from environment
248
+ args.max_graph_nodes = get_env_value("MAX_GRAPH_NODES", 1000, int)
249
+
250
  # Handle openai-ollama special case
251
  if args.llm_binding == "openai-ollama":
252
  args.llm_binding = "openai"
lightrag/api/lightrag_server.py CHANGED
@@ -326,6 +326,7 @@ def create_app(args):
326
  enable_llm_cache=args.enable_llm_cache,
327
  auto_manage_storages_states=False,
328
  max_parallel_insert=args.max_parallel_insert,
 
329
  addon_params={"language": args.summary_language},
330
  )
331
  else: # azure_openai
@@ -353,6 +354,7 @@ def create_app(args):
353
  enable_llm_cache=args.enable_llm_cache,
354
  auto_manage_storages_states=False,
355
  max_parallel_insert=args.max_parallel_insert,
 
356
  addon_params={"language": args.summary_language},
357
  )
358
 
@@ -475,7 +477,7 @@ def create_app(args):
475
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
476
  "enable_llm_cache": args.enable_llm_cache,
477
  "workspace": args.workspace,
478
- "max_graph_nodes": os.getenv("MAX_GRAPH_NODES"),
479
  },
480
  "auth_mode": auth_mode,
481
  "pipeline_busy": pipeline_status.get("busy", False),
 
326
  enable_llm_cache=args.enable_llm_cache,
327
  auto_manage_storages_states=False,
328
  max_parallel_insert=args.max_parallel_insert,
329
+ max_graph_nodes=args.max_graph_nodes,
330
  addon_params={"language": args.summary_language},
331
  )
332
  else: # azure_openai
 
354
  enable_llm_cache=args.enable_llm_cache,
355
  auto_manage_storages_states=False,
356
  max_parallel_insert=args.max_parallel_insert,
357
+ max_graph_nodes=args.max_graph_nodes,
358
  addon_params={"language": args.summary_language},
359
  )
360
 
 
477
  "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
478
  "enable_llm_cache": args.enable_llm_cache,
479
  "workspace": args.workspace,
480
+ "max_graph_nodes": args.max_graph_nodes,
481
  },
482
  "auth_mode": auth_mode,
483
  "pipeline_busy": pipeline_status.get("busy", False),
lightrag/kg/mongo_impl.py CHANGED
@@ -34,8 +34,6 @@ from pymongo.errors import PyMongoError # type: ignore
34
  config = configparser.ConfigParser()
35
  config.read("config.ini", "utf-8")
36
 
37
- # Get maximum number of graph nodes from environment variable, default is 1000
38
- MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
39
  GRAPH_BFS_MODE = os.getenv("MONGO_GRAPH_BFS_MODE", "bidirectional")
40
 
41
 
@@ -883,7 +881,7 @@ class MongoGraphStorage(BaseGraphStorage):
883
  )
884
 
885
  async def get_knowledge_graph_all_by_degree(
886
- self, max_depth: int = 3, max_nodes: int = MAX_GRAPH_NODES
887
  ) -> KnowledgeGraph:
888
  """
889
  It's possible that the node with one or multiple relationships is retrieved,
@@ -961,9 +959,9 @@ class MongoGraphStorage(BaseGraphStorage):
961
  node_labels: list[str],
962
  seen_nodes: set[str],
963
  result: KnowledgeGraph,
964
- depth: int = 0,
965
- max_depth: int = 3,
966
- max_nodes: int = MAX_GRAPH_NODES,
967
  ) -> KnowledgeGraph:
968
  if depth > max_depth or len(result.nodes) > max_nodes:
969
  return result
@@ -1006,9 +1004,9 @@ class MongoGraphStorage(BaseGraphStorage):
1006
  async def get_knowledge_subgraph_bidirectional_bfs(
1007
  self,
1008
  node_label: str,
1009
- depth=0,
1010
- max_depth: int = 3,
1011
- max_nodes: int = MAX_GRAPH_NODES,
1012
  ) -> KnowledgeGraph:
1013
  seen_nodes = set()
1014
  seen_edges = set()
@@ -1038,7 +1036,7 @@ class MongoGraphStorage(BaseGraphStorage):
1038
  return result
1039
 
1040
  async def get_knowledge_subgraph_in_out_bound_bfs(
1041
- self, node_label: str, max_depth: int = 3, max_nodes: int = MAX_GRAPH_NODES
1042
  ) -> KnowledgeGraph:
1043
  seen_nodes = set()
1044
  seen_edges = set()
@@ -1152,7 +1150,7 @@ class MongoGraphStorage(BaseGraphStorage):
1152
  self,
1153
  node_label: str,
1154
  max_depth: int = 3,
1155
- max_nodes: int = MAX_GRAPH_NODES,
1156
  ) -> KnowledgeGraph:
1157
  """
1158
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
@@ -1160,7 +1158,7 @@ class MongoGraphStorage(BaseGraphStorage):
1160
  Args:
1161
  node_label: Label of the starting node, * means all nodes
1162
  max_depth: Maximum depth of the subgraph, Defaults to 3
1163
- max_nodes: Maxiumu nodes to return, Defaults to 1000
1164
 
1165
  Returns:
1166
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
@@ -1184,6 +1182,13 @@ class MongoGraphStorage(BaseGraphStorage):
1184
  C → B
1185
  C → D
1186
  """
 
 
 
 
 
 
 
1187
  result = KnowledgeGraph()
1188
  start = time.perf_counter()
1189
 
 
34
  config = configparser.ConfigParser()
35
  config.read("config.ini", "utf-8")
36
 
 
 
37
  GRAPH_BFS_MODE = os.getenv("MONGO_GRAPH_BFS_MODE", "bidirectional")
38
 
39
 
 
881
  )
882
 
883
  async def get_knowledge_graph_all_by_degree(
884
+ self, max_depth: int, max_nodes: int
885
  ) -> KnowledgeGraph:
886
  """
887
  It's possible that the node with one or multiple relationships is retrieved,
 
959
  node_labels: list[str],
960
  seen_nodes: set[str],
961
  result: KnowledgeGraph,
962
+ depth: int,
963
+ max_depth: int,
964
+ max_nodes: int,
965
  ) -> KnowledgeGraph:
966
  if depth > max_depth or len(result.nodes) > max_nodes:
967
  return result
 
1004
  async def get_knowledge_subgraph_bidirectional_bfs(
1005
  self,
1006
  node_label: str,
1007
+ depth: int,
1008
+ max_depth: int,
1009
+ max_nodes: int,
1010
  ) -> KnowledgeGraph:
1011
  seen_nodes = set()
1012
  seen_edges = set()
 
1036
  return result
1037
 
1038
  async def get_knowledge_subgraph_in_out_bound_bfs(
1039
+ self, node_label: str, max_depth: int, max_nodes: int
1040
  ) -> KnowledgeGraph:
1041
  seen_nodes = set()
1042
  seen_edges = set()
 
1150
  self,
1151
  node_label: str,
1152
  max_depth: int = 3,
1153
+ max_nodes: int = None,
1154
  ) -> KnowledgeGraph:
1155
  """
1156
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
 
1158
  Args:
1159
  node_label: Label of the starting node, * means all nodes
1160
  max_depth: Maximum depth of the subgraph, Defaults to 3
1161
+ max_nodes: Maximum nodes to return, Defaults to global_config max_graph_nodes
1162
 
1163
  Returns:
1164
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
 
1182
  C → B
1183
  C → D
1184
  """
1185
+ # Use global_config max_graph_nodes as default if max_nodes is None
1186
+ if max_nodes is None:
1187
+ max_nodes = self.global_config.get("max_graph_nodes", 1000)
1188
+ else:
1189
+ # Limit max_nodes to not exceed global_config max_graph_nodes
1190
+ max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
1191
+
1192
  result = KnowledgeGraph()
1193
  start = time.perf_counter()
1194
 
lightrag/kg/neo4j_impl.py CHANGED
@@ -36,9 +36,6 @@ from dotenv import load_dotenv
36
  # the OS environment variables take precedence over the .env file
37
  load_dotenv(dotenv_path=".env", override=False)
38
 
39
- # Get maximum number of graph nodes from environment variable, default is 1000
40
- MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
41
-
42
  config = configparser.ConfigParser()
43
  config.read("config.ini", "utf-8")
44
 
@@ -902,7 +899,7 @@ class Neo4JStorage(BaseGraphStorage):
902
  self,
903
  node_label: str,
904
  max_depth: int = 3,
905
- max_nodes: int = MAX_GRAPH_NODES,
906
  ) -> KnowledgeGraph:
907
  """
908
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
@@ -916,6 +913,13 @@ class Neo4JStorage(BaseGraphStorage):
916
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
917
  indicating whether the graph was truncated due to max_nodes limit
918
  """
 
 
 
 
 
 
 
919
  workspace_label = self._get_workspace_label()
920
  result = KnowledgeGraph()
921
  seen_nodes = set()
 
36
  # the OS environment variables take precedence over the .env file
37
  load_dotenv(dotenv_path=".env", override=False)
38
 
 
 
 
39
  config = configparser.ConfigParser()
40
  config.read("config.ini", "utf-8")
41
 
 
899
  self,
900
  node_label: str,
901
  max_depth: int = 3,
902
+ max_nodes: int = None,
903
  ) -> KnowledgeGraph:
904
  """
905
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
 
913
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
914
  indicating whether the graph was truncated due to max_nodes limit
915
  """
916
+ # Get max_nodes from global_config if not provided
917
+ if max_nodes is None:
918
+ max_nodes = self.global_config.get("max_graph_nodes", 1000)
919
+ else:
920
+ # Limit max_nodes to not exceed global_config max_graph_nodes
921
+ max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
922
+
923
  workspace_label = self._get_workspace_label()
924
  result = KnowledgeGraph()
925
  seen_nodes = set()
lightrag/kg/networkx_impl.py CHANGED
@@ -26,8 +26,6 @@ from dotenv import load_dotenv
26
  # the OS environment variables take precedence over the .env file
27
  load_dotenv(dotenv_path=".env", override=False)
28
 
29
- MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
30
-
31
 
32
  @final
33
  @dataclass
@@ -218,7 +216,7 @@ class NetworkXStorage(BaseGraphStorage):
218
  self,
219
  node_label: str,
220
  max_depth: int = 3,
221
- max_nodes: int = MAX_GRAPH_NODES,
222
  ) -> KnowledgeGraph:
223
  """
224
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
@@ -232,6 +230,13 @@ class NetworkXStorage(BaseGraphStorage):
232
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
233
  indicating whether the graph was truncated due to max_nodes limit
234
  """
 
 
 
 
 
 
 
235
  graph = await self._get_graph()
236
 
237
  result = KnowledgeGraph()
 
26
  # the OS environment variables take precedence over the .env file
27
  load_dotenv(dotenv_path=".env", override=False)
28
 
 
 
29
 
30
  @final
31
  @dataclass
 
216
  self,
217
  node_label: str,
218
  max_depth: int = 3,
219
+ max_nodes: int = None,
220
  ) -> KnowledgeGraph:
221
  """
222
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
 
230
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
231
  indicating whether the graph was truncated due to max_nodes limit
232
  """
233
+ # Get max_nodes from global_config if not provided
234
+ if max_nodes is None:
235
+ max_nodes = self.global_config.get("max_graph_nodes", 1000)
236
+ else:
237
+ # Limit max_nodes to not exceed global_config max_graph_nodes
238
+ max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
239
+
240
  graph = await self._get_graph()
241
 
242
  result = KnowledgeGraph()
lightrag/kg/postgres_impl.py CHANGED
@@ -45,9 +45,6 @@ from dotenv import load_dotenv
45
  # the OS environment variables take precedence over the .env file
46
  load_dotenv(dotenv_path=".env", override=False)
47
 
48
- # Get maximum number of graph nodes from environment variable, default is 1000
49
- MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
50
-
51
 
52
  class PostgreSQLDB:
53
  def __init__(self, config: dict[str, Any], **kwargs: Any):
@@ -2819,7 +2816,7 @@ class PGGraphStorage(BaseGraphStorage):
2819
  self,
2820
  node_label: str,
2821
  max_depth: int = 3,
2822
- max_nodes: int = MAX_GRAPH_NODES,
2823
  ) -> KnowledgeGraph:
2824
  """
2825
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
@@ -2827,12 +2824,18 @@ class PGGraphStorage(BaseGraphStorage):
2827
  Args:
2828
  node_label: Label of the starting node, * means all nodes
2829
  max_depth: Maximum depth of the subgraph, Defaults to 3
2830
- max_nodes: Maxiumu nodes to return, Defaults to 1000
2831
 
2832
  Returns:
2833
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
2834
  indicating whether the graph was truncated due to max_nodes limit
2835
  """
 
 
 
 
 
 
2836
  kg = KnowledgeGraph()
2837
 
2838
  # Handle wildcard query - get all nodes
 
45
  # the OS environment variables take precedence over the .env file
46
  load_dotenv(dotenv_path=".env", override=False)
47
 
 
 
 
48
 
49
  class PostgreSQLDB:
50
  def __init__(self, config: dict[str, Any], **kwargs: Any):
 
2816
  self,
2817
  node_label: str,
2818
  max_depth: int = 3,
2819
+ max_nodes: int = None,
2820
  ) -> KnowledgeGraph:
2821
  """
2822
  Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
 
2824
  Args:
2825
  node_label: Label of the starting node, * means all nodes
2826
  max_depth: Maximum depth of the subgraph, Defaults to 3
2827
+ max_nodes: Maximum nodes to return, Defaults to global_config max_graph_nodes
2828
 
2829
  Returns:
2830
  KnowledgeGraph object containing nodes and edges, with an is_truncated flag
2831
  indicating whether the graph was truncated due to max_nodes limit
2832
  """
2833
+ # Use global_config max_graph_nodes as default if max_nodes is None
2834
+ if max_nodes is None:
2835
+ max_nodes = self.global_config.get("max_graph_nodes", 1000)
2836
+ else:
2837
+ # Limit max_nodes to not exceed global_config max_graph_nodes
2838
+ max_nodes = min(max_nodes, self.global_config.get("max_graph_nodes", 1000))
2839
  kg = KnowledgeGraph()
2840
 
2841
  # Handle wildcard query - get all nodes
lightrag/lightrag.py CHANGED
@@ -258,6 +258,9 @@ class LightRAG:
258
  max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
259
  """Maximum number of parallel insert operations."""
260
 
 
 
 
261
  addon_params: dict[str, Any] = field(
262
  default_factory=lambda: {
263
  "language": get_env_value("SUMMARY_LANGUAGE", "English", str)
@@ -526,18 +529,24 @@ class LightRAG:
526
  self,
527
  node_label: str,
528
  max_depth: int = 3,
529
- max_nodes: int = 1000,
530
  ) -> KnowledgeGraph:
531
  """Get knowledge graph for a given label
532
 
533
  Args:
534
  node_label (str): Label to get knowledge graph for
535
  max_depth (int): Maximum depth of graph
536
- max_nodes (int, optional): Maximum number of nodes to return. Defaults to 1000.
537
 
538
  Returns:
539
  KnowledgeGraph: Knowledge graph containing nodes and edges
540
  """
 
 
 
 
 
 
541
 
542
  return await self.chunk_entity_relation_graph.get_knowledge_graph(
543
  node_label, max_depth, max_nodes
 
258
  max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
259
  """Maximum number of parallel insert operations."""
260
 
261
+ max_graph_nodes: int = field(default=get_env_value("MAX_GRAPH_NODES", 1000, int))
262
+ """Maximum number of graph nodes to return in knowledge graph queries."""
263
+
264
  addon_params: dict[str, Any] = field(
265
  default_factory=lambda: {
266
  "language": get_env_value("SUMMARY_LANGUAGE", "English", str)
 
529
  self,
530
  node_label: str,
531
  max_depth: int = 3,
532
+ max_nodes: int = None,
533
  ) -> KnowledgeGraph:
534
  """Get knowledge graph for a given label
535
 
536
  Args:
537
  node_label (str): Label to get knowledge graph for
538
  max_depth (int): Maximum depth of graph
539
+ max_nodes (int, optional): Maximum number of nodes to return. Defaults to self.max_graph_nodes.
540
 
541
  Returns:
542
  KnowledgeGraph: Knowledge graph containing nodes and edges
543
  """
544
+ # Use self.max_graph_nodes as default if max_nodes is None
545
+ if max_nodes is None:
546
+ max_nodes = self.max_graph_nodes
547
+ else:
548
+ # Limit max_nodes to not exceed self.max_graph_nodes
549
+ max_nodes = min(max_nodes, self.max_graph_nodes)
550
 
551
  return await self.chunk_entity_relation_graph.get_knowledge_graph(
552
  node_label, max_depth, max_nodes