yangdx
commited on
Commit
·
d0b1ac0
1
Parent(s):
72ece4e
Add max nodes limit for graph retrieval of networkX
Browse files• Set MAX_GRAPH_NODES env var (default 1000)
• Change edge type to "RELATED"
- .env.example +1 -0
- lightrag/api/routers/graph_routes.py +17 -2
- lightrag/kg/networkx_impl.py +8 -6
.env.example
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
# PORT=9621
|
4 |
# WORKERS=1
|
5 |
# NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances
|
|
|
6 |
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
|
7 |
|
8 |
### Optional SSL Configuration
|
|
|
3 |
# PORT=9621
|
4 |
# WORKERS=1
|
5 |
# NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances
|
6 |
+
# MAX_GRAPH_NODES=1000 # Max nodes return from grap retrieval
|
7 |
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
|
8 |
|
9 |
### Optional SSL Configuration
|
lightrag/api/routers/graph_routes.py
CHANGED
@@ -16,12 +16,27 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
|
|
16 |
|
17 |
@router.get("/graph/label/list", dependencies=[Depends(optional_api_key)])
|
18 |
async def get_graph_labels():
|
19 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
20 |
return await rag.get_graph_labels()
|
21 |
|
22 |
@router.get("/graphs", dependencies=[Depends(optional_api_key)])
|
23 |
async def get_knowledge_graph(label: str, max_depth: int = 3):
|
24 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
|
26 |
|
27 |
return router
|
|
|
16 |
|
17 |
@router.get("/graph/label/list", dependencies=[Depends(optional_api_key)])
|
18 |
async def get_graph_labels():
|
19 |
+
"""
|
20 |
+
Get all graph labels
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
List[str]: List of graph labels
|
24 |
+
"""
|
25 |
return await rag.get_graph_labels()
|
26 |
|
27 |
@router.get("/graphs", dependencies=[Depends(optional_api_key)])
|
28 |
async def get_knowledge_graph(label: str, max_depth: int = 3):
|
29 |
+
"""
|
30 |
+
Get knowledge graph for a specific label.
|
31 |
+
Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
|
32 |
+
|
33 |
+
Args:
|
34 |
+
label (str): Label to get knowledge graph for
|
35 |
+
max_depth (int, optional): Maximum depth of graph. Defaults to 3.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
Dict[str, List[str]]: Knowledge graph for label
|
39 |
+
"""
|
40 |
return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
|
41 |
|
42 |
return router
|
lightrag/kg/networkx_impl.py
CHANGED
@@ -24,6 +24,8 @@ from .shared_storage import (
|
|
24 |
is_multiprocess,
|
25 |
)
|
26 |
|
|
|
|
|
27 |
|
28 |
@final
|
29 |
@dataclass
|
@@ -234,6 +236,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
234 |
) -> KnowledgeGraph:
|
235 |
"""
|
236 |
Get complete connected subgraph for specified node (including the starting node itself)
|
|
|
237 |
|
238 |
Args:
|
239 |
node_label: Label of the starting node
|
@@ -269,18 +272,17 @@ class NetworkXStorage(BaseGraphStorage):
|
|
269 |
subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth)
|
270 |
|
271 |
# Check if number of nodes exceeds max_graph_nodes
|
272 |
-
|
273 |
-
if len(subgraph.nodes()) > max_graph_nodes:
|
274 |
origin_nodes = len(subgraph.nodes())
|
275 |
node_degrees = dict(subgraph.degree())
|
276 |
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
277 |
-
:
|
278 |
]
|
279 |
top_node_ids = [node[0] for node in top_nodes]
|
280 |
-
# Create new subgraph
|
281 |
subgraph = subgraph.subgraph(top_node_ids)
|
282 |
logger.info(
|
283 |
-
f"Reduced graph from {origin_nodes} nodes to {
|
284 |
)
|
285 |
|
286 |
# Add nodes to result
|
@@ -320,7 +322,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
320 |
result.edges.append(
|
321 |
KnowledgeGraphEdge(
|
322 |
id=edge_id,
|
323 |
-
type="
|
324 |
source=str(source),
|
325 |
target=str(target),
|
326 |
properties=edge_data,
|
|
|
24 |
is_multiprocess,
|
25 |
)
|
26 |
|
27 |
+
MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
|
28 |
+
|
29 |
|
30 |
@final
|
31 |
@dataclass
|
|
|
236 |
) -> KnowledgeGraph:
|
237 |
"""
|
238 |
Get complete connected subgraph for specified node (including the starting node itself)
|
239 |
+
Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
|
240 |
|
241 |
Args:
|
242 |
node_label: Label of the starting node
|
|
|
272 |
subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth)
|
273 |
|
274 |
# Check if number of nodes exceeds max_graph_nodes
|
275 |
+
if len(subgraph.nodes()) > MAX_GRAPH_NODES:
|
|
|
276 |
origin_nodes = len(subgraph.nodes())
|
277 |
node_degrees = dict(subgraph.degree())
|
278 |
top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[
|
279 |
+
:MAX_GRAPH_NODES
|
280 |
]
|
281 |
top_node_ids = [node[0] for node in top_nodes]
|
282 |
+
# Create new subgraph and keep nodes only with most degree
|
283 |
subgraph = subgraph.subgraph(top_node_ids)
|
284 |
logger.info(
|
285 |
+
f"Reduced graph from {origin_nodes} nodes to {MAX_GRAPH_NODES} nodes (depth={max_depth})"
|
286 |
)
|
287 |
|
288 |
# Add nodes to result
|
|
|
322 |
result.edges.append(
|
323 |
KnowledgeGraphEdge(
|
324 |
id=edge_id,
|
325 |
+
type="RELATED",
|
326 |
source=str(source),
|
327 |
target=str(target),
|
328 |
properties=edge_data,
|