zrguo
commited on
Commit
·
cc5aef9
1
Parent(s):
e976257
Update operate.py
Browse files- lightrag/operate.py +25 -21
lightrag/operate.py
CHANGED
@@ -453,9 +453,7 @@ async def _rebuild_single_entity(
|
|
453 |
|
454 |
# Helper function to update entity in both graph and vector storage
|
455 |
async def _update_entity_storage(
|
456 |
-
final_description: str,
|
457 |
-
entity_type: str,
|
458 |
-
file_paths: set[str]
|
459 |
):
|
460 |
# Update entity in graph storage
|
461 |
updated_entity_data = {
|
@@ -463,7 +461,9 @@ async def _rebuild_single_entity(
|
|
463 |
"description": final_description,
|
464 |
"entity_type": entity_type,
|
465 |
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
466 |
-
"file_path": GRAPH_FIELD_SEP.join(file_paths)
|
|
|
|
|
467 |
}
|
468 |
await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
|
469 |
|
@@ -474,7 +474,9 @@ async def _rebuild_single_entity(
|
|
474 |
try:
|
475 |
await entities_vdb.delete([entity_vdb_id])
|
476 |
except Exception as e:
|
477 |
-
logger.debug(
|
|
|
|
|
478 |
|
479 |
# Insert new vector record
|
480 |
entity_content = f"{entity_name}\n{final_description}"
|
@@ -510,36 +512,38 @@ async def _rebuild_single_entity(
|
|
510 |
all_entity_data.extend(chunk_entities[chunk_id][entity_name])
|
511 |
|
512 |
if not all_entity_data:
|
513 |
-
logger.warning(
|
514 |
-
|
|
|
|
|
515 |
# Get all edges connected to this entity
|
516 |
edges = await knowledge_graph_inst.get_node_edges(entity_name)
|
517 |
if not edges:
|
518 |
logger.warning(f"No relationships found for entity {entity_name}")
|
519 |
return
|
520 |
-
|
521 |
# Collect relationship data to extract entity information
|
522 |
relationship_descriptions = []
|
523 |
file_paths = set()
|
524 |
-
|
525 |
# Get edge data for all connected relationships
|
526 |
for src_id, tgt_id in edges:
|
527 |
edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
|
528 |
if edge_data:
|
529 |
if edge_data.get("description"):
|
530 |
relationship_descriptions.append(edge_data["description"])
|
531 |
-
|
532 |
if edge_data.get("file_path"):
|
533 |
edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
|
534 |
file_paths.update(edge_file_paths)
|
535 |
-
|
536 |
# Generate description from relationships or fallback to current
|
537 |
if relationship_descriptions:
|
538 |
combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
|
539 |
final_description = await _generate_final_description(combined_description)
|
540 |
else:
|
541 |
final_description = current_entity.get("description", "")
|
542 |
-
|
543 |
entity_type = current_entity.get("entity_type", "UNKNOWN")
|
544 |
await _update_entity_storage(final_description, entity_type, file_paths)
|
545 |
return
|
@@ -635,11 +639,12 @@ async def _rebuild_single_relationship(
|
|
635 |
if keywords
|
636 |
else current_relationship.get("keywords", "")
|
637 |
)
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
)
|
|
|
643 |
|
644 |
# Use summary if description is too long
|
645 |
if len(combined_description) > global_config["summary_to_max_tokens"]:
|
@@ -657,7 +662,7 @@ async def _rebuild_single_relationship(
|
|
657 |
**current_relationship,
|
658 |
"description": final_description,
|
659 |
"keywords": combined_keywords,
|
660 |
-
"weight":
|
661 |
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
662 |
"file_path": GRAPH_FIELD_SEP.join(file_paths)
|
663 |
if file_paths
|
@@ -688,7 +693,7 @@ async def _rebuild_single_relationship(
|
|
688 |
"content": rel_content,
|
689 |
"keywords": combined_keywords,
|
690 |
"description": final_description,
|
691 |
-
"weight":
|
692 |
"file_path": updated_relationship_data["file_path"],
|
693 |
}
|
694 |
}
|
@@ -838,8 +843,7 @@ async def _merge_edges_then_upsert(
|
|
838 |
)
|
839 |
|
840 |
# Process edges_data with None checks
|
841 |
-
|
842 |
-
weight = sum(all_weights) / len(all_weights)
|
843 |
description = GRAPH_FIELD_SEP.join(
|
844 |
sorted(
|
845 |
set(
|
|
|
453 |
|
454 |
# Helper function to update entity in both graph and vector storage
|
455 |
async def _update_entity_storage(
|
456 |
+
final_description: str, entity_type: str, file_paths: set[str]
|
|
|
|
|
457 |
):
|
458 |
# Update entity in graph storage
|
459 |
updated_entity_data = {
|
|
|
461 |
"description": final_description,
|
462 |
"entity_type": entity_type,
|
463 |
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
464 |
+
"file_path": GRAPH_FIELD_SEP.join(file_paths)
|
465 |
+
if file_paths
|
466 |
+
else current_entity.get("file_path", "unknown_source"),
|
467 |
}
|
468 |
await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
|
469 |
|
|
|
474 |
try:
|
475 |
await entities_vdb.delete([entity_vdb_id])
|
476 |
except Exception as e:
|
477 |
+
logger.debug(
|
478 |
+
f"Could not delete old entity vector record {entity_vdb_id}: {e}"
|
479 |
+
)
|
480 |
|
481 |
# Insert new vector record
|
482 |
entity_content = f"{entity_name}\n{final_description}"
|
|
|
512 |
all_entity_data.extend(chunk_entities[chunk_id][entity_name])
|
513 |
|
514 |
if not all_entity_data:
|
515 |
+
logger.warning(
|
516 |
+
f"No cached entity data found for {entity_name}, trying to rebuild from relationships"
|
517 |
+
)
|
518 |
+
|
519 |
# Get all edges connected to this entity
|
520 |
edges = await knowledge_graph_inst.get_node_edges(entity_name)
|
521 |
if not edges:
|
522 |
logger.warning(f"No relationships found for entity {entity_name}")
|
523 |
return
|
524 |
+
|
525 |
# Collect relationship data to extract entity information
|
526 |
relationship_descriptions = []
|
527 |
file_paths = set()
|
528 |
+
|
529 |
# Get edge data for all connected relationships
|
530 |
for src_id, tgt_id in edges:
|
531 |
edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
|
532 |
if edge_data:
|
533 |
if edge_data.get("description"):
|
534 |
relationship_descriptions.append(edge_data["description"])
|
535 |
+
|
536 |
if edge_data.get("file_path"):
|
537 |
edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
|
538 |
file_paths.update(edge_file_paths)
|
539 |
+
|
540 |
# Generate description from relationships or fallback to current
|
541 |
if relationship_descriptions:
|
542 |
combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
|
543 |
final_description = await _generate_final_description(combined_description)
|
544 |
else:
|
545 |
final_description = current_entity.get("description", "")
|
546 |
+
|
547 |
entity_type = current_entity.get("entity_type", "UNKNOWN")
|
548 |
await _update_entity_storage(final_description, entity_type, file_paths)
|
549 |
return
|
|
|
639 |
if keywords
|
640 |
else current_relationship.get("keywords", "")
|
641 |
)
|
642 |
+
# weight = (
|
643 |
+
# sum(weights) / len(weights)
|
644 |
+
# if weights
|
645 |
+
# else current_relationship.get("weight", 1.0)
|
646 |
+
# )
|
647 |
+
weight = sum(weights) if weights else current_relationship.get("weight", 1.0)
|
648 |
|
649 |
# Use summary if description is too long
|
650 |
if len(combined_description) > global_config["summary_to_max_tokens"]:
|
|
|
662 |
**current_relationship,
|
663 |
"description": final_description,
|
664 |
"keywords": combined_keywords,
|
665 |
+
"weight": weight,
|
666 |
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
667 |
"file_path": GRAPH_FIELD_SEP.join(file_paths)
|
668 |
if file_paths
|
|
|
693 |
"content": rel_content,
|
694 |
"keywords": combined_keywords,
|
695 |
"description": final_description,
|
696 |
+
"weight": weight,
|
697 |
"file_path": updated_relationship_data["file_path"],
|
698 |
}
|
699 |
}
|
|
|
843 |
)
|
844 |
|
845 |
# Process edges_data with None checks
|
846 |
+
weight = sum([dp["weight"] for dp in edges_data] + already_weights)
|
|
|
847 |
description = GRAPH_FIELD_SEP.join(
|
848 |
sorted(
|
849 |
set(
|