zrguo
commited on
Commit
·
e976257
1
Parent(s):
a346fa4
Update operate.py
Browse files- lightrag/operate.py +90 -49
lightrag/operate.py
CHANGED
@@ -451,6 +451,58 @@ async def _rebuild_single_entity(
|
|
451 |
if not current_entity:
|
452 |
return
|
453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
# Collect all entity data from relevant chunks
|
455 |
all_entity_data = []
|
456 |
for chunk_id in chunk_ids:
|
@@ -458,10 +510,41 @@ async def _rebuild_single_entity(
|
|
458 |
all_entity_data.extend(chunk_entities[chunk_id][entity_name])
|
459 |
|
460 |
if not all_entity_data:
|
461 |
-
logger.warning(f"No cached entity data found for {entity_name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
462 |
return
|
463 |
|
464 |
-
#
|
465 |
descriptions = []
|
466 |
entity_types = []
|
467 |
file_paths = set()
|
@@ -488,52 +571,9 @@ async def _rebuild_single_entity(
|
|
488 |
else current_entity.get("entity_type", "UNKNOWN")
|
489 |
)
|
490 |
|
491 |
-
#
|
492 |
-
|
493 |
-
|
494 |
-
entity_name,
|
495 |
-
combined_description,
|
496 |
-
global_config,
|
497 |
-
llm_response_cache=llm_response_cache,
|
498 |
-
)
|
499 |
-
else:
|
500 |
-
final_description = combined_description
|
501 |
-
|
502 |
-
# Update entity in graph storage
|
503 |
-
updated_entity_data = {
|
504 |
-
**current_entity,
|
505 |
-
"description": final_description,
|
506 |
-
"entity_type": entity_type,
|
507 |
-
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
508 |
-
"file_path": GRAPH_FIELD_SEP.join(file_paths)
|
509 |
-
if file_paths
|
510 |
-
else current_entity.get("file_path", "unknown_source"),
|
511 |
-
}
|
512 |
-
await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
|
513 |
-
|
514 |
-
# Update entity in vector database
|
515 |
-
entity_vdb_id = compute_mdhash_id(entity_name, prefix="ent-")
|
516 |
-
|
517 |
-
# Delete old vector record first
|
518 |
-
try:
|
519 |
-
await entities_vdb.delete([entity_vdb_id])
|
520 |
-
except Exception as e:
|
521 |
-
logger.debug(f"Could not delete old entity vector record {entity_vdb_id}: {e}")
|
522 |
-
|
523 |
-
# Insert new vector record
|
524 |
-
entity_content = f"{entity_name}\n{final_description}"
|
525 |
-
await entities_vdb.upsert(
|
526 |
-
{
|
527 |
-
entity_vdb_id: {
|
528 |
-
"content": entity_content,
|
529 |
-
"entity_name": entity_name,
|
530 |
-
"source_id": updated_entity_data["source_id"],
|
531 |
-
"description": final_description,
|
532 |
-
"entity_type": entity_type,
|
533 |
-
"file_path": updated_entity_data["file_path"],
|
534 |
-
}
|
535 |
-
}
|
536 |
-
)
|
537 |
|
538 |
|
539 |
async def _rebuild_single_relationship(
|
@@ -798,7 +838,8 @@ async def _merge_edges_then_upsert(
|
|
798 |
)
|
799 |
|
800 |
# Process edges_data with None checks
|
801 |
-
|
|
|
802 |
description = GRAPH_FIELD_SEP.join(
|
803 |
sorted(
|
804 |
set(
|
|
|
451 |
if not current_entity:
|
452 |
return
|
453 |
|
454 |
+
# Helper function to update entity in both graph and vector storage
|
455 |
+
async def _update_entity_storage(
|
456 |
+
final_description: str,
|
457 |
+
entity_type: str,
|
458 |
+
file_paths: set[str]
|
459 |
+
):
|
460 |
+
# Update entity in graph storage
|
461 |
+
updated_entity_data = {
|
462 |
+
**current_entity,
|
463 |
+
"description": final_description,
|
464 |
+
"entity_type": entity_type,
|
465 |
+
"source_id": GRAPH_FIELD_SEP.join(chunk_ids),
|
466 |
+
"file_path": GRAPH_FIELD_SEP.join(file_paths) if file_paths else current_entity.get("file_path", "unknown_source"),
|
467 |
+
}
|
468 |
+
await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
|
469 |
+
|
470 |
+
# Update entity in vector database
|
471 |
+
entity_vdb_id = compute_mdhash_id(entity_name, prefix="ent-")
|
472 |
+
|
473 |
+
# Delete old vector record first
|
474 |
+
try:
|
475 |
+
await entities_vdb.delete([entity_vdb_id])
|
476 |
+
except Exception as e:
|
477 |
+
logger.debug(f"Could not delete old entity vector record {entity_vdb_id}: {e}")
|
478 |
+
|
479 |
+
# Insert new vector record
|
480 |
+
entity_content = f"{entity_name}\n{final_description}"
|
481 |
+
await entities_vdb.upsert(
|
482 |
+
{
|
483 |
+
entity_vdb_id: {
|
484 |
+
"content": entity_content,
|
485 |
+
"entity_name": entity_name,
|
486 |
+
"source_id": updated_entity_data["source_id"],
|
487 |
+
"description": final_description,
|
488 |
+
"entity_type": entity_type,
|
489 |
+
"file_path": updated_entity_data["file_path"],
|
490 |
+
}
|
491 |
+
}
|
492 |
+
)
|
493 |
+
|
494 |
+
# Helper function to generate final description with optional LLM summary
|
495 |
+
async def _generate_final_description(combined_description: str) -> str:
|
496 |
+
if len(combined_description) > global_config["summary_to_max_tokens"]:
|
497 |
+
return await _handle_entity_relation_summary(
|
498 |
+
entity_name,
|
499 |
+
combined_description,
|
500 |
+
global_config,
|
501 |
+
llm_response_cache=llm_response_cache,
|
502 |
+
)
|
503 |
+
else:
|
504 |
+
return combined_description
|
505 |
+
|
506 |
# Collect all entity data from relevant chunks
|
507 |
all_entity_data = []
|
508 |
for chunk_id in chunk_ids:
|
|
|
510 |
all_entity_data.extend(chunk_entities[chunk_id][entity_name])
|
511 |
|
512 |
if not all_entity_data:
|
513 |
+
logger.warning(f"No cached entity data found for {entity_name}, trying to rebuild from relationships")
|
514 |
+
|
515 |
+
# Get all edges connected to this entity
|
516 |
+
edges = await knowledge_graph_inst.get_node_edges(entity_name)
|
517 |
+
if not edges:
|
518 |
+
logger.warning(f"No relationships found for entity {entity_name}")
|
519 |
+
return
|
520 |
+
|
521 |
+
# Collect relationship data to extract entity information
|
522 |
+
relationship_descriptions = []
|
523 |
+
file_paths = set()
|
524 |
+
|
525 |
+
# Get edge data for all connected relationships
|
526 |
+
for src_id, tgt_id in edges:
|
527 |
+
edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
|
528 |
+
if edge_data:
|
529 |
+
if edge_data.get("description"):
|
530 |
+
relationship_descriptions.append(edge_data["description"])
|
531 |
+
|
532 |
+
if edge_data.get("file_path"):
|
533 |
+
edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
|
534 |
+
file_paths.update(edge_file_paths)
|
535 |
+
|
536 |
+
# Generate description from relationships or fallback to current
|
537 |
+
if relationship_descriptions:
|
538 |
+
combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
|
539 |
+
final_description = await _generate_final_description(combined_description)
|
540 |
+
else:
|
541 |
+
final_description = current_entity.get("description", "")
|
542 |
+
|
543 |
+
entity_type = current_entity.get("entity_type", "UNKNOWN")
|
544 |
+
await _update_entity_storage(final_description, entity_type, file_paths)
|
545 |
return
|
546 |
|
547 |
+
# Process cached entity data
|
548 |
descriptions = []
|
549 |
entity_types = []
|
550 |
file_paths = set()
|
|
|
571 |
else current_entity.get("entity_type", "UNKNOWN")
|
572 |
)
|
573 |
|
574 |
+
# Generate final description and update storage
|
575 |
+
final_description = await _generate_final_description(combined_description)
|
576 |
+
await _update_entity_storage(final_description, entity_type, file_paths)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
|
579 |
async def _rebuild_single_relationship(
|
|
|
838 |
)
|
839 |
|
840 |
# Process edges_data with None checks
|
841 |
+
all_weights = [dp["weight"] for dp in edges_data] + already_weights
|
842 |
+
weight = sum(all_weights) / len(all_weights)
|
843 |
description = GRAPH_FIELD_SEP.join(
|
844 |
sorted(
|
845 |
set(
|