zrguo commited on
Commit
cc5aef9
·
1 Parent(s): e976257

Update operate.py

Browse files
Files changed (1) hide show
  1. lightrag/operate.py +25 -21
lightrag/operate.py CHANGED
@@ -453,9 +453,7 @@ async def _rebuild_single_entity(
453
 
454
  # Helper function to update entity in both graph and vector storage
455
  async def _update_entity_storage(
456
- final_description: str,
457
- entity_type: str,
458
- file_paths: set[str]
459
  ):
460
  # Update entity in graph storage
461
  updated_entity_data = {
@@ -463,7 +461,9 @@ async def _rebuild_single_entity(
463
  "description": final_description,
464
  "entity_type": entity_type,
465
  "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
466
- "file_path": GRAPH_FIELD_SEP.join(file_paths) if file_paths else current_entity.get("file_path", "unknown_source"),
 
 
467
  }
468
  await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
469
 
@@ -474,7 +474,9 @@ async def _rebuild_single_entity(
474
  try:
475
  await entities_vdb.delete([entity_vdb_id])
476
  except Exception as e:
477
- logger.debug(f"Could not delete old entity vector record {entity_vdb_id}: {e}")
 
 
478
 
479
  # Insert new vector record
480
  entity_content = f"{entity_name}\n{final_description}"
@@ -510,36 +512,38 @@ async def _rebuild_single_entity(
510
  all_entity_data.extend(chunk_entities[chunk_id][entity_name])
511
 
512
  if not all_entity_data:
513
- logger.warning(f"No cached entity data found for {entity_name}, trying to rebuild from relationships")
514
-
 
 
515
  # Get all edges connected to this entity
516
  edges = await knowledge_graph_inst.get_node_edges(entity_name)
517
  if not edges:
518
  logger.warning(f"No relationships found for entity {entity_name}")
519
  return
520
-
521
  # Collect relationship data to extract entity information
522
  relationship_descriptions = []
523
  file_paths = set()
524
-
525
  # Get edge data for all connected relationships
526
  for src_id, tgt_id in edges:
527
  edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
528
  if edge_data:
529
  if edge_data.get("description"):
530
  relationship_descriptions.append(edge_data["description"])
531
-
532
  if edge_data.get("file_path"):
533
  edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
534
  file_paths.update(edge_file_paths)
535
-
536
  # Generate description from relationships or fallback to current
537
  if relationship_descriptions:
538
  combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
539
  final_description = await _generate_final_description(combined_description)
540
  else:
541
  final_description = current_entity.get("description", "")
542
-
543
  entity_type = current_entity.get("entity_type", "UNKNOWN")
544
  await _update_entity_storage(final_description, entity_type, file_paths)
545
  return
@@ -635,11 +639,12 @@ async def _rebuild_single_relationship(
635
  if keywords
636
  else current_relationship.get("keywords", "")
637
  )
638
- avg_weight = (
639
- sum(weights) / len(weights)
640
- if weights
641
- else current_relationship.get("weight", 1.0)
642
- )
 
643
 
644
  # Use summary if description is too long
645
  if len(combined_description) > global_config["summary_to_max_tokens"]:
@@ -657,7 +662,7 @@ async def _rebuild_single_relationship(
657
  **current_relationship,
658
  "description": final_description,
659
  "keywords": combined_keywords,
660
- "weight": avg_weight,
661
  "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
662
  "file_path": GRAPH_FIELD_SEP.join(file_paths)
663
  if file_paths
@@ -688,7 +693,7 @@ async def _rebuild_single_relationship(
688
  "content": rel_content,
689
  "keywords": combined_keywords,
690
  "description": final_description,
691
- "weight": avg_weight,
692
  "file_path": updated_relationship_data["file_path"],
693
  }
694
  }
@@ -838,8 +843,7 @@ async def _merge_edges_then_upsert(
838
  )
839
 
840
  # Process edges_data with None checks
841
- all_weights = [dp["weight"] for dp in edges_data] + already_weights
842
- weight = sum(all_weights) / len(all_weights)
843
  description = GRAPH_FIELD_SEP.join(
844
  sorted(
845
  set(
 
453
 
454
  # Helper function to update entity in both graph and vector storage
455
  async def _update_entity_storage(
456
+ final_description: str, entity_type: str, file_paths: set[str]
 
 
457
  ):
458
  # Update entity in graph storage
459
  updated_entity_data = {
 
461
  "description": final_description,
462
  "entity_type": entity_type,
463
  "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
464
+ "file_path": GRAPH_FIELD_SEP.join(file_paths)
465
+ if file_paths
466
+ else current_entity.get("file_path", "unknown_source"),
467
  }
468
  await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
469
 
 
474
  try:
475
  await entities_vdb.delete([entity_vdb_id])
476
  except Exception as e:
477
+ logger.debug(
478
+ f"Could not delete old entity vector record {entity_vdb_id}: {e}"
479
+ )
480
 
481
  # Insert new vector record
482
  entity_content = f"{entity_name}\n{final_description}"
 
512
  all_entity_data.extend(chunk_entities[chunk_id][entity_name])
513
 
514
  if not all_entity_data:
515
+ logger.warning(
516
+ f"No cached entity data found for {entity_name}, trying to rebuild from relationships"
517
+ )
518
+
519
  # Get all edges connected to this entity
520
  edges = await knowledge_graph_inst.get_node_edges(entity_name)
521
  if not edges:
522
  logger.warning(f"No relationships found for entity {entity_name}")
523
  return
524
+
525
  # Collect relationship data to extract entity information
526
  relationship_descriptions = []
527
  file_paths = set()
528
+
529
  # Get edge data for all connected relationships
530
  for src_id, tgt_id in edges:
531
  edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
532
  if edge_data:
533
  if edge_data.get("description"):
534
  relationship_descriptions.append(edge_data["description"])
535
+
536
  if edge_data.get("file_path"):
537
  edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
538
  file_paths.update(edge_file_paths)
539
+
540
  # Generate description from relationships or fallback to current
541
  if relationship_descriptions:
542
  combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
543
  final_description = await _generate_final_description(combined_description)
544
  else:
545
  final_description = current_entity.get("description", "")
546
+
547
  entity_type = current_entity.get("entity_type", "UNKNOWN")
548
  await _update_entity_storage(final_description, entity_type, file_paths)
549
  return
 
639
  if keywords
640
  else current_relationship.get("keywords", "")
641
  )
642
+ # weight = (
643
+ # sum(weights) / len(weights)
644
+ # if weights
645
+ # else current_relationship.get("weight", 1.0)
646
+ # )
647
+ weight = sum(weights) if weights else current_relationship.get("weight", 1.0)
648
 
649
  # Use summary if description is too long
650
  if len(combined_description) > global_config["summary_to_max_tokens"]:
 
662
  **current_relationship,
663
  "description": final_description,
664
  "keywords": combined_keywords,
665
+ "weight": weight,
666
  "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
667
  "file_path": GRAPH_FIELD_SEP.join(file_paths)
668
  if file_paths
 
693
  "content": rel_content,
694
  "keywords": combined_keywords,
695
  "description": final_description,
696
+ "weight": weight,
697
  "file_path": updated_relationship_data["file_path"],
698
  }
699
  }
 
843
  )
844
 
845
  # Process edges_data with None checks
846
+ weight = sum([dp["weight"] for dp in edges_data] + already_weights)
 
847
  description = GRAPH_FIELD_SEP.join(
848
  sorted(
849
  set(