zrguo commited on
Commit
e976257
·
1 Parent(s): a346fa4

Update operate.py

Browse files
Files changed (1) hide show
  1. lightrag/operate.py +90 -49
lightrag/operate.py CHANGED
@@ -451,6 +451,58 @@ async def _rebuild_single_entity(
451
  if not current_entity:
452
  return
453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  # Collect all entity data from relevant chunks
455
  all_entity_data = []
456
  for chunk_id in chunk_ids:
@@ -458,10 +510,41 @@ async def _rebuild_single_entity(
458
  all_entity_data.extend(chunk_entities[chunk_id][entity_name])
459
 
460
  if not all_entity_data:
461
- logger.warning(f"No cached entity data found for {entity_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  return
463
 
464
- # Merge descriptions and get the most common entity type
465
  descriptions = []
466
  entity_types = []
467
  file_paths = set()
@@ -488,52 +571,9 @@ async def _rebuild_single_entity(
488
  else current_entity.get("entity_type", "UNKNOWN")
489
  )
490
 
491
- # Use summary if description is too long
492
- if len(combined_description) > global_config["summary_to_max_tokens"]:
493
- final_description = await _handle_entity_relation_summary(
494
- entity_name,
495
- combined_description,
496
- global_config,
497
- llm_response_cache=llm_response_cache,
498
- )
499
- else:
500
- final_description = combined_description
501
-
502
- # Update entity in graph storage
503
- updated_entity_data = {
504
- **current_entity,
505
- "description": final_description,
506
- "entity_type": entity_type,
507
- "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
508
- "file_path": GRAPH_FIELD_SEP.join(file_paths)
509
- if file_paths
510
- else current_entity.get("file_path", "unknown_source"),
511
- }
512
- await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
513
-
514
- # Update entity in vector database
515
- entity_vdb_id = compute_mdhash_id(entity_name, prefix="ent-")
516
-
517
- # Delete old vector record first
518
- try:
519
- await entities_vdb.delete([entity_vdb_id])
520
- except Exception as e:
521
- logger.debug(f"Could not delete old entity vector record {entity_vdb_id}: {e}")
522
-
523
- # Insert new vector record
524
- entity_content = f"{entity_name}\n{final_description}"
525
- await entities_vdb.upsert(
526
- {
527
- entity_vdb_id: {
528
- "content": entity_content,
529
- "entity_name": entity_name,
530
- "source_id": updated_entity_data["source_id"],
531
- "description": final_description,
532
- "entity_type": entity_type,
533
- "file_path": updated_entity_data["file_path"],
534
- }
535
- }
536
- )
537
 
538
 
539
  async def _rebuild_single_relationship(
@@ -798,7 +838,8 @@ async def _merge_edges_then_upsert(
798
  )
799
 
800
  # Process edges_data with None checks
801
- weight = sum([dp["weight"] for dp in edges_data] + already_weights)
 
802
  description = GRAPH_FIELD_SEP.join(
803
  sorted(
804
  set(
 
451
  if not current_entity:
452
  return
453
 
454
+ # Helper function to update entity in both graph and vector storage
455
+ async def _update_entity_storage(
456
+ final_description: str,
457
+ entity_type: str,
458
+ file_paths: set[str]
459
+ ):
460
+ # Update entity in graph storage
461
+ updated_entity_data = {
462
+ **current_entity,
463
+ "description": final_description,
464
+ "entity_type": entity_type,
465
+ "source_id": GRAPH_FIELD_SEP.join(chunk_ids),
466
+ "file_path": GRAPH_FIELD_SEP.join(file_paths) if file_paths else current_entity.get("file_path", "unknown_source"),
467
+ }
468
+ await knowledge_graph_inst.upsert_node(entity_name, updated_entity_data)
469
+
470
+ # Update entity in vector database
471
+ entity_vdb_id = compute_mdhash_id(entity_name, prefix="ent-")
472
+
473
+ # Delete old vector record first
474
+ try:
475
+ await entities_vdb.delete([entity_vdb_id])
476
+ except Exception as e:
477
+ logger.debug(f"Could not delete old entity vector record {entity_vdb_id}: {e}")
478
+
479
+ # Insert new vector record
480
+ entity_content = f"{entity_name}\n{final_description}"
481
+ await entities_vdb.upsert(
482
+ {
483
+ entity_vdb_id: {
484
+ "content": entity_content,
485
+ "entity_name": entity_name,
486
+ "source_id": updated_entity_data["source_id"],
487
+ "description": final_description,
488
+ "entity_type": entity_type,
489
+ "file_path": updated_entity_data["file_path"],
490
+ }
491
+ }
492
+ )
493
+
494
+ # Helper function to generate final description with optional LLM summary
495
+ async def _generate_final_description(combined_description: str) -> str:
496
+ if len(combined_description) > global_config["summary_to_max_tokens"]:
497
+ return await _handle_entity_relation_summary(
498
+ entity_name,
499
+ combined_description,
500
+ global_config,
501
+ llm_response_cache=llm_response_cache,
502
+ )
503
+ else:
504
+ return combined_description
505
+
506
  # Collect all entity data from relevant chunks
507
  all_entity_data = []
508
  for chunk_id in chunk_ids:
 
510
  all_entity_data.extend(chunk_entities[chunk_id][entity_name])
511
 
512
  if not all_entity_data:
513
+ logger.warning(f"No cached entity data found for {entity_name}, trying to rebuild from relationships")
514
+
515
+ # Get all edges connected to this entity
516
+ edges = await knowledge_graph_inst.get_node_edges(entity_name)
517
+ if not edges:
518
+ logger.warning(f"No relationships found for entity {entity_name}")
519
+ return
520
+
521
+ # Collect relationship data to extract entity information
522
+ relationship_descriptions = []
523
+ file_paths = set()
524
+
525
+ # Get edge data for all connected relationships
526
+ for src_id, tgt_id in edges:
527
+ edge_data = await knowledge_graph_inst.get_edge(src_id, tgt_id)
528
+ if edge_data:
529
+ if edge_data.get("description"):
530
+ relationship_descriptions.append(edge_data["description"])
531
+
532
+ if edge_data.get("file_path"):
533
+ edge_file_paths = edge_data["file_path"].split(GRAPH_FIELD_SEP)
534
+ file_paths.update(edge_file_paths)
535
+
536
+ # Generate description from relationships or fallback to current
537
+ if relationship_descriptions:
538
+ combined_description = GRAPH_FIELD_SEP.join(relationship_descriptions)
539
+ final_description = await _generate_final_description(combined_description)
540
+ else:
541
+ final_description = current_entity.get("description", "")
542
+
543
+ entity_type = current_entity.get("entity_type", "UNKNOWN")
544
+ await _update_entity_storage(final_description, entity_type, file_paths)
545
  return
546
 
547
+ # Process cached entity data
548
  descriptions = []
549
  entity_types = []
550
  file_paths = set()
 
571
  else current_entity.get("entity_type", "UNKNOWN")
572
  )
573
 
574
+ # Generate final description and update storage
575
+ final_description = await _generate_final_description(combined_description)
576
+ await _update_entity_storage(final_description, entity_type, file_paths)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
 
579
  async def _rebuild_single_relationship(
 
838
  )
839
 
840
  # Process edges_data with None checks
841
+ all_weights = [dp["weight"] for dp in edges_data] + already_weights
842
+ weight = sum(all_weights) / len(all_weights)
843
  description = GRAPH_FIELD_SEP.join(
844
  sorted(
845
  set(