yangdx commited on
Commit
8fe10bd
·
1 Parent(s): 50ed665

Only merge new entities/edges during gleaning

Browse files

- Restrict gleaning to new entity names
- Only add edges with new keys
- Prevent similar decription of the same entity or edge

Files changed (1) hide show
  1. lightrag/operate.py +5 -3
lightrag/operate.py CHANGED
@@ -613,11 +613,13 @@ async def extract_entities(
613
  glean_result, chunk_key, file_path
614
  )
615
 
616
- # Merge results
617
  for entity_name, entities in glean_nodes.items():
618
- maybe_nodes[entity_name].extend(entities)
 
619
  for edge_key, edges in glean_edges.items():
620
- maybe_edges[edge_key].extend(edges)
 
621
 
622
  if now_glean_index == entity_extract_max_gleaning - 1:
623
  break
 
613
  glean_result, chunk_key, file_path
614
  )
615
 
616
+ # Merge results - only add entities and edges with new names
617
  for entity_name, entities in glean_nodes.items():
618
+ if entity_name not in maybe_nodes: # Only accetp entities with new name in gleaning stage
619
+ maybe_nodes[entity_name].extend(entities)
620
  for edge_key, edges in glean_edges.items():
621
+ if edge_key not in maybe_edges: # Only accetp edges with new name in gleaning stage
622
+ maybe_edges[edge_key].extend(edges)
623
 
624
  if now_glean_index == entity_extract_max_gleaning - 1:
625
  break