yangdx
commited on
Commit
·
8fe10bd
1
Parent(s):
50ed665
Only merge new entities/edges during gleaning
Browse files- Restrict gleaning to new entity names
- Only add edges with new keys
- Prevent similar decription of the same entity or edge
- lightrag/operate.py +5 -3
lightrag/operate.py
CHANGED
@@ -613,11 +613,13 @@ async def extract_entities(
|
|
613 |
glean_result, chunk_key, file_path
|
614 |
)
|
615 |
|
616 |
-
# Merge results
|
617 |
for entity_name, entities in glean_nodes.items():
|
618 |
-
maybe_nodes
|
|
|
619 |
for edge_key, edges in glean_edges.items():
|
620 |
-
maybe_edges
|
|
|
621 |
|
622 |
if now_glean_index == entity_extract_max_gleaning - 1:
|
623 |
break
|
|
|
613 |
glean_result, chunk_key, file_path
|
614 |
)
|
615 |
|
616 |
+
# Merge results - only add entities and edges with new names
|
617 |
for entity_name, entities in glean_nodes.items():
|
618 |
+
if entity_name not in maybe_nodes: # Only accetp entities with new name in gleaning stage
|
619 |
+
maybe_nodes[entity_name].extend(entities)
|
620 |
for edge_key, edges in glean_edges.items():
|
621 |
+
if edge_key not in maybe_edges: # Only accetp edges with new name in gleaning stage
|
622 |
+
maybe_edges[edge_key].extend(edges)
|
623 |
|
624 |
if now_glean_index == entity_extract_max_gleaning - 1:
|
625 |
break
|