Merge pull request #1550 from danielaskdd/keyword-join-with-comma
Browse files- lightrag/operate.py +20 -8
lightrag/operate.py
CHANGED
@@ -218,7 +218,11 @@ async def _handle_single_relationship_extraction(
|
|
218 |
edge_description = clean_str(record_attributes[3])
|
219 |
edge_description = normalize_extracted_info(edge_description)
|
220 |
|
221 |
-
edge_keywords =
|
|
|
|
|
|
|
|
|
222 |
edge_source_id = chunk_key
|
223 |
weight = (
|
224 |
float(record_attributes[-1].strip('"').strip("'"))
|
@@ -388,14 +392,22 @@ async def _merge_edges_then_upsert(
|
|
388 |
)
|
389 |
)
|
390 |
)
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
)
|
397 |
-
|
398 |
-
)
|
|
|
399 |
source_id = GRAPH_FIELD_SEP.join(
|
400 |
set(
|
401 |
[dp["source_id"] for dp in edges_data if dp.get("source_id")]
|
|
|
218 |
edge_description = clean_str(record_attributes[3])
|
219 |
edge_description = normalize_extracted_info(edge_description)
|
220 |
|
221 |
+
edge_keywords = normalize_extracted_info(
|
222 |
+
clean_str(record_attributes[4]), is_entity=True
|
223 |
+
)
|
224 |
+
edge_keywords = edge_keywords.replace(",", ",")
|
225 |
+
|
226 |
edge_source_id = chunk_key
|
227 |
weight = (
|
228 |
float(record_attributes[-1].strip('"').strip("'"))
|
|
|
392 |
)
|
393 |
)
|
394 |
)
|
395 |
+
|
396 |
+
# Split all existing and new keywords into individual terms, then combine and deduplicate
|
397 |
+
all_keywords = set()
|
398 |
+
# Process already_keywords (which are comma-separated)
|
399 |
+
for keyword_str in already_keywords:
|
400 |
+
if keyword_str: # Skip empty strings
|
401 |
+
all_keywords.update(k.strip() for k in keyword_str.split(",") if k.strip())
|
402 |
+
# Process new keywords from edges_data
|
403 |
+
for edge in edges_data:
|
404 |
+
if edge.get("keywords"):
|
405 |
+
all_keywords.update(
|
406 |
+
k.strip() for k in edge["keywords"].split(",") if k.strip()
|
407 |
)
|
408 |
+
# Join all unique keywords with commas
|
409 |
+
keywords = ",".join(sorted(all_keywords))
|
410 |
+
|
411 |
source_id = GRAPH_FIELD_SEP.join(
|
412 |
set(
|
413 |
[dp["source_id"] for dp in edges_data if dp.get("source_id")]
|