Daniel.y commited on
Commit
ba10d28
·
unverified ·
2 Parent(s): 902a5b6 93f0c6d

Merge pull request #1550 from danielaskdd/keyword-join-with-comma

Browse files
Files changed (1) hide show
  1. lightrag/operate.py +20 -8
lightrag/operate.py CHANGED
@@ -218,7 +218,11 @@ async def _handle_single_relationship_extraction(
218
  edge_description = clean_str(record_attributes[3])
219
  edge_description = normalize_extracted_info(edge_description)
220
 
221
- edge_keywords = clean_str(record_attributes[4]).strip('"').strip("'")
 
 
 
 
222
  edge_source_id = chunk_key
223
  weight = (
224
  float(record_attributes[-1].strip('"').strip("'"))
@@ -388,14 +392,22 @@ async def _merge_edges_then_upsert(
388
  )
389
  )
390
  )
391
- keywords = GRAPH_FIELD_SEP.join(
392
- sorted(
393
- set(
394
- [dp["keywords"] for dp in edges_data if dp.get("keywords")]
395
- + already_keywords
 
 
 
 
 
 
 
396
  )
397
- )
398
- )
 
399
  source_id = GRAPH_FIELD_SEP.join(
400
  set(
401
  [dp["source_id"] for dp in edges_data if dp.get("source_id")]
 
218
  edge_description = clean_str(record_attributes[3])
219
  edge_description = normalize_extracted_info(edge_description)
220
 
221
+ edge_keywords = normalize_extracted_info(
222
+ clean_str(record_attributes[4]), is_entity=True
223
+ )
224
+ edge_keywords = edge_keywords.replace(",", ",")
225
+
226
  edge_source_id = chunk_key
227
  weight = (
228
  float(record_attributes[-1].strip('"').strip("'"))
 
392
  )
393
  )
394
  )
395
+
396
+ # Split all existing and new keywords into individual terms, then combine and deduplicate
397
+ all_keywords = set()
398
+ # Process already_keywords (which are comma-separated)
399
+ for keyword_str in already_keywords:
400
+ if keyword_str: # Skip empty strings
401
+ all_keywords.update(k.strip() for k in keyword_str.split(",") if k.strip())
402
+ # Process new keywords from edges_data
403
+ for edge in edges_data:
404
+ if edge.get("keywords"):
405
+ all_keywords.update(
406
+ k.strip() for k in edge["keywords"].split(",") if k.strip()
407
  )
408
+ # Join all unique keywords with commas
409
+ keywords = ",".join(sorted(all_keywords))
410
+
411
  source_id = GRAPH_FIELD_SEP.join(
412
  set(
413
  [dp["source_id"] for dp in edges_data if dp.get("source_id")]