wiltshirek commited on
Commit
aa1c267
·
2 Parent(s): ee3779b ae5772f

Merge branch 'HKUDS:main' into main

Browse files
Files changed (2) hide show
  1. lightrag/operate.py +10 -16
  2. lightrag/utils.py +45 -5
lightrag/operate.py CHANGED
@@ -15,6 +15,7 @@ from .utils import (
15
  pack_user_ass_to_openai_messages,
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
 
18
  )
19
  from .base import (
20
  BaseGraphStorage,
@@ -1006,35 +1007,28 @@ def combine_contexts(high_level_context, low_level_context):
1006
  ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
1007
 
1008
  # Combine and deduplicate the entities
1009
- combined_entities_set = set(
1010
- filter(None, hl_entities.strip().split("\n") + ll_entities.strip().split("\n"))
1011
- )
1012
- combined_entities = "\n".join(combined_entities_set)
1013
-
1014
  # Combine and deduplicate the relationships
1015
- combined_relationships_set = set(
1016
- filter(
1017
- None,
1018
- hl_relationships.strip().split("\n") + ll_relationships.strip().split("\n"),
1019
- )
1020
- )
1021
- combined_relationships = "\n".join(combined_relationships_set)
1022
 
1023
  # Combine and deduplicate the sources
1024
- combined_sources_set = set(
1025
- filter(None, hl_sources.strip().split("\n") + ll_sources.strip().split("\n"))
1026
- )
1027
- combined_sources = "\n".join(combined_sources_set)
1028
 
1029
  # Format the combined context
1030
  return f"""
1031
  -----Entities-----
1032
  ```csv
1033
  {combined_entities}
 
1034
  -----Relationships-----
 
1035
  {combined_relationships}
 
1036
  -----Sources-----
 
1037
  {combined_sources}
 
1038
  """
1039
 
1040
 
 
15
  pack_user_ass_to_openai_messages,
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
18
+ process_combine_contexts,
19
  )
20
  from .base import (
21
  BaseGraphStorage,
 
1007
  ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
1008
 
1009
  # Combine and deduplicate the entities
1010
+ combined_entities = process_combine_contexts(hl_entities, ll_entities)
1011
+
 
 
 
1012
  # Combine and deduplicate the relationships
1013
+ combined_relationships = process_combine_contexts(hl_relationships, ll_relationships)
 
 
 
 
 
 
1014
 
1015
  # Combine and deduplicate the sources
1016
+ combined_sources = process_combine_contexts(hl_sources, ll_sources)
 
 
 
1017
 
1018
  # Format the combined context
1019
  return f"""
1020
  -----Entities-----
1021
  ```csv
1022
  {combined_entities}
1023
+ ```
1024
  -----Relationships-----
1025
+ ```csv
1026
  {combined_relationships}
1027
+ ```
1028
  -----Sources-----
1029
+ ```csv
1030
  {combined_sources}
1031
+ ``
1032
  """
1033
 
1034
 
lightrag/utils.py CHANGED
@@ -1,5 +1,7 @@
1
  import asyncio
2
  import html
 
 
3
  import json
4
  import logging
5
  import os
@@ -7,7 +9,7 @@ import re
7
  from dataclasses import dataclass
8
  from functools import wraps
9
  from hashlib import md5
10
- from typing import Any, Union
11
  import xml.etree.ElementTree as ET
12
 
13
  import numpy as np
@@ -174,11 +176,17 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
174
  return list_data[:i]
175
  return list_data
176
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- def list_of_list_to_csv(data: list[list]):
179
- return "\n".join(
180
- [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
181
- )
182
 
183
 
184
  def save_data_to_file(data, file_name):
@@ -244,3 +252,35 @@ def xml_to_json(xml_file):
244
  except Exception as e:
245
  print(f"An error occurred: {e}")
246
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  import html
3
+ import io
4
+ import csv
5
  import json
6
  import logging
7
  import os
 
9
  from dataclasses import dataclass
10
  from functools import wraps
11
  from hashlib import md5
12
+ from typing import Any, Union,List
13
  import xml.etree.ElementTree as ET
14
 
15
  import numpy as np
 
176
  return list_data[:i]
177
  return list_data
178
 
179
+ def list_of_list_to_csv(data: List[List[str]]) -> str:
180
+ output = io.StringIO()
181
+ writer = csv.writer(output)
182
+ writer.writerows(data)
183
+ return output.getvalue()
184
+ def csv_string_to_list(csv_string: str) -> List[List[str]]:
185
+ output = io.StringIO(csv_string)
186
+ reader = csv.reader(output)
187
+ return [row for row in reader]
188
+
189
 
 
 
 
 
190
 
191
 
192
  def save_data_to_file(data, file_name):
 
252
  except Exception as e:
253
  print(f"An error occurred: {e}")
254
  return None
255
+
256
+ def process_combine_contexts(hl, ll):
257
+ header = None
258
+ list_hl = csv_string_to_list(hl.strip())
259
+ list_ll = csv_string_to_list(ll.strip())
260
+
261
+ if list_hl:
262
+ header=list_hl[0]
263
+ list_hl = list_hl[1:]
264
+ if list_ll:
265
+ header = list_ll[0]
266
+ list_ll = list_ll[1:]
267
+ if header is None:
268
+ return ""
269
+
270
+ if list_hl:
271
+ list_hl = [','.join(item[1:]) for item in list_hl if item]
272
+ if list_ll:
273
+ list_ll = [','.join(item[1:]) for item in list_ll if item]
274
+
275
+ combined_sources_set = set(
276
+ filter(None, list_hl + list_ll)
277
+ )
278
+
279
+ combined_sources = [",\t".join(header)]
280
+
281
+ for i, item in enumerate(combined_sources_set, start=1):
282
+ combined_sources.append(f"{i},\t{item}")
283
+
284
+ combined_sources = "\n".join(combined_sources)
285
+
286
+ return combined_sources