zrguo commited on
Commit
96cece4
·
2 Parent(s): ddcc625 baafbb4

Merge pull request #183 from gogoswift/main

Browse files
Files changed (2) hide show
  1. lightrag/operate.py +10 -16
  2. lightrag/utils.py +54 -5
lightrag/operate.py CHANGED
@@ -15,6 +15,7 @@ from .utils import (
15
  pack_user_ass_to_openai_messages,
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
 
18
  )
19
  from .base import (
20
  BaseGraphStorage,
@@ -1003,35 +1004,28 @@ def combine_contexts(high_level_context, low_level_context):
1003
  ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
1004
 
1005
  # Combine and deduplicate the entities
1006
- combined_entities_set = set(
1007
- filter(None, hl_entities.strip().split("\n") + ll_entities.strip().split("\n"))
1008
- )
1009
- combined_entities = "\n".join(combined_entities_set)
1010
-
1011
  # Combine and deduplicate the relationships
1012
- combined_relationships_set = set(
1013
- filter(
1014
- None,
1015
- hl_relationships.strip().split("\n") + ll_relationships.strip().split("\n"),
1016
- )
1017
- )
1018
- combined_relationships = "\n".join(combined_relationships_set)
1019
 
1020
  # Combine and deduplicate the sources
1021
- combined_sources_set = set(
1022
- filter(None, hl_sources.strip().split("\n") + ll_sources.strip().split("\n"))
1023
- )
1024
- combined_sources = "\n".join(combined_sources_set)
1025
 
1026
  # Format the combined context
1027
  return f"""
1028
  -----Entities-----
1029
  ```csv
1030
  {combined_entities}
 
1031
  -----Relationships-----
 
1032
  {combined_relationships}
 
1033
  -----Sources-----
 
1034
  {combined_sources}
 
1035
  """
1036
 
1037
 
 
15
  pack_user_ass_to_openai_messages,
16
  split_string_by_multi_markers,
17
  truncate_list_by_token_size,
18
+ process_combine_contexts,
19
  )
20
  from .base import (
21
  BaseGraphStorage,
 
1004
  ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
1005
 
1006
  # Combine and deduplicate the entities
1007
+ combined_entities = process_combine_contexts(hl_entities, ll_entities)
1008
+
 
 
 
1009
  # Combine and deduplicate the relationships
1010
+ combined_relationships = process_combine_contexts(hl_relationships, ll_relationships)
 
 
 
 
 
 
1011
 
1012
  # Combine and deduplicate the sources
1013
+ combined_sources = process_combine_contexts(hl_sources, ll_sources)
 
 
 
1014
 
1015
  # Format the combined context
1016
  return f"""
1017
  -----Entities-----
1018
  ```csv
1019
  {combined_entities}
1020
+ ```
1021
  -----Relationships-----
1022
+ ```csv
1023
  {combined_relationships}
1024
+ ```
1025
  -----Sources-----
1026
+ ```csv
1027
  {combined_sources}
1028
+ ``
1029
  """
1030
 
1031
 
lightrag/utils.py CHANGED
@@ -1,5 +1,7 @@
1
  import asyncio
2
  import html
 
 
3
  import json
4
  import logging
5
  import os
@@ -7,7 +9,7 @@ import re
7
  from dataclasses import dataclass
8
  from functools import wraps
9
  from hashlib import md5
10
- from typing import Any, Union
11
  import xml.etree.ElementTree as ET
12
 
13
  import numpy as np
@@ -175,10 +177,21 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
175
  return list_data
176
 
177
 
178
- def list_of_list_to_csv(data: list[list]):
179
- return "\n".join(
180
- [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
181
- )
 
 
 
 
 
 
 
 
 
 
 
182
 
183
 
184
  def save_data_to_file(data, file_name):
@@ -244,3 +257,39 @@ def xml_to_json(xml_file):
244
  except Exception as e:
245
  print(f"An error occurred: {e}")
246
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  import html
3
+ import io
4
+ import csv
5
  import json
6
  import logging
7
  import os
 
9
  from dataclasses import dataclass
10
  from functools import wraps
11
  from hashlib import md5
12
+ from typing import Any, Union,List
13
  import xml.etree.ElementTree as ET
14
 
15
  import numpy as np
 
177
  return list_data
178
 
179
 
180
+ # def list_of_list_to_csv(data: list[list]):
181
+ # return "\n".join(
182
+ # [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
183
+ # )
184
+ def list_of_list_to_csv(data: List[List[str]]) -> str:
185
+ output = io.StringIO()
186
+ writer = csv.writer(output)
187
+ writer.writerows(data)
188
+ return output.getvalue()
189
+ def csv_string_to_list(csv_string: str) -> List[List[str]]:
190
+ output = io.StringIO(csv_string)
191
+ reader = csv.reader(output)
192
+ return [row for row in reader]
193
+
194
+
195
 
196
 
197
  def save_data_to_file(data, file_name):
 
257
  except Exception as e:
258
  print(f"An error occurred: {e}")
259
  return None
260
+
261
+ #混合检索中的合并函数
262
+ def process_combine_contexts(hl, ll):
263
+ header = None
264
+ list_hl = csv_string_to_list(hl.strip())
265
+ list_ll = csv_string_to_list(ll.strip())
266
+ # 去掉第一个元素(如果不为空)
267
+ if list_hl:
268
+ header=list_hl[0]
269
+ list_hl = list_hl[1:]
270
+ if list_ll:
271
+ header = list_ll[0]
272
+ list_ll = list_ll[1:]
273
+ if header is None:
274
+ return ""
275
+ # 去掉每个子元素中的第一个元素(如果不为空),再转为一维数组,用于合并去重
276
+ if list_hl:
277
+ list_hl = [','.join(item[1:]) for item in list_hl if item]
278
+ if list_ll:
279
+ list_ll = [','.join(item[1:]) for item in list_ll if item]
280
+
281
+ # 合并并去重
282
+ combined_sources_set = set(
283
+ filter(None, list_hl + list_ll)
284
+ )
285
+
286
+ # 创建包含头部的新列表
287
+ combined_sources = [",\t".join(header)]
288
+ # 为 combined_sources_set 中的每个元素添加自增数字
289
+ for i, item in enumerate(combined_sources_set, start=1):
290
+ combined_sources.append(f"{i},\t{item}")
291
+
292
+ # 将列表转换为字符串,子元素之间用换行符分隔
293
+ combined_sources = "\n".join(combined_sources)
294
+
295
+ return combined_sources