zrguo commited on
Commit
ae5772f
·
1 Parent(s): 96cece4

Update utils.py

Browse files
Files changed (1) hide show
  1. lightrag/utils.py +3 -12
lightrag/utils.py CHANGED
@@ -176,11 +176,6 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
176
  return list_data[:i]
177
  return list_data
178
 
179
-
180
- # def list_of_list_to_csv(data: list[list]):
181
- # return "\n".join(
182
- # [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
183
- # )
184
  def list_of_list_to_csv(data: List[List[str]]) -> str:
185
  output = io.StringIO()
186
  writer = csv.writer(output)
@@ -258,12 +253,11 @@ def xml_to_json(xml_file):
258
  print(f"An error occurred: {e}")
259
  return None
260
 
261
- #混合检索中的合并函数
262
  def process_combine_contexts(hl, ll):
263
  header = None
264
  list_hl = csv_string_to_list(hl.strip())
265
  list_ll = csv_string_to_list(ll.strip())
266
- # 去掉第一个元素(如果不为空)
267
  if list_hl:
268
  header=list_hl[0]
269
  list_hl = list_hl[1:]
@@ -272,24 +266,21 @@ def process_combine_contexts(hl, ll):
272
  list_ll = list_ll[1:]
273
  if header is None:
274
  return ""
275
- # 去掉每个子元素中的第一个元素(如果不为空),再转为一维数组,用于合并去重
276
  if list_hl:
277
  list_hl = [','.join(item[1:]) for item in list_hl if item]
278
  if list_ll:
279
  list_ll = [','.join(item[1:]) for item in list_ll if item]
280
 
281
- # 合并并去重
282
  combined_sources_set = set(
283
  filter(None, list_hl + list_ll)
284
  )
285
 
286
- # 创建包含头部的新列表
287
  combined_sources = [",\t".join(header)]
288
- # 为 combined_sources_set 中的每个元素添加自增数字
289
  for i, item in enumerate(combined_sources_set, start=1):
290
  combined_sources.append(f"{i},\t{item}")
291
 
292
- # 将列表转换为字符串,子元素之间用换行符分隔
293
  combined_sources = "\n".join(combined_sources)
294
 
295
  return combined_sources
 
176
  return list_data[:i]
177
  return list_data
178
 
 
 
 
 
 
179
  def list_of_list_to_csv(data: List[List[str]]) -> str:
180
  output = io.StringIO()
181
  writer = csv.writer(output)
 
253
  print(f"An error occurred: {e}")
254
  return None
255
 
 
256
  def process_combine_contexts(hl, ll):
257
  header = None
258
  list_hl = csv_string_to_list(hl.strip())
259
  list_ll = csv_string_to_list(ll.strip())
260
+
261
  if list_hl:
262
  header=list_hl[0]
263
  list_hl = list_hl[1:]
 
266
  list_ll = list_ll[1:]
267
  if header is None:
268
  return ""
269
+
270
  if list_hl:
271
  list_hl = [','.join(item[1:]) for item in list_hl if item]
272
  if list_ll:
273
  list_ll = [','.join(item[1:]) for item in list_ll if item]
274
 
 
275
  combined_sources_set = set(
276
  filter(None, list_hl + list_ll)
277
  )
278
 
 
279
  combined_sources = [",\t".join(header)]
280
+
281
  for i, item in enumerate(combined_sources_set, start=1):
282
  combined_sources.append(f"{i},\t{item}")
283
 
 
284
  combined_sources = "\n".join(combined_sources)
285
 
286
  return combined_sources