Merge branch 'HKUDS:main' into main
Browse files- lightrag/operate.py +10 -16
- lightrag/utils.py +45 -5
lightrag/operate.py
CHANGED
@@ -15,6 +15,7 @@ from .utils import (
|
|
15 |
pack_user_ass_to_openai_messages,
|
16 |
split_string_by_multi_markers,
|
17 |
truncate_list_by_token_size,
|
|
|
18 |
)
|
19 |
from .base import (
|
20 |
BaseGraphStorage,
|
@@ -1006,35 +1007,28 @@ def combine_contexts(high_level_context, low_level_context):
|
|
1006 |
ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
|
1007 |
|
1008 |
# Combine and deduplicate the entities
|
1009 |
-
|
1010 |
-
|
1011 |
-
)
|
1012 |
-
combined_entities = "\n".join(combined_entities_set)
|
1013 |
-
|
1014 |
# Combine and deduplicate the relationships
|
1015 |
-
|
1016 |
-
filter(
|
1017 |
-
None,
|
1018 |
-
hl_relationships.strip().split("\n") + ll_relationships.strip().split("\n"),
|
1019 |
-
)
|
1020 |
-
)
|
1021 |
-
combined_relationships = "\n".join(combined_relationships_set)
|
1022 |
|
1023 |
# Combine and deduplicate the sources
|
1024 |
-
|
1025 |
-
filter(None, hl_sources.strip().split("\n") + ll_sources.strip().split("\n"))
|
1026 |
-
)
|
1027 |
-
combined_sources = "\n".join(combined_sources_set)
|
1028 |
|
1029 |
# Format the combined context
|
1030 |
return f"""
|
1031 |
-----Entities-----
|
1032 |
```csv
|
1033 |
{combined_entities}
|
|
|
1034 |
-----Relationships-----
|
|
|
1035 |
{combined_relationships}
|
|
|
1036 |
-----Sources-----
|
|
|
1037 |
{combined_sources}
|
|
|
1038 |
"""
|
1039 |
|
1040 |
|
|
|
15 |
pack_user_ass_to_openai_messages,
|
16 |
split_string_by_multi_markers,
|
17 |
truncate_list_by_token_size,
|
18 |
+
process_combine_contexts,
|
19 |
)
|
20 |
from .base import (
|
21 |
BaseGraphStorage,
|
|
|
1007 |
ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
|
1008 |
|
1009 |
# Combine and deduplicate the entities
|
1010 |
+
combined_entities = process_combine_contexts(hl_entities, ll_entities)
|
1011 |
+
|
|
|
|
|
|
|
1012 |
# Combine and deduplicate the relationships
|
1013 |
+
combined_relationships = process_combine_contexts(hl_relationships, ll_relationships)
|
|
|
|
|
|
|
|
|
|
|
|
|
1014 |
|
1015 |
# Combine and deduplicate the sources
|
1016 |
+
combined_sources = process_combine_contexts(hl_sources, ll_sources)
|
|
|
|
|
|
|
1017 |
|
1018 |
# Format the combined context
|
1019 |
return f"""
|
1020 |
-----Entities-----
|
1021 |
```csv
|
1022 |
{combined_entities}
|
1023 |
+
```
|
1024 |
-----Relationships-----
|
1025 |
+
```csv
|
1026 |
{combined_relationships}
|
1027 |
+
```
|
1028 |
-----Sources-----
|
1029 |
+
```csv
|
1030 |
{combined_sources}
|
1031 |
+
``
|
1032 |
"""
|
1033 |
|
1034 |
|
lightrag/utils.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import asyncio
|
2 |
import html
|
|
|
|
|
3 |
import json
|
4 |
import logging
|
5 |
import os
|
@@ -7,7 +9,7 @@ import re
|
|
7 |
from dataclasses import dataclass
|
8 |
from functools import wraps
|
9 |
from hashlib import md5
|
10 |
-
from typing import Any, Union
|
11 |
import xml.etree.ElementTree as ET
|
12 |
|
13 |
import numpy as np
|
@@ -174,11 +176,17 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
|
|
174 |
return list_data[:i]
|
175 |
return list_data
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
-
def list_of_list_to_csv(data: list[list]):
|
179 |
-
return "\n".join(
|
180 |
-
[",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
|
181 |
-
)
|
182 |
|
183 |
|
184 |
def save_data_to_file(data, file_name):
|
@@ -244,3 +252,35 @@ def xml_to_json(xml_file):
|
|
244 |
except Exception as e:
|
245 |
print(f"An error occurred: {e}")
|
246 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import asyncio
|
2 |
import html
|
3 |
+
import io
|
4 |
+
import csv
|
5 |
import json
|
6 |
import logging
|
7 |
import os
|
|
|
9 |
from dataclasses import dataclass
|
10 |
from functools import wraps
|
11 |
from hashlib import md5
|
12 |
+
from typing import Any, Union,List
|
13 |
import xml.etree.ElementTree as ET
|
14 |
|
15 |
import numpy as np
|
|
|
176 |
return list_data[:i]
|
177 |
return list_data
|
178 |
|
179 |
+
def list_of_list_to_csv(data: List[List[str]]) -> str:
|
180 |
+
output = io.StringIO()
|
181 |
+
writer = csv.writer(output)
|
182 |
+
writer.writerows(data)
|
183 |
+
return output.getvalue()
|
184 |
+
def csv_string_to_list(csv_string: str) -> List[List[str]]:
|
185 |
+
output = io.StringIO(csv_string)
|
186 |
+
reader = csv.reader(output)
|
187 |
+
return [row for row in reader]
|
188 |
+
|
189 |
|
|
|
|
|
|
|
|
|
190 |
|
191 |
|
192 |
def save_data_to_file(data, file_name):
|
|
|
252 |
except Exception as e:
|
253 |
print(f"An error occurred: {e}")
|
254 |
return None
|
255 |
+
|
256 |
+
def process_combine_contexts(hl, ll):
|
257 |
+
header = None
|
258 |
+
list_hl = csv_string_to_list(hl.strip())
|
259 |
+
list_ll = csv_string_to_list(ll.strip())
|
260 |
+
|
261 |
+
if list_hl:
|
262 |
+
header=list_hl[0]
|
263 |
+
list_hl = list_hl[1:]
|
264 |
+
if list_ll:
|
265 |
+
header = list_ll[0]
|
266 |
+
list_ll = list_ll[1:]
|
267 |
+
if header is None:
|
268 |
+
return ""
|
269 |
+
|
270 |
+
if list_hl:
|
271 |
+
list_hl = [','.join(item[1:]) for item in list_hl if item]
|
272 |
+
if list_ll:
|
273 |
+
list_ll = [','.join(item[1:]) for item in list_ll if item]
|
274 |
+
|
275 |
+
combined_sources_set = set(
|
276 |
+
filter(None, list_hl + list_ll)
|
277 |
+
)
|
278 |
+
|
279 |
+
combined_sources = [",\t".join(header)]
|
280 |
+
|
281 |
+
for i, item in enumerate(combined_sources_set, start=1):
|
282 |
+
combined_sources.append(f"{i},\t{item}")
|
283 |
+
|
284 |
+
combined_sources = "\n".join(combined_sources)
|
285 |
+
|
286 |
+
return combined_sources
|