fix format
Browse files- lightrag/kg/neo4j_impl.py +1 -4
- lightrag/operate.py +55 -31
lightrag/kg/neo4j_impl.py
CHANGED
@@ -280,10 +280,7 @@ class Neo4JStorage(BaseGraphStorage):
|
|
280 |
MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
|
281 |
RETURN properties(r) as edge_properties
|
282 |
LIMIT 1
|
283 |
-
"""
|
284 |
-
entity_name_label_source=entity_name_label_source,
|
285 |
-
entity_name_label_target=entity_name_label_target,
|
286 |
-
)
|
287 |
|
288 |
result = await session.run(query)
|
289 |
record = await result.single()
|
|
|
280 |
MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
|
281 |
RETURN properties(r) as edge_properties
|
282 |
LIMIT 1
|
283 |
+
"""
|
|
|
|
|
|
|
284 |
|
285 |
result = await session.run(query)
|
286 |
record = await result.single()
|
lightrag/operate.py
CHANGED
@@ -141,17 +141,18 @@ async def _handle_single_entity_extraction(
|
|
141 |
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
142 |
return None
|
143 |
# add this record as a node in the G
|
144 |
-
entity_name = clean_str(record_attributes[1].
|
145 |
if not entity_name.strip():
|
146 |
return None
|
147 |
-
entity_type = clean_str(record_attributes[2].
|
148 |
-
entity_description = clean_str(record_attributes[3])
|
149 |
entity_source_id = chunk_key
|
150 |
return dict(
|
151 |
entity_name=entity_name,
|
152 |
entity_type=entity_type,
|
153 |
description=entity_description,
|
154 |
source_id=entity_source_id,
|
|
|
155 |
)
|
156 |
|
157 |
|
@@ -162,14 +163,15 @@ async def _handle_single_relationship_extraction(
|
|
162 |
if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
|
163 |
return None
|
164 |
# add this record as edge
|
165 |
-
source = clean_str(record_attributes[1].
|
166 |
-
target = clean_str(record_attributes[2].
|
167 |
-
edge_description = clean_str(record_attributes[3])
|
168 |
-
|
169 |
-
edge_keywords = clean_str(record_attributes[4])
|
170 |
edge_source_id = chunk_key
|
171 |
weight = (
|
172 |
-
float(record_attributes[-1]
|
|
|
|
|
173 |
)
|
174 |
return dict(
|
175 |
src_id=source,
|
@@ -547,9 +549,13 @@ async def extract_entities(
|
|
547 |
if entity_vdb is not None:
|
548 |
data_for_vdb = {
|
549 |
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
|
550 |
-
"content": dp["entity_name"] + dp["description"],
|
551 |
"entity_name": dp["entity_name"],
|
|
|
|
|
552 |
"source_id": dp["source_id"],
|
|
|
|
|
|
|
553 |
}
|
554 |
for dp in all_entities_data
|
555 |
}
|
@@ -560,11 +566,9 @@ async def extract_entities(
|
|
560 |
compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
|
561 |
"src_id": dp["src_id"],
|
562 |
"tgt_id": dp["tgt_id"],
|
|
|
|
|
563 |
"source_id": dp["source_id"],
|
564 |
-
"content": dp["keywords"]
|
565 |
-
+ dp["src_id"]
|
566 |
-
+ dp["tgt_id"]
|
567 |
-
+ dp["description"],
|
568 |
"metadata": {
|
569 |
"created_at": dp.get("metadata", {}).get("created_at", time.time())
|
570 |
},
|
@@ -960,7 +964,7 @@ async def mix_kg_vector_query(
|
|
960 |
stream=query_param.stream,
|
961 |
)
|
962 |
|
963 |
-
#
|
964 |
if isinstance(response, str) and len(response) > len(sys_prompt):
|
965 |
response = (
|
966 |
response.replace(sys_prompt, "")
|
@@ -972,7 +976,7 @@ async def mix_kg_vector_query(
|
|
972 |
.strip()
|
973 |
)
|
974 |
|
975 |
-
# 7. Save cache -
|
976 |
await save_to_cache(
|
977 |
hashing_kv,
|
978 |
CacheData(
|
@@ -1128,8 +1132,19 @@ async def _get_node_data(
|
|
1128 |
)
|
1129 |
|
1130 |
# build prompt
|
1131 |
-
entites_section_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1132 |
for i, n in enumerate(node_datas):
|
|
|
|
|
|
|
1133 |
entites_section_list.append(
|
1134 |
[
|
1135 |
i,
|
@@ -1137,6 +1152,7 @@ async def _get_node_data(
|
|
1137 |
n.get("entity_type", "UNKNOWN"),
|
1138 |
n.get("description", "UNKNOWN"),
|
1139 |
n["rank"],
|
|
|
1140 |
]
|
1141 |
)
|
1142 |
entities_context = list_of_list_to_csv(entites_section_list)
|
@@ -1401,6 +1417,10 @@ async def _get_edge_data(
|
|
1401 |
|
1402 |
entites_section_list = [["id", "entity", "type", "description", "rank"]]
|
1403 |
for i, n in enumerate(use_entities):
|
|
|
|
|
|
|
|
|
1404 |
entites_section_list.append(
|
1405 |
[
|
1406 |
i,
|
@@ -1408,6 +1428,7 @@ async def _get_edge_data(
|
|
1408 |
n.get("entity_type", "UNKNOWN"),
|
1409 |
n.get("description", "UNKNOWN"),
|
1410 |
n["rank"],
|
|
|
1411 |
]
|
1412 |
)
|
1413 |
entities_context = list_of_list_to_csv(entites_section_list)
|
@@ -1766,6 +1787,8 @@ async def kg_query_with_keywords(
|
|
1766 |
system_prompt=sys_prompt,
|
1767 |
stream=query_param.stream,
|
1768 |
)
|
|
|
|
|
1769 |
if isinstance(response, str) and len(response) > len(sys_prompt):
|
1770 |
response = (
|
1771 |
response.replace(sys_prompt, "")
|
@@ -1777,18 +1800,19 @@ async def kg_query_with_keywords(
|
|
1777 |
.strip()
|
1778 |
)
|
1779 |
|
1780 |
-
|
1781 |
-
|
1782 |
-
|
1783 |
-
|
1784 |
-
|
1785 |
-
|
1786 |
-
|
1787 |
-
|
1788 |
-
|
1789 |
-
|
1790 |
-
|
1791 |
-
|
1792 |
-
|
1793 |
-
|
|
|
1794 |
return response
|
|
|
141 |
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
142 |
return None
|
143 |
# add this record as a node in the G
|
144 |
+
entity_name = clean_str(record_attributes[1]).strip('"')
|
145 |
if not entity_name.strip():
|
146 |
return None
|
147 |
+
entity_type = clean_str(record_attributes[2]).strip('"')
|
148 |
+
entity_description = clean_str(record_attributes[3]).strip('"')
|
149 |
entity_source_id = chunk_key
|
150 |
return dict(
|
151 |
entity_name=entity_name,
|
152 |
entity_type=entity_type,
|
153 |
description=entity_description,
|
154 |
source_id=entity_source_id,
|
155 |
+
metadata={"created_at": time.time()},
|
156 |
)
|
157 |
|
158 |
|
|
|
163 |
if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
|
164 |
return None
|
165 |
# add this record as edge
|
166 |
+
source = clean_str(record_attributes[1]).strip('"')
|
167 |
+
target = clean_str(record_attributes[2]).strip('"')
|
168 |
+
edge_description = clean_str(record_attributes[3]).strip('"')
|
169 |
+
edge_keywords = clean_str(record_attributes[4]).strip('"')
|
|
|
170 |
edge_source_id = chunk_key
|
171 |
weight = (
|
172 |
+
float(record_attributes[-1].strip('"'))
|
173 |
+
if is_float_regex(record_attributes[-1])
|
174 |
+
else 1.0
|
175 |
)
|
176 |
return dict(
|
177 |
src_id=source,
|
|
|
549 |
if entity_vdb is not None:
|
550 |
data_for_vdb = {
|
551 |
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
|
|
|
552 |
"entity_name": dp["entity_name"],
|
553 |
+
"entity_type": dp["entity_type"],
|
554 |
+
"content": f"{dp['entity_name']}\n{dp['description']}",
|
555 |
"source_id": dp["source_id"],
|
556 |
+
"metadata": {
|
557 |
+
"created_at": dp.get("metadata", {}).get("created_at", time.time())
|
558 |
+
},
|
559 |
}
|
560 |
for dp in all_entities_data
|
561 |
}
|
|
|
566 |
compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
|
567 |
"src_id": dp["src_id"],
|
568 |
"tgt_id": dp["tgt_id"],
|
569 |
+
"keywords": dp["keywords"],
|
570 |
+
"content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
|
571 |
"source_id": dp["source_id"],
|
|
|
|
|
|
|
|
|
572 |
"metadata": {
|
573 |
"created_at": dp.get("metadata", {}).get("created_at", time.time())
|
574 |
},
|
|
|
964 |
stream=query_param.stream,
|
965 |
)
|
966 |
|
967 |
+
# Clean up response content
|
968 |
if isinstance(response, str) and len(response) > len(sys_prompt):
|
969 |
response = (
|
970 |
response.replace(sys_prompt, "")
|
|
|
976 |
.strip()
|
977 |
)
|
978 |
|
979 |
+
# 7. Save cache - Only cache after collecting complete response
|
980 |
await save_to_cache(
|
981 |
hashing_kv,
|
982 |
CacheData(
|
|
|
1132 |
)
|
1133 |
|
1134 |
# build prompt
|
1135 |
+
entites_section_list = [
|
1136 |
+
[
|
1137 |
+
"id",
|
1138 |
+
"entity",
|
1139 |
+
"type",
|
1140 |
+
"description",
|
1141 |
+
"rank" "created_at",
|
1142 |
+
]
|
1143 |
+
]
|
1144 |
for i, n in enumerate(node_datas):
|
1145 |
+
created_at = n.get("created_at", "UNKNOWN")
|
1146 |
+
if isinstance(created_at, (int, float)):
|
1147 |
+
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
1148 |
entites_section_list.append(
|
1149 |
[
|
1150 |
i,
|
|
|
1152 |
n.get("entity_type", "UNKNOWN"),
|
1153 |
n.get("description", "UNKNOWN"),
|
1154 |
n["rank"],
|
1155 |
+
created_at,
|
1156 |
]
|
1157 |
)
|
1158 |
entities_context = list_of_list_to_csv(entites_section_list)
|
|
|
1417 |
|
1418 |
entites_section_list = [["id", "entity", "type", "description", "rank"]]
|
1419 |
for i, n in enumerate(use_entities):
|
1420 |
+
created_at = e.get("created_at", "Unknown")
|
1421 |
+
# Convert timestamp to readable format
|
1422 |
+
if isinstance(created_at, (int, float)):
|
1423 |
+
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
1424 |
entites_section_list.append(
|
1425 |
[
|
1426 |
i,
|
|
|
1428 |
n.get("entity_type", "UNKNOWN"),
|
1429 |
n.get("description", "UNKNOWN"),
|
1430 |
n["rank"],
|
1431 |
+
created_at,
|
1432 |
]
|
1433 |
)
|
1434 |
entities_context = list_of_list_to_csv(entites_section_list)
|
|
|
1787 |
system_prompt=sys_prompt,
|
1788 |
stream=query_param.stream,
|
1789 |
)
|
1790 |
+
|
1791 |
+
# 清理响应内容
|
1792 |
if isinstance(response, str) and len(response) > len(sys_prompt):
|
1793 |
response = (
|
1794 |
response.replace(sys_prompt, "")
|
|
|
1800 |
.strip()
|
1801 |
)
|
1802 |
|
1803 |
+
# 7. Save cache - 只有在收集完整响应后才缓存
|
1804 |
+
await save_to_cache(
|
1805 |
+
hashing_kv,
|
1806 |
+
CacheData(
|
1807 |
+
args_hash=args_hash,
|
1808 |
+
content=response,
|
1809 |
+
prompt=query,
|
1810 |
+
quantized=quantized,
|
1811 |
+
min_val=min_val,
|
1812 |
+
max_val=max_val,
|
1813 |
+
mode=query_param.mode,
|
1814 |
+
cache_type="query",
|
1815 |
+
),
|
1816 |
+
)
|
1817 |
+
|
1818 |
return response
|