LarFii commited on
Commit
8225c04
·
1 Parent(s): 1c6f17d

fix format

Browse files
Files changed (2) hide show
  1. lightrag/kg/neo4j_impl.py +1 -4
  2. lightrag/operate.py +55 -31
lightrag/kg/neo4j_impl.py CHANGED
@@ -280,10 +280,7 @@ class Neo4JStorage(BaseGraphStorage):
280
  MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
281
  RETURN properties(r) as edge_properties
282
  LIMIT 1
283
- """.format(
284
- entity_name_label_source=entity_name_label_source,
285
- entity_name_label_target=entity_name_label_target,
286
- )
287
 
288
  result = await session.run(query)
289
  record = await result.single()
 
280
  MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
281
  RETURN properties(r) as edge_properties
282
  LIMIT 1
283
+ """
 
 
 
284
 
285
  result = await session.run(query)
286
  record = await result.single()
lightrag/operate.py CHANGED
@@ -141,17 +141,18 @@ async def _handle_single_entity_extraction(
141
  if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
142
  return None
143
  # add this record as a node in the G
144
- entity_name = clean_str(record_attributes[1].upper())
145
  if not entity_name.strip():
146
  return None
147
- entity_type = clean_str(record_attributes[2].upper())
148
- entity_description = clean_str(record_attributes[3])
149
  entity_source_id = chunk_key
150
  return dict(
151
  entity_name=entity_name,
152
  entity_type=entity_type,
153
  description=entity_description,
154
  source_id=entity_source_id,
 
155
  )
156
 
157
 
@@ -162,14 +163,15 @@ async def _handle_single_relationship_extraction(
162
  if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
163
  return None
164
  # add this record as edge
165
- source = clean_str(record_attributes[1].upper())
166
- target = clean_str(record_attributes[2].upper())
167
- edge_description = clean_str(record_attributes[3])
168
-
169
- edge_keywords = clean_str(record_attributes[4])
170
  edge_source_id = chunk_key
171
  weight = (
172
- float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
 
 
173
  )
174
  return dict(
175
  src_id=source,
@@ -547,9 +549,13 @@ async def extract_entities(
547
  if entity_vdb is not None:
548
  data_for_vdb = {
549
  compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
550
- "content": dp["entity_name"] + dp["description"],
551
  "entity_name": dp["entity_name"],
 
 
552
  "source_id": dp["source_id"],
 
 
 
553
  }
554
  for dp in all_entities_data
555
  }
@@ -560,11 +566,9 @@ async def extract_entities(
560
  compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
561
  "src_id": dp["src_id"],
562
  "tgt_id": dp["tgt_id"],
 
 
563
  "source_id": dp["source_id"],
564
- "content": dp["keywords"]
565
- + dp["src_id"]
566
- + dp["tgt_id"]
567
- + dp["description"],
568
  "metadata": {
569
  "created_at": dp.get("metadata", {}).get("created_at", time.time())
570
  },
@@ -960,7 +964,7 @@ async def mix_kg_vector_query(
960
  stream=query_param.stream,
961
  )
962
 
963
- # 清理响应内容
964
  if isinstance(response, str) and len(response) > len(sys_prompt):
965
  response = (
966
  response.replace(sys_prompt, "")
@@ -972,7 +976,7 @@ async def mix_kg_vector_query(
972
  .strip()
973
  )
974
 
975
- # 7. Save cache - 只有在收集完整响应后才缓存
976
  await save_to_cache(
977
  hashing_kv,
978
  CacheData(
@@ -1128,8 +1132,19 @@ async def _get_node_data(
1128
  )
1129
 
1130
  # build prompt
1131
- entites_section_list = [["id", "entity", "type", "description", "rank"]]
 
 
 
 
 
 
 
 
1132
  for i, n in enumerate(node_datas):
 
 
 
1133
  entites_section_list.append(
1134
  [
1135
  i,
@@ -1137,6 +1152,7 @@ async def _get_node_data(
1137
  n.get("entity_type", "UNKNOWN"),
1138
  n.get("description", "UNKNOWN"),
1139
  n["rank"],
 
1140
  ]
1141
  )
1142
  entities_context = list_of_list_to_csv(entites_section_list)
@@ -1401,6 +1417,10 @@ async def _get_edge_data(
1401
 
1402
  entites_section_list = [["id", "entity", "type", "description", "rank"]]
1403
  for i, n in enumerate(use_entities):
 
 
 
 
1404
  entites_section_list.append(
1405
  [
1406
  i,
@@ -1408,6 +1428,7 @@ async def _get_edge_data(
1408
  n.get("entity_type", "UNKNOWN"),
1409
  n.get("description", "UNKNOWN"),
1410
  n["rank"],
 
1411
  ]
1412
  )
1413
  entities_context = list_of_list_to_csv(entites_section_list)
@@ -1766,6 +1787,8 @@ async def kg_query_with_keywords(
1766
  system_prompt=sys_prompt,
1767
  stream=query_param.stream,
1768
  )
 
 
1769
  if isinstance(response, str) and len(response) > len(sys_prompt):
1770
  response = (
1771
  response.replace(sys_prompt, "")
@@ -1777,18 +1800,19 @@ async def kg_query_with_keywords(
1777
  .strip()
1778
  )
1779
 
1780
- # Save to cache
1781
- await save_to_cache(
1782
- hashing_kv,
1783
- CacheData(
1784
- args_hash=args_hash,
1785
- content=response,
1786
- prompt=query,
1787
- quantized=quantized,
1788
- min_val=min_val,
1789
- max_val=max_val,
1790
- mode=query_param.mode,
1791
- cache_type="query",
1792
- ),
1793
- )
 
1794
  return response
 
141
  if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
142
  return None
143
  # add this record as a node in the G
144
+ entity_name = clean_str(record_attributes[1]).strip('"')
145
  if not entity_name.strip():
146
  return None
147
+ entity_type = clean_str(record_attributes[2]).strip('"')
148
+ entity_description = clean_str(record_attributes[3]).strip('"')
149
  entity_source_id = chunk_key
150
  return dict(
151
  entity_name=entity_name,
152
  entity_type=entity_type,
153
  description=entity_description,
154
  source_id=entity_source_id,
155
+ metadata={"created_at": time.time()},
156
  )
157
 
158
 
 
163
  if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
164
  return None
165
  # add this record as edge
166
+ source = clean_str(record_attributes[1]).strip('"')
167
+ target = clean_str(record_attributes[2]).strip('"')
168
+ edge_description = clean_str(record_attributes[3]).strip('"')
169
+ edge_keywords = clean_str(record_attributes[4]).strip('"')
 
170
  edge_source_id = chunk_key
171
  weight = (
172
+ float(record_attributes[-1].strip('"'))
173
+ if is_float_regex(record_attributes[-1])
174
+ else 1.0
175
  )
176
  return dict(
177
  src_id=source,
 
549
  if entity_vdb is not None:
550
  data_for_vdb = {
551
  compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
 
552
  "entity_name": dp["entity_name"],
553
+ "entity_type": dp["entity_type"],
554
+ "content": f"{dp['entity_name']}\n{dp['description']}",
555
  "source_id": dp["source_id"],
556
+ "metadata": {
557
+ "created_at": dp.get("metadata", {}).get("created_at", time.time())
558
+ },
559
  }
560
  for dp in all_entities_data
561
  }
 
566
  compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
567
  "src_id": dp["src_id"],
568
  "tgt_id": dp["tgt_id"],
569
+ "keywords": dp["keywords"],
570
+ "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
571
  "source_id": dp["source_id"],
 
 
 
 
572
  "metadata": {
573
  "created_at": dp.get("metadata", {}).get("created_at", time.time())
574
  },
 
964
  stream=query_param.stream,
965
  )
966
 
967
+ # Clean up response content
968
  if isinstance(response, str) and len(response) > len(sys_prompt):
969
  response = (
970
  response.replace(sys_prompt, "")
 
976
  .strip()
977
  )
978
 
979
+ # 7. Save cache - Only cache after collecting complete response
980
  await save_to_cache(
981
  hashing_kv,
982
  CacheData(
 
1132
  )
1133
 
1134
  # build prompt
1135
+ entites_section_list = [
1136
+ [
1137
+ "id",
1138
+ "entity",
1139
+ "type",
1140
+ "description",
1141
+ "rank" "created_at",
1142
+ ]
1143
+ ]
1144
  for i, n in enumerate(node_datas):
1145
+ created_at = n.get("created_at", "UNKNOWN")
1146
+ if isinstance(created_at, (int, float)):
1147
+ created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1148
  entites_section_list.append(
1149
  [
1150
  i,
 
1152
  n.get("entity_type", "UNKNOWN"),
1153
  n.get("description", "UNKNOWN"),
1154
  n["rank"],
1155
+ created_at,
1156
  ]
1157
  )
1158
  entities_context = list_of_list_to_csv(entites_section_list)
 
1417
 
1418
  entites_section_list = [["id", "entity", "type", "description", "rank"]]
1419
  for i, n in enumerate(use_entities):
1420
+ created_at = e.get("created_at", "Unknown")
1421
+ # Convert timestamp to readable format
1422
+ if isinstance(created_at, (int, float)):
1423
+ created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1424
  entites_section_list.append(
1425
  [
1426
  i,
 
1428
  n.get("entity_type", "UNKNOWN"),
1429
  n.get("description", "UNKNOWN"),
1430
  n["rank"],
1431
+ created_at,
1432
  ]
1433
  )
1434
  entities_context = list_of_list_to_csv(entites_section_list)
 
1787
  system_prompt=sys_prompt,
1788
  stream=query_param.stream,
1789
  )
1790
+
1791
+ # 清理响应内容
1792
  if isinstance(response, str) and len(response) > len(sys_prompt):
1793
  response = (
1794
  response.replace(sys_prompt, "")
 
1800
  .strip()
1801
  )
1802
 
1803
+ # 7. Save cache - 只有在收集完整响应后才缓存
1804
+ await save_to_cache(
1805
+ hashing_kv,
1806
+ CacheData(
1807
+ args_hash=args_hash,
1808
+ content=response,
1809
+ prompt=query,
1810
+ quantized=quantized,
1811
+ min_val=min_val,
1812
+ max_val=max_val,
1813
+ mode=query_param.mode,
1814
+ cache_type="query",
1815
+ ),
1816
+ )
1817
+
1818
  return response