gzdaniel commited on
Commit
808f02f
·
2 Parent(s): 9cd52bc cdf717a

Merge branch 'fix-graphml-json-mapping'

Browse files
examples/graph_visual_with_neo4j.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import json
3
- from lightrag.utils import xml_to_json
4
  from neo4j import GraphDatabase
5
 
6
  # Constants
@@ -14,6 +14,66 @@ NEO4J_USERNAME = "neo4j"
14
  NEO4J_PASSWORD = "your_password"
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def convert_xml_to_json(xml_path, output_path):
18
  """Converts XML file to JSON and saves the output."""
19
  if not os.path.exists(xml_path):
 
1
  import os
2
  import json
3
+ import xml.etree.ElementTree as ET
4
  from neo4j import GraphDatabase
5
 
6
  # Constants
 
14
  NEO4J_PASSWORD = "your_password"
15
 
16
 
17
+ def xml_to_json(xml_file):
18
+ try:
19
+ tree = ET.parse(xml_file)
20
+ root = tree.getroot()
21
+
22
+ # Print the root element's tag and attributes to confirm the file has been correctly loaded
23
+ print(f"Root element: {root.tag}")
24
+ print(f"Root attributes: {root.attrib}")
25
+
26
+ data = {"nodes": [], "edges": []}
27
+
28
+ # Use namespace
29
+ namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
30
+
31
+ for node in root.findall(".//node", namespace):
32
+ node_data = {
33
+ "id": node.get("id").strip('"'),
34
+ "entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
35
+ if node.find("./data[@key='d1']", namespace) is not None
36
+ else "",
37
+ "description": node.find("./data[@key='d2']", namespace).text
38
+ if node.find("./data[@key='d2']", namespace) is not None
39
+ else "",
40
+ "source_id": node.find("./data[@key='d3']", namespace).text
41
+ if node.find("./data[@key='d3']", namespace) is not None
42
+ else "",
43
+ }
44
+ data["nodes"].append(node_data)
45
+
46
+ for edge in root.findall(".//edge", namespace):
47
+ edge_data = {
48
+ "source": edge.get("source").strip('"'),
49
+ "target": edge.get("target").strip('"'),
50
+ "weight": float(edge.find("./data[@key='d5']", namespace).text)
51
+ if edge.find("./data[@key='d5']", namespace) is not None
52
+ else 0.0,
53
+ "description": edge.find("./data[@key='d6']", namespace).text
54
+ if edge.find("./data[@key='d6']", namespace) is not None
55
+ else "",
56
+ "keywords": edge.find("./data[@key='d7']", namespace).text
57
+ if edge.find("./data[@key='d7']", namespace) is not None
58
+ else "",
59
+ "source_id": edge.find("./data[@key='d8']", namespace).text
60
+ if edge.find("./data[@key='d8']", namespace) is not None
61
+ else "",
62
+ }
63
+ data["edges"].append(edge_data)
64
+
65
+ # Print the number of nodes and edges found
66
+ print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
67
+
68
+ return data
69
+ except ET.ParseError as e:
70
+ print(f"Error parsing XML file: {e}")
71
+ return None
72
+ except Exception as e:
73
+ print(f"An error occurred: {e}")
74
+ return None
75
+
76
+
77
  def convert_xml_to_json(xml_path, output_path):
78
  """Converts XML file to JSON and saves the output."""
79
  if not os.path.exists(xml_path):
lightrag/utils.py CHANGED
@@ -13,7 +13,6 @@ from dataclasses import dataclass
13
  from functools import wraps
14
  from hashlib import md5
15
  from typing import Any, Protocol, Callable, TYPE_CHECKING, List
16
- import xml.etree.ElementTree as ET
17
  import numpy as np
18
  from lightrag.prompt import PROMPTS
19
  from dotenv import load_dotenv
@@ -753,71 +752,6 @@ def truncate_list_by_token_size(
753
  return list_data
754
 
755
 
756
- def save_data_to_file(data, file_name):
757
- with open(file_name, "w", encoding="utf-8") as f:
758
- json.dump(data, f, ensure_ascii=False, indent=4)
759
-
760
-
761
- def xml_to_json(xml_file):
762
- try:
763
- tree = ET.parse(xml_file)
764
- root = tree.getroot()
765
-
766
- # Print the root element's tag and attributes to confirm the file has been correctly loaded
767
- print(f"Root element: {root.tag}")
768
- print(f"Root attributes: {root.attrib}")
769
-
770
- data = {"nodes": [], "edges": []}
771
-
772
- # Use namespace
773
- namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
774
-
775
- for node in root.findall(".//node", namespace):
776
- node_data = {
777
- "id": node.get("id").strip('"'),
778
- "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
779
- if node.find("./data[@key='d0']", namespace) is not None
780
- else "",
781
- "description": node.find("./data[@key='d1']", namespace).text
782
- if node.find("./data[@key='d1']", namespace) is not None
783
- else "",
784
- "source_id": node.find("./data[@key='d2']", namespace).text
785
- if node.find("./data[@key='d2']", namespace) is not None
786
- else "",
787
- }
788
- data["nodes"].append(node_data)
789
-
790
- for edge in root.findall(".//edge", namespace):
791
- edge_data = {
792
- "source": edge.get("source").strip('"'),
793
- "target": edge.get("target").strip('"'),
794
- "weight": float(edge.find("./data[@key='d3']", namespace).text)
795
- if edge.find("./data[@key='d3']", namespace) is not None
796
- else 0.0,
797
- "description": edge.find("./data[@key='d4']", namespace).text
798
- if edge.find("./data[@key='d4']", namespace) is not None
799
- else "",
800
- "keywords": edge.find("./data[@key='d5']", namespace).text
801
- if edge.find("./data[@key='d5']", namespace) is not None
802
- else "",
803
- "source_id": edge.find("./data[@key='d6']", namespace).text
804
- if edge.find("./data[@key='d6']", namespace) is not None
805
- else "",
806
- }
807
- data["edges"].append(edge_data)
808
-
809
- # Print the number of nodes and edges found
810
- print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
811
-
812
- return data
813
- except ET.ParseError as e:
814
- print(f"Error parsing XML file: {e}")
815
- return None
816
- except Exception as e:
817
- print(f"An error occurred: {e}")
818
- return None
819
-
820
-
821
  def process_combine_contexts(*context_lists):
822
  """
823
  Combine multiple context lists and remove duplicate content
 
13
  from functools import wraps
14
  from hashlib import md5
15
  from typing import Any, Protocol, Callable, TYPE_CHECKING, List
 
16
  import numpy as np
17
  from lightrag.prompt import PROMPTS
18
  from dotenv import load_dotenv
 
752
  return list_data
753
 
754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  def process_combine_contexts(*context_lists):
756
  """
757
  Combine multiple context lists and remove duplicate content