Merge branch 'fix-graphml-json-mapping'
Browse files- examples/graph_visual_with_neo4j.py +61 -1
- lightrag/utils.py +0 -66
examples/graph_visual_with_neo4j.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
-
|
4 |
from neo4j import GraphDatabase
|
5 |
|
6 |
# Constants
|
@@ -14,6 +14,66 @@ NEO4J_USERNAME = "neo4j"
|
|
14 |
NEO4J_PASSWORD = "your_password"
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def convert_xml_to_json(xml_path, output_path):
|
18 |
"""Converts XML file to JSON and saves the output."""
|
19 |
if not os.path.exists(xml_path):
|
|
|
1 |
import os
|
2 |
import json
|
3 |
+
import xml.etree.ElementTree as ET
|
4 |
from neo4j import GraphDatabase
|
5 |
|
6 |
# Constants
|
|
|
14 |
NEO4J_PASSWORD = "your_password"
|
15 |
|
16 |
|
17 |
+
def xml_to_json(xml_file):
|
18 |
+
try:
|
19 |
+
tree = ET.parse(xml_file)
|
20 |
+
root = tree.getroot()
|
21 |
+
|
22 |
+
# Print the root element's tag and attributes to confirm the file has been correctly loaded
|
23 |
+
print(f"Root element: {root.tag}")
|
24 |
+
print(f"Root attributes: {root.attrib}")
|
25 |
+
|
26 |
+
data = {"nodes": [], "edges": []}
|
27 |
+
|
28 |
+
# Use namespace
|
29 |
+
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
|
30 |
+
|
31 |
+
for node in root.findall(".//node", namespace):
|
32 |
+
node_data = {
|
33 |
+
"id": node.get("id").strip('"'),
|
34 |
+
"entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
|
35 |
+
if node.find("./data[@key='d1']", namespace) is not None
|
36 |
+
else "",
|
37 |
+
"description": node.find("./data[@key='d2']", namespace).text
|
38 |
+
if node.find("./data[@key='d2']", namespace) is not None
|
39 |
+
else "",
|
40 |
+
"source_id": node.find("./data[@key='d3']", namespace).text
|
41 |
+
if node.find("./data[@key='d3']", namespace) is not None
|
42 |
+
else "",
|
43 |
+
}
|
44 |
+
data["nodes"].append(node_data)
|
45 |
+
|
46 |
+
for edge in root.findall(".//edge", namespace):
|
47 |
+
edge_data = {
|
48 |
+
"source": edge.get("source").strip('"'),
|
49 |
+
"target": edge.get("target").strip('"'),
|
50 |
+
"weight": float(edge.find("./data[@key='d5']", namespace).text)
|
51 |
+
if edge.find("./data[@key='d5']", namespace) is not None
|
52 |
+
else 0.0,
|
53 |
+
"description": edge.find("./data[@key='d6']", namespace).text
|
54 |
+
if edge.find("./data[@key='d6']", namespace) is not None
|
55 |
+
else "",
|
56 |
+
"keywords": edge.find("./data[@key='d7']", namespace).text
|
57 |
+
if edge.find("./data[@key='d7']", namespace) is not None
|
58 |
+
else "",
|
59 |
+
"source_id": edge.find("./data[@key='d8']", namespace).text
|
60 |
+
if edge.find("./data[@key='d8']", namespace) is not None
|
61 |
+
else "",
|
62 |
+
}
|
63 |
+
data["edges"].append(edge_data)
|
64 |
+
|
65 |
+
# Print the number of nodes and edges found
|
66 |
+
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
|
67 |
+
|
68 |
+
return data
|
69 |
+
except ET.ParseError as e:
|
70 |
+
print(f"Error parsing XML file: {e}")
|
71 |
+
return None
|
72 |
+
except Exception as e:
|
73 |
+
print(f"An error occurred: {e}")
|
74 |
+
return None
|
75 |
+
|
76 |
+
|
77 |
def convert_xml_to_json(xml_path, output_path):
|
78 |
"""Converts XML file to JSON and saves the output."""
|
79 |
if not os.path.exists(xml_path):
|
lightrag/utils.py
CHANGED
@@ -13,7 +13,6 @@ from dataclasses import dataclass
|
|
13 |
from functools import wraps
|
14 |
from hashlib import md5
|
15 |
from typing import Any, Protocol, Callable, TYPE_CHECKING, List
|
16 |
-
import xml.etree.ElementTree as ET
|
17 |
import numpy as np
|
18 |
from lightrag.prompt import PROMPTS
|
19 |
from dotenv import load_dotenv
|
@@ -753,71 +752,6 @@ def truncate_list_by_token_size(
|
|
753 |
return list_data
|
754 |
|
755 |
|
756 |
-
def save_data_to_file(data, file_name):
|
757 |
-
with open(file_name, "w", encoding="utf-8") as f:
|
758 |
-
json.dump(data, f, ensure_ascii=False, indent=4)
|
759 |
-
|
760 |
-
|
761 |
-
def xml_to_json(xml_file):
|
762 |
-
try:
|
763 |
-
tree = ET.parse(xml_file)
|
764 |
-
root = tree.getroot()
|
765 |
-
|
766 |
-
# Print the root element's tag and attributes to confirm the file has been correctly loaded
|
767 |
-
print(f"Root element: {root.tag}")
|
768 |
-
print(f"Root attributes: {root.attrib}")
|
769 |
-
|
770 |
-
data = {"nodes": [], "edges": []}
|
771 |
-
|
772 |
-
# Use namespace
|
773 |
-
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
|
774 |
-
|
775 |
-
for node in root.findall(".//node", namespace):
|
776 |
-
node_data = {
|
777 |
-
"id": node.get("id").strip('"'),
|
778 |
-
"entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
|
779 |
-
if node.find("./data[@key='d0']", namespace) is not None
|
780 |
-
else "",
|
781 |
-
"description": node.find("./data[@key='d1']", namespace).text
|
782 |
-
if node.find("./data[@key='d1']", namespace) is not None
|
783 |
-
else "",
|
784 |
-
"source_id": node.find("./data[@key='d2']", namespace).text
|
785 |
-
if node.find("./data[@key='d2']", namespace) is not None
|
786 |
-
else "",
|
787 |
-
}
|
788 |
-
data["nodes"].append(node_data)
|
789 |
-
|
790 |
-
for edge in root.findall(".//edge", namespace):
|
791 |
-
edge_data = {
|
792 |
-
"source": edge.get("source").strip('"'),
|
793 |
-
"target": edge.get("target").strip('"'),
|
794 |
-
"weight": float(edge.find("./data[@key='d3']", namespace).text)
|
795 |
-
if edge.find("./data[@key='d3']", namespace) is not None
|
796 |
-
else 0.0,
|
797 |
-
"description": edge.find("./data[@key='d4']", namespace).text
|
798 |
-
if edge.find("./data[@key='d4']", namespace) is not None
|
799 |
-
else "",
|
800 |
-
"keywords": edge.find("./data[@key='d5']", namespace).text
|
801 |
-
if edge.find("./data[@key='d5']", namespace) is not None
|
802 |
-
else "",
|
803 |
-
"source_id": edge.find("./data[@key='d6']", namespace).text
|
804 |
-
if edge.find("./data[@key='d6']", namespace) is not None
|
805 |
-
else "",
|
806 |
-
}
|
807 |
-
data["edges"].append(edge_data)
|
808 |
-
|
809 |
-
# Print the number of nodes and edges found
|
810 |
-
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
|
811 |
-
|
812 |
-
return data
|
813 |
-
except ET.ParseError as e:
|
814 |
-
print(f"Error parsing XML file: {e}")
|
815 |
-
return None
|
816 |
-
except Exception as e:
|
817 |
-
print(f"An error occurred: {e}")
|
818 |
-
return None
|
819 |
-
|
820 |
-
|
821 |
def process_combine_contexts(*context_lists):
|
822 |
"""
|
823 |
Combine multiple context lists and remove duplicate content
|
|
|
13 |
from functools import wraps
|
14 |
from hashlib import md5
|
15 |
from typing import Any, Protocol, Callable, TYPE_CHECKING, List
|
|
|
16 |
import numpy as np
|
17 |
from lightrag.prompt import PROMPTS
|
18 |
from dotenv import load_dotenv
|
|
|
752 |
return list_data
|
753 |
|
754 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
755 |
def process_combine_contexts(*context_lists):
|
756 |
"""
|
757 |
Combine multiple context lists and remove duplicate content
|