Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

zrguo commited on May 27

Commit

beb4bc2

1 Parent(s): a2e05db

Update insert_custom_kg

Browse files

Files changed (4) hide show

README.md +60 -53
lightrag/lightrag.py +15 -4
lightrag/operate.py +1 -0
lightrag/utils_graph.py +7 -2

README.md CHANGED Viewed

@@ -900,59 +900,66 @@ All operations are available in both synchronous and asynchronous versions. The
 ```python
 custom_kg = {
-    "chunks": [
-        {
-            "content": "Alice and Bob are collaborating on quantum computing research.",
-            "source_id": "doc-1"
-        }
-    ],
-    "entities": [
-        {
-            "entity_name": "Alice",
-            "entity_type": "person",
-            "description": "Alice is a researcher specializing in quantum physics.",
-            "source_id": "doc-1"
-        },
-        {
-            "entity_name": "Bob",
-            "entity_type": "person",
-            "description": "Bob is a mathematician.",
-            "source_id": "doc-1"
-        },
-        {
-            "entity_name": "Quantum Computing",
-            "entity_type": "technology",
-            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
-            "source_id": "doc-1"
-        }
-    ],
-    "relationships": [
-        {
-            "src_id": "Alice",
-            "tgt_id": "Bob",
-            "description": "Alice and Bob are research partners.",
-            "keywords": "collaboration research",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        },
-        {
-            "src_id": "Alice",
-            "tgt_id": "Quantum Computing",
-            "description": "Alice conducts research on quantum computing.",
-            "keywords": "research expertise",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        },
-        {
-            "src_id": "Bob",
-            "tgt_id": "Quantum Computing",
-            "description": "Bob researches quantum computing.",
-            "keywords": "research application",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        }
-    ]
-}
 rag.insert_custom_kg(custom_kg)
 ```

 ```python
 custom_kg = {
+        "chunks": [
+            {
+                "content": "Alice and Bob are collaborating on quantum computing research.",
+                "source_id": "doc-1",
+                "file_path": "test_file",
+            }
+        ],
+        "entities": [
+            {
+                "entity_name": "Alice",
+                "entity_type": "person",
+                "description": "Alice is a researcher specializing in quantum physics.",
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            },
+            {
+                "entity_name": "Bob",
+                "entity_type": "person",
+                "description": "Bob is a mathematician.",
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            },
+            {
+                "entity_name": "Quantum Computing",
+                "entity_type": "technology",
+                "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            }
+        ],
+        "relationships": [
+            {
+                "src_id": "Alice",
+                "tgt_id": "Bob",
+                "description": "Alice and Bob are research partners.",
+                "keywords": "collaboration research",
+                "weight": 1.0,
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            },
+            {
+                "src_id": "Alice",
+                "tgt_id": "Quantum Computing",
+                "description": "Alice conducts research on quantum computing.",
+                "keywords": "research expertise",
+                "weight": 1.0,
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            },
+            {
+                "src_id": "Bob",
+                "tgt_id": "Quantum Computing",
+                "description": "Bob researches quantum computing.",
+                "keywords": "research application",
+                "weight": 1.0,
+                "source_id": "doc-1",
+                "file_path": "test_file"
+            }
+        ]
+    }
 rag.insert_custom_kg(custom_kg)
 ```

lightrag/lightrag.py CHANGED Viewed

@@ -4,6 +4,7 @@ import traceback
 import asyncio
 import configparser
 import os
 import warnings
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
@@ -1235,7 +1236,6 @@ class LightRAG:
         self,
         custom_kg: dict[str, Any],
         full_doc_id: str = None,
-        file_path: str = "custom_kg",
     ) -> None:
         update_storage = False
         try:
@@ -1245,6 +1245,7 @@ class LightRAG:
             for chunk_data in custom_kg.get("chunks", []):
                 chunk_content = clean_text(chunk_data["content"])
                 source_id = chunk_data["source_id"]
                 tokens = len(self.tokenizer.encode(chunk_content))
                 chunk_order_index = (
                     0
@@ -1261,7 +1262,7 @@ class LightRAG:
                     "full_doc_id": full_doc_id
                     if full_doc_id is not None
                     else source_id,
-                    "file_path": file_path,  # Add file path
                     "status": DocStatus.PROCESSED,
                 }
                 all_chunks_data[chunk_id] = chunk_entry
@@ -1282,6 +1283,7 @@ class LightRAG:
                 description = entity_data.get("description", "No description provided")
                 source_chunk_id = entity_data.get("source_id", "UNKNOWN")
                 source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
                 # Log if source_id is UNKNOWN
                 if source_id == "UNKNOWN":
@@ -1296,6 +1298,7 @@ class LightRAG:
                     "description": description,
                     "source_id": source_id,
                     "file_path": file_path,
                 }
                 # Insert node data into the knowledge graph
                 await self.chunk_entity_relation_graph.upsert_node(
@@ -1315,6 +1318,7 @@ class LightRAG:
                 weight = relationship_data.get("weight", 1.0)
                 source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
                 source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
                 # Log if source_id is UNKNOWN
                 if source_id == "UNKNOWN":
@@ -1334,6 +1338,8 @@ class LightRAG:
                                 "source_id": source_id,
                                 "description": "UNKNOWN",
                                 "entity_type": "UNKNOWN",
                             },
                         )
@@ -1346,8 +1352,11 @@ class LightRAG:
                         "description": description,
                         "keywords": keywords,
                         "source_id": source_id,
                     },
                 )
                 edge_data: dict[str, str] = {
                     "src_id": src_id,
                     "tgt_id": tgt_id,
@@ -1355,6 +1364,8 @@ class LightRAG:
                     "keywords": keywords,
                     "source_id": source_id,
                     "weight": weight,
                 }
                 all_relationships_data.append(edge_data)
                 update_storage = True
@@ -1367,7 +1378,7 @@ class LightRAG:
                     "source_id": dp["source_id"],
                     "description": dp["description"],
                     "entity_type": dp["entity_type"],
-                    "file_path": file_path,  # Add file path
                 }
                 for dp in all_entities_data
             }
@@ -1383,7 +1394,7 @@ class LightRAG:
                     "keywords": dp["keywords"],
                     "description": dp["description"],
                     "weight": dp["weight"],
-                    "file_path": file_path,  # Add file path
                 }
                 for dp in all_relationships_data
             }

 import asyncio
 import configparser
 import os
+import time
 import warnings
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
         self,
         custom_kg: dict[str, Any],
         full_doc_id: str = None,
     ) -> None:
         update_storage = False
         try:
             for chunk_data in custom_kg.get("chunks", []):
                 chunk_content = clean_text(chunk_data["content"])
                 source_id = chunk_data["source_id"]
+                file_path = chunk_data.get("file_path", "custom_kg")
                 tokens = len(self.tokenizer.encode(chunk_content))
                 chunk_order_index = (
                     0
                     "full_doc_id": full_doc_id
                     if full_doc_id is not None
                     else source_id,
+                    "file_path": file_path,
                     "status": DocStatus.PROCESSED,
                 }
                 all_chunks_data[chunk_id] = chunk_entry
                 description = entity_data.get("description", "No description provided")
                 source_chunk_id = entity_data.get("source_id", "UNKNOWN")
                 source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
+                file_path = entity_data.get("file_path", "custom_kg")
                 # Log if source_id is UNKNOWN
                 if source_id == "UNKNOWN":
                     "description": description,
                     "source_id": source_id,
                     "file_path": file_path,
+                    "created_at": int(time.time()),
                 }
                 # Insert node data into the knowledge graph
                 await self.chunk_entity_relation_graph.upsert_node(
                 weight = relationship_data.get("weight", 1.0)
                 source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
                 source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
+                file_path = relationship_data.get("file_path", "custom_kg")
                 # Log if source_id is UNKNOWN
                 if source_id == "UNKNOWN":
                                 "source_id": source_id,
                                 "description": "UNKNOWN",
                                 "entity_type": "UNKNOWN",
+                                "file_path": file_path,
+                                "created_at": int(time.time()),
                             },
                         )
                         "description": description,
                         "keywords": keywords,
                         "source_id": source_id,
+                        "file_path": file_path,
+                        "created_at": int(time.time()),
                     },
                 )
                 edge_data: dict[str, str] = {
                     "src_id": src_id,
                     "tgt_id": tgt_id,
                     "keywords": keywords,
                     "source_id": source_id,
                     "weight": weight,
+                    "file_path": file_path,
+                    "created_at": int(time.time()),
                 }
                 all_relationships_data.append(edge_data)
                 update_storage = True
                     "source_id": dp["source_id"],
                     "description": dp["description"],
                     "entity_type": dp["entity_type"],
+                    "file_path": dp.get("file_path", "custom_kg"),
                 }
                 for dp in all_entities_data
             }
                     "keywords": dp["keywords"],
                     "description": dp["description"],
                     "weight": dp["weight"],
+                    "file_path": dp.get("file_path", "custom_kg"),
                 }
                 for dp in all_relationships_data
             }

lightrag/operate.py CHANGED Viewed

@@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
         keywords=keywords,
         source_id=source_id,
         file_path=file_path,
     )
     return edge_data

         keywords=keywords,
         source_id=source_id,
         file_path=file_path,
+        created_at=int(time.time()),
     )
     return edge_data

lightrag/utils_graph.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 import asyncio
 from typing import Any, cast
@@ -479,7 +480,9 @@ async def acreate_entity(
                 "entity_id": entity_name,
                 "entity_type": entity_data.get("entity_type", "UNKNOWN"),
                 "description": entity_data.get("description", ""),
-                "source_id": entity_data.get("source_id", "manual"),
             }
             # Add entity to knowledge graph
@@ -575,8 +578,10 @@ async def acreate_relation(
             edge_data = {
                 "description": relation_data.get("description", ""),
                 "keywords": relation_data.get("keywords", ""),
-                "source_id": relation_data.get("source_id", "manual"),
                 "weight": float(relation_data.get("weight", 1.0)),
             }
             # Add relation to knowledge graph

 from __future__ import annotations
+import time
 import asyncio
 from typing import Any, cast
                 "entity_id": entity_name,
                 "entity_type": entity_data.get("entity_type", "UNKNOWN"),
                 "description": entity_data.get("description", ""),
+                "source_id": entity_data.get("source_id", "manual_creation"),
+                "file_path": entity_data.get("file_path", "manual_creation"),
+                "created_at": int(time.time()),
             }
             # Add entity to knowledge graph
             edge_data = {
                 "description": relation_data.get("description", ""),
                 "keywords": relation_data.get("keywords", ""),
+                "source_id": relation_data.get("source_id", "manual_creation"),
                 "weight": float(relation_data.get("weight", 1.0)),
+                "file_path": relation_data.get("file_path", "manual_creation"),
+                "created_at": int(time.time()),
             }
             # Add relation to knowledge graph