zrguo commited on
Commit
beb4bc2
·
1 Parent(s): a2e05db

Update insert_custom_kg

Browse files
README.md CHANGED
@@ -900,59 +900,66 @@ All operations are available in both synchronous and asynchronous versions. The
900
 
901
  ```python
902
  custom_kg = {
903
- "chunks": [
904
- {
905
- "content": "Alice and Bob are collaborating on quantum computing research.",
906
- "source_id": "doc-1"
907
- }
908
- ],
909
- "entities": [
910
- {
911
- "entity_name": "Alice",
912
- "entity_type": "person",
913
- "description": "Alice is a researcher specializing in quantum physics.",
914
- "source_id": "doc-1"
915
- },
916
- {
917
- "entity_name": "Bob",
918
- "entity_type": "person",
919
- "description": "Bob is a mathematician.",
920
- "source_id": "doc-1"
921
- },
922
- {
923
- "entity_name": "Quantum Computing",
924
- "entity_type": "technology",
925
- "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
926
- "source_id": "doc-1"
927
- }
928
- ],
929
- "relationships": [
930
- {
931
- "src_id": "Alice",
932
- "tgt_id": "Bob",
933
- "description": "Alice and Bob are research partners.",
934
- "keywords": "collaboration research",
935
- "weight": 1.0,
936
- "source_id": "doc-1"
937
- },
938
- {
939
- "src_id": "Alice",
940
- "tgt_id": "Quantum Computing",
941
- "description": "Alice conducts research on quantum computing.",
942
- "keywords": "research expertise",
943
- "weight": 1.0,
944
- "source_id": "doc-1"
945
- },
946
- {
947
- "src_id": "Bob",
948
- "tgt_id": "Quantum Computing",
949
- "description": "Bob researches quantum computing.",
950
- "keywords": "research application",
951
- "weight": 1.0,
952
- "source_id": "doc-1"
953
- }
954
- ]
955
- }
 
 
 
 
 
 
 
956
 
957
  rag.insert_custom_kg(custom_kg)
958
  ```
 
900
 
901
  ```python
902
  custom_kg = {
903
+ "chunks": [
904
+ {
905
+ "content": "Alice and Bob are collaborating on quantum computing research.",
906
+ "source_id": "doc-1",
907
+ "file_path": "test_file",
908
+ }
909
+ ],
910
+ "entities": [
911
+ {
912
+ "entity_name": "Alice",
913
+ "entity_type": "person",
914
+ "description": "Alice is a researcher specializing in quantum physics.",
915
+ "source_id": "doc-1",
916
+ "file_path": "test_file"
917
+ },
918
+ {
919
+ "entity_name": "Bob",
920
+ "entity_type": "person",
921
+ "description": "Bob is a mathematician.",
922
+ "source_id": "doc-1",
923
+ "file_path": "test_file"
924
+ },
925
+ {
926
+ "entity_name": "Quantum Computing",
927
+ "entity_type": "technology",
928
+ "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
929
+ "source_id": "doc-1",
930
+ "file_path": "test_file"
931
+ }
932
+ ],
933
+ "relationships": [
934
+ {
935
+ "src_id": "Alice",
936
+ "tgt_id": "Bob",
937
+ "description": "Alice and Bob are research partners.",
938
+ "keywords": "collaboration research",
939
+ "weight": 1.0,
940
+ "source_id": "doc-1",
941
+ "file_path": "test_file"
942
+ },
943
+ {
944
+ "src_id": "Alice",
945
+ "tgt_id": "Quantum Computing",
946
+ "description": "Alice conducts research on quantum computing.",
947
+ "keywords": "research expertise",
948
+ "weight": 1.0,
949
+ "source_id": "doc-1",
950
+ "file_path": "test_file"
951
+ },
952
+ {
953
+ "src_id": "Bob",
954
+ "tgt_id": "Quantum Computing",
955
+ "description": "Bob researches quantum computing.",
956
+ "keywords": "research application",
957
+ "weight": 1.0,
958
+ "source_id": "doc-1",
959
+ "file_path": "test_file"
960
+ }
961
+ ]
962
+ }
963
 
964
  rag.insert_custom_kg(custom_kg)
965
  ```
lightrag/lightrag.py CHANGED
@@ -4,6 +4,7 @@ import traceback
4
  import asyncio
5
  import configparser
6
  import os
 
7
  import warnings
8
  from dataclasses import asdict, dataclass, field
9
  from datetime import datetime, timezone
@@ -1235,7 +1236,6 @@ class LightRAG:
1235
  self,
1236
  custom_kg: dict[str, Any],
1237
  full_doc_id: str = None,
1238
- file_path: str = "custom_kg",
1239
  ) -> None:
1240
  update_storage = False
1241
  try:
@@ -1245,6 +1245,7 @@ class LightRAG:
1245
  for chunk_data in custom_kg.get("chunks", []):
1246
  chunk_content = clean_text(chunk_data["content"])
1247
  source_id = chunk_data["source_id"]
 
1248
  tokens = len(self.tokenizer.encode(chunk_content))
1249
  chunk_order_index = (
1250
  0
@@ -1261,7 +1262,7 @@ class LightRAG:
1261
  "full_doc_id": full_doc_id
1262
  if full_doc_id is not None
1263
  else source_id,
1264
- "file_path": file_path, # Add file path
1265
  "status": DocStatus.PROCESSED,
1266
  }
1267
  all_chunks_data[chunk_id] = chunk_entry
@@ -1282,6 +1283,7 @@ class LightRAG:
1282
  description = entity_data.get("description", "No description provided")
1283
  source_chunk_id = entity_data.get("source_id", "UNKNOWN")
1284
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
 
1285
 
1286
  # Log if source_id is UNKNOWN
1287
  if source_id == "UNKNOWN":
@@ -1296,6 +1298,7 @@ class LightRAG:
1296
  "description": description,
1297
  "source_id": source_id,
1298
  "file_path": file_path,
 
1299
  }
1300
  # Insert node data into the knowledge graph
1301
  await self.chunk_entity_relation_graph.upsert_node(
@@ -1315,6 +1318,7 @@ class LightRAG:
1315
  weight = relationship_data.get("weight", 1.0)
1316
  source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
1317
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
 
1318
 
1319
  # Log if source_id is UNKNOWN
1320
  if source_id == "UNKNOWN":
@@ -1334,6 +1338,8 @@ class LightRAG:
1334
  "source_id": source_id,
1335
  "description": "UNKNOWN",
1336
  "entity_type": "UNKNOWN",
 
 
1337
  },
1338
  )
1339
 
@@ -1346,8 +1352,11 @@ class LightRAG:
1346
  "description": description,
1347
  "keywords": keywords,
1348
  "source_id": source_id,
 
 
1349
  },
1350
  )
 
1351
  edge_data: dict[str, str] = {
1352
  "src_id": src_id,
1353
  "tgt_id": tgt_id,
@@ -1355,6 +1364,8 @@ class LightRAG:
1355
  "keywords": keywords,
1356
  "source_id": source_id,
1357
  "weight": weight,
 
 
1358
  }
1359
  all_relationships_data.append(edge_data)
1360
  update_storage = True
@@ -1367,7 +1378,7 @@ class LightRAG:
1367
  "source_id": dp["source_id"],
1368
  "description": dp["description"],
1369
  "entity_type": dp["entity_type"],
1370
- "file_path": file_path, # Add file path
1371
  }
1372
  for dp in all_entities_data
1373
  }
@@ -1383,7 +1394,7 @@ class LightRAG:
1383
  "keywords": dp["keywords"],
1384
  "description": dp["description"],
1385
  "weight": dp["weight"],
1386
- "file_path": file_path, # Add file path
1387
  }
1388
  for dp in all_relationships_data
1389
  }
 
4
  import asyncio
5
  import configparser
6
  import os
7
+ import time
8
  import warnings
9
  from dataclasses import asdict, dataclass, field
10
  from datetime import datetime, timezone
 
1236
  self,
1237
  custom_kg: dict[str, Any],
1238
  full_doc_id: str = None,
 
1239
  ) -> None:
1240
  update_storage = False
1241
  try:
 
1245
  for chunk_data in custom_kg.get("chunks", []):
1246
  chunk_content = clean_text(chunk_data["content"])
1247
  source_id = chunk_data["source_id"]
1248
+ file_path = chunk_data.get("file_path", "custom_kg")
1249
  tokens = len(self.tokenizer.encode(chunk_content))
1250
  chunk_order_index = (
1251
  0
 
1262
  "full_doc_id": full_doc_id
1263
  if full_doc_id is not None
1264
  else source_id,
1265
+ "file_path": file_path,
1266
  "status": DocStatus.PROCESSED,
1267
  }
1268
  all_chunks_data[chunk_id] = chunk_entry
 
1283
  description = entity_data.get("description", "No description provided")
1284
  source_chunk_id = entity_data.get("source_id", "UNKNOWN")
1285
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1286
+ file_path = entity_data.get("file_path", "custom_kg")
1287
 
1288
  # Log if source_id is UNKNOWN
1289
  if source_id == "UNKNOWN":
 
1298
  "description": description,
1299
  "source_id": source_id,
1300
  "file_path": file_path,
1301
+ "created_at": int(time.time()),
1302
  }
1303
  # Insert node data into the knowledge graph
1304
  await self.chunk_entity_relation_graph.upsert_node(
 
1318
  weight = relationship_data.get("weight", 1.0)
1319
  source_chunk_id = relationship_data.get("source_id", "UNKNOWN")
1320
  source_id = chunk_to_source_map.get(source_chunk_id, "UNKNOWN")
1321
+ file_path = relationship_data.get("file_path", "custom_kg")
1322
 
1323
  # Log if source_id is UNKNOWN
1324
  if source_id == "UNKNOWN":
 
1338
  "source_id": source_id,
1339
  "description": "UNKNOWN",
1340
  "entity_type": "UNKNOWN",
1341
+ "file_path": file_path,
1342
+ "created_at": int(time.time()),
1343
  },
1344
  )
1345
 
 
1352
  "description": description,
1353
  "keywords": keywords,
1354
  "source_id": source_id,
1355
+ "file_path": file_path,
1356
+ "created_at": int(time.time()),
1357
  },
1358
  )
1359
+
1360
  edge_data: dict[str, str] = {
1361
  "src_id": src_id,
1362
  "tgt_id": tgt_id,
 
1364
  "keywords": keywords,
1365
  "source_id": source_id,
1366
  "weight": weight,
1367
+ "file_path": file_path,
1368
+ "created_at": int(time.time()),
1369
  }
1370
  all_relationships_data.append(edge_data)
1371
  update_storage = True
 
1378
  "source_id": dp["source_id"],
1379
  "description": dp["description"],
1380
  "entity_type": dp["entity_type"],
1381
+ "file_path": dp.get("file_path", "custom_kg"),
1382
  }
1383
  for dp in all_entities_data
1384
  }
 
1394
  "keywords": dp["keywords"],
1395
  "description": dp["description"],
1396
  "weight": dp["weight"],
1397
+ "file_path": dp.get("file_path", "custom_kg"),
1398
  }
1399
  for dp in all_relationships_data
1400
  }
lightrag/operate.py CHANGED
@@ -496,6 +496,7 @@ async def _merge_edges_then_upsert(
496
  keywords=keywords,
497
  source_id=source_id,
498
  file_path=file_path,
 
499
  )
500
 
501
  return edge_data
 
496
  keywords=keywords,
497
  source_id=source_id,
498
  file_path=file_path,
499
+ created_at=int(time.time()),
500
  )
501
 
502
  return edge_data
lightrag/utils_graph.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  import asyncio
4
  from typing import Any, cast
5
 
@@ -479,7 +480,9 @@ async def acreate_entity(
479
  "entity_id": entity_name,
480
  "entity_type": entity_data.get("entity_type", "UNKNOWN"),
481
  "description": entity_data.get("description", ""),
482
- "source_id": entity_data.get("source_id", "manual"),
 
 
483
  }
484
 
485
  # Add entity to knowledge graph
@@ -575,8 +578,10 @@ async def acreate_relation(
575
  edge_data = {
576
  "description": relation_data.get("description", ""),
577
  "keywords": relation_data.get("keywords", ""),
578
- "source_id": relation_data.get("source_id", "manual"),
579
  "weight": float(relation_data.get("weight", 1.0)),
 
 
580
  }
581
 
582
  # Add relation to knowledge graph
 
1
  from __future__ import annotations
2
 
3
+ import time
4
  import asyncio
5
  from typing import Any, cast
6
 
 
480
  "entity_id": entity_name,
481
  "entity_type": entity_data.get("entity_type", "UNKNOWN"),
482
  "description": entity_data.get("description", ""),
483
+ "source_id": entity_data.get("source_id", "manual_creation"),
484
+ "file_path": entity_data.get("file_path", "manual_creation"),
485
+ "created_at": int(time.time()),
486
  }
487
 
488
  # Add entity to knowledge graph
 
578
  edge_data = {
579
  "description": relation_data.get("description", ""),
580
  "keywords": relation_data.get("keywords", ""),
581
+ "source_id": relation_data.get("source_id", "manual_creation"),
582
  "weight": float(relation_data.get("weight", 1.0)),
583
+ "file_path": relation_data.get("file_path", "manual_creation"),
584
+ "created_at": int(time.time()),
585
  }
586
 
587
  # Add relation to knowledge graph