LarFii commited on
Commit
39724db
·
1 Parent(s): 0ad6ed5

fix postgres support

Browse files
Files changed (2) hide show
  1. lightrag/kg/postgres_impl.py +22 -8
  2. lightrag/operate.py +6 -10
lightrag/kg/postgres_impl.py CHANGED
@@ -423,6 +423,7 @@ class PGVectorStorage(BaseVectorStorage):
423
  "full_doc_id": item["full_doc_id"],
424
  "content": item["content"],
425
  "content_vector": json.dumps(item["__vector__"].tolist()),
 
426
  }
427
  except Exception as e:
428
  logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
@@ -445,6 +446,7 @@ class PGVectorStorage(BaseVectorStorage):
445
  "content": item["content"],
446
  "content_vector": json.dumps(item["__vector__"].tolist()),
447
  "chunk_ids": chunk_ids,
 
448
  # TODO: add document_id
449
  }
450
  return upsert_sql, data
@@ -465,6 +467,7 @@ class PGVectorStorage(BaseVectorStorage):
465
  "content": item["content"],
466
  "content_vector": json.dumps(item["__vector__"].tolist()),
467
  "chunk_ids": chunk_ids,
 
468
  # TODO: add document_id
469
  }
470
  return upsert_sql, data
@@ -740,6 +743,7 @@ class PGDocStatusStorage(DocStatusStorage):
740
  chunks_count=result[0]["chunks_count"],
741
  created_at=result[0]["created_at"],
742
  updated_at=result[0]["updated_at"],
 
743
  )
744
 
745
  async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
@@ -774,6 +778,7 @@ class PGDocStatusStorage(DocStatusStorage):
774
  created_at=element["created_at"],
775
  updated_at=element["updated_at"],
776
  chunks_count=element["chunks_count"],
 
777
  )
778
  for element in result
779
  }
@@ -793,14 +798,15 @@ class PGDocStatusStorage(DocStatusStorage):
793
  if not data:
794
  return
795
 
796
- sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
797
- values($1,$2,$3,$4,$5,$6,$7)
798
  on conflict(id,workspace) do update set
799
  content = EXCLUDED.content,
800
  content_summary = EXCLUDED.content_summary,
801
  content_length = EXCLUDED.content_length,
802
  chunks_count = EXCLUDED.chunks_count,
803
  status = EXCLUDED.status,
 
804
  updated_at = CURRENT_TIMESTAMP"""
805
  for k, v in data.items():
806
  # chunks_count is optional
@@ -814,6 +820,7 @@ class PGDocStatusStorage(DocStatusStorage):
814
  "content_length": v["content_length"],
815
  "chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
816
  "status": v["status"],
 
817
  },
818
  )
819
 
@@ -1549,6 +1556,7 @@ TABLES = {
1549
  tokens INTEGER,
1550
  content TEXT,
1551
  content_vector VECTOR,
 
1552
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1553
  update_time TIMESTAMP,
1554
  CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
@@ -1564,6 +1572,7 @@ TABLES = {
1564
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1565
  update_time TIMESTAMP,
1566
  chunk_id TEXT NULL,
 
1567
  CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
1568
  )"""
1569
  },
@@ -1578,6 +1587,7 @@ TABLES = {
1578
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1579
  update_time TIMESTAMP,
1580
  chunk_id TEXT NULL,
 
1581
  CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
1582
  )"""
1583
  },
@@ -1602,6 +1612,7 @@ TABLES = {
1602
  content_length int4 NULL,
1603
  chunks_count int4 NULL,
1604
  status varchar(64) NULL,
 
1605
  created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
1606
  updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
1607
  CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
@@ -1650,35 +1661,38 @@ SQL_TEMPLATES = {
1650
  update_time = CURRENT_TIMESTAMP
1651
  """,
1652
  "upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
1653
- chunk_order_index, full_doc_id, content, content_vector)
1654
- VALUES ($1, $2, $3, $4, $5, $6, $7)
1655
  ON CONFLICT (workspace,id) DO UPDATE
1656
  SET tokens=EXCLUDED.tokens,
1657
  chunk_order_index=EXCLUDED.chunk_order_index,
1658
  full_doc_id=EXCLUDED.full_doc_id,
1659
  content = EXCLUDED.content,
1660
  content_vector=EXCLUDED.content_vector,
 
1661
  update_time = CURRENT_TIMESTAMP
1662
  """,
1663
  "upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
1664
- content_vector, chunk_ids)
1665
- VALUES ($1, $2, $3, $4, $5, $6::varchar[])
1666
  ON CONFLICT (workspace,id) DO UPDATE
1667
  SET entity_name=EXCLUDED.entity_name,
1668
  content=EXCLUDED.content,
1669
  content_vector=EXCLUDED.content_vector,
1670
  chunk_ids=EXCLUDED.chunk_ids,
 
1671
  update_time=CURRENT_TIMESTAMP
1672
  """,
1673
  "upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
1674
- target_id, content, content_vector, chunk_ids)
1675
- VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[])
1676
  ON CONFLICT (workspace,id) DO UPDATE
1677
  SET source_id=EXCLUDED.source_id,
1678
  target_id=EXCLUDED.target_id,
1679
  content=EXCLUDED.content,
1680
  content_vector=EXCLUDED.content_vector,
1681
  chunk_ids=EXCLUDED.chunk_ids,
 
1682
  update_time = CURRENT_TIMESTAMP
1683
  """,
1684
  # SQL for VectorStorage
 
423
  "full_doc_id": item["full_doc_id"],
424
  "content": item["content"],
425
  "content_vector": json.dumps(item["__vector__"].tolist()),
426
+ "file_path": item["file_path"],
427
  }
428
  except Exception as e:
429
  logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
 
446
  "content": item["content"],
447
  "content_vector": json.dumps(item["__vector__"].tolist()),
448
  "chunk_ids": chunk_ids,
449
+ "file_path": item["file_path"],
450
  # TODO: add document_id
451
  }
452
  return upsert_sql, data
 
467
  "content": item["content"],
468
  "content_vector": json.dumps(item["__vector__"].tolist()),
469
  "chunk_ids": chunk_ids,
470
+ "file_path": item["file_path"],
471
  # TODO: add document_id
472
  }
473
  return upsert_sql, data
 
743
  chunks_count=result[0]["chunks_count"],
744
  created_at=result[0]["created_at"],
745
  updated_at=result[0]["updated_at"],
746
+ file_path=result[0]["file_path"],
747
  )
748
 
749
  async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
 
778
  created_at=element["created_at"],
779
  updated_at=element["updated_at"],
780
  chunks_count=element["chunks_count"],
781
+ file_path=element["file_path"],
782
  )
783
  for element in result
784
  }
 
798
  if not data:
799
  return
800
 
801
+ sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path)
802
+ values($1,$2,$3,$4,$5,$6,$7,$8)
803
  on conflict(id,workspace) do update set
804
  content = EXCLUDED.content,
805
  content_summary = EXCLUDED.content_summary,
806
  content_length = EXCLUDED.content_length,
807
  chunks_count = EXCLUDED.chunks_count,
808
  status = EXCLUDED.status,
809
+ file_path = EXCLUDED.file_path,
810
  updated_at = CURRENT_TIMESTAMP"""
811
  for k, v in data.items():
812
  # chunks_count is optional
 
820
  "content_length": v["content_length"],
821
  "chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
822
  "status": v["status"],
823
+ "file_path": v["file_path"],
824
  },
825
  )
826
 
 
1556
  tokens INTEGER,
1557
  content TEXT,
1558
  content_vector VECTOR,
1559
+ file_path VARCHAR(256),
1560
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1561
  update_time TIMESTAMP,
1562
  CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
 
1572
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1573
  update_time TIMESTAMP,
1574
  chunk_id TEXT NULL,
1575
+ file_path TEXT NULL,
1576
  CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
1577
  )"""
1578
  },
 
1587
  create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1588
  update_time TIMESTAMP,
1589
  chunk_id TEXT NULL,
1590
+ file_path TEXT NULL,
1591
  CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
1592
  )"""
1593
  },
 
1612
  content_length int4 NULL,
1613
  chunks_count int4 NULL,
1614
  status varchar(64) NULL,
1615
+ file_path TEXT NULL,
1616
  created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
1617
  updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
1618
  CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
 
1661
  update_time = CURRENT_TIMESTAMP
1662
  """,
1663
  "upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
1664
+ chunk_order_index, full_doc_id, content, content_vector, file_path)
1665
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
1666
  ON CONFLICT (workspace,id) DO UPDATE
1667
  SET tokens=EXCLUDED.tokens,
1668
  chunk_order_index=EXCLUDED.chunk_order_index,
1669
  full_doc_id=EXCLUDED.full_doc_id,
1670
  content = EXCLUDED.content,
1671
  content_vector=EXCLUDED.content_vector,
1672
+ file_path=EXCLUDED.file_path,
1673
  update_time = CURRENT_TIMESTAMP
1674
  """,
1675
  "upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
1676
+ content_vector, chunk_ids, file_path)
1677
+ VALUES ($1, $2, $3, $4, $5, $6::varchar[], $7::varchar[])
1678
  ON CONFLICT (workspace,id) DO UPDATE
1679
  SET entity_name=EXCLUDED.entity_name,
1680
  content=EXCLUDED.content,
1681
  content_vector=EXCLUDED.content_vector,
1682
  chunk_ids=EXCLUDED.chunk_ids,
1683
+ file_path=EXCLUDED.file_path,
1684
  update_time=CURRENT_TIMESTAMP
1685
  """,
1686
  "upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
1687
+ target_id, content, content_vector, chunk_ids, file_path)
1688
+ VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[], $8::varchar[])
1689
  ON CONFLICT (workspace,id) DO UPDATE
1690
  SET source_id=EXCLUDED.source_id,
1691
  target_id=EXCLUDED.target_id,
1692
  content=EXCLUDED.content,
1693
  content_vector=EXCLUDED.content_vector,
1694
  chunk_ids=EXCLUDED.chunk_ids,
1695
+ file_path=EXCLUDED.file_path,
1696
  update_time = CURRENT_TIMESTAMP
1697
  """,
1698
  # SQL for VectorStorage
lightrag/operate.py CHANGED
@@ -677,12 +677,10 @@ async def extract_entities(
677
  "entity_type": dp["entity_type"],
678
  "content": f"{dp['entity_name']}\n{dp['description']}",
679
  "source_id": dp["source_id"],
680
- "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
681
  "metadata": {
682
- "created_at": dp.get("metadata", {}).get("created_at", time.time()),
683
- "file_path": dp.get("metadata", {}).get(
684
- "file_path", "unknown_source"
685
- ),
686
  },
687
  }
688
  for dp in all_entities_data
@@ -697,12 +695,10 @@ async def extract_entities(
697
  "keywords": dp["keywords"],
698
  "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
699
  "source_id": dp["source_id"],
700
- "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
701
  "metadata": {
702
- "created_at": dp.get("metadata", {}).get("created_at", time.time()),
703
- "file_path": dp.get("metadata", {}).get(
704
- "file_path", "unknown_source"
705
- ),
706
  },
707
  }
708
  for dp in all_relationships_data
 
677
  "entity_type": dp["entity_type"],
678
  "content": f"{dp['entity_name']}\n{dp['description']}",
679
  "source_id": dp["source_id"],
680
+ "file_path": dp.get("file_path", "unknown_source"),
681
  "metadata": {
682
+ "created_at": dp.get("created_at", time.time()),
683
+ "file_path": dp.get("file_path", "unknown_source"),
 
 
684
  },
685
  }
686
  for dp in all_entities_data
 
695
  "keywords": dp["keywords"],
696
  "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
697
  "source_id": dp["source_id"],
698
+ "file_path": dp.get("file_path", "unknown_source"),
699
  "metadata": {
700
+ "created_at": dp.get("created_at", time.time()),
701
+ "file_path": dp.get("file_path", "unknown_source"),
 
 
702
  },
703
  }
704
  for dp in all_relationships_data