fix postgres support
Browse files- lightrag/kg/postgres_impl.py +22 -8
- lightrag/operate.py +6 -10
lightrag/kg/postgres_impl.py
CHANGED
@@ -423,6 +423,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|
423 |
"full_doc_id": item["full_doc_id"],
|
424 |
"content": item["content"],
|
425 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
|
|
426 |
}
|
427 |
except Exception as e:
|
428 |
logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
|
@@ -445,6 +446,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|
445 |
"content": item["content"],
|
446 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
447 |
"chunk_ids": chunk_ids,
|
|
|
448 |
# TODO: add document_id
|
449 |
}
|
450 |
return upsert_sql, data
|
@@ -465,6 +467,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|
465 |
"content": item["content"],
|
466 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
467 |
"chunk_ids": chunk_ids,
|
|
|
468 |
# TODO: add document_id
|
469 |
}
|
470 |
return upsert_sql, data
|
@@ -740,6 +743,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
740 |
chunks_count=result[0]["chunks_count"],
|
741 |
created_at=result[0]["created_at"],
|
742 |
updated_at=result[0]["updated_at"],
|
|
|
743 |
)
|
744 |
|
745 |
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
@@ -774,6 +778,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
774 |
created_at=element["created_at"],
|
775 |
updated_at=element["updated_at"],
|
776 |
chunks_count=element["chunks_count"],
|
|
|
777 |
)
|
778 |
for element in result
|
779 |
}
|
@@ -793,14 +798,15 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
793 |
if not data:
|
794 |
return
|
795 |
|
796 |
-
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
|
797 |
-
values($1,$2,$3,$4,$5,$6,$7)
|
798 |
on conflict(id,workspace) do update set
|
799 |
content = EXCLUDED.content,
|
800 |
content_summary = EXCLUDED.content_summary,
|
801 |
content_length = EXCLUDED.content_length,
|
802 |
chunks_count = EXCLUDED.chunks_count,
|
803 |
status = EXCLUDED.status,
|
|
|
804 |
updated_at = CURRENT_TIMESTAMP"""
|
805 |
for k, v in data.items():
|
806 |
# chunks_count is optional
|
@@ -814,6 +820,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
814 |
"content_length": v["content_length"],
|
815 |
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
816 |
"status": v["status"],
|
|
|
817 |
},
|
818 |
)
|
819 |
|
@@ -1549,6 +1556,7 @@ TABLES = {
|
|
1549 |
tokens INTEGER,
|
1550 |
content TEXT,
|
1551 |
content_vector VECTOR,
|
|
|
1552 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1553 |
update_time TIMESTAMP,
|
1554 |
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
@@ -1564,6 +1572,7 @@ TABLES = {
|
|
1564 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1565 |
update_time TIMESTAMP,
|
1566 |
chunk_id TEXT NULL,
|
|
|
1567 |
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
1568 |
)"""
|
1569 |
},
|
@@ -1578,6 +1587,7 @@ TABLES = {
|
|
1578 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1579 |
update_time TIMESTAMP,
|
1580 |
chunk_id TEXT NULL,
|
|
|
1581 |
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
1582 |
)"""
|
1583 |
},
|
@@ -1602,6 +1612,7 @@ TABLES = {
|
|
1602 |
content_length int4 NULL,
|
1603 |
chunks_count int4 NULL,
|
1604 |
status varchar(64) NULL,
|
|
|
1605 |
created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
1606 |
updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
1607 |
CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
|
@@ -1650,35 +1661,38 @@ SQL_TEMPLATES = {
|
|
1650 |
update_time = CURRENT_TIMESTAMP
|
1651 |
""",
|
1652 |
"upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
|
1653 |
-
chunk_order_index, full_doc_id, content, content_vector)
|
1654 |
-
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
1655 |
ON CONFLICT (workspace,id) DO UPDATE
|
1656 |
SET tokens=EXCLUDED.tokens,
|
1657 |
chunk_order_index=EXCLUDED.chunk_order_index,
|
1658 |
full_doc_id=EXCLUDED.full_doc_id,
|
1659 |
content = EXCLUDED.content,
|
1660 |
content_vector=EXCLUDED.content_vector,
|
|
|
1661 |
update_time = CURRENT_TIMESTAMP
|
1662 |
""",
|
1663 |
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
1664 |
-
content_vector, chunk_ids)
|
1665 |
-
VALUES ($1, $2, $3, $4, $5, $6::varchar[])
|
1666 |
ON CONFLICT (workspace,id) DO UPDATE
|
1667 |
SET entity_name=EXCLUDED.entity_name,
|
1668 |
content=EXCLUDED.content,
|
1669 |
content_vector=EXCLUDED.content_vector,
|
1670 |
chunk_ids=EXCLUDED.chunk_ids,
|
|
|
1671 |
update_time=CURRENT_TIMESTAMP
|
1672 |
""",
|
1673 |
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
1674 |
-
target_id, content, content_vector, chunk_ids)
|
1675 |
-
VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[])
|
1676 |
ON CONFLICT (workspace,id) DO UPDATE
|
1677 |
SET source_id=EXCLUDED.source_id,
|
1678 |
target_id=EXCLUDED.target_id,
|
1679 |
content=EXCLUDED.content,
|
1680 |
content_vector=EXCLUDED.content_vector,
|
1681 |
chunk_ids=EXCLUDED.chunk_ids,
|
|
|
1682 |
update_time = CURRENT_TIMESTAMP
|
1683 |
""",
|
1684 |
# SQL for VectorStorage
|
|
|
423 |
"full_doc_id": item["full_doc_id"],
|
424 |
"content": item["content"],
|
425 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
426 |
+
"file_path": item["file_path"],
|
427 |
}
|
428 |
except Exception as e:
|
429 |
logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
|
|
|
446 |
"content": item["content"],
|
447 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
448 |
"chunk_ids": chunk_ids,
|
449 |
+
"file_path": item["file_path"],
|
450 |
# TODO: add document_id
|
451 |
}
|
452 |
return upsert_sql, data
|
|
|
467 |
"content": item["content"],
|
468 |
"content_vector": json.dumps(item["__vector__"].tolist()),
|
469 |
"chunk_ids": chunk_ids,
|
470 |
+
"file_path": item["file_path"],
|
471 |
# TODO: add document_id
|
472 |
}
|
473 |
return upsert_sql, data
|
|
|
743 |
chunks_count=result[0]["chunks_count"],
|
744 |
created_at=result[0]["created_at"],
|
745 |
updated_at=result[0]["updated_at"],
|
746 |
+
file_path=result[0]["file_path"],
|
747 |
)
|
748 |
|
749 |
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
|
|
778 |
created_at=element["created_at"],
|
779 |
updated_at=element["updated_at"],
|
780 |
chunks_count=element["chunks_count"],
|
781 |
+
file_path=element["file_path"],
|
782 |
)
|
783 |
for element in result
|
784 |
}
|
|
|
798 |
if not data:
|
799 |
return
|
800 |
|
801 |
+
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path)
|
802 |
+
values($1,$2,$3,$4,$5,$6,$7,$8)
|
803 |
on conflict(id,workspace) do update set
|
804 |
content = EXCLUDED.content,
|
805 |
content_summary = EXCLUDED.content_summary,
|
806 |
content_length = EXCLUDED.content_length,
|
807 |
chunks_count = EXCLUDED.chunks_count,
|
808 |
status = EXCLUDED.status,
|
809 |
+
file_path = EXCLUDED.file_path,
|
810 |
updated_at = CURRENT_TIMESTAMP"""
|
811 |
for k, v in data.items():
|
812 |
# chunks_count is optional
|
|
|
820 |
"content_length": v["content_length"],
|
821 |
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
822 |
"status": v["status"],
|
823 |
+
"file_path": v["file_path"],
|
824 |
},
|
825 |
)
|
826 |
|
|
|
1556 |
tokens INTEGER,
|
1557 |
content TEXT,
|
1558 |
content_vector VECTOR,
|
1559 |
+
file_path VARCHAR(256),
|
1560 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1561 |
update_time TIMESTAMP,
|
1562 |
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
|
|
1572 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1573 |
update_time TIMESTAMP,
|
1574 |
chunk_id TEXT NULL,
|
1575 |
+
file_path TEXT NULL,
|
1576 |
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
1577 |
)"""
|
1578 |
},
|
|
|
1587 |
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
1588 |
update_time TIMESTAMP,
|
1589 |
chunk_id TEXT NULL,
|
1590 |
+
file_path TEXT NULL,
|
1591 |
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
1592 |
)"""
|
1593 |
},
|
|
|
1612 |
content_length int4 NULL,
|
1613 |
chunks_count int4 NULL,
|
1614 |
status varchar(64) NULL,
|
1615 |
+
file_path TEXT NULL,
|
1616 |
created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
1617 |
updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
1618 |
CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
|
|
|
1661 |
update_time = CURRENT_TIMESTAMP
|
1662 |
""",
|
1663 |
"upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
|
1664 |
+
chunk_order_index, full_doc_id, content, content_vector, file_path)
|
1665 |
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
1666 |
ON CONFLICT (workspace,id) DO UPDATE
|
1667 |
SET tokens=EXCLUDED.tokens,
|
1668 |
chunk_order_index=EXCLUDED.chunk_order_index,
|
1669 |
full_doc_id=EXCLUDED.full_doc_id,
|
1670 |
content = EXCLUDED.content,
|
1671 |
content_vector=EXCLUDED.content_vector,
|
1672 |
+
file_path=EXCLUDED.file_path,
|
1673 |
update_time = CURRENT_TIMESTAMP
|
1674 |
""",
|
1675 |
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
1676 |
+
content_vector, chunk_ids, file_path)
|
1677 |
+
VALUES ($1, $2, $3, $4, $5, $6::varchar[], $7::varchar[])
|
1678 |
ON CONFLICT (workspace,id) DO UPDATE
|
1679 |
SET entity_name=EXCLUDED.entity_name,
|
1680 |
content=EXCLUDED.content,
|
1681 |
content_vector=EXCLUDED.content_vector,
|
1682 |
chunk_ids=EXCLUDED.chunk_ids,
|
1683 |
+
file_path=EXCLUDED.file_path,
|
1684 |
update_time=CURRENT_TIMESTAMP
|
1685 |
""",
|
1686 |
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
1687 |
+
target_id, content, content_vector, chunk_ids, file_path)
|
1688 |
+
VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[], $8::varchar[])
|
1689 |
ON CONFLICT (workspace,id) DO UPDATE
|
1690 |
SET source_id=EXCLUDED.source_id,
|
1691 |
target_id=EXCLUDED.target_id,
|
1692 |
content=EXCLUDED.content,
|
1693 |
content_vector=EXCLUDED.content_vector,
|
1694 |
chunk_ids=EXCLUDED.chunk_ids,
|
1695 |
+
file_path=EXCLUDED.file_path,
|
1696 |
update_time = CURRENT_TIMESTAMP
|
1697 |
""",
|
1698 |
# SQL for VectorStorage
|
lightrag/operate.py
CHANGED
@@ -677,12 +677,10 @@ async def extract_entities(
|
|
677 |
"entity_type": dp["entity_type"],
|
678 |
"content": f"{dp['entity_name']}\n{dp['description']}",
|
679 |
"source_id": dp["source_id"],
|
680 |
-
"file_path": dp.get("
|
681 |
"metadata": {
|
682 |
-
"created_at": dp.get("
|
683 |
-
"file_path": dp.get("
|
684 |
-
"file_path", "unknown_source"
|
685 |
-
),
|
686 |
},
|
687 |
}
|
688 |
for dp in all_entities_data
|
@@ -697,12 +695,10 @@ async def extract_entities(
|
|
697 |
"keywords": dp["keywords"],
|
698 |
"content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
|
699 |
"source_id": dp["source_id"],
|
700 |
-
"file_path": dp.get("
|
701 |
"metadata": {
|
702 |
-
"created_at": dp.get("
|
703 |
-
"file_path": dp.get("
|
704 |
-
"file_path", "unknown_source"
|
705 |
-
),
|
706 |
},
|
707 |
}
|
708 |
for dp in all_relationships_data
|
|
|
677 |
"entity_type": dp["entity_type"],
|
678 |
"content": f"{dp['entity_name']}\n{dp['description']}",
|
679 |
"source_id": dp["source_id"],
|
680 |
+
"file_path": dp.get("file_path", "unknown_source"),
|
681 |
"metadata": {
|
682 |
+
"created_at": dp.get("created_at", time.time()),
|
683 |
+
"file_path": dp.get("file_path", "unknown_source"),
|
|
|
|
|
684 |
},
|
685 |
}
|
686 |
for dp in all_entities_data
|
|
|
695 |
"keywords": dp["keywords"],
|
696 |
"content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
|
697 |
"source_id": dp["source_id"],
|
698 |
+
"file_path": dp.get("file_path", "unknown_source"),
|
699 |
"metadata": {
|
700 |
+
"created_at": dp.get("created_at", time.time()),
|
701 |
+
"file_path": dp.get("file_path", "unknown_source"),
|
|
|
|
|
702 |
},
|
703 |
}
|
704 |
for dp in all_relationships_data
|