Gurjot Singh
commited on
Commit
·
5b0dc1f
1
Parent(s):
e06a7a0
Fix linting errors
Browse files- examples/test_faiss.py +6 -11
- lightrag/kg/faiss_impl.py +9 -4
examples/test_faiss.py
CHANGED
@@ -8,7 +8,6 @@ from sentence_transformers import SentenceTransformer
|
|
8 |
from openai import AzureOpenAI
|
9 |
from lightrag import LightRAG, QueryParam
|
10 |
from lightrag.utils import EmbeddingFunc
|
11 |
-
from lightrag.kg.faiss_impl import FaissVectorDBStorage
|
12 |
|
13 |
# Configure Logging
|
14 |
logging.basicConfig(level=logging.INFO)
|
@@ -20,14 +19,10 @@ AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
20 |
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
|
21 |
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
|
22 |
|
|
|
23 |
async def llm_model_func(
|
24 |
-
prompt,
|
25 |
-
system_prompt=None,
|
26 |
-
history_messages=[],
|
27 |
-
keyword_extraction=False,
|
28 |
-
**kwargs
|
29 |
) -> str:
|
30 |
-
|
31 |
# Create a client for AzureOpenAI
|
32 |
client = AzureOpenAI(
|
33 |
api_key=AZURE_OPENAI_API_KEY,
|
@@ -56,12 +51,12 @@ async def llm_model_func(
|
|
56 |
|
57 |
|
58 |
async def embedding_func(texts: list[str]) -> np.ndarray:
|
59 |
-
model = SentenceTransformer(
|
60 |
embeddings = model.encode(texts, convert_to_numpy=True)
|
61 |
return embeddings
|
62 |
|
|
|
63 |
def main():
|
64 |
-
|
65 |
WORKING_DIR = "./dickens"
|
66 |
|
67 |
# Initialize LightRAG with the LLM model function and embedding function
|
@@ -76,7 +71,7 @@ def main():
|
|
76 |
vector_storage="FaissVectorDBStorage",
|
77 |
vector_db_storage_cls_kwargs={
|
78 |
"cosine_better_than_threshold": 0.3 # Your desired threshold
|
79 |
-
}
|
80 |
)
|
81 |
|
82 |
# Insert the custom chunks into LightRAG
|
@@ -101,4 +96,4 @@ def main():
|
|
101 |
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
-
main()
|
|
|
8 |
from openai import AzureOpenAI
|
9 |
from lightrag import LightRAG, QueryParam
|
10 |
from lightrag.utils import EmbeddingFunc
|
|
|
11 |
|
12 |
# Configure Logging
|
13 |
logging.basicConfig(level=logging.INFO)
|
|
|
19 |
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
|
20 |
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
|
21 |
|
22 |
+
|
23 |
async def llm_model_func(
|
24 |
+
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
|
|
|
|
|
|
|
|
25 |
) -> str:
|
|
|
26 |
# Create a client for AzureOpenAI
|
27 |
client = AzureOpenAI(
|
28 |
api_key=AZURE_OPENAI_API_KEY,
|
|
|
51 |
|
52 |
|
53 |
async def embedding_func(texts: list[str]) -> np.ndarray:
|
54 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
55 |
embeddings = model.encode(texts, convert_to_numpy=True)
|
56 |
return embeddings
|
57 |
|
58 |
+
|
59 |
def main():
|
|
|
60 |
WORKING_DIR = "./dickens"
|
61 |
|
62 |
# Initialize LightRAG with the LLM model function and embedding function
|
|
|
71 |
vector_storage="FaissVectorDBStorage",
|
72 |
vector_db_storage_cls_kwargs={
|
73 |
"cosine_better_than_threshold": 0.3 # Your desired threshold
|
74 |
+
},
|
75 |
)
|
76 |
|
77 |
# Insert the custom chunks into LightRAG
|
|
|
96 |
|
97 |
|
98 |
if __name__ == "__main__":
|
99 |
+
main()
|
lightrag/kg/faiss_impl.py
CHANGED
@@ -22,6 +22,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
22 |
A Faiss-based Vector DB Storage for LightRAG.
|
23 |
Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search.
|
24 |
"""
|
|
|
25 |
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
26 |
|
27 |
def __post_init__(self):
|
@@ -46,7 +47,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
46 |
# For demonstration, we use a simple IndexFlatIP.
|
47 |
self._index = faiss.IndexFlatIP(self._dim)
|
48 |
|
49 |
-
# Keep a local store for metadata, IDs, etc.
|
50 |
# Maps <int faiss_id> → metadata (including your original ID).
|
51 |
self._id_to_meta = {}
|
52 |
|
@@ -93,7 +94,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
93 |
for i in range(0, len(contents), self._max_batch_size)
|
94 |
]
|
95 |
|
96 |
-
pbar = tqdm_async(
|
|
|
|
|
97 |
|
98 |
async def wrapped_task(batch):
|
99 |
result = await self.embedding_func(batch)
|
@@ -200,7 +203,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
200 |
|
201 |
if to_remove:
|
202 |
self._remove_faiss_ids(to_remove)
|
203 |
-
logger.info(
|
|
|
|
|
204 |
|
205 |
async def delete_entity(self, entity_name: str):
|
206 |
"""
|
@@ -288,7 +293,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
288 |
|
289 |
def _load_faiss_index(self):
|
290 |
"""
|
291 |
-
Load the Faiss index + metadata from disk if it exists,
|
292 |
and rebuild in-memory structures so we can query.
|
293 |
"""
|
294 |
if not os.path.exists(self._faiss_index_file):
|
|
|
22 |
A Faiss-based Vector DB Storage for LightRAG.
|
23 |
Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search.
|
24 |
"""
|
25 |
+
|
26 |
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
27 |
|
28 |
def __post_init__(self):
|
|
|
47 |
# For demonstration, we use a simple IndexFlatIP.
|
48 |
self._index = faiss.IndexFlatIP(self._dim)
|
49 |
|
50 |
+
# Keep a local store for metadata, IDs, etc.
|
51 |
# Maps <int faiss_id> → metadata (including your original ID).
|
52 |
self._id_to_meta = {}
|
53 |
|
|
|
94 |
for i in range(0, len(contents), self._max_batch_size)
|
95 |
]
|
96 |
|
97 |
+
pbar = tqdm_async(
|
98 |
+
total=len(batches), desc="Generating embeddings", unit="batch"
|
99 |
+
)
|
100 |
|
101 |
async def wrapped_task(batch):
|
102 |
result = await self.embedding_func(batch)
|
|
|
203 |
|
204 |
if to_remove:
|
205 |
self._remove_faiss_ids(to_remove)
|
206 |
+
logger.info(
|
207 |
+
f"Successfully deleted {len(to_remove)} vectors from {self.namespace}"
|
208 |
+
)
|
209 |
|
210 |
async def delete_entity(self, entity_name: str):
|
211 |
"""
|
|
|
293 |
|
294 |
def _load_faiss_index(self):
|
295 |
"""
|
296 |
+
Load the Faiss index + metadata from disk if it exists,
|
297 |
and rebuild in-memory structures so we can query.
|
298 |
"""
|
299 |
if not os.path.exists(self._faiss_index_file):
|