Gurjot Singh commited on
Commit
5b0dc1f
·
1 Parent(s): e06a7a0

Fix linting errors

Browse files
examples/test_faiss.py CHANGED
@@ -8,7 +8,6 @@ from sentence_transformers import SentenceTransformer
8
  from openai import AzureOpenAI
9
  from lightrag import LightRAG, QueryParam
10
  from lightrag.utils import EmbeddingFunc
11
- from lightrag.kg.faiss_impl import FaissVectorDBStorage
12
 
13
  # Configure Logging
14
  logging.basicConfig(level=logging.INFO)
@@ -20,14 +19,10 @@ AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
20
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
21
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
22
 
 
23
  async def llm_model_func(
24
- prompt,
25
- system_prompt=None,
26
- history_messages=[],
27
- keyword_extraction=False,
28
- **kwargs
29
  ) -> str:
30
-
31
  # Create a client for AzureOpenAI
32
  client = AzureOpenAI(
33
  api_key=AZURE_OPENAI_API_KEY,
@@ -56,12 +51,12 @@ async def llm_model_func(
56
 
57
 
58
  async def embedding_func(texts: list[str]) -> np.ndarray:
59
- model = SentenceTransformer('all-MiniLM-L6-v2')
60
  embeddings = model.encode(texts, convert_to_numpy=True)
61
  return embeddings
62
 
 
63
  def main():
64
-
65
  WORKING_DIR = "./dickens"
66
 
67
  # Initialize LightRAG with the LLM model function and embedding function
@@ -76,7 +71,7 @@ def main():
76
  vector_storage="FaissVectorDBStorage",
77
  vector_db_storage_cls_kwargs={
78
  "cosine_better_than_threshold": 0.3 # Your desired threshold
79
- }
80
  )
81
 
82
  # Insert the custom chunks into LightRAG
@@ -101,4 +96,4 @@ def main():
101
 
102
 
103
  if __name__ == "__main__":
104
- main()
 
8
  from openai import AzureOpenAI
9
  from lightrag import LightRAG, QueryParam
10
  from lightrag.utils import EmbeddingFunc
 
11
 
12
  # Configure Logging
13
  logging.basicConfig(level=logging.INFO)
 
19
  AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
20
  AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
21
 
22
+
23
  async def llm_model_func(
24
+ prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
 
 
 
 
25
  ) -> str:
 
26
  # Create a client for AzureOpenAI
27
  client = AzureOpenAI(
28
  api_key=AZURE_OPENAI_API_KEY,
 
51
 
52
 
53
  async def embedding_func(texts: list[str]) -> np.ndarray:
54
+ model = SentenceTransformer("all-MiniLM-L6-v2")
55
  embeddings = model.encode(texts, convert_to_numpy=True)
56
  return embeddings
57
 
58
+
59
  def main():
 
60
  WORKING_DIR = "./dickens"
61
 
62
  # Initialize LightRAG with the LLM model function and embedding function
 
71
  vector_storage="FaissVectorDBStorage",
72
  vector_db_storage_cls_kwargs={
73
  "cosine_better_than_threshold": 0.3 # Your desired threshold
74
+ },
75
  )
76
 
77
  # Insert the custom chunks into LightRAG
 
96
 
97
 
98
  if __name__ == "__main__":
99
+ main()
lightrag/kg/faiss_impl.py CHANGED
@@ -22,6 +22,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
22
  A Faiss-based Vector DB Storage for LightRAG.
23
  Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search.
24
  """
 
25
  cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
26
 
27
  def __post_init__(self):
@@ -46,7 +47,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
46
  # For demonstration, we use a simple IndexFlatIP.
47
  self._index = faiss.IndexFlatIP(self._dim)
48
 
49
- # Keep a local store for metadata, IDs, etc.
50
  # Maps <int faiss_id> → metadata (including your original ID).
51
  self._id_to_meta = {}
52
 
@@ -93,7 +94,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
93
  for i in range(0, len(contents), self._max_batch_size)
94
  ]
95
 
96
- pbar = tqdm_async(total=len(batches), desc="Generating embeddings", unit="batch")
 
 
97
 
98
  async def wrapped_task(batch):
99
  result = await self.embedding_func(batch)
@@ -200,7 +203,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
200
 
201
  if to_remove:
202
  self._remove_faiss_ids(to_remove)
203
- logger.info(f"Successfully deleted {len(to_remove)} vectors from {self.namespace}")
 
 
204
 
205
  async def delete_entity(self, entity_name: str):
206
  """
@@ -288,7 +293,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
288
 
289
  def _load_faiss_index(self):
290
  """
291
- Load the Faiss index + metadata from disk if it exists,
292
  and rebuild in-memory structures so we can query.
293
  """
294
  if not os.path.exists(self._faiss_index_file):
 
22
  A Faiss-based Vector DB Storage for LightRAG.
23
  Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search.
24
  """
25
+
26
  cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
27
 
28
  def __post_init__(self):
 
47
  # For demonstration, we use a simple IndexFlatIP.
48
  self._index = faiss.IndexFlatIP(self._dim)
49
 
50
+ # Keep a local store for metadata, IDs, etc.
51
  # Maps <int faiss_id> → metadata (including your original ID).
52
  self._id_to_meta = {}
53
 
 
94
  for i in range(0, len(contents), self._max_batch_size)
95
  ]
96
 
97
+ pbar = tqdm_async(
98
+ total=len(batches), desc="Generating embeddings", unit="batch"
99
+ )
100
 
101
  async def wrapped_task(batch):
102
  result = await self.embedding_func(batch)
 
203
 
204
  if to_remove:
205
  self._remove_faiss_ids(to_remove)
206
+ logger.info(
207
+ f"Successfully deleted {len(to_remove)} vectors from {self.namespace}"
208
+ )
209
 
210
  async def delete_entity(self, entity_name: str):
211
  """
 
293
 
294
  def _load_faiss_index(self):
295
  """
296
+ Load the Faiss index + metadata from disk if it exists,
297
  and rebuild in-memory structures so we can query.
298
  """
299
  if not os.path.exists(self._faiss_index_file):