Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

YanSte commited on Feb 18

Commit

d5e3b99

1 Parent(s): ef85051

remove tqdm and cleaned readme and ollama

Browse files

Files changed (16) hide show

README.md +0 -10
lightrag/api/requirements.txt +0 -1
lightrag/kg/faiss_impl.py +1 -11
lightrag/kg/milvus_impl.py +1 -10
lightrag/kg/mongo_impl.py +1 -11
lightrag/kg/nano_vector_db_impl.py +1 -10
lightrag/kg/postgres_impl.py +1 -10
lightrag/kg/qdrant_impl.py +1 -10
lightrag/kg/redis_impl.py +2 -2
lightrag/kg/tidb_impl.py +2 -10
lightrag/llm/ollama.py +3 -3
lightrag/operate.py +16 -46
lightrag/utils.py +0 -3
reproduce/Step_3.py +1 -2
reproduce/Step_3_openai_compatible.py +1 -2
requirements.txt +0 -1

README.md CHANGED Viewed

@@ -344,16 +344,6 @@ rag = LightRAG(
     ),
 )
 ```
-#### Fully functional example
-There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
-#### Using "Thinking" Models (e.g., DeepSeek)
-To return only the model's response, you can pass `reasoning_tag` in `llm_model_kwargs`.
-For example, for DeepSeek models, `reasoning_tag` should be set to `think`.
 #### Low RAM GPUs
 In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.

     ),
 )
 ```
 #### Low RAM GPUs
 In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.

lightrag/api/requirements.txt CHANGED Viewed

@@ -7,5 +7,4 @@ python-multipart
 tenacity
 tiktoken
 torch
-tqdm
 uvicorn

 tenacity
 tiktoken
 torch
 uvicorn

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):
 try:
     import faiss
-    from tqdm.asyncio import tqdm as tqdm_async
 except ImportError as e:
     raise ImportError(
         "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        pbar = tqdm_async(
-            total=len(batches), desc="Generating embeddings", unit="batch"
-        )
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         # Flatten the list of arrays

 try:
     import faiss
 except ImportError as e:
     raise ImportError(
         "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         # Flatten the list of arrays

lightrag/kg/milvus_impl.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 from lightrag.utils import logger
@@ -94,15 +93,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

 import asyncio
 import os
 from typing import Any, final
 from dataclasses import dataclass
 import numpy as np
 from lightrag.utils import logger
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

lightrag/kg/mongo_impl.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 from dataclasses import dataclass
 import numpy as np
 import configparser
-from tqdm.asyncio import tqdm as tqdm_async
 import asyncio
 from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)
         for i, d in enumerate(list_data):
             d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()

 from dataclasses import dataclass
 import numpy as np
 import configparser
 import asyncio
 from typing import Any, List, Union, final
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)
         for i, d in enumerate(list_data):
             d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()

lightrag/kg/nano_vector_db_impl.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

 import asyncio
 import os
 from typing import Any, final
 from dataclasses import dataclass
 import numpy as np
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

lightrag/kg/postgres_impl.py CHANGED Viewed

@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):
 try:
     import asyncpg
-    from tqdm.asyncio import tqdm as tqdm_async
 except ImportError as e:
     raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

 try:
     import asyncpg
 except ImportError as e:
     raise ImportError(
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

lightrag/kg/qdrant_impl.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

 import asyncio
 import os
 from typing import Any, final
 from dataclasses import dataclass
 import numpy as np
 import hashlib
             for i in range(0, len(contents), self._max_batch_size)
         ]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
         embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)

lightrag/kg/redis_impl.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import pipmaster as pm
 import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):
     async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
         pipe = self._redis.pipeline()
-        for k, v in tqdm_async(data.items(), desc="Upserting"):
             pipe.set(f"{self.namespace}:{k}", json.dumps(v))
         await pipe.execute()

 import os
 from typing import Any, final
 from dataclasses import dataclass
 import pipmaster as pm
 import configparser
     async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
         pipe = self._redis.pipeline()
+        for k, v in data.items():
             pipe.set(f"{self.namespace}:{k}", json.dumps(v))
         await pipe.execute()

lightrag/kg/tidb_impl.py CHANGED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 from lightrag.types import KnowledgeGraph
-from tqdm import tqdm
 from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
             for i in range(0, len(contents), self._max_batch_size)
         ]
         embedding_tasks = [self.embedding_func(batch) for batch in batches]
-        embeddings_list = []
-        for f in tqdm(
-            asyncio.as_completed(embedding_tasks),
-            total=len(embedding_tasks),
-            desc="Generating embeddings",
-            unit="batch",
-        ):
-            embeddings = await f
-            embeddings_list.append(embeddings)
         embeddings = np.concatenate(embeddings_list)
         for i, d in enumerate(list_data):
             d["content_vector"] = embeddings[i]

 from lightrag.types import KnowledgeGraph
 from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 from ..namespace import NameSpace, is_namespace
             for i in range(0, len(contents), self._max_batch_size)
         ]
         embedding_tasks = [self.embedding_func(batch) for batch in batches]
+        embeddings_list = await asyncio.gather(*embedding_tasks)
         embeddings = np.concatenate(embeddings_list)
         for i, d in enumerate(list_data):
             d["content_vector"] = embeddings[i]

lightrag/llm/ollama.py CHANGED Viewed

@@ -4,7 +4,7 @@ if sys.version_info < (3, 9):
     from typing import AsyncIterator
 else:
     from collections.abc import AsyncIterator
 import pipmaster as pm  # Pipmaster for dynamic library install
 # install specific modules
@@ -48,7 +48,7 @@ async def _ollama_model_if_cache(
     **kwargs,
 ) -> Union[str, AsyncIterator[str]]:
     stream = True if kwargs.get("stream") else False
     kwargs.pop("max_tokens", None)
     # kwargs.pop("response_format", None) # allow json
     host = kwargs.pop("host", None)
@@ -129,4 +129,4 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
     kwargs["headers"] = headers
     ollama_client = ollama.Client(**kwargs)
     data = ollama_client.embed(model=embed_model, input=texts)
-    return data["embeddings"]

     from typing import AsyncIterator
 else:
     from collections.abc import AsyncIterator
 import pipmaster as pm  # Pipmaster for dynamic library install
 # install specific modules
     **kwargs,
 ) -> Union[str, AsyncIterator[str]]:
     stream = True if kwargs.get("stream") else False
     kwargs.pop("max_tokens", None)
     # kwargs.pop("response_format", None) # allow json
     host = kwargs.pop("host", None)
     kwargs["headers"] = headers
     ollama_client = ollama.Client(**kwargs)
     data = ollama_client.embed(model=embed_model, input=texts)
+    return data["embeddings"]

lightrag/operate.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, AsyncIterator
 from collections import Counter, defaultdict
 from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
         )
         return dict(maybe_nodes), dict(maybe_edges)
-    results = []
-    for result in tqdm_async(
-        asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
-        total=len(ordered_chunks),
-        desc="Level 2 - Extracting entities and relationships",
-        unit="chunk",
-        position=1,
-        leave=False,
-    ):
-        results.append(await result)
     maybe_nodes = defaultdict(list)
     maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
             maybe_nodes[k].extend(v)
         for k, v in m_edges.items():
             maybe_edges[tuple(sorted(k))].extend(v)
-    logger.debug("Inserting entities into storage...")
-    all_entities_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
-                for k, v in maybe_nodes.items()
-            ]
-        ),
-        total=len(maybe_nodes),
-        desc="Level 3 - Inserting entities",
-        unit="entity",
-        position=2,
-        leave=False,
-    ):
-        all_entities_data.append(await result)
-    logger.debug("Inserting relationships into storage...")
-    all_relationships_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_edges_then_upsert(
-                    k[0], k[1], v, knowledge_graph_inst, global_config
-                )
-                for k, v in maybe_edges.items()
-            ]
-        ),
-        total=len(maybe_edges),
-        desc="Level 3 - Inserting relationships",
-        unit="relationship",
-        position=3,
-        leave=False,
-    ):
-        all_relationships_data.append(await result)
     if not len(all_entities_data) and not len(all_relationships_data):
         logger.warning(

 import asyncio
 import json
 import re
 from typing import Any, AsyncIterator
 from collections import Counter, defaultdict
 from .utils import (
         )
         return dict(maybe_nodes), dict(maybe_edges)
+    tasks = [_process_single_content(c) for c in ordered_chunks]
+    results = await asyncio.gather(*tasks)
     maybe_nodes = defaultdict(list)
     maybe_edges = defaultdict(list)
             maybe_nodes[k].extend(v)
         for k, v in m_edges.items():
             maybe_edges[tuple(sorted(k))].extend(v)
+    all_entities_data = await asyncio.gather(
+        *[
+            _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
+            for k, v in maybe_nodes.items()
+        ]
+    )
+    all_relationships_data = await asyncio.gather(
+        *[
+            _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
+            for k, v in maybe_edges.items()
+        ]
+    )
     if not len(all_entities_data) and not len(all_relationships_data):
         logger.warning(

lightrag/utils.py CHANGED Viewed

@@ -19,7 +19,6 @@ import tiktoken
 from lightrag.prompt import PROMPTS
 VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
@@ -84,7 +83,6 @@ class EmbeddingFunc:
         return await self.func(*args, **kwargs)
 def locate_json_string_body_from_string(content: str) -> str | None:
     """Locate the JSON string body from a string"""
     try:
@@ -715,4 +713,3 @@ def get_conversation_turns(
         )
     return "\n".join(formatted_turns)

 from lightrag.prompt import PROMPTS
 VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
         return await self.func(*args, **kwargs)
 def locate_json_string_body_from_string(content: str) -> str | None:
     """Locate the JSON string body from a string"""
     try:
         )
     return "\n".join(formatted_turns)

reproduce/Step_3.py CHANGED Viewed

@@ -2,7 +2,6 @@ import re
 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
-from tqdm import tqdm
 def extract_queries(file_path):
@@ -44,7 +43,7 @@ def run_queries_and_save_to_json(
         result_file.write("[\n")
         first_entry = True
-        for query_text in tqdm(queries, desc="Processing queries", unit="query"):
             result, error = loop.run_until_complete(
                 process_query(query_text, rag_instance, query_param)
             )

 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
 def extract_queries(file_path):
         result_file.write("[\n")
         first_entry = True
+        for query_text in queries:
             result, error = loop.run_until_complete(
                 process_query(query_text, rag_instance, query_param)
             )

reproduce/Step_3_openai_compatible.py CHANGED Viewed

@@ -3,7 +3,6 @@ import re
 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
-from tqdm import tqdm
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc
 import numpy as np
@@ -76,7 +75,7 @@ def run_queries_and_save_to_json(
         result_file.write("[\n")
         first_entry = True
-        for query_text in tqdm(queries, desc="Processing queries", unit="query"):
             result, error = loop.run_until_complete(
                 process_query(query_text, rag_instance, query_param)
             )

 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc
 import numpy as np
         result_file.write("[\n")
         first_entry = True
+        for query_text in queries:
             result, error = loop.run_until_complete(
                 process_query(query_text, rag_instance, query_param)
             )

requirements.txt CHANGED Viewed

@@ -22,7 +22,6 @@ tenacity
 # LLM packages
 tiktoken
-tqdm
 xxhash
 # Extra libraries are installed when needed using pipmaster

 # LLM packages
 tiktoken
 xxhash
 # Extra libraries are installed when needed using pipmaster