YanSte commited on
Commit
d5e3b99
·
1 Parent(s): ef85051

remove tqdm and cleaned readme and ollama

Browse files
README.md CHANGED
@@ -344,16 +344,6 @@ rag = LightRAG(
344
  ),
345
  )
346
  ```
347
- #### Fully functional example
348
-
349
- There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
350
-
351
- #### Using "Thinking" Models (e.g., DeepSeek)
352
-
353
- To return only the model's response, you can pass `reasoning_tag` in `llm_model_kwargs`.
354
-
355
- For example, for DeepSeek models, `reasoning_tag` should be set to `think`.
356
-
357
  #### Low RAM GPUs
358
 
359
  In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
 
344
  ),
345
  )
346
  ```
 
 
 
 
 
 
 
 
 
 
347
  #### Low RAM GPUs
348
 
349
  In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
lightrag/api/requirements.txt CHANGED
@@ -7,5 +7,4 @@ python-multipart
7
  tenacity
8
  tiktoken
9
  torch
10
- tqdm
11
  uvicorn
 
7
  tenacity
8
  tiktoken
9
  torch
 
10
  uvicorn
lightrag/kg/faiss_impl.py CHANGED
@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):
22
 
23
  try:
24
  import faiss
25
- from tqdm.asyncio import tqdm as tqdm_async
26
  except ImportError as e:
27
  raise ImportError(
28
  "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
109
  for i in range(0, len(contents), self._max_batch_size)
110
  ]
111
 
112
- pbar = tqdm_async(
113
- total=len(batches), desc="Generating embeddings", unit="batch"
114
- )
115
-
116
- async def wrapped_task(batch):
117
- result = await self.embedding_func(batch)
118
- pbar.update(1)
119
- return result
120
-
121
- embedding_tasks = [wrapped_task(batch) for batch in batches]
122
  embeddings_list = await asyncio.gather(*embedding_tasks)
123
 
124
  # Flatten the list of arrays
 
22
 
23
  try:
24
  import faiss
 
25
  except ImportError as e:
26
  raise ImportError(
27
  "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
 
108
  for i in range(0, len(contents), self._max_batch_size)
109
  ]
110
 
111
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
 
112
  embeddings_list = await asyncio.gather(*embedding_tasks)
113
 
114
  # Flatten the list of arrays
lightrag/kg/milvus_impl.py CHANGED
@@ -1,7 +1,6 @@
1
  import asyncio
2
  import os
3
  from typing import Any, final
4
- from tqdm.asyncio import tqdm as tqdm_async
5
  from dataclasses import dataclass
6
  import numpy as np
7
  from lightrag.utils import logger
@@ -94,15 +93,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
94
  for i in range(0, len(contents), self._max_batch_size)
95
  ]
96
 
97
- async def wrapped_task(batch):
98
- result = await self.embedding_func(batch)
99
- pbar.update(1)
100
- return result
101
-
102
- embedding_tasks = [wrapped_task(batch) for batch in batches]
103
- pbar = tqdm_async(
104
- total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
105
- )
106
  embeddings_list = await asyncio.gather(*embedding_tasks)
107
 
108
  embeddings = np.concatenate(embeddings_list)
 
1
  import asyncio
2
  import os
3
  from typing import Any, final
 
4
  from dataclasses import dataclass
5
  import numpy as np
6
  from lightrag.utils import logger
 
93
  for i in range(0, len(contents), self._max_batch_size)
94
  ]
95
 
96
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
97
  embeddings_list = await asyncio.gather(*embedding_tasks)
98
 
99
  embeddings = np.concatenate(embeddings_list)
lightrag/kg/mongo_impl.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  from dataclasses import dataclass
3
  import numpy as np
4
  import configparser
5
- from tqdm.asyncio import tqdm as tqdm_async
6
  import asyncio
7
 
8
  from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
854
  for i in range(0, len(contents), self._max_batch_size)
855
  ]
856
 
857
- async def wrapped_task(batch):
858
- result = await self.embedding_func(batch)
859
- pbar.update(1)
860
- return result
861
-
862
- embedding_tasks = [wrapped_task(batch) for batch in batches]
863
- pbar = tqdm_async(
864
- total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
865
- )
866
  embeddings_list = await asyncio.gather(*embedding_tasks)
867
-
868
  embeddings = np.concatenate(embeddings_list)
869
  for i, d in enumerate(list_data):
870
  d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
 
2
  from dataclasses import dataclass
3
  import numpy as np
4
  import configparser
 
5
  import asyncio
6
 
7
  from typing import Any, List, Union, final
 
853
  for i in range(0, len(contents), self._max_batch_size)
854
  ]
855
 
856
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
857
  embeddings_list = await asyncio.gather(*embedding_tasks)
 
858
  embeddings = np.concatenate(embeddings_list)
859
  for i, d in enumerate(list_data):
860
  d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -1,7 +1,6 @@
1
  import asyncio
2
  import os
3
  from typing import Any, final
4
- from tqdm.asyncio import tqdm as tqdm_async
5
  from dataclasses import dataclass
6
  import numpy as np
7
 
@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
71
  for i in range(0, len(contents), self._max_batch_size)
72
  ]
73
 
74
- async def wrapped_task(batch):
75
- result = await self.embedding_func(batch)
76
- pbar.update(1)
77
- return result
78
-
79
- embedding_tasks = [wrapped_task(batch) for batch in batches]
80
- pbar = tqdm_async(
81
- total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
82
- )
83
  embeddings_list = await asyncio.gather(*embedding_tasks)
84
 
85
  embeddings = np.concatenate(embeddings_list)
 
1
  import asyncio
2
  import os
3
  from typing import Any, final
 
4
  from dataclasses import dataclass
5
  import numpy as np
6
 
 
70
  for i in range(0, len(contents), self._max_batch_size)
71
  ]
72
 
73
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
74
  embeddings_list = await asyncio.gather(*embedding_tasks)
75
 
76
  embeddings = np.concatenate(embeddings_list)
lightrag/kg/postgres_impl.py CHANGED
@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):
41
 
42
  try:
43
  import asyncpg
44
- from tqdm.asyncio import tqdm as tqdm_async
45
 
46
  except ImportError as e:
47
  raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
380
  for i in range(0, len(contents), self._max_batch_size)
381
  ]
382
 
383
- async def wrapped_task(batch):
384
- result = await self.embedding_func(batch)
385
- pbar.update(1)
386
- return result
387
-
388
- embedding_tasks = [wrapped_task(batch) for batch in batches]
389
- pbar = tqdm_async(
390
- total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
391
- )
392
  embeddings_list = await asyncio.gather(*embedding_tasks)
393
 
394
  embeddings = np.concatenate(embeddings_list)
 
41
 
42
  try:
43
  import asyncpg
 
44
 
45
  except ImportError as e:
46
  raise ImportError(
 
379
  for i in range(0, len(contents), self._max_batch_size)
380
  ]
381
 
382
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
383
  embeddings_list = await asyncio.gather(*embedding_tasks)
384
 
385
  embeddings = np.concatenate(embeddings_list)
lightrag/kg/qdrant_impl.py CHANGED
@@ -1,7 +1,6 @@
1
  import asyncio
2
  import os
3
  from typing import Any, final
4
- from tqdm.asyncio import tqdm as tqdm_async
5
  from dataclasses import dataclass
6
  import numpy as np
7
  import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
110
  for i in range(0, len(contents), self._max_batch_size)
111
  ]
112
 
113
- async def wrapped_task(batch):
114
- result = await self.embedding_func(batch)
115
- pbar.update(1)
116
- return result
117
-
118
- embedding_tasks = [wrapped_task(batch) for batch in batches]
119
- pbar = tqdm_async(
120
- total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
121
- )
122
  embeddings_list = await asyncio.gather(*embedding_tasks)
123
 
124
  embeddings = np.concatenate(embeddings_list)
 
1
  import asyncio
2
  import os
3
  from typing import Any, final
 
4
  from dataclasses import dataclass
5
  import numpy as np
6
  import hashlib
 
109
  for i in range(0, len(contents), self._max_batch_size)
110
  ]
111
 
112
+ embedding_tasks = [self.embedding_func(batch) for batch in batches]
 
 
 
 
 
 
 
 
113
  embeddings_list = await asyncio.gather(*embedding_tasks)
114
 
115
  embeddings = np.concatenate(embeddings_list)
lightrag/kg/redis_impl.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
  from typing import Any, final
3
- from tqdm.asyncio import tqdm as tqdm_async
4
  from dataclasses import dataclass
5
  import pipmaster as pm
6
  import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):
51
 
52
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
53
  pipe = self._redis.pipeline()
54
- for k, v in tqdm_async(data.items(), desc="Upserting"):
 
55
  pipe.set(f"{self.namespace}:{k}", json.dumps(v))
56
  await pipe.execute()
57
 
 
1
  import os
2
  from typing import Any, final
 
3
  from dataclasses import dataclass
4
  import pipmaster as pm
5
  import configparser
 
50
 
51
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
52
  pipe = self._redis.pipeline()
53
+
54
+ for k, v in data.items():
55
  pipe.set(f"{self.namespace}:{k}", json.dumps(v))
56
  await pipe.execute()
57
 
lightrag/kg/tidb_impl.py CHANGED
@@ -7,7 +7,6 @@ import numpy as np
7
 
8
  from lightrag.types import KnowledgeGraph
9
 
10
- from tqdm import tqdm
11
 
12
  from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
13
  from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
270
  for i in range(0, len(contents), self._max_batch_size)
271
  ]
272
  embedding_tasks = [self.embedding_func(batch) for batch in batches]
273
- embeddings_list = []
274
- for f in tqdm(
275
- asyncio.as_completed(embedding_tasks),
276
- total=len(embedding_tasks),
277
- desc="Generating embeddings",
278
- unit="batch",
279
- ):
280
- embeddings = await f
281
- embeddings_list.append(embeddings)
282
  embeddings = np.concatenate(embeddings_list)
283
  for i, d in enumerate(list_data):
284
  d["content_vector"] = embeddings[i]
 
7
 
8
  from lightrag.types import KnowledgeGraph
9
 
 
10
 
11
  from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
12
  from ..namespace import NameSpace, is_namespace
 
269
  for i in range(0, len(contents), self._max_batch_size)
270
  ]
271
  embedding_tasks = [self.embedding_func(batch) for batch in batches]
272
+ embeddings_list = await asyncio.gather(*embedding_tasks)
273
+
 
 
 
 
 
 
 
274
  embeddings = np.concatenate(embeddings_list)
275
  for i, d in enumerate(list_data):
276
  d["content_vector"] = embeddings[i]
lightrag/llm/ollama.py CHANGED
@@ -4,7 +4,7 @@ if sys.version_info < (3, 9):
4
  from typing import AsyncIterator
5
  else:
6
  from collections.abc import AsyncIterator
7
-
8
  import pipmaster as pm # Pipmaster for dynamic library install
9
 
10
  # install specific modules
@@ -48,7 +48,7 @@ async def _ollama_model_if_cache(
48
  **kwargs,
49
  ) -> Union[str, AsyncIterator[str]]:
50
  stream = True if kwargs.get("stream") else False
51
-
52
  kwargs.pop("max_tokens", None)
53
  # kwargs.pop("response_format", None) # allow json
54
  host = kwargs.pop("host", None)
@@ -129,4 +129,4 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
129
  kwargs["headers"] = headers
130
  ollama_client = ollama.Client(**kwargs)
131
  data = ollama_client.embed(model=embed_model, input=texts)
132
- return data["embeddings"]
 
4
  from typing import AsyncIterator
5
  else:
6
  from collections.abc import AsyncIterator
7
+
8
  import pipmaster as pm # Pipmaster for dynamic library install
9
 
10
  # install specific modules
 
48
  **kwargs,
49
  ) -> Union[str, AsyncIterator[str]]:
50
  stream = True if kwargs.get("stream") else False
51
+
52
  kwargs.pop("max_tokens", None)
53
  # kwargs.pop("response_format", None) # allow json
54
  host = kwargs.pop("host", None)
 
129
  kwargs["headers"] = headers
130
  ollama_client = ollama.Client(**kwargs)
131
  data = ollama_client.embed(model=embed_model, input=texts)
132
+ return data["embeddings"]
lightrag/operate.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
3
  import asyncio
4
  import json
5
  import re
6
- from tqdm.asyncio import tqdm as tqdm_async
7
  from typing import Any, AsyncIterator
8
  from collections import Counter, defaultdict
9
  from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
500
  )
501
  return dict(maybe_nodes), dict(maybe_edges)
502
 
503
- results = []
504
- for result in tqdm_async(
505
- asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
506
- total=len(ordered_chunks),
507
- desc="Level 2 - Extracting entities and relationships",
508
- unit="chunk",
509
- position=1,
510
- leave=False,
511
- ):
512
- results.append(await result)
513
 
514
  maybe_nodes = defaultdict(list)
515
  maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
518
  maybe_nodes[k].extend(v)
519
  for k, v in m_edges.items():
520
  maybe_edges[tuple(sorted(k))].extend(v)
521
- logger.debug("Inserting entities into storage...")
522
- all_entities_data = []
523
- for result in tqdm_async(
524
- asyncio.as_completed(
525
- [
526
- _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
527
- for k, v in maybe_nodes.items()
528
- ]
529
- ),
530
- total=len(maybe_nodes),
531
- desc="Level 3 - Inserting entities",
532
- unit="entity",
533
- position=2,
534
- leave=False,
535
- ):
536
- all_entities_data.append(await result)
537
-
538
- logger.debug("Inserting relationships into storage...")
539
- all_relationships_data = []
540
- for result in tqdm_async(
541
- asyncio.as_completed(
542
- [
543
- _merge_edges_then_upsert(
544
- k[0], k[1], v, knowledge_graph_inst, global_config
545
- )
546
- for k, v in maybe_edges.items()
547
- ]
548
- ),
549
- total=len(maybe_edges),
550
- desc="Level 3 - Inserting relationships",
551
- unit="relationship",
552
- position=3,
553
- leave=False,
554
- ):
555
- all_relationships_data.append(await result)
556
 
557
  if not len(all_entities_data) and not len(all_relationships_data):
558
  logger.warning(
 
3
  import asyncio
4
  import json
5
  import re
 
6
  from typing import Any, AsyncIterator
7
  from collections import Counter, defaultdict
8
  from .utils import (
 
499
  )
500
  return dict(maybe_nodes), dict(maybe_edges)
501
 
502
+ tasks = [_process_single_content(c) for c in ordered_chunks]
503
+ results = await asyncio.gather(*tasks)
 
 
 
 
 
 
 
 
504
 
505
  maybe_nodes = defaultdict(list)
506
  maybe_edges = defaultdict(list)
 
509
  maybe_nodes[k].extend(v)
510
  for k, v in m_edges.items():
511
  maybe_edges[tuple(sorted(k))].extend(v)
512
+
513
+ all_entities_data = await asyncio.gather(
514
+ *[
515
+ _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
516
+ for k, v in maybe_nodes.items()
517
+ ]
518
+ )
519
+
520
+ all_relationships_data = await asyncio.gather(
521
+ *[
522
+ _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
523
+ for k, v in maybe_edges.items()
524
+ ]
525
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
  if not len(all_entities_data) and not len(all_relationships_data):
528
  logger.warning(
lightrag/utils.py CHANGED
@@ -19,7 +19,6 @@ import tiktoken
19
  from lightrag.prompt import PROMPTS
20
 
21
 
22
-
23
  VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
24
 
25
 
@@ -84,7 +83,6 @@ class EmbeddingFunc:
84
  return await self.func(*args, **kwargs)
85
 
86
 
87
-
88
  def locate_json_string_body_from_string(content: str) -> str | None:
89
  """Locate the JSON string body from a string"""
90
  try:
@@ -715,4 +713,3 @@ def get_conversation_turns(
715
  )
716
 
717
  return "\n".join(formatted_turns)
718
-
 
19
  from lightrag.prompt import PROMPTS
20
 
21
 
 
22
  VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
23
 
24
 
 
83
  return await self.func(*args, **kwargs)
84
 
85
 
 
86
  def locate_json_string_body_from_string(content: str) -> str | None:
87
  """Locate the JSON string body from a string"""
88
  try:
 
713
  )
714
 
715
  return "\n".join(formatted_turns)
 
reproduce/Step_3.py CHANGED
@@ -2,7 +2,6 @@ import re
2
  import json
3
  import asyncio
4
  from lightrag import LightRAG, QueryParam
5
- from tqdm import tqdm
6
 
7
 
8
  def extract_queries(file_path):
@@ -44,7 +43,7 @@ def run_queries_and_save_to_json(
44
  result_file.write("[\n")
45
  first_entry = True
46
 
47
- for query_text in tqdm(queries, desc="Processing queries", unit="query"):
48
  result, error = loop.run_until_complete(
49
  process_query(query_text, rag_instance, query_param)
50
  )
 
2
  import json
3
  import asyncio
4
  from lightrag import LightRAG, QueryParam
 
5
 
6
 
7
  def extract_queries(file_path):
 
43
  result_file.write("[\n")
44
  first_entry = True
45
 
46
+ for query_text in queries:
47
  result, error = loop.run_until_complete(
48
  process_query(query_text, rag_instance, query_param)
49
  )
reproduce/Step_3_openai_compatible.py CHANGED
@@ -3,7 +3,6 @@ import re
3
  import json
4
  import asyncio
5
  from lightrag import LightRAG, QueryParam
6
- from tqdm import tqdm
7
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
8
  from lightrag.utils import EmbeddingFunc
9
  import numpy as np
@@ -76,7 +75,7 @@ def run_queries_and_save_to_json(
76
  result_file.write("[\n")
77
  first_entry = True
78
 
79
- for query_text in tqdm(queries, desc="Processing queries", unit="query"):
80
  result, error = loop.run_until_complete(
81
  process_query(query_text, rag_instance, query_param)
82
  )
 
3
  import json
4
  import asyncio
5
  from lightrag import LightRAG, QueryParam
 
6
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
7
  from lightrag.utils import EmbeddingFunc
8
  import numpy as np
 
75
  result_file.write("[\n")
76
  first_entry = True
77
 
78
+ for query_text in queries:
79
  result, error = loop.run_until_complete(
80
  process_query(query_text, rag_instance, query_param)
81
  )
requirements.txt CHANGED
@@ -22,7 +22,6 @@ tenacity
22
 
23
  # LLM packages
24
  tiktoken
25
- tqdm
26
  xxhash
27
 
28
  # Extra libraries are installed when needed using pipmaster
 
22
 
23
  # LLM packages
24
  tiktoken
 
25
  xxhash
26
 
27
  # Extra libraries are installed when needed using pipmaster