DavIvek commited on
Commit
a04f5c7
·
2 Parent(s): be6f7dd 1a6ee7b

Merge remote-tracking branch 'upstream/memgraph' into add-Memgraph-graph-db

Browse files
README-zh.md CHANGED
@@ -30,7 +30,7 @@
30
  <a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
31
  </p>
32
  <p>
33
- <a href="README_zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
34
  <a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
35
  </p>
36
  </div>
 
30
  <a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
31
  </p>
32
  <p>
33
+ <a href="README-zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
34
  <a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
35
  </p>
36
  </div>
env.example CHANGED
@@ -96,7 +96,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key
96
  # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
97
  EMBEDDING_BINDING_HOST=http://localhost:11434
98
  ### Num of chunks send to Embedding in single request
99
- # EMBEDDING_BATCH_NUM=32
100
  ### Max concurrency requests for Embedding
101
  # EMBEDDING_FUNC_MAX_ASYNC=16
102
  ### Maximum tokens sent to Embedding for each chunk (no longer in use?)
 
96
  # If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
97
  EMBEDDING_BINDING_HOST=http://localhost:11434
98
  ### Num of chunks send to Embedding in single request
99
+ # EMBEDDING_BATCH_NUM=10
100
  ### Max concurrency requests for Embedding
101
  # EMBEDDING_FUNC_MAX_ASYNC=16
102
  ### Maximum tokens sent to Embedding for each chunk (no longer in use?)
lightrag/lightrag.py CHANGED
@@ -201,7 +201,7 @@ class LightRAG:
201
  embedding_func: EmbeddingFunc | None = field(default=None)
202
  """Function for computing text embeddings. Must be set before use."""
203
 
204
- embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 32)))
205
  """Batch size for embedding computations."""
206
 
207
  embedding_func_max_async: int = field(
 
201
  embedding_func: EmbeddingFunc | None = field(default=None)
202
  """Function for computing text embeddings. Must be set before use."""
203
 
204
+ embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 10)))
205
  """Batch size for embedding computations."""
206
 
207
  embedding_func_max_async: int = field(
lightrag/llm/openai.py CHANGED
@@ -210,9 +210,18 @@ async def openai_complete_if_cache(
210
  async def inner():
211
  # Track if we've started iterating
212
  iteration_started = False
 
 
213
  try:
214
  iteration_started = True
215
  async for chunk in response:
 
 
 
 
 
 
 
216
  # Check if choices exists and is not empty
217
  if not hasattr(chunk, "choices") or not chunk.choices:
218
  logger.warning(f"Received chunk without choices: {chunk}")
@@ -222,16 +231,31 @@ async def openai_complete_if_cache(
222
  if not hasattr(chunk.choices[0], "delta") or not hasattr(
223
  chunk.choices[0].delta, "content"
224
  ):
225
- logger.warning(
226
- f"Received chunk without delta content: {chunk.choices[0]}"
227
- )
228
  continue
 
229
  content = chunk.choices[0].delta.content
230
  if content is None:
231
  continue
232
  if r"\u" in content:
233
  content = safe_unicode_decode(content.encode("utf-8"))
 
234
  yield content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  except Exception as e:
236
  logger.error(f"Error in stream response: {str(e)}")
237
  # Try to clean up resources if possible
 
210
  async def inner():
211
  # Track if we've started iterating
212
  iteration_started = False
213
+ final_chunk_usage = None
214
+
215
  try:
216
  iteration_started = True
217
  async for chunk in response:
218
+ # Check if this chunk has usage information (final chunk)
219
+ if hasattr(chunk, "usage") and chunk.usage:
220
+ final_chunk_usage = chunk.usage
221
+ logger.debug(
222
+ f"Received usage info in streaming chunk: {chunk.usage}"
223
+ )
224
+
225
  # Check if choices exists and is not empty
226
  if not hasattr(chunk, "choices") or not chunk.choices:
227
  logger.warning(f"Received chunk without choices: {chunk}")
 
231
  if not hasattr(chunk.choices[0], "delta") or not hasattr(
232
  chunk.choices[0].delta, "content"
233
  ):
234
+ # This might be the final chunk, continue to check for usage
 
 
235
  continue
236
+
237
  content = chunk.choices[0].delta.content
238
  if content is None:
239
  continue
240
  if r"\u" in content:
241
  content = safe_unicode_decode(content.encode("utf-8"))
242
+
243
  yield content
244
+
245
+ # After streaming is complete, track token usage
246
+ if token_tracker and final_chunk_usage:
247
+ # Use actual usage from the API
248
+ token_counts = {
249
+ "prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0),
250
+ "completion_tokens": getattr(
251
+ final_chunk_usage, "completion_tokens", 0
252
+ ),
253
+ "total_tokens": getattr(final_chunk_usage, "total_tokens", 0),
254
+ }
255
+ token_tracker.add_usage(token_counts)
256
+ logger.debug(f"Streaming token usage (from API): {token_counts}")
257
+ elif token_tracker:
258
+ logger.debug("No usage information available in streaming response")
259
  except Exception as e:
260
  logger.error(f"Error in stream response: {str(e)}")
261
  # Try to clean up resources if possible
lightrag/operate.py CHANGED
@@ -26,6 +26,7 @@ from .utils import (
26
  get_conversation_turns,
27
  use_llm_func_with_cache,
28
  update_chunk_cache_list,
 
29
  )
30
  from .base import (
31
  BaseGraphStorage,
@@ -1703,7 +1704,8 @@ async def extract_keywords_only(
1703
  result = await use_model_func(kw_prompt, keyword_extraction=True)
1704
 
1705
  # 6. Parse out JSON from the LLM response
1706
- match = re.search(r"\{.*\}", result, re.DOTALL)
 
1707
  if not match:
1708
  logger.error("No JSON-like structure found in the LLM respond.")
1709
  return [], []
 
26
  get_conversation_turns,
27
  use_llm_func_with_cache,
28
  update_chunk_cache_list,
29
+ remove_think_tags,
30
  )
31
  from .base import (
32
  BaseGraphStorage,
 
1704
  result = await use_model_func(kw_prompt, keyword_extraction=True)
1705
 
1706
  # 6. Parse out JSON from the LLM response
1707
+ result = remove_think_tags(result)
1708
+ match = re.search(r"\{.*?\}", result, re.DOTALL)
1709
  if not match:
1710
  logger.error("No JSON-like structure found in the LLM respond.")
1711
  return [], []
lightrag/utils.py CHANGED
@@ -1465,6 +1465,11 @@ async def update_chunk_cache_list(
1465
  )
1466
 
1467
 
 
 
 
 
 
1468
  async def use_llm_func_with_cache(
1469
  input_text: str,
1470
  use_llm_func: callable,
@@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache(
1531
  kwargs["max_tokens"] = max_tokens
1532
 
1533
  res: str = await use_llm_func(input_text, **kwargs)
 
1534
 
1535
  if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
1536
  await save_to_cache(
@@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache(
1557
  if max_tokens is not None:
1558
  kwargs["max_tokens"] = max_tokens
1559
 
1560
- logger.info(f"Call LLM function with query text lenght: {len(input_text)}")
1561
- return await use_llm_func(input_text, **kwargs)
 
1562
 
1563
 
1564
  def get_content_summary(content: str, max_length: int = 250) -> str:
 
1465
  )
1466
 
1467
 
1468
+ def remove_think_tags(text: str) -> str:
1469
+ """Remove <think> tags from the text"""
1470
+ return re.sub(r"^(<think>.*?</think>|<think>)", "", text, flags=re.DOTALL).strip()
1471
+
1472
+
1473
  async def use_llm_func_with_cache(
1474
  input_text: str,
1475
  use_llm_func: callable,
 
1536
  kwargs["max_tokens"] = max_tokens
1537
 
1538
  res: str = await use_llm_func(input_text, **kwargs)
1539
+ res = remove_think_tags(res)
1540
 
1541
  if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
1542
  await save_to_cache(
 
1563
  if max_tokens is not None:
1564
  kwargs["max_tokens"] = max_tokens
1565
 
1566
+ logger.info(f"Call LLM function with query text length: {len(input_text)}")
1567
+ res = await use_llm_func(input_text, **kwargs)
1568
+ return remove_think_tags(res)
1569
 
1570
 
1571
  def get_content_summary(content: str, max_length: int = 250) -> str: