Merge branch 'main' into add-Memgraph-graph-db
Browse files- README-zh.md +1 -1
- env.example +1 -1
- lightrag/lightrag.py +1 -1
- lightrag/llm/openai.py +27 -3
- lightrag/operate.py +3 -1
- lightrag/utils.py +9 -2
README-zh.md
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
<a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
|
31 |
</p>
|
32 |
<p>
|
33 |
-
<a href="
|
34 |
<a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
|
35 |
</p>
|
36 |
</div>
|
|
|
30 |
<a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
|
31 |
</p>
|
32 |
<p>
|
33 |
+
<a href="README-zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
|
34 |
<a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
|
35 |
</p>
|
36 |
</div>
|
env.example
CHANGED
@@ -96,7 +96,7 @@ EMBEDDING_BINDING_API_KEY=your_api_key
|
|
96 |
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
97 |
EMBEDDING_BINDING_HOST=http://localhost:11434
|
98 |
### Num of chunks send to Embedding in single request
|
99 |
-
# EMBEDDING_BATCH_NUM=
|
100 |
### Max concurrency requests for Embedding
|
101 |
# EMBEDDING_FUNC_MAX_ASYNC=16
|
102 |
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
|
|
96 |
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
97 |
EMBEDDING_BINDING_HOST=http://localhost:11434
|
98 |
### Num of chunks send to Embedding in single request
|
99 |
+
# EMBEDDING_BATCH_NUM=10
|
100 |
### Max concurrency requests for Embedding
|
101 |
# EMBEDDING_FUNC_MAX_ASYNC=16
|
102 |
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
|
lightrag/lightrag.py
CHANGED
@@ -201,7 +201,7 @@ class LightRAG:
|
|
201 |
embedding_func: EmbeddingFunc | None = field(default=None)
|
202 |
"""Function for computing text embeddings. Must be set before use."""
|
203 |
|
204 |
-
embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM",
|
205 |
"""Batch size for embedding computations."""
|
206 |
|
207 |
embedding_func_max_async: int = field(
|
|
|
201 |
embedding_func: EmbeddingFunc | None = field(default=None)
|
202 |
"""Function for computing text embeddings. Must be set before use."""
|
203 |
|
204 |
+
embedding_batch_num: int = field(default=int(os.getenv("EMBEDDING_BATCH_NUM", 10)))
|
205 |
"""Batch size for embedding computations."""
|
206 |
|
207 |
embedding_func_max_async: int = field(
|
lightrag/llm/openai.py
CHANGED
@@ -210,9 +210,18 @@ async def openai_complete_if_cache(
|
|
210 |
async def inner():
|
211 |
# Track if we've started iterating
|
212 |
iteration_started = False
|
|
|
|
|
213 |
try:
|
214 |
iteration_started = True
|
215 |
async for chunk in response:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# Check if choices exists and is not empty
|
217 |
if not hasattr(chunk, "choices") or not chunk.choices:
|
218 |
logger.warning(f"Received chunk without choices: {chunk}")
|
@@ -222,16 +231,31 @@ async def openai_complete_if_cache(
|
|
222 |
if not hasattr(chunk.choices[0], "delta") or not hasattr(
|
223 |
chunk.choices[0].delta, "content"
|
224 |
):
|
225 |
-
|
226 |
-
f"Received chunk without delta content: {chunk.choices[0]}"
|
227 |
-
)
|
228 |
continue
|
|
|
229 |
content = chunk.choices[0].delta.content
|
230 |
if content is None:
|
231 |
continue
|
232 |
if r"\u" in content:
|
233 |
content = safe_unicode_decode(content.encode("utf-8"))
|
|
|
234 |
yield content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
except Exception as e:
|
236 |
logger.error(f"Error in stream response: {str(e)}")
|
237 |
# Try to clean up resources if possible
|
|
|
210 |
async def inner():
|
211 |
# Track if we've started iterating
|
212 |
iteration_started = False
|
213 |
+
final_chunk_usage = None
|
214 |
+
|
215 |
try:
|
216 |
iteration_started = True
|
217 |
async for chunk in response:
|
218 |
+
# Check if this chunk has usage information (final chunk)
|
219 |
+
if hasattr(chunk, "usage") and chunk.usage:
|
220 |
+
final_chunk_usage = chunk.usage
|
221 |
+
logger.debug(
|
222 |
+
f"Received usage info in streaming chunk: {chunk.usage}"
|
223 |
+
)
|
224 |
+
|
225 |
# Check if choices exists and is not empty
|
226 |
if not hasattr(chunk, "choices") or not chunk.choices:
|
227 |
logger.warning(f"Received chunk without choices: {chunk}")
|
|
|
231 |
if not hasattr(chunk.choices[0], "delta") or not hasattr(
|
232 |
chunk.choices[0].delta, "content"
|
233 |
):
|
234 |
+
# This might be the final chunk, continue to check for usage
|
|
|
|
|
235 |
continue
|
236 |
+
|
237 |
content = chunk.choices[0].delta.content
|
238 |
if content is None:
|
239 |
continue
|
240 |
if r"\u" in content:
|
241 |
content = safe_unicode_decode(content.encode("utf-8"))
|
242 |
+
|
243 |
yield content
|
244 |
+
|
245 |
+
# After streaming is complete, track token usage
|
246 |
+
if token_tracker and final_chunk_usage:
|
247 |
+
# Use actual usage from the API
|
248 |
+
token_counts = {
|
249 |
+
"prompt_tokens": getattr(final_chunk_usage, "prompt_tokens", 0),
|
250 |
+
"completion_tokens": getattr(
|
251 |
+
final_chunk_usage, "completion_tokens", 0
|
252 |
+
),
|
253 |
+
"total_tokens": getattr(final_chunk_usage, "total_tokens", 0),
|
254 |
+
}
|
255 |
+
token_tracker.add_usage(token_counts)
|
256 |
+
logger.debug(f"Streaming token usage (from API): {token_counts}")
|
257 |
+
elif token_tracker:
|
258 |
+
logger.debug("No usage information available in streaming response")
|
259 |
except Exception as e:
|
260 |
logger.error(f"Error in stream response: {str(e)}")
|
261 |
# Try to clean up resources if possible
|
lightrag/operate.py
CHANGED
@@ -26,6 +26,7 @@ from .utils import (
|
|
26 |
get_conversation_turns,
|
27 |
use_llm_func_with_cache,
|
28 |
update_chunk_cache_list,
|
|
|
29 |
)
|
30 |
from .base import (
|
31 |
BaseGraphStorage,
|
@@ -1703,7 +1704,8 @@ async def extract_keywords_only(
|
|
1703 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
1704 |
|
1705 |
# 6. Parse out JSON from the LLM response
|
1706 |
-
|
|
|
1707 |
if not match:
|
1708 |
logger.error("No JSON-like structure found in the LLM respond.")
|
1709 |
return [], []
|
|
|
26 |
get_conversation_turns,
|
27 |
use_llm_func_with_cache,
|
28 |
update_chunk_cache_list,
|
29 |
+
remove_think_tags,
|
30 |
)
|
31 |
from .base import (
|
32 |
BaseGraphStorage,
|
|
|
1704 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
1705 |
|
1706 |
# 6. Parse out JSON from the LLM response
|
1707 |
+
result = remove_think_tags(result)
|
1708 |
+
match = re.search(r"\{.*?\}", result, re.DOTALL)
|
1709 |
if not match:
|
1710 |
logger.error("No JSON-like structure found in the LLM respond.")
|
1711 |
return [], []
|
lightrag/utils.py
CHANGED
@@ -1465,6 +1465,11 @@ async def update_chunk_cache_list(
|
|
1465 |
)
|
1466 |
|
1467 |
|
|
|
|
|
|
|
|
|
|
|
1468 |
async def use_llm_func_with_cache(
|
1469 |
input_text: str,
|
1470 |
use_llm_func: callable,
|
@@ -1531,6 +1536,7 @@ async def use_llm_func_with_cache(
|
|
1531 |
kwargs["max_tokens"] = max_tokens
|
1532 |
|
1533 |
res: str = await use_llm_func(input_text, **kwargs)
|
|
|
1534 |
|
1535 |
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
1536 |
await save_to_cache(
|
@@ -1557,8 +1563,9 @@ async def use_llm_func_with_cache(
|
|
1557 |
if max_tokens is not None:
|
1558 |
kwargs["max_tokens"] = max_tokens
|
1559 |
|
1560 |
-
logger.info(f"Call LLM function with query text
|
1561 |
-
|
|
|
1562 |
|
1563 |
|
1564 |
def get_content_summary(content: str, max_length: int = 250) -> str:
|
|
|
1465 |
)
|
1466 |
|
1467 |
|
1468 |
+
def remove_think_tags(text: str) -> str:
|
1469 |
+
"""Remove <think> tags from the text"""
|
1470 |
+
return re.sub(r"^(<think>.*?</think>|<think>)", "", text, flags=re.DOTALL).strip()
|
1471 |
+
|
1472 |
+
|
1473 |
async def use_llm_func_with_cache(
|
1474 |
input_text: str,
|
1475 |
use_llm_func: callable,
|
|
|
1536 |
kwargs["max_tokens"] = max_tokens
|
1537 |
|
1538 |
res: str = await use_llm_func(input_text, **kwargs)
|
1539 |
+
res = remove_think_tags(res)
|
1540 |
|
1541 |
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
1542 |
await save_to_cache(
|
|
|
1563 |
if max_tokens is not None:
|
1564 |
kwargs["max_tokens"] = max_tokens
|
1565 |
|
1566 |
+
logger.info(f"Call LLM function with query text length: {len(input_text)}")
|
1567 |
+
res = await use_llm_func(input_text, **kwargs)
|
1568 |
+
return remove_think_tags(res)
|
1569 |
|
1570 |
|
1571 |
def get_content_summary(content: str, max_length: int = 250) -> str:
|