yangdx
commited on
Commit
·
e5ea22e
1
Parent(s):
8d61fcb
Refactor /api/generate:use llm_model_func directly
Browse files- lightrag/api/lightrag_server.py +10 -12
lightrag/api/lightrag_server.py
CHANGED
@@ -1272,23 +1272,17 @@ def create_app(args):
|
|
1272 |
# 计算输入token数量
|
1273 |
prompt_tokens = estimate_tokens(cleaned_query)
|
1274 |
|
1275 |
-
#
|
1276 |
-
query_param = QueryParam(
|
1277 |
-
mode=mode,
|
1278 |
-
stream=request.stream,
|
1279 |
-
only_need_context=False
|
1280 |
-
)
|
1281 |
-
|
1282 |
-
# 如果有 system prompt,更新 rag 的 llm_model_kwargs
|
1283 |
if request.system:
|
1284 |
rag.llm_model_kwargs["system_prompt"] = request.system
|
1285 |
|
1286 |
if request.stream:
|
1287 |
from fastapi.responses import StreamingResponse
|
1288 |
|
1289 |
-
response = await rag.
|
1290 |
-
cleaned_query,
|
1291 |
-
|
|
|
1292 |
)
|
1293 |
|
1294 |
async def stream_generator():
|
@@ -1383,7 +1377,11 @@ def create_app(args):
|
|
1383 |
)
|
1384 |
else:
|
1385 |
first_chunk_time = time.time_ns()
|
1386 |
-
response_text = await rag.
|
|
|
|
|
|
|
|
|
1387 |
last_chunk_time = time.time_ns()
|
1388 |
|
1389 |
if not response_text:
|
|
|
1272 |
# 计算输入token数量
|
1273 |
prompt_tokens = estimate_tokens(cleaned_query)
|
1274 |
|
1275 |
+
# 直接使用 llm_model_func 进行查询
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1276 |
if request.system:
|
1277 |
rag.llm_model_kwargs["system_prompt"] = request.system
|
1278 |
|
1279 |
if request.stream:
|
1280 |
from fastapi.responses import StreamingResponse
|
1281 |
|
1282 |
+
response = await rag.llm_model_func(
|
1283 |
+
cleaned_query,
|
1284 |
+
stream=True,
|
1285 |
+
**rag.llm_model_kwargs
|
1286 |
)
|
1287 |
|
1288 |
async def stream_generator():
|
|
|
1377 |
)
|
1378 |
else:
|
1379 |
first_chunk_time = time.time_ns()
|
1380 |
+
response_text = await rag.llm_model_func(
|
1381 |
+
cleaned_query,
|
1382 |
+
stream=False,
|
1383 |
+
**rag.llm_model_kwargs
|
1384 |
+
)
|
1385 |
last_chunk_time = time.time_ns()
|
1386 |
|
1387 |
if not response_text:
|