yangdx
commited on
Commit
·
f55e12d
1
Parent(s):
afa5198
增强聊天接口的调试和性能统计功能
Browse files- 添加原始请求日志记录
- 修改响应结构以包含性能统计
- 更新测试用例以展示性能数据
- 优化响应格式为字典结构
- 增加请求体解码功能
- lightrag/api/lightrag_ollama.py +22 -13
- test_lightrag_ollama_chat.py +20 -1
lightrag/api/lightrag_ollama.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
|
2 |
from pydantic import BaseModel
|
3 |
import logging
|
4 |
import argparse
|
@@ -673,7 +673,10 @@ def create_app(args):
|
|
673 |
return query, SearchMode.hybrid
|
674 |
|
675 |
@app.post("/api/chat")
|
676 |
-
async def chat(request: OllamaChatRequest):
|
|
|
|
|
|
|
677 |
"""Handle chat completion requests"""
|
678 |
try:
|
679 |
# 获取所有消息内容
|
@@ -776,17 +779,23 @@ def create_app(args):
|
|
776 |
if not response_text:
|
777 |
response_text = "No response generated"
|
778 |
|
779 |
-
#
|
780 |
-
return
|
781 |
-
model
|
782 |
-
created_at
|
783 |
-
message
|
784 |
-
role
|
785 |
-
content
|
786 |
-
images
|
787 |
-
|
788 |
-
done
|
789 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
790 |
except Exception as e:
|
791 |
raise HTTPException(status_code=500, detail=str(e))
|
792 |
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
|
2 |
from pydantic import BaseModel
|
3 |
import logging
|
4 |
import argparse
|
|
|
673 |
return query, SearchMode.hybrid
|
674 |
|
675 |
@app.post("/api/chat")
|
676 |
+
async def chat(raw_request: Request, request: OllamaChatRequest):
|
677 |
+
# 打印原始请求数据
|
678 |
+
body = await raw_request.body()
|
679 |
+
logging.info(f"收到 /api/chat 原始请求: {body.decode('utf-8')}")
|
680 |
"""Handle chat completion requests"""
|
681 |
try:
|
682 |
# 获取所有消息内容
|
|
|
779 |
if not response_text:
|
780 |
response_text = "No response generated"
|
781 |
|
782 |
+
# 构造响应,包含性能统计信息
|
783 |
+
return {
|
784 |
+
"model": LIGHTRAG_MODEL,
|
785 |
+
"created_at": LIGHTRAG_CREATED_AT,
|
786 |
+
"message": {
|
787 |
+
"role": "assistant",
|
788 |
+
"content": str(response_text), # 确保转换为字符串
|
789 |
+
"images": None
|
790 |
+
},
|
791 |
+
"done": True,
|
792 |
+
"total_duration": 0, # 由于我们没有实际统计这些指标,暂时使用默认值
|
793 |
+
"load_duration": 0,
|
794 |
+
"prompt_eval_count": 0,
|
795 |
+
"prompt_eval_duration": 0,
|
796 |
+
"eval_count": 0,
|
797 |
+
"eval_duration": 0
|
798 |
+
}
|
799 |
except Exception as e:
|
800 |
raise HTTPException(status_code=500, detail=str(e))
|
801 |
|
test_lightrag_ollama_chat.py
CHANGED
@@ -23,7 +23,26 @@ def test_non_stream_chat():
|
|
23 |
|
24 |
# 打印响应
|
25 |
print("\n=== 非流式调用响应 ===")
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
def test_stream_chat():
|
29 |
"""测试流式调用 /api/chat 接口"""
|
|
|
23 |
|
24 |
# 打印响应
|
25 |
print("\n=== 非流式调用响应 ===")
|
26 |
+
response_json = response.json()
|
27 |
+
|
28 |
+
# 打印消息内容
|
29 |
+
print("=== 响应内容 ===")
|
30 |
+
print(json.dumps({
|
31 |
+
"model": response_json["model"],
|
32 |
+
"message": response_json["message"]
|
33 |
+
}, ensure_ascii=False, indent=2))
|
34 |
+
|
35 |
+
# 打印性能统计
|
36 |
+
print("\n=== 性能统计 ===")
|
37 |
+
stats = {
|
38 |
+
"total_duration": response_json["total_duration"],
|
39 |
+
"load_duration": response_json["load_duration"],
|
40 |
+
"prompt_eval_count": response_json["prompt_eval_count"],
|
41 |
+
"prompt_eval_duration": response_json["prompt_eval_duration"],
|
42 |
+
"eval_count": response_json["eval_count"],
|
43 |
+
"eval_duration": response_json["eval_duration"]
|
44 |
+
}
|
45 |
+
print(json.dumps(stats, ensure_ascii=False, indent=2))
|
46 |
|
47 |
def test_stream_chat():
|
48 |
"""测试流式调用 /api/chat 接口"""
|