yangdx
commited on
Commit
·
031ec23
1
Parent(s):
7cba440
Translate comments to English
Browse files- lightrag/api/lightrag_ollama.py +34 -47
lightrag/api/lightrag_ollama.py
CHANGED
@@ -541,13 +541,13 @@ def create_app(args):
|
|
541 |
|
542 |
async def stream_generator():
|
543 |
if isinstance(response, str):
|
544 |
-
#
|
545 |
yield f"{json.dumps({'response': response})}\n"
|
546 |
else:
|
547 |
-
#
|
548 |
try:
|
549 |
async for chunk in response:
|
550 |
-
if chunk: #
|
551 |
yield f"{json.dumps({'response': chunk})}\n"
|
552 |
except Exception as e:
|
553 |
logging.error(f"Streaming error: {str(e)}")
|
@@ -563,7 +563,7 @@ def create_app(args):
|
|
563 |
"Access-Control-Allow-Origin": "*",
|
564 |
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
565 |
"Access-Control-Allow-Headers": "Content-Type",
|
566 |
-
"X-Accel-Buffering": "no", #
|
567 |
},
|
568 |
)
|
569 |
except Exception as e:
|
@@ -760,7 +760,6 @@ def create_app(args):
|
|
760 |
last_chunk_time = first_chunk_time
|
761 |
total_response = response
|
762 |
|
763 |
-
# 第一次发送查询内容
|
764 |
data = {
|
765 |
"model": LIGHTRAG_MODEL,
|
766 |
"created_at": LIGHTRAG_CREATED_AT,
|
@@ -773,41 +772,35 @@ def create_app(args):
|
|
773 |
}
|
774 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
775 |
|
776 |
-
# 计算各项指标
|
777 |
completion_tokens = estimate_tokens(total_response)
|
778 |
-
total_time = last_chunk_time - start_time
|
779 |
prompt_eval_time = (
|
780 |
first_chunk_time - start_time
|
781 |
-
)
|
782 |
eval_time = (
|
783 |
last_chunk_time - first_chunk_time
|
784 |
-
)
|
785 |
|
786 |
-
# 第二次发送统计信息
|
787 |
data = {
|
788 |
"model": LIGHTRAG_MODEL,
|
789 |
"created_at": LIGHTRAG_CREATED_AT,
|
790 |
"done": True,
|
791 |
-
"total_duration": total_time,
|
792 |
-
"load_duration": 0,
|
793 |
-
"prompt_eval_count": prompt_tokens,
|
794 |
-
"prompt_eval_duration": prompt_eval_time,
|
795 |
-
"eval_count": completion_tokens,
|
796 |
-
"eval_duration": eval_time,
|
797 |
}
|
798 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
799 |
else:
|
800 |
-
# 流式响应
|
801 |
async for chunk in response:
|
802 |
-
if chunk:
|
803 |
-
# 记录第一个chunk的时间
|
804 |
if first_chunk_time is None:
|
805 |
first_chunk_time = time.time_ns()
|
806 |
|
807 |
-
# 更新最后一个chunk的时间
|
808 |
last_chunk_time = time.time_ns()
|
809 |
|
810 |
-
# 累积响应内容
|
811 |
total_response += chunk
|
812 |
data = {
|
813 |
"model": LIGHTRAG_MODEL,
|
@@ -821,30 +814,28 @@ def create_app(args):
|
|
821 |
}
|
822 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
823 |
|
824 |
-
# 计算各项指标
|
825 |
completion_tokens = estimate_tokens(total_response)
|
826 |
-
total_time = last_chunk_time - start_time
|
827 |
prompt_eval_time = (
|
828 |
first_chunk_time - start_time
|
829 |
-
)
|
830 |
eval_time = (
|
831 |
last_chunk_time - first_chunk_time
|
832 |
-
)
|
833 |
|
834 |
-
# 发送完成标记,包含性能统计信息
|
835 |
data = {
|
836 |
"model": LIGHTRAG_MODEL,
|
837 |
"created_at": LIGHTRAG_CREATED_AT,
|
838 |
"done": True,
|
839 |
-
"total_duration": total_time,
|
840 |
-
"load_duration": 0,
|
841 |
-
"prompt_eval_count": prompt_tokens,
|
842 |
-
"prompt_eval_duration": prompt_eval_time,
|
843 |
-
"eval_count": completion_tokens,
|
844 |
-
"eval_duration": eval_time,
|
845 |
}
|
846 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
847 |
-
return #
|
848 |
except Exception as e:
|
849 |
logging.error(f"Error in stream_generator: {str(e)}")
|
850 |
raise
|
@@ -862,37 +853,33 @@ def create_app(args):
|
|
862 |
},
|
863 |
)
|
864 |
else:
|
865 |
-
# 非流式响应
|
866 |
first_chunk_time = time.time_ns()
|
867 |
response_text = await rag.aquery(cleaned_query, param=query_param)
|
868 |
last_chunk_time = time.time_ns()
|
869 |
|
870 |
-
# 确保响应不为空
|
871 |
if not response_text:
|
872 |
response_text = "No response generated"
|
873 |
|
874 |
-
# 计算各项指标
|
875 |
completion_tokens = estimate_tokens(str(response_text))
|
876 |
-
total_time = last_chunk_time - start_time
|
877 |
-
prompt_eval_time = first_chunk_time - start_time
|
878 |
-
eval_time = last_chunk_time - first_chunk_time
|
879 |
|
880 |
-
# 构造响应,包含性能统计信息
|
881 |
return {
|
882 |
"model": LIGHTRAG_MODEL,
|
883 |
"created_at": LIGHTRAG_CREATED_AT,
|
884 |
"message": {
|
885 |
"role": "assistant",
|
886 |
-
"content": str(response_text),
|
887 |
"images": None,
|
888 |
},
|
889 |
"done": True,
|
890 |
-
"total_duration": total_time,
|
891 |
-
"load_duration": 0,
|
892 |
-
"prompt_eval_count": prompt_tokens,
|
893 |
-
"prompt_eval_duration": prompt_eval_time,
|
894 |
-
"eval_count": completion_tokens,
|
895 |
-
"eval_duration": eval_time,
|
896 |
}
|
897 |
except Exception as e:
|
898 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
541 |
|
542 |
async def stream_generator():
|
543 |
if isinstance(response, str):
|
544 |
+
# If it's a string, send it all at once
|
545 |
yield f"{json.dumps({'response': response})}\n"
|
546 |
else:
|
547 |
+
# If it's an async generator, send chunks one by one
|
548 |
try:
|
549 |
async for chunk in response:
|
550 |
+
if chunk: # Only send non-empty content
|
551 |
yield f"{json.dumps({'response': chunk})}\n"
|
552 |
except Exception as e:
|
553 |
logging.error(f"Streaming error: {str(e)}")
|
|
|
563 |
"Access-Control-Allow-Origin": "*",
|
564 |
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
565 |
"Access-Control-Allow-Headers": "Content-Type",
|
566 |
+
"X-Accel-Buffering": "no", # Disable Nginx buffering
|
567 |
},
|
568 |
)
|
569 |
except Exception as e:
|
|
|
760 |
last_chunk_time = first_chunk_time
|
761 |
total_response = response
|
762 |
|
|
|
763 |
data = {
|
764 |
"model": LIGHTRAG_MODEL,
|
765 |
"created_at": LIGHTRAG_CREATED_AT,
|
|
|
772 |
}
|
773 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
774 |
|
|
|
775 |
completion_tokens = estimate_tokens(total_response)
|
776 |
+
total_time = last_chunk_time - start_time
|
777 |
prompt_eval_time = (
|
778 |
first_chunk_time - start_time
|
779 |
+
)
|
780 |
eval_time = (
|
781 |
last_chunk_time - first_chunk_time
|
782 |
+
)
|
783 |
|
|
|
784 |
data = {
|
785 |
"model": LIGHTRAG_MODEL,
|
786 |
"created_at": LIGHTRAG_CREATED_AT,
|
787 |
"done": True,
|
788 |
+
"total_duration": total_time,
|
789 |
+
"load_duration": 0,
|
790 |
+
"prompt_eval_count": prompt_tokens,
|
791 |
+
"prompt_eval_duration": prompt_eval_time,
|
792 |
+
"eval_count": completion_tokens,
|
793 |
+
"eval_duration": eval_time,
|
794 |
}
|
795 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
796 |
else:
|
|
|
797 |
async for chunk in response:
|
798 |
+
if chunk:
|
|
|
799 |
if first_chunk_time is None:
|
800 |
first_chunk_time = time.time_ns()
|
801 |
|
|
|
802 |
last_chunk_time = time.time_ns()
|
803 |
|
|
|
804 |
total_response += chunk
|
805 |
data = {
|
806 |
"model": LIGHTRAG_MODEL,
|
|
|
814 |
}
|
815 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
816 |
|
|
|
817 |
completion_tokens = estimate_tokens(total_response)
|
818 |
+
total_time = last_chunk_time - start_time
|
819 |
prompt_eval_time = (
|
820 |
first_chunk_time - start_time
|
821 |
+
)
|
822 |
eval_time = (
|
823 |
last_chunk_time - first_chunk_time
|
824 |
+
)
|
825 |
|
|
|
826 |
data = {
|
827 |
"model": LIGHTRAG_MODEL,
|
828 |
"created_at": LIGHTRAG_CREATED_AT,
|
829 |
"done": True,
|
830 |
+
"total_duration": total_time,
|
831 |
+
"load_duration": 0,
|
832 |
+
"prompt_eval_count": prompt_tokens,
|
833 |
+
"prompt_eval_duration": prompt_eval_time,
|
834 |
+
"eval_count": completion_tokens,
|
835 |
+
"eval_duration": eval_time,
|
836 |
}
|
837 |
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
838 |
+
return # Ensure the generator ends immediately after sending the completion marker
|
839 |
except Exception as e:
|
840 |
logging.error(f"Error in stream_generator: {str(e)}")
|
841 |
raise
|
|
|
853 |
},
|
854 |
)
|
855 |
else:
|
|
|
856 |
first_chunk_time = time.time_ns()
|
857 |
response_text = await rag.aquery(cleaned_query, param=query_param)
|
858 |
last_chunk_time = time.time_ns()
|
859 |
|
|
|
860 |
if not response_text:
|
861 |
response_text = "No response generated"
|
862 |
|
|
|
863 |
completion_tokens = estimate_tokens(str(response_text))
|
864 |
+
total_time = last_chunk_time - start_time
|
865 |
+
prompt_eval_time = first_chunk_time - start_time
|
866 |
+
eval_time = last_chunk_time - first_chunk_time
|
867 |
|
|
|
868 |
return {
|
869 |
"model": LIGHTRAG_MODEL,
|
870 |
"created_at": LIGHTRAG_CREATED_AT,
|
871 |
"message": {
|
872 |
"role": "assistant",
|
873 |
+
"content": str(response_text),
|
874 |
"images": None,
|
875 |
},
|
876 |
"done": True,
|
877 |
+
"total_duration": total_time,
|
878 |
+
"load_duration": 0,
|
879 |
+
"prompt_eval_count": prompt_tokens,
|
880 |
+
"prompt_eval_duration": prompt_eval_time,
|
881 |
+
"eval_count": completion_tokens,
|
882 |
+
"eval_duration": eval_time,
|
883 |
}
|
884 |
except Exception as e:
|
885 |
raise HTTPException(status_code=500, detail=str(e))
|