yangdx commited on
Commit
031ec23
·
1 Parent(s): 7cba440

Translate comments to English

Browse files
Files changed (1) hide show
  1. lightrag/api/lightrag_ollama.py +34 -47
lightrag/api/lightrag_ollama.py CHANGED
@@ -541,13 +541,13 @@ def create_app(args):
541
 
542
  async def stream_generator():
543
  if isinstance(response, str):
544
- # 如果是字符串,一次性发送
545
  yield f"{json.dumps({'response': response})}\n"
546
  else:
547
- # 如果是异步生成器,逐块发送
548
  try:
549
  async for chunk in response:
550
- if chunk: # 只发送非空内容
551
  yield f"{json.dumps({'response': chunk})}\n"
552
  except Exception as e:
553
  logging.error(f"Streaming error: {str(e)}")
@@ -563,7 +563,7 @@ def create_app(args):
563
  "Access-Control-Allow-Origin": "*",
564
  "Access-Control-Allow-Methods": "POST, OPTIONS",
565
  "Access-Control-Allow-Headers": "Content-Type",
566
- "X-Accel-Buffering": "no", # 禁用 Nginx 缓冲
567
  },
568
  )
569
  except Exception as e:
@@ -760,7 +760,6 @@ def create_app(args):
760
  last_chunk_time = first_chunk_time
761
  total_response = response
762
 
763
- # 第一次发送查询内容
764
  data = {
765
  "model": LIGHTRAG_MODEL,
766
  "created_at": LIGHTRAG_CREATED_AT,
@@ -773,41 +772,35 @@ def create_app(args):
773
  }
774
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
775
 
776
- # 计算各项指标
777
  completion_tokens = estimate_tokens(total_response)
778
- total_time = last_chunk_time - start_time # 总时间
779
  prompt_eval_time = (
780
  first_chunk_time - start_time
781
- ) # 首个响应之前的时间
782
  eval_time = (
783
  last_chunk_time - first_chunk_time
784
- ) # 生成响应的时间
785
 
786
- # 第二次发送统计信息
787
  data = {
788
  "model": LIGHTRAG_MODEL,
789
  "created_at": LIGHTRAG_CREATED_AT,
790
  "done": True,
791
- "total_duration": total_time, # 总时间
792
- "load_duration": 0, # 加载时间为0
793
- "prompt_eval_count": prompt_tokens, # 输入token数
794
- "prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
795
- "eval_count": completion_tokens, # 输出token数
796
- "eval_duration": eval_time, # 生成响应的时间
797
  }
798
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
799
  else:
800
- # 流式响应
801
  async for chunk in response:
802
- if chunk: # 只发送非空内容
803
- # 记录第一个chunk的时间
804
  if first_chunk_time is None:
805
  first_chunk_time = time.time_ns()
806
 
807
- # 更新最后一个chunk的时间
808
  last_chunk_time = time.time_ns()
809
 
810
- # 累积响应内容
811
  total_response += chunk
812
  data = {
813
  "model": LIGHTRAG_MODEL,
@@ -821,30 +814,28 @@ def create_app(args):
821
  }
822
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
823
 
824
- # 计算各项指标
825
  completion_tokens = estimate_tokens(total_response)
826
- total_time = last_chunk_time - start_time # 总时间
827
  prompt_eval_time = (
828
  first_chunk_time - start_time
829
- ) # 首个响应之前的时间
830
  eval_time = (
831
  last_chunk_time - first_chunk_time
832
- ) # 生成响应的时间
833
 
834
- # 发送完成标记,包含性能统计信息
835
  data = {
836
  "model": LIGHTRAG_MODEL,
837
  "created_at": LIGHTRAG_CREATED_AT,
838
  "done": True,
839
- "total_duration": total_time, # 总时间
840
- "load_duration": 0, # 加载时间为0
841
- "prompt_eval_count": prompt_tokens, # 输入token数
842
- "prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
843
- "eval_count": completion_tokens, # 输出token数
844
- "eval_duration": eval_time, # 生成响应的时间
845
  }
846
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
847
- return # 确保生成器在发送完成标记后立即结束
848
  except Exception as e:
849
  logging.error(f"Error in stream_generator: {str(e)}")
850
  raise
@@ -862,37 +853,33 @@ def create_app(args):
862
  },
863
  )
864
  else:
865
- # 非流式响应
866
  first_chunk_time = time.time_ns()
867
  response_text = await rag.aquery(cleaned_query, param=query_param)
868
  last_chunk_time = time.time_ns()
869
 
870
- # 确保响应不为空
871
  if not response_text:
872
  response_text = "No response generated"
873
 
874
- # 计算各项指标
875
  completion_tokens = estimate_tokens(str(response_text))
876
- total_time = last_chunk_time - start_time # 总时间
877
- prompt_eval_time = first_chunk_time - start_time # 首个响应之前的时间
878
- eval_time = last_chunk_time - first_chunk_time # 生成响应的时间
879
 
880
- # 构造响应,包含性能统计信息
881
  return {
882
  "model": LIGHTRAG_MODEL,
883
  "created_at": LIGHTRAG_CREATED_AT,
884
  "message": {
885
  "role": "assistant",
886
- "content": str(response_text), # 确保转换为字符串
887
  "images": None,
888
  },
889
  "done": True,
890
- "total_duration": total_time, # 总时间
891
- "load_duration": 0, # 加载时间为0
892
- "prompt_eval_count": prompt_tokens, # 输入token数
893
- "prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
894
- "eval_count": completion_tokens, # 输出token数
895
- "eval_duration": eval_time, # 生成响应的时间
896
  }
897
  except Exception as e:
898
  raise HTTPException(status_code=500, detail=str(e))
 
541
 
542
  async def stream_generator():
543
  if isinstance(response, str):
544
+ # If it's a string, send it all at once
545
  yield f"{json.dumps({'response': response})}\n"
546
  else:
547
+ # If it's an async generator, send chunks one by one
548
  try:
549
  async for chunk in response:
550
+ if chunk: # Only send non-empty content
551
  yield f"{json.dumps({'response': chunk})}\n"
552
  except Exception as e:
553
  logging.error(f"Streaming error: {str(e)}")
 
563
  "Access-Control-Allow-Origin": "*",
564
  "Access-Control-Allow-Methods": "POST, OPTIONS",
565
  "Access-Control-Allow-Headers": "Content-Type",
566
+ "X-Accel-Buffering": "no", # Disable Nginx buffering
567
  },
568
  )
569
  except Exception as e:
 
760
  last_chunk_time = first_chunk_time
761
  total_response = response
762
 
 
763
  data = {
764
  "model": LIGHTRAG_MODEL,
765
  "created_at": LIGHTRAG_CREATED_AT,
 
772
  }
773
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
774
 
 
775
  completion_tokens = estimate_tokens(total_response)
776
+ total_time = last_chunk_time - start_time
777
  prompt_eval_time = (
778
  first_chunk_time - start_time
779
+ )
780
  eval_time = (
781
  last_chunk_time - first_chunk_time
782
+ )
783
 
 
784
  data = {
785
  "model": LIGHTRAG_MODEL,
786
  "created_at": LIGHTRAG_CREATED_AT,
787
  "done": True,
788
+ "total_duration": total_time,
789
+ "load_duration": 0,
790
+ "prompt_eval_count": prompt_tokens,
791
+ "prompt_eval_duration": prompt_eval_time,
792
+ "eval_count": completion_tokens,
793
+ "eval_duration": eval_time,
794
  }
795
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
796
  else:
 
797
  async for chunk in response:
798
+ if chunk:
 
799
  if first_chunk_time is None:
800
  first_chunk_time = time.time_ns()
801
 
 
802
  last_chunk_time = time.time_ns()
803
 
 
804
  total_response += chunk
805
  data = {
806
  "model": LIGHTRAG_MODEL,
 
814
  }
815
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
816
 
 
817
  completion_tokens = estimate_tokens(total_response)
818
+ total_time = last_chunk_time - start_time
819
  prompt_eval_time = (
820
  first_chunk_time - start_time
821
+ )
822
  eval_time = (
823
  last_chunk_time - first_chunk_time
824
+ )
825
 
 
826
  data = {
827
  "model": LIGHTRAG_MODEL,
828
  "created_at": LIGHTRAG_CREATED_AT,
829
  "done": True,
830
+ "total_duration": total_time,
831
+ "load_duration": 0,
832
+ "prompt_eval_count": prompt_tokens,
833
+ "prompt_eval_duration": prompt_eval_time,
834
+ "eval_count": completion_tokens,
835
+ "eval_duration": eval_time,
836
  }
837
  yield f"{json.dumps(data, ensure_ascii=False)}\n"
838
+ return # Ensure the generator ends immediately after sending the completion marker
839
  except Exception as e:
840
  logging.error(f"Error in stream_generator: {str(e)}")
841
  raise
 
853
  },
854
  )
855
  else:
 
856
  first_chunk_time = time.time_ns()
857
  response_text = await rag.aquery(cleaned_query, param=query_param)
858
  last_chunk_time = time.time_ns()
859
 
 
860
  if not response_text:
861
  response_text = "No response generated"
862
 
 
863
  completion_tokens = estimate_tokens(str(response_text))
864
+ total_time = last_chunk_time - start_time
865
+ prompt_eval_time = first_chunk_time - start_time
866
+ eval_time = last_chunk_time - first_chunk_time
867
 
 
868
  return {
869
  "model": LIGHTRAG_MODEL,
870
  "created_at": LIGHTRAG_CREATED_AT,
871
  "message": {
872
  "role": "assistant",
873
+ "content": str(response_text),
874
  "images": None,
875
  },
876
  "done": True,
877
+ "total_duration": total_time,
878
+ "load_duration": 0,
879
+ "prompt_eval_count": prompt_tokens,
880
+ "prompt_eval_duration": prompt_eval_time,
881
+ "eval_count": completion_tokens,
882
+ "eval_duration": eval_time,
883
  }
884
  except Exception as e:
885
  raise HTTPException(status_code=500, detail=str(e))