yangdx commited on
Commit
b59560d
·
1 Parent(s): 6200fba

fix: improve timing accuracy and variable scoping in OllamaAPI

Browse files
Files changed (1) hide show
  1. lightrag/api/ollama_api.py +12 -12
lightrag/api/ollama_api.py CHANGED
@@ -203,15 +203,15 @@ class OllamaAPI:
203
  )
204
 
205
  async def stream_generator():
206
- first_chunk_time = None
207
- last_chunk_time = time.time_ns()
208
- total_response = ""
209
-
210
  try:
 
 
 
 
211
  # Ensure response is an async generator
212
  if isinstance(response, str):
213
  # If it's a string, send in two parts
214
- first_chunk_time = last_chunk_time
215
  last_chunk_time = time.time_ns()
216
  total_response = response
217
 
@@ -284,7 +284,7 @@ class OllamaAPI:
284
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
285
  return
286
  if first_chunk_time is None:
287
- first_chunk_time = last_chunk_time
288
  completion_tokens = estimate_tokens(total_response)
289
  total_time = last_chunk_time - start_time
290
  prompt_eval_time = first_chunk_time - start_time
@@ -409,15 +409,15 @@ class OllamaAPI:
409
  )
410
 
411
  async def stream_generator():
412
- first_chunk_time = None
413
- last_chunk_time = time.time_ns()
414
- total_response = ""
415
-
416
  try:
 
 
 
 
417
  # Ensure response is an async generator
418
  if isinstance(response, str):
419
  # If it's a string, send in two parts
420
- first_chunk_time = last_chunk_time
421
  last_chunk_time = time.time_ns()
422
  total_response = response
423
 
@@ -503,7 +503,7 @@ class OllamaAPI:
503
  return
504
 
505
  if first_chunk_time is None:
506
- first_chunk_time = last_chunk_time
507
  completion_tokens = estimate_tokens(total_response)
508
  total_time = last_chunk_time - start_time
509
  prompt_eval_time = first_chunk_time - start_time
 
203
  )
204
 
205
  async def stream_generator():
 
 
 
 
206
  try:
207
+ first_chunk_time = None
208
+ last_chunk_time = time.time_ns()
209
+ total_response = ""
210
+
211
  # Ensure response is an async generator
212
  if isinstance(response, str):
213
  # If it's a string, send in two parts
214
+ first_chunk_time = start_time
215
  last_chunk_time = time.time_ns()
216
  total_response = response
217
 
 
284
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
285
  return
286
  if first_chunk_time is None:
287
+ first_chunk_time = start_time
288
  completion_tokens = estimate_tokens(total_response)
289
  total_time = last_chunk_time - start_time
290
  prompt_eval_time = first_chunk_time - start_time
 
409
  )
410
 
411
  async def stream_generator():
 
 
 
 
412
  try:
413
+ first_chunk_time = None
414
+ last_chunk_time = time.time_ns()
415
+ total_response = ""
416
+
417
  # Ensure response is an async generator
418
  if isinstance(response, str):
419
  # If it's a string, send in two parts
420
+ first_chunk_time = start_time
421
  last_chunk_time = time.time_ns()
422
  total_response = response
423
 
 
503
  return
504
 
505
  if first_chunk_time is None:
506
+ first_chunk_time = start_time
507
  completion_tokens = estimate_tokens(total_response)
508
  total_time = last_chunk_time - start_time
509
  prompt_eval_time = first_chunk_time - start_time