yangdx
commited on
Commit
·
b59560d
1
Parent(s):
6200fba
fix: improve timing accuracy and variable scoping in OllamaAPI
Browse files- lightrag/api/ollama_api.py +12 -12
lightrag/api/ollama_api.py
CHANGED
@@ -203,15 +203,15 @@ class OllamaAPI:
|
|
203 |
)
|
204 |
|
205 |
async def stream_generator():
|
206 |
-
first_chunk_time = None
|
207 |
-
last_chunk_time = time.time_ns()
|
208 |
-
total_response = ""
|
209 |
-
|
210 |
try:
|
|
|
|
|
|
|
|
|
211 |
# Ensure response is an async generator
|
212 |
if isinstance(response, str):
|
213 |
# If it's a string, send in two parts
|
214 |
-
first_chunk_time =
|
215 |
last_chunk_time = time.time_ns()
|
216 |
total_response = response
|
217 |
|
@@ -284,7 +284,7 @@ class OllamaAPI:
|
|
284 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
285 |
return
|
286 |
if first_chunk_time is None:
|
287 |
-
first_chunk_time =
|
288 |
completion_tokens = estimate_tokens(total_response)
|
289 |
total_time = last_chunk_time - start_time
|
290 |
prompt_eval_time = first_chunk_time - start_time
|
@@ -409,15 +409,15 @@ class OllamaAPI:
|
|
409 |
)
|
410 |
|
411 |
async def stream_generator():
|
412 |
-
first_chunk_time = None
|
413 |
-
last_chunk_time = time.time_ns()
|
414 |
-
total_response = ""
|
415 |
-
|
416 |
try:
|
|
|
|
|
|
|
|
|
417 |
# Ensure response is an async generator
|
418 |
if isinstance(response, str):
|
419 |
# If it's a string, send in two parts
|
420 |
-
first_chunk_time =
|
421 |
last_chunk_time = time.time_ns()
|
422 |
total_response = response
|
423 |
|
@@ -503,7 +503,7 @@ class OllamaAPI:
|
|
503 |
return
|
504 |
|
505 |
if first_chunk_time is None:
|
506 |
-
first_chunk_time =
|
507 |
completion_tokens = estimate_tokens(total_response)
|
508 |
total_time = last_chunk_time - start_time
|
509 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
203 |
)
|
204 |
|
205 |
async def stream_generator():
|
|
|
|
|
|
|
|
|
206 |
try:
|
207 |
+
first_chunk_time = None
|
208 |
+
last_chunk_time = time.time_ns()
|
209 |
+
total_response = ""
|
210 |
+
|
211 |
# Ensure response is an async generator
|
212 |
if isinstance(response, str):
|
213 |
# If it's a string, send in two parts
|
214 |
+
first_chunk_time = start_time
|
215 |
last_chunk_time = time.time_ns()
|
216 |
total_response = response
|
217 |
|
|
|
284 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
285 |
return
|
286 |
if first_chunk_time is None:
|
287 |
+
first_chunk_time = start_time
|
288 |
completion_tokens = estimate_tokens(total_response)
|
289 |
total_time = last_chunk_time - start_time
|
290 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
409 |
)
|
410 |
|
411 |
async def stream_generator():
|
|
|
|
|
|
|
|
|
412 |
try:
|
413 |
+
first_chunk_time = None
|
414 |
+
last_chunk_time = time.time_ns()
|
415 |
+
total_response = ""
|
416 |
+
|
417 |
# Ensure response is an async generator
|
418 |
if isinstance(response, str):
|
419 |
# If it's a string, send in two parts
|
420 |
+
first_chunk_time = start_time
|
421 |
last_chunk_time = time.time_ns()
|
422 |
total_response = response
|
423 |
|
|
|
503 |
return
|
504 |
|
505 |
if first_chunk_time is None:
|
506 |
+
first_chunk_time = start_time
|
507 |
completion_tokens = estimate_tokens(total_response)
|
508 |
total_time = last_chunk_time - start_time
|
509 |
prompt_eval_time = first_chunk_time - start_time
|