Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

yangdx commited on Feb 6

Commit

b59560d

1 Parent(s): 6200fba

fix: improve timing accuracy and variable scoping in OllamaAPI

Browse files

Files changed (1) hide show

lightrag/api/ollama_api.py +12 -12

lightrag/api/ollama_api.py CHANGED Viewed

@@ -203,15 +203,15 @@ class OllamaAPI:
                     )
                     async def stream_generator():
-                        first_chunk_time = None
-                        last_chunk_time = time.time_ns()
-                        total_response = ""
                         try:
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
-                                first_chunk_time = last_chunk_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
@@ -284,7 +284,7 @@ class OllamaAPI:
                                     yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
                                     return
                                 if first_chunk_time is None:
-                                    first_chunk_time = last_chunk_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time
@@ -409,15 +409,15 @@ class OllamaAPI:
                         )
                     async def stream_generator():
-                        first_chunk_time = None
-                        last_chunk_time = time.time_ns()
-                        total_response = ""
                         try:
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
-                                first_chunk_time = last_chunk_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
@@ -503,7 +503,7 @@ class OllamaAPI:
                                     return
                                 if first_chunk_time is None:
-                                    first_chunk_time = last_chunk_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time

                     )
                     async def stream_generator():
                         try:
+                            first_chunk_time = None
+                            last_chunk_time = time.time_ns()
+                            total_response = ""
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
+                                first_chunk_time = start_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
                                     yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
                                     return
                                 if first_chunk_time is None:
+                                    first_chunk_time = start_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time
                         )
                     async def stream_generator():
                         try:
+                            first_chunk_time = None
+                            last_chunk_time = time.time_ns()
+                            total_response = ""
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
+                                first_chunk_time = start_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
                                     return
                                 if first_chunk_time is None:
+                                    first_chunk_time = start_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time