Spaces:

AxL95
/

medically

Runtime error

AxL95 commited on May 4

Commit

ab6a52d

verified ·

1 Parent(s): 032d01b

Update chat.py

Files changed (1) hide show

chat.py CHANGED Viewed

@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import re
 import json
 from auth import get_current_user
 from database import get_db
 from config import HF_TOKEN, MAX_TOKENS, EMBEDDING_MODEL
@@ -343,16 +343,12 @@ async def chat(request: Request):
                 if chunk.choices and chunk.choices[0].delta.content:
                     content = chunk.choices[0].delta.content
                     collected_response += content
-                    chunk_buffer += content
-                    chunk_count += 1
-                    if chunk_count >= MAX_CHUNKS_BEFORE_SEND or '\n' in content:
-                        yield f"data: {json.dumps({'content': chunk_buffer})}\n\n"
-                        chunk_buffer = ""
-                        chunk_count = 0
-            if chunk_buffer:
-                yield f"data: {json.dumps({'content': chunk_buffer})}\n\n"
             if collected_response.endswith((".", "!", "?")) == False and len(collected_response) > 500:
                 suffix = "\n\n(Note: Ma réponse a été limitée par des contraintes de taille. N'hésitez pas à me demander de poursuivre si vous souhaitez plus d'informations.)"
@@ -401,5 +397,9 @@ async def chat(request: Request):
     return StreamingResponse(
         generate_stream(),
-        media_type="text/event-stream"
-    )

 from sklearn.metrics.pairwise import cosine_similarity
 import re
 import json
+import asyncio
 from auth import get_current_user
 from database import get_db
 from config import HF_TOKEN, MAX_TOKENS, EMBEDDING_MODEL
                 if chunk.choices and chunk.choices[0].delta.content:
                     content = chunk.choices[0].delta.content
                     collected_response += content
+                    # Envoyer chaque token individuellement sans buffering
+                    yield f"data: {json.dumps({'content': content})}\n\n"
+                    # Petit sleep pour éviter le buffering par le serveur ASGI
+                    await asyncio.sleep(0)
             if collected_response.endswith((".", "!", "?")) == False and len(collected_response) > 500:
                 suffix = "\n\n(Note: Ma réponse a été limitée par des contraintes de taille. N'hésitez pas à me demander de poursuivre si vous souhaitez plus d'informations.)"
     return StreamingResponse(
         generate_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache, no-transform",
+            "X-Accel-Buffering": "no"  # Important pour Nginx
+        }
+)