Update chat.py
Browse files
chat.py
CHANGED
@@ -332,24 +332,31 @@ async def chat(request: Request):
|
|
332 |
completion_stream = hf_client.chat.completions.create(
|
333 |
model="mistralai/Mistral-7B-Instruct-v0.3",
|
334 |
messages=messages,
|
335 |
-
max_tokens=
|
336 |
temperature=0.7,
|
337 |
stream=True
|
338 |
)
|
339 |
chunk_buffer = ""
|
340 |
chunk_count = 0
|
341 |
-
MAX_CHUNKS_BEFORE_SEND =
|
342 |
for chunk in completion_stream:
|
343 |
if chunk.choices and chunk.choices[0].delta.content:
|
344 |
content = chunk.choices[0].delta.content
|
|
|
345 |
collected_response += content
|
|
|
|
|
346 |
|
347 |
-
# Envoyer
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
|
|
|
|
|
|
|
|
353 |
if collected_response.endswith((".", "!", "?")) == False and len(collected_response) > 500:
|
354 |
suffix = "\n\n(Note: Ma réponse a été limitée par des contraintes de taille. N'hésitez pas à me demander de poursuivre si vous souhaitez plus d'informations.)"
|
355 |
collected_response += suffix
|
@@ -374,7 +381,7 @@ async def chat(request: Request):
|
|
374 |
fallback = hf_client.text_generation(
|
375 |
model="mistralai/Mistral-7B-Instruct-v0.3",
|
376 |
prompt=f"<s>[INST] {system_prompt}\n\nQuestion: {user_message} [/INST]",
|
377 |
-
|
378 |
temperature=0.7
|
379 |
)
|
380 |
yield f"data: {json.dumps({'content': fallback})}\n\n"
|
@@ -397,9 +404,5 @@ async def chat(request: Request):
|
|
397 |
|
398 |
return StreamingResponse(
|
399 |
generate_stream(),
|
400 |
-
media_type="text/event-stream"
|
401 |
-
headers={
|
402 |
-
"Cache-Control": "no-cache, no-transform",
|
403 |
-
"X-Accel-Buffering": "no" # Important pour Nginx
|
404 |
-
}
|
405 |
)
|
|
|
332 |
completion_stream = hf_client.chat.completions.create(
|
333 |
model="mistralai/Mistral-7B-Instruct-v0.3",
|
334 |
messages=messages,
|
335 |
+
max_tokens=900,
|
336 |
temperature=0.7,
|
337 |
stream=True
|
338 |
)
|
339 |
chunk_buffer = ""
|
340 |
chunk_count = 0
|
341 |
+
MAX_CHUNKS_BEFORE_SEND = 1
|
342 |
for chunk in completion_stream:
|
343 |
if chunk.choices and chunk.choices[0].delta.content:
|
344 |
content = chunk.choices[0].delta.content
|
345 |
+
print(f"Token received: {content}")
|
346 |
collected_response += content
|
347 |
+
chunk_buffer += content
|
348 |
+
chunk_count += 1
|
349 |
|
350 |
+
# Envoyer plus fréquemment
|
351 |
+
if chunk_count >= MAX_CHUNKS_BEFORE_SEND or '\n' in content:
|
352 |
+
print(f"Sending chunk: {chunk_buffer}") # Debug
|
353 |
+
yield f"data: {json.dumps({'content': chunk_buffer})}\n\n"
|
354 |
+
chunk_buffer = ""
|
355 |
+
chunk_count = 0
|
356 |
+
|
357 |
+
if chunk_buffer:
|
358 |
+
yield f"data: {json.dumps({'content': chunk_buffer})}\n\n"
|
359 |
+
|
360 |
if collected_response.endswith((".", "!", "?")) == False and len(collected_response) > 500:
|
361 |
suffix = "\n\n(Note: Ma réponse a été limitée par des contraintes de taille. N'hésitez pas à me demander de poursuivre si vous souhaitez plus d'informations.)"
|
362 |
collected_response += suffix
|
|
|
381 |
fallback = hf_client.text_generation(
|
382 |
model="mistralai/Mistral-7B-Instruct-v0.3",
|
383 |
prompt=f"<s>[INST] {system_prompt}\n\nQuestion: {user_message} [/INST]",
|
384 |
+
max_tokens=512,
|
385 |
temperature=0.7
|
386 |
)
|
387 |
yield f"data: {json.dumps({'content': fallback})}\n\n"
|
|
|
404 |
|
405 |
return StreamingResponse(
|
406 |
generate_stream(),
|
407 |
+
media_type="text/event-stream"
|
|
|
|
|
|
|
|
|
408 |
)
|