Spaces:
Sleeping
Sleeping
Fix state and Add concurrency
Browse filesState: Fixed issue where conversation history would persist across users and new chats
Concurrency: Added concurrency support using async
app.py
CHANGED
|
@@ -4,8 +4,6 @@ import os
|
|
| 4 |
import cohereAPI
|
| 5 |
|
| 6 |
|
| 7 |
-
# Conversation history storage
|
| 8 |
-
conversation_history = []
|
| 9 |
|
| 10 |
# Model configurations
|
| 11 |
COHERE_MODELS = [
|
|
@@ -33,9 +31,30 @@ def show_model_change_info(model_name):
|
|
| 33 |
return model_name
|
| 34 |
|
| 35 |
|
| 36 |
-
def respond(message, history, model_name="command-a-03-2025"):
|
| 37 |
"""Generate streaming response using Cohere API"""
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Get API key from environment
|
| 41 |
api_key = os.getenv('COHERE_API_KEY')
|
|
@@ -49,9 +68,9 @@ def respond(message, history, model_name="command-a-03-2025"):
|
|
| 49 |
Do not repeat the user's question in your response. Do not exceed 50 words."""
|
| 50 |
|
| 51 |
try:
|
| 52 |
-
# Use streaming function
|
| 53 |
partial_message = ""
|
| 54 |
-
for chunk in cohereAPI.
|
| 55 |
system_message=system_message,
|
| 56 |
user_message=message,
|
| 57 |
conversation_history=conversation_history,
|
|
|
|
| 4 |
import cohereAPI
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Model configurations
|
| 9 |
COHERE_MODELS = [
|
|
|
|
| 31 |
return model_name
|
| 32 |
|
| 33 |
|
| 34 |
+
async def respond(message, history, model_name="command-a-03-2025"):
|
| 35 |
"""Generate streaming response using Cohere API"""
|
| 36 |
+
|
| 37 |
+
# Convert Gradio history format to API format
|
| 38 |
+
conversation_history = []
|
| 39 |
+
if history:
|
| 40 |
+
for entry in history:
|
| 41 |
+
if isinstance(entry, dict):
|
| 42 |
+
# Clean dict format - only keep role and content
|
| 43 |
+
if "role" in entry and "content" in entry:
|
| 44 |
+
conversation_history.append({
|
| 45 |
+
"role": entry["role"],
|
| 46 |
+
"content": entry["content"]
|
| 47 |
+
})
|
| 48 |
+
elif isinstance(entry, (list, tuple)) and len(entry) == 2:
|
| 49 |
+
# Old format: [user_msg, assistant_msg]
|
| 50 |
+
user_msg, assistant_msg = entry
|
| 51 |
+
if user_msg:
|
| 52 |
+
conversation_history.append({"role": "user", "content": str(user_msg)})
|
| 53 |
+
if assistant_msg:
|
| 54 |
+
conversation_history.append({"role": "assistant", "content": str(assistant_msg)})
|
| 55 |
+
else:
|
| 56 |
+
# Handle other formats gracefully
|
| 57 |
+
continue
|
| 58 |
|
| 59 |
# Get API key from environment
|
| 60 |
api_key = os.getenv('COHERE_API_KEY')
|
|
|
|
| 68 |
Do not repeat the user's question in your response. Do not exceed 50 words."""
|
| 69 |
|
| 70 |
try:
|
| 71 |
+
# Use async streaming function
|
| 72 |
partial_message = ""
|
| 73 |
+
async for chunk in cohereAPI.send_message_stream_async(
|
| 74 |
system_message=system_message,
|
| 75 |
user_message=message,
|
| 76 |
conversation_history=conversation_history,
|