|
|
"""
|
|
|
ElevenLabs Voice Interface - For $2K + AirPods Pro Prize
|
|
|
|
|
|
Voice-first enterprise AI interaction.
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
from typing import Optional, AsyncGenerator
|
|
|
import asyncio
|
|
|
|
|
|
try:
|
|
|
from elevenlabs import ElevenLabs, VoiceSettings
|
|
|
from elevenlabs.client import AsyncElevenLabs
|
|
|
ELEVENLABS_AVAILABLE = True
|
|
|
except ImportError:
|
|
|
ELEVENLABS_AVAILABLE = False
|
|
|
print("[WARNING] ElevenLabs not installed")
|
|
|
|
|
|
|
|
|
class VoiceInterface:
|
|
|
"""
|
|
|
Voice-first interface for OmniMind using ElevenLabs.
|
|
|
|
|
|
Prize Integration: ElevenLabs Category Award ($2K + AirPods Pro)
|
|
|
- Natural conversational AI
|
|
|
- Streaming voice responses
|
|
|
- Enterprise-grade voice quality
|
|
|
"""
|
|
|
|
|
|
def __init__(self):
|
|
|
self.api_key = os.getenv("ELEVENLABS_API_KEY")
|
|
|
|
|
|
if not ELEVENLABS_AVAILABLE or not self.api_key:
|
|
|
self.client = None
|
|
|
print("[WARNING] ElevenLabs not configured")
|
|
|
return
|
|
|
|
|
|
self.client = AsyncElevenLabs(api_key=self.api_key)
|
|
|
|
|
|
|
|
|
self.voices = {
|
|
|
"professional": "ErXwobaYiN019PkySvjV",
|
|
|
"friendly": "EXAVITQu4vr4xnSDxMaL",
|
|
|
"executive": "VR6AewLTigWG4xSOukaG",
|
|
|
}
|
|
|
|
|
|
self.current_voice = "professional"
|
|
|
|
|
|
async def text_to_speech(
|
|
|
self,
|
|
|
text: str,
|
|
|
voice: str = "professional",
|
|
|
stream: bool = True
|
|
|
) -> AsyncGenerator[bytes, None]:
|
|
|
"""
|
|
|
Convert text to speech with streaming support.
|
|
|
|
|
|
Args:
|
|
|
text: Text to convert
|
|
|
voice: Voice persona (professional, friendly, executive)
|
|
|
stream: Stream audio chunks for real-time playback
|
|
|
|
|
|
Yields:
|
|
|
Audio chunks (bytes)
|
|
|
"""
|
|
|
if not self.client:
|
|
|
|
|
|
return
|
|
|
yield
|
|
|
|
|
|
voice_id = self.voices.get(voice, self.voices["professional"])
|
|
|
|
|
|
if stream:
|
|
|
|
|
|
audio_stream = await self.client.text_to_speech.convert_as_stream(
|
|
|
text=text,
|
|
|
voice_id=voice_id,
|
|
|
model_id="eleven_turbo_v2_5",
|
|
|
voice_settings=VoiceSettings(
|
|
|
stability=0.5,
|
|
|
similarity_boost=0.75,
|
|
|
style=0.5,
|
|
|
use_speaker_boost=True
|
|
|
)
|
|
|
)
|
|
|
|
|
|
async for chunk in audio_stream:
|
|
|
yield chunk
|
|
|
else:
|
|
|
|
|
|
audio = await self.client.text_to_speech.convert(
|
|
|
text=text,
|
|
|
voice_id=voice_id,
|
|
|
model_id="eleven_turbo_v2_5",
|
|
|
voice_settings=VoiceSettings(
|
|
|
stability=0.5,
|
|
|
similarity_boost=0.75,
|
|
|
style=0.5,
|
|
|
use_speaker_boost=True
|
|
|
)
|
|
|
)
|
|
|
|
|
|
yield audio
|
|
|
|
|
|
async def speech_to_text(self, audio_data: bytes) -> str:
|
|
|
"""
|
|
|
Convert speech to text (using OpenAI Whisper as ElevenLabs doesn't have STT).
|
|
|
|
|
|
Args:
|
|
|
audio_data: Audio bytes (WAV format)
|
|
|
|
|
|
Returns:
|
|
|
Transcribed text
|
|
|
"""
|
|
|
|
|
|
from openai import AsyncOpenAI
|
|
|
|
|
|
openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
|
|
import tempfile
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
|
f.write(audio_data)
|
|
|
audio_path = f.name
|
|
|
|
|
|
try:
|
|
|
with open(audio_path, "rb") as audio_file:
|
|
|
transcript = await openai_client.audio.transcriptions.create(
|
|
|
model="whisper-1",
|
|
|
file=audio_file
|
|
|
)
|
|
|
|
|
|
return transcript.text
|
|
|
|
|
|
finally:
|
|
|
|
|
|
import os
|
|
|
os.unlink(audio_path)
|
|
|
|
|
|
async def get_available_voices(self):
|
|
|
"""Get list of available voices"""
|
|
|
if not self.client:
|
|
|
return {"status": "unavailable", "voices": []}
|
|
|
|
|
|
voices = await self.client.voices.get_all()
|
|
|
|
|
|
return {
|
|
|
"status": "success",
|
|
|
"voices": [
|
|
|
{
|
|
|
"voice_id": voice.voice_id,
|
|
|
"name": voice.name,
|
|
|
"category": voice.category
|
|
|
}
|
|
|
for voice in voices.voices
|
|
|
]
|
|
|
}
|
|
|
|
|
|
def set_voice(self, voice_name: str):
|
|
|
"""Set the current voice persona"""
|
|
|
if voice_name in self.voices:
|
|
|
self.current_voice = voice_name
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
voice = VoiceInterface()
|
|
|
|