""" ElevenLabs Voice Interface - For $2K + AirPods Pro Prize Voice-first enterprise AI interaction. """ import os from typing import Optional, AsyncGenerator import asyncio try: from elevenlabs import ElevenLabs, VoiceSettings from elevenlabs.client import AsyncElevenLabs ELEVENLABS_AVAILABLE = True except ImportError: ELEVENLABS_AVAILABLE = False print("[WARNING] ElevenLabs not installed") class VoiceInterface: """ Voice-first interface for OmniMind using ElevenLabs. Prize Integration: ElevenLabs Category Award ($2K + AirPods Pro) - Natural conversational AI - Streaming voice responses - Enterprise-grade voice quality """ def __init__(self): self.api_key = os.getenv("ELEVENLABS_API_KEY") if not ELEVENLABS_AVAILABLE or not self.api_key: self.client = None print("[WARNING] ElevenLabs not configured") return self.client = AsyncElevenLabs(api_key=self.api_key) # Voice configurations for different personas self.voices = { "professional": "ErXwobaYiN019PkySvjV", # Antoni - professional male "friendly": "EXAVITQu4vr4xnSDxMaL", # Sarah - friendly female "executive": "VR6AewLTigWG4xSOukaG", # Arnold - authoritative male } self.current_voice = "professional" async def text_to_speech( self, text: str, voice: str = "professional", stream: bool = True ) -> AsyncGenerator[bytes, None]: """ Convert text to speech with streaming support. Args: text: Text to convert voice: Voice persona (professional, friendly, executive) stream: Stream audio chunks for real-time playback Yields: Audio chunks (bytes) """ if not self.client: # Return empty generator if not configured return yield voice_id = self.voices.get(voice, self.voices["professional"]) if stream: # Streaming for real-time responses audio_stream = await self.client.text_to_speech.convert_as_stream( text=text, voice_id=voice_id, model_id="eleven_turbo_v2_5", # Fastest model voice_settings=VoiceSettings( stability=0.5, similarity_boost=0.75, style=0.5, use_speaker_boost=True ) ) async for chunk in audio_stream: yield chunk else: # Non-streaming for complete audio audio = await self.client.text_to_speech.convert( text=text, voice_id=voice_id, model_id="eleven_turbo_v2_5", voice_settings=VoiceSettings( stability=0.5, similarity_boost=0.75, style=0.5, use_speaker_boost=True ) ) yield audio async def speech_to_text(self, audio_data: bytes) -> str: """ Convert speech to text (using OpenAI Whisper as ElevenLabs doesn't have STT). Args: audio_data: Audio bytes (WAV format) Returns: Transcribed text """ # ElevenLabs doesn't have STT, so we use OpenAI Whisper from openai import AsyncOpenAI openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Save audio temporarily import tempfile with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: f.write(audio_data) audio_path = f.name try: with open(audio_path, "rb") as audio_file: transcript = await openai_client.audio.transcriptions.create( model="whisper-1", file=audio_file ) return transcript.text finally: # Cleanup import os os.unlink(audio_path) async def get_available_voices(self): """Get list of available voices""" if not self.client: return {"status": "unavailable", "voices": []} voices = await self.client.voices.get_all() return { "status": "success", "voices": [ { "voice_id": voice.voice_id, "name": voice.name, "category": voice.category } for voice in voices.voices ] } def set_voice(self, voice_name: str): """Set the current voice persona""" if voice_name in self.voices: self.current_voice = voice_name return True return False # Global voice interface voice = VoiceInterface()