import gradio as gr import requests import random import urllib.parse import tempfile import os NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE") TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE") VOICES = [ "alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" ] def check_nsfw(prompt: str) -> bool: try: encoded_prompt = urllib.parse.quote(prompt) url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt) response = requests.get(url, timeout=20) result = response.text.strip().upper() return result == "YES" except: return True # assume NSFW if check fails def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes: try: encoded_prompt = urllib.parse.quote(prompt) encoded_emotion = urllib.parse.quote(emotion) url = TTS_URL_TEMPLATE.format( prompt=encoded_prompt, emotion=encoded_emotion, voice=voice, seed=seed ) response = requests.get(url, timeout=60) response.raise_for_status() if 'audio' not in response.headers.get('content-type', '').lower(): raise gr.Error("API response is not audio.") return response.content except Exception as e: print("Error:", e) raise gr.Error("Error generating audio.") def text_to_speech_app(prompt, voice, emotion, use_random_seed, specific_seed): if not prompt: raise gr.Error("Prompt cannot be empty.") if not voice: raise gr.Error("Voice selection required.") if not emotion: emotion = "neutral" seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed) is_nsfw = False # Set to check_nsfw(prompt) if needed if is_nsfw: return None, "⚠️ Inappropriate prompt detected." audio_bytes = generate_audio(prompt, voice, emotion, seed) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: tmp.write(audio_bytes) return tmp.name, f"✅ Audio ready | Voice: {voice} | Emotion: {emotion} | Seed: {seed}" def toggle_seed_input(use_random_seed): return gr.update(visible=not use_random_seed, value=12345) with gr.Blocks() as app: gr.Markdown("## 🎙️ DeepCAL Oracle Voice Engine") gr.Markdown("Enter text, select a voice and emotion, then generate oracle-grade audio.") with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox(label="Prompt") emotion_input = gr.Textbox(label="Emotion", placeholder="e.g., sarcastic, serious") voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") with gr.Column(scale=1): random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True) seed_input = gr.Number(label="Specific Seed", visible=False, value=12345) submit_button = gr.Button("🎧 Generate Audio") with gr.Row(): audio_output = gr.Audio(label="Audio Output", type="filepath") status_output = gr.Textbox(label="Status") random_seed_checkbox.change(toggle_seed_input, [random_seed_checkbox], [seed_input]) submit_button.click( fn=text_to_speech_app, inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input], outputs=[audio_output, status_output], concurrency_limit=30 ) gr.Examples( examples=[ ["Welcome to DeepCAL Oracle.", "ballad", "wise, cosmic", True, 12345], ["Headshot logistics incoming.", "shimmer", "sarcastic and aggressive", True, 5555], ], inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input], outputs=[audio_output, status_output], fn=text_to_speech_app, cache_examples=False ) if __name__ == "__main__": if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE: app.launch() else: print("🚫 Missing required secrets. Please set NSFW_API_URL_TEMPLATE and TTS_API_URL_TEMPLATE.")