import spaces import gradio as gr import torch from TTS.api import TTS import os # Agree to Coqui TOS os.environ["COQUI_TOS_AGREED"] = "1" # Set device to CUDA for GPU acceleration device = "cuda" # Initialize TTS model and move to the specified device tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) @spaces.GPU(enable_queue=True) def clone(text, audio, language): """ Generates a cloned voice audio file based on input text and reference audio. Args: text (str): The text to synthesize. audio (str): Path to the reference audio file for voice cloning. language (str): The language code ("en" or "ar"). Returns: str: Path to the generated audio file. """ output_path = "./output.wav" tts.tts_to_file( text=text, speaker_wav=audio, language=language, file_path=output_path ) return output_path # Define the Gradio interface iface = gr.Interface( fn=clone, inputs=[ gr.Textbox(label='Text', lines=2, placeholder="Enter the text you want to synthesize..."), gr.Audio(type='filepath', label='Voice Reference Audio File'), gr.Dropdown( label="Select Language", choices=["en", "ar"], value="en" # Default value ) ], outputs=gr.Audio(type='filepath'), title='Voice Clone', description=""" Clone a voice by providing text and a reference audio file. """, theme=gr.themes.Base( primary_hue="teal", secondary_hue="teal", neutral_hue="slate" ), examples=[ ["Hey! It's me Dorothy, from the Wizard of Oz.", "./audio/Wizard-of-Oz-Dorothy.wav"], ["It's me Vito Corleone, from the Godfather.", "./audio/Godfather.wav"], ["Hey, it's me Paris Hilton.", "./audio/Paris-Hilton.mp3"], ["Hey, it's me Megan Fox from Transformers.", "./audio/Megan-Fox.mp3"], ["Hey there, it's me Jeff Goldblum.", "./audio/Jeff-Goldblum.mp3"], ["Hey there, it's me Heath Ledger as the Joker.", "./audio/Heath-Ledger.mp3"], ], allow_flagging="never" # Optional: Disable flagging if not needed ) # Launch the interface iface.launch()