import gradio as gr from TTS.api import TTS import tempfile # Load XTTS model tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2") def clone_voice(text, speaker_wav): if speaker_wav is None: return None, "Please upload a reference audio file." # Save uploaded audio with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: tmp.write(speaker_wav.read()) speaker_path = tmp.name output_path = "cloned_output.wav" # Generate audio using XTTS tts.tts_to_file( text=text, speaker_wav=speaker_path, language="zh", file_path=output_path ) return output_path, "Voice cloning completed successfully." # Gradio UI demo = gr.Interface( fn=clone_voice, inputs=[ gr.Textbox(label="Enter Chinese Text"), gr.Audio(label="Upload Speaker Audio (.wav)", type="file") ], outputs=[ gr.Audio(label="Cloned Output Audio"), gr.Textbox(label="Status") ], title="XTTS Voice Cloning Demo", description="Upload reference audio and enter Chinese text to generate speech in cloned voice." ) demo.launch()