voice-clone-arabic

Build error

App Files Files Community

nikkmitra commited on Nov 5, 2024

Commit

0721077

verified ·

1 Parent(s): a452e88

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -60

app.py CHANGED Viewed

@@ -3,91 +3,69 @@ import gradio as gr
 import torch
 from TTS.api import TTS
 import os
-import subprocess
-import uuid
 # Agree to Coqui TOS
 os.environ["COQUI_TOS_AGREED"] = "1"
-# Set device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Initialize TTS model
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
-def cleanup_voice(speaker_wav):
-    """
-    Cleans up the voice reference audio using ffmpeg.
-    """
-    try:
-        # Generate a unique filename for the output
-        out_filename = f"{uuid.uuid4()}.wav"
-        # Define ffmpeg filters (adjust these as needed)
-        lowpass_highpass = "lowpass=f=3000, highpass=f=300"
-        trim_silence = "silenceremove=start_periods=1:start_duration=0.5:start_threshold=-40dB"
-        # Construct the ffmpeg command
-        shell_command = [
-            "ffmpeg",
-            "-y",
-            "-i", speaker_wav,
-            "-af", f"{lowpass_highpass},{trim_silence}",
-            out_filename
-        ]
-        # Execute the ffmpeg command
-        subprocess.run(shell_command, capture_output=True, text=True, check=True)
-        print("Filtered microphone input")
-        return out_filename
-    except subprocess.CalledProcessError:
-        # If ffmpeg fails, return the original file
-        print("Error: Failed filtering, using original microphone input")
-        return speaker_wav
 @spaces.GPU(enable_queue=True)
 def clone(text, audio, language):
     """
-    Clones the voice based on the input text and reference audio.
     """
-    # Cleanup the voice reference audio
-    cleaned_audio = cleanup_voice(audio)
-    # Generate the output audio file
     output_path = "./output.wav"
-    tts.tts_to_file(text=text, speaker_wav=cleaned_audio, language=language, file_path=output_path)
-    # Optionally, remove the cleaned audio file if it's a temporary file
-    if cleaned_audio != audio:
-        os.remove(cleaned_audio)
     return output_path
-# Define Gradio interface
 iface = gr.Interface(
     fn=clone,
     inputs=[
         gr.Textbox(label='Text', lines=2, placeholder="Enter the text you want to synthesize..."),
         gr.Audio(type='filepath', label='Voice Reference Audio File'),
-        gr.Dropdown(choices=["en", "ar"], value="en", label="Select Language")
     ],
     outputs=gr.Audio(type='filepath'),
     title='Voice Clone',
     description="""
-    This application allows you to clone voices using the xtts_v2 model.
-    Non-commercial use only. [Coqui Public Model License](https://coqui.ai/cpml)
     """,
-    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
     examples=[
-        ["Hey! It's me Dorothy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorothy.wav"],
-        ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
-        ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
-        ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
-        ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
-        ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"],
-    ]
 )
 # Launch the interface

 import torch
 from TTS.api import TTS
 import os
 # Agree to Coqui TOS
 os.environ["COQUI_TOS_AGREED"] = "1"
+# Set device to CUDA for GPU acceleration
+device = "cuda"
+# Initialize TTS model and move to the specified device
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 @spaces.GPU(enable_queue=True)
 def clone(text, audio, language):
     """
+    Generates a cloned voice audio file based on input text and reference audio.
+    Args:
+        text (str): The text to synthesize.
+        audio (str): Path to the reference audio file for voice cloning.
+        language (str): The language code ("en" or "ar").
+    Returns:
+        str: Path to the generated audio file.
     """
     output_path = "./output.wav"
+    tts.tts_to_file(
+        text=text,
+        speaker_wav=audio,
+        language=language,
+        file_path=output_path
+    )
     return output_path
+# Define the Gradio interface
 iface = gr.Interface(
     fn=clone,
     inputs=[
         gr.Textbox(label='Text', lines=2, placeholder="Enter the text you want to synthesize..."),
         gr.Audio(type='filepath', label='Voice Reference Audio File'),
+        gr.Dropdown(
+            label="Select Language",
+            choices=["en", "ar"],
+            value="en"  # Default value
+        )
     ],
     outputs=gr.Audio(type='filepath'),
     title='Voice Clone',
     description="""
+    Clone a voice by providing text and a reference audio file.
     """,
+    theme=gr.themes.Base(
+        primary_hue="teal",
+        secondary_hue="teal",
+        neutral_hue="slate"
+    ),
     examples=[
+        ["Hey! It's me Dorothy, from the Wizard of Oz.", "./audio/Wizard-of-Oz-Dorothy.wav"],
+        ["It's me Vito Corleone, from the Godfather.", "./audio/Godfather.wav"],
+        ["Hey, it's me Paris Hilton.", "./audio/Paris-Hilton.mp3"],
+        ["Hey, it's me Megan Fox from Transformers.", "./audio/Megan-Fox.mp3"],
+        ["Hey there, it's me Jeff Goldblum.", "./audio/Jeff-Goldblum.mp3"],
+        ["Hey there, it's me Heath Ledger as the Joker.", "./audio/Heath-Ledger.mp3"],
+    ],
+    allow_flagging="never"  # Optional: Disable flagging if not needed
 )
 # Launch the interface