nikkmitra commited on
Commit
0721077
·
verified ·
1 Parent(s): a452e88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -60
app.py CHANGED
@@ -3,91 +3,69 @@ import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
  import os
6
- import subprocess
7
- import uuid
8
 
9
  # Agree to Coqui TOS
10
  os.environ["COQUI_TOS_AGREED"] = "1"
11
 
12
- # Set device
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
- # Initialize TTS model
16
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
17
 
18
- def cleanup_voice(speaker_wav):
19
- """
20
- Cleans up the voice reference audio using ffmpeg.
21
- """
22
- try:
23
- # Generate a unique filename for the output
24
- out_filename = f"{uuid.uuid4()}.wav"
25
-
26
- # Define ffmpeg filters (adjust these as needed)
27
- lowpass_highpass = "lowpass=f=3000, highpass=f=300"
28
- trim_silence = "silenceremove=start_periods=1:start_duration=0.5:start_threshold=-40dB"
29
-
30
- # Construct the ffmpeg command
31
- shell_command = [
32
- "ffmpeg",
33
- "-y",
34
- "-i", speaker_wav,
35
- "-af", f"{lowpass_highpass},{trim_silence}",
36
- out_filename
37
- ]
38
-
39
- # Execute the ffmpeg command
40
- subprocess.run(shell_command, capture_output=True, text=True, check=True)
41
-
42
- print("Filtered microphone input")
43
- return out_filename
44
-
45
- except subprocess.CalledProcessError:
46
- # If ffmpeg fails, return the original file
47
- print("Error: Failed filtering, using original microphone input")
48
- return speaker_wav
49
-
50
  @spaces.GPU(enable_queue=True)
51
  def clone(text, audio, language):
52
  """
53
- Clones the voice based on the input text and reference audio.
 
 
 
 
 
 
 
 
54
  """
55
- # Cleanup the voice reference audio
56
- cleaned_audio = cleanup_voice(audio)
57
-
58
- # Generate the output audio file
59
  output_path = "./output.wav"
60
- tts.tts_to_file(text=text, speaker_wav=cleaned_audio, language=language, file_path=output_path)
61
-
62
- # Optionally, remove the cleaned audio file if it's a temporary file
63
- if cleaned_audio != audio:
64
- os.remove(cleaned_audio)
65
-
66
  return output_path
67
 
68
- # Define Gradio interface
69
  iface = gr.Interface(
70
  fn=clone,
71
  inputs=[
72
  gr.Textbox(label='Text', lines=2, placeholder="Enter the text you want to synthesize..."),
73
  gr.Audio(type='filepath', label='Voice Reference Audio File'),
74
- gr.Dropdown(choices=["en", "ar"], value="en", label="Select Language")
 
 
 
 
75
  ],
76
  outputs=gr.Audio(type='filepath'),
77
  title='Voice Clone',
78
  description="""
79
- This application allows you to clone voices using the xtts_v2 model.
80
- Non-commercial use only. [Coqui Public Model License](https://coqui.ai/cpml)
81
  """,
82
- theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
 
 
 
 
83
  examples=[
84
- ["Hey! It's me Dorothy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorothy.wav"],
85
- ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
86
- ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
87
- ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
88
- ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
89
- ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"],
90
- ]
 
91
  )
92
 
93
  # Launch the interface
 
3
  import torch
4
  from TTS.api import TTS
5
  import os
 
 
6
 
7
  # Agree to Coqui TOS
8
  os.environ["COQUI_TOS_AGREED"] = "1"
9
 
10
+ # Set device to CUDA for GPU acceleration
11
+ device = "cuda"
12
 
13
+ # Initialize TTS model and move to the specified device
14
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @spaces.GPU(enable_queue=True)
17
  def clone(text, audio, language):
18
  """
19
+ Generates a cloned voice audio file based on input text and reference audio.
20
+
21
+ Args:
22
+ text (str): The text to synthesize.
23
+ audio (str): Path to the reference audio file for voice cloning.
24
+ language (str): The language code ("en" or "ar").
25
+
26
+ Returns:
27
+ str: Path to the generated audio file.
28
  """
 
 
 
 
29
  output_path = "./output.wav"
30
+ tts.tts_to_file(
31
+ text=text,
32
+ speaker_wav=audio,
33
+ language=language,
34
+ file_path=output_path
35
+ )
36
  return output_path
37
 
38
+ # Define the Gradio interface
39
  iface = gr.Interface(
40
  fn=clone,
41
  inputs=[
42
  gr.Textbox(label='Text', lines=2, placeholder="Enter the text you want to synthesize..."),
43
  gr.Audio(type='filepath', label='Voice Reference Audio File'),
44
+ gr.Dropdown(
45
+ label="Select Language",
46
+ choices=["en", "ar"],
47
+ value="en" # Default value
48
+ )
49
  ],
50
  outputs=gr.Audio(type='filepath'),
51
  title='Voice Clone',
52
  description="""
53
+ Clone a voice by providing text and a reference audio file.
 
54
  """,
55
+ theme=gr.themes.Base(
56
+ primary_hue="teal",
57
+ secondary_hue="teal",
58
+ neutral_hue="slate"
59
+ ),
60
  examples=[
61
+ ["Hey! It's me Dorothy, from the Wizard of Oz.", "./audio/Wizard-of-Oz-Dorothy.wav"],
62
+ ["It's me Vito Corleone, from the Godfather.", "./audio/Godfather.wav"],
63
+ ["Hey, it's me Paris Hilton.", "./audio/Paris-Hilton.mp3"],
64
+ ["Hey, it's me Megan Fox from Transformers.", "./audio/Megan-Fox.mp3"],
65
+ ["Hey there, it's me Jeff Goldblum.", "./audio/Jeff-Goldblum.mp3"],
66
+ ["Hey there, it's me Heath Ledger as the Joker.", "./audio/Heath-Ledger.mp3"],
67
+ ],
68
+ allow_flagging="never" # Optional: Disable flagging if not needed
69
  )
70
 
71
  # Launch the interface