fantos commited on
Commit
e2d9e5d
·
verified ·
1 Parent(s): 2406db3

Delete app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +0 -117
app-backup.py DELETED
@@ -1,117 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import outetts
4
- from outetts.version.v2.interface import _DEFAULT_SPEAKERS
5
- import torch
6
- import spaces
7
-
8
- def get_available_speakers():
9
- speakers = list(_DEFAULT_SPEAKERS.keys())
10
- return speakers
11
-
12
- @spaces.GPU
13
- def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio):
14
- model_config = outetts.HFModelConfig_v2(
15
- model_path="OuteAI/OuteTTS-0.3-1B",
16
- tokenizer_path="OuteAI/OuteTTS-0.3-1B",
17
- dtype=torch.bfloat16,
18
- device="cuda"
19
- )
20
- interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config)
21
-
22
- try:
23
- if reference_audio:
24
- speaker = interface.create_speaker(reference_audio)
25
- elif speaker_selection and speaker_selection != "None":
26
- speaker = interface.load_default_speaker(speaker_selection)
27
- else:
28
- speaker = None
29
-
30
- gen_cfg = outetts.GenerationConfig(
31
- text=text,
32
- temperature=temperature,
33
- repetition_penalty=repetition_penalty,
34
- max_length=4096,
35
- speaker=speaker,
36
- )
37
- output = interface.generate(config=gen_cfg)
38
-
39
- if output.audio is None:
40
- raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.")
41
-
42
- output_path = "output.wav"
43
- output.save(output_path)
44
- return output_path, None
45
- except Exception as e:
46
- return None, str(e)
47
-
48
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as demo:
49
- gr.Markdown("# Voice Clone Multilingual TTS")
50
-
51
- error_box = gr.Textbox(label="Error Messages", visible=False)
52
-
53
- with gr.Row():
54
- with gr.Column(scale=1):
55
- text_input = gr.Textbox(
56
- label="Text to Synthesize",
57
- placeholder="Enter text here...",
58
- lines=8
59
- )
60
-
61
- submit_button = gr.Button("Generate Speech")
62
-
63
- with gr.Column(scale=1):
64
- audio_output = gr.Audio(
65
- label="Generated Audio",
66
- type="filepath"
67
- )
68
-
69
- with gr.Group():
70
- speaker_dropdown = gr.Dropdown(
71
- choices=get_available_speakers(),
72
- value="en_male_1",
73
- label="Speaker Selection"
74
- )
75
-
76
- temperature = gr.Slider(
77
- 0.1, 1.0,
78
- value=0.1,
79
- label="Temperature (lower = more stable tone, higher = more expressive)"
80
- )
81
-
82
- repetition_penalty = gr.Slider(
83
- 0.5, 2.0,
84
- value=1.1,
85
- label="Repetition Penalty"
86
- )
87
-
88
- reference_audio = gr.Audio(
89
- label="Reference Audio (for voice cloning)",
90
- type="filepath"
91
- )
92
-
93
- gr.Markdown("""
94
- ### Voice Cloning Guidelines:
95
- - Use around 7-10 seconds of clear, noise-free audio
96
- - For transcription interface will use Whisper turbo to transcribe the audio file
97
- - Longer audio clips will reduce maximum output length
98
- - Custom speaker overrides speaker selection
99
- """)
100
-
101
- submit_button.click(
102
- fn=generate_tts,
103
- inputs=[
104
- text_input,
105
- temperature,
106
- repetition_penalty,
107
- speaker_dropdown,
108
- reference_audio,
109
- ],
110
- outputs=[audio_output, error_box]
111
- ).then(
112
- fn=lambda x: gr.update(visible=bool(x)),
113
- inputs=[error_box],
114
- outputs=[error_box]
115
- )
116
-
117
- demo.launch()