Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import base64 | |
| import torch | |
| import io | |
| import scipy.io.wavfile as wavfile | |
| from PIL import Image | |
| import numpy as np | |
| import commons | |
| import utils | |
| from models import SynthesizerTrn | |
| from text.symbols import symbols | |
| from text import text_to_sequence | |
| import subprocess | |
| import os | |
| import tempfile | |
| def get_text(text, hps): | |
| text_norm = text_to_sequence(text, hps.data.text_cleaners) | |
| if hps.data.add_blank: | |
| text_norm = commons.intersperse(text_norm, 0) | |
| text_norm = torch.LongTensor(text_norm) | |
| return text_norm | |
| def text_to_speech(text): | |
| stn_tst = get_text(text, hps) | |
| with torch.no_grad(): | |
| x_tst = stn_tst.unsqueeze(0) | |
| x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) | |
| audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1.2)[0][ | |
| 0, 0].data.float().numpy() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| wavfile.write(f.name, hps.data.sampling_rate, audio) | |
| audio_file = f.name | |
| # Return the audio file path | |
| return audio_file | |
| # Load the trained model | |
| hps = utils.get_hparams_from_file("./configs/jp_base.json") | |
| hps.model_dir = './logs/jp_base' | |
| pretrained_model = f'{hps.model_dir}/model.pth' | |
| net_g = SynthesizerTrn( | |
| len(symbols), | |
| hps.data.filter_length // 2 + 1, | |
| hps.train.segment_size // hps.data.hop_length, | |
| **hps.model) | |
| _ = net_g.eval() | |
| if os.path.isfile(pretrained_model): | |
| _ = utils.load_checkpoint(pretrained_model, net_g, None) | |
| else: | |
| # Run the shell script | |
| subprocess.call('./startup.sh', shell=True) | |
| _ = utils.load_checkpoint(pretrained_model, net_g, None) | |
| # Define the function that will be used to generate speech from text | |
| def generate_speech(text): | |
| # Use the text_to_speech function to generate speech from text | |
| speech = text_to_speech(text) | |
| # Return the speech as a dictionary with 'audio' as the key | |
| # return {'audio': speech} | |
| return speech | |
| # Define the interface for the text-to-speech model | |
| text_input = gr.inputs.Textbox(label='Enter Text Here') | |
| output_audio = gr.outputs.Audio(label='Speech', type='filepath') | |
| # Define the user interface using Gradio | |
| ui = gr.Interface( | |
| fn=generate_speech, | |
| inputs=text_input, | |
| outputs=output_audio, | |
| title='Text-to-Speech Demo', | |
| description='Generate speech from text using a text-to-speech model.' | |
| ) | |
| # Run the interface | |
| ui.launch(share=True) | |