Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| import scipy.io.wavfile | |
| import numpy as np | |
| # Load the MMS-TTS model and processor for Tibetan (bod) | |
| model_id = "ganga4364/mms-tts-bod-finetune-sherab" # Replace with your fine-tuned model if necessary | |
| # Use the text-to-speech pipeline with the model | |
| synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU | |
| # Function to perform TTS inference and save audio to a file | |
| def generate_audio(input_text): | |
| # Perform TTS inference | |
| speech = synthesiser(input_text) | |
| file_path = "finetuned_output.wav" | |
| # Save the audio to a file (e.g., 'output.wav') | |
| scipy.io.wavfile.write(file_path, rate=speech["sampling_rate"], data=speech["audio"][0]) | |
| # Return the path to the audio file | |
| return file_path | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=generate_audio, | |
| inputs="text", # Text input for the TTS | |
| outputs="audio", # Output will be an audio file | |
| title="Tibetan Text-to-Speech (MMS-TTS)", | |
| description="Enter Tibetan text and generate speech using MMS-TTS." | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch() | |