twi-ewe-mss-tss / app.py
ik's picture
Update app.py
2333629
import gradio as gr
import speech_recognition as sr
from ttsmms import TTS
from deep_translator import GoogleTranslator
# Initialize the TTS model for Ewe and Twi languages
ewe = TTS("data/ewe")
twi = TTS("data/aka")
# Create a list of supported languages and their corresponding TTS models
langs = [{"lang": 'ewe', "tts": ewe}, {"lang": 'twi', "tts": twi}]
# Function to convert speech to text using Google's speech recognition API
def speech_to_text(audio_file):
r = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = r.record(source)
try:
text = r.recognize_google(audio)
return text
except sr.UnknownValueError:
return None
except sr.RequestError:
print("Speech recognition service unavailable.")
return None
# Function to convert text to speech
def text_to_speech(text, lang):
# Find the selected language in the list of supported languages
selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None)
if selected_lang is None:
raise ValueError(f"Language '{lang}' is not supported.")
selected_tts = selected_lang["tts"]
# Translate the text to the selected language using Google Translator
translated = GoogleTranslator(source='auto', target=lang).translate(text)
wav_path = "output.wav"
# Generate speech synthesis and save it as a WAV file
selected_tts.synthesis(translated, wav_path=wav_path)
return wav_path, translated
# Function to handle the speech to text app
def speech_to_text_app(audio_file):
text = speech_to_text(audio_file)
return text if text else "Unable to transcribe audio."
# Function to handle the text to speech output
def text_to_speech_output(text, lang):
wav_path, translated = text_to_speech(text, lang)
return wav_path,translated
# Function to handle the speech to text and text to speech app
def speech_to_text_and_tts_app(lang_input, audio_file, text_input):
if audio_file:
print("Converting audio to text:", audio_file)
text = speech_to_text(audio_file)
wav_path, translates = text_to_speech_output(text, lang_input)
return translates, wav_path
else:
wav_path, translates = text_to_speech_output(text_input, lang_input)
return translates, wav_path
# Define the Gradio interface inputs and outputs
audio_input = gr.inputs.Audio(source="microphone", type="filepath", label="Record Audio")
text_input = gr.inputs.Textbox(label="Enter your text here")
lang_input = gr.inputs.Dropdown(choices=[lang["lang"] for lang in langs], label="Language")
output_text = gr.outputs.Textbox(label="Transcription")
output_audio = gr.outputs.Audio(label="Text-to-Speech Audio", type='filepath')
# Create the Gradio interface
interface = gr.Interface(
fn=speech_to_text_and_tts_app,
inputs=[lang_input, audio_input, text_input],
outputs=[output_text, output_audio],
title="English to Twi - Ewe Speech Generator(MMS TTS)",
description="Translate English to Twi and Ewe Language(from Ghana)"
)
# Launch the interface
interface.launch()