Spaces:

ik
/

twi-ewe-mss-tss

Sleeping

twi-ewe-mss-tss / app.py

Update app.py

2333629 over 2 years ago

3.12 kB

	import gradio as gr
	import speech_recognition as sr
	from ttsmms import TTS
	from deep_translator import GoogleTranslator

	# Initialize the TTS model for Ewe and Twi languages
	ewe = TTS("data/ewe")
	twi = TTS("data/aka")

	# Create a list of supported languages and their corresponding TTS models
	langs = [{"lang": 'ewe', "tts": ewe}, {"lang": 'twi', "tts": twi}]


	# Function to convert speech to text using Google's speech recognition API
	def speech_to_text(audio_file):
	r = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio = r.record(source)
	try:
	text = r.recognize_google(audio)
	return text
	except sr.UnknownValueError:
	return None
	except sr.RequestError:
	print("Speech recognition service unavailable.")
	return None


	# Function to convert text to speech
	def text_to_speech(text, lang):
	# Find the selected language in the list of supported languages
	selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None)
	if selected_lang is None:
	raise ValueError(f"Language '{lang}' is not supported.")
	selected_tts = selected_lang["tts"]
	# Translate the text to the selected language using Google Translator
	translated = GoogleTranslator(source='auto', target=lang).translate(text)
	wav_path = "output.wav"
	# Generate speech synthesis and save it as a WAV file
	selected_tts.synthesis(translated, wav_path=wav_path)
	return wav_path, translated


	# Function to handle the speech to text app
	def speech_to_text_app(audio_file):
	text = speech_to_text(audio_file)
	return text if text else "Unable to transcribe audio."


	# Function to handle the text to speech output
	def text_to_speech_output(text, lang):
	wav_path, translated = text_to_speech(text, lang)
	return wav_path,translated


	# Function to handle the speech to text and text to speech app
	def speech_to_text_and_tts_app(lang_input, audio_file, text_input):
	if audio_file:
	print("Converting audio to text:", audio_file)
	text = speech_to_text(audio_file)
	wav_path, translates = text_to_speech_output(text, lang_input)
	return translates, wav_path
	else:
	wav_path, translates = text_to_speech_output(text_input, lang_input)
	return translates, wav_path


	# Define the Gradio interface inputs and outputs
	audio_input = gr.inputs.Audio(source="microphone", type="filepath", label="Record Audio")
	text_input = gr.inputs.Textbox(label="Enter your text here")
	lang_input = gr.inputs.Dropdown(choices=[lang["lang"] for lang in langs], label="Language")
	output_text = gr.outputs.Textbox(label="Transcription")
	output_audio = gr.outputs.Audio(label="Text-to-Speech Audio", type='filepath')

	# Create the Gradio interface
	interface = gr.Interface(
	fn=speech_to_text_and_tts_app,
	inputs=[lang_input, audio_input, text_input],
	outputs=[output_text, output_audio],
	title="English to Twi - Ewe Speech Generator(MMS TTS)",
	description="Translate English to Twi and Ewe Language(from Ghana)"
	)

	# Launch the interface
	interface.launch()