text-to-speech / app.py
Eldeeb's picture
Update app.py
456fa97 verified
import streamlit as st
from transformers import pipeline
import io
import numpy as np
import requests
# Caching the text-to-speech model
@st.cache_resource
def load_tts_pipeline():
return pipeline("text-to-speech", model="microsoft/speecht5_tts")
# Initialize the model once using cache
tts_pipe = load_tts_pipeline()
# Initialize session state for conversation history, bot response, and selected options
if 'conversation_history' not in st.session_state:
st.session_state.conversation_history = ""
if 'tts_audio' not in st.session_state:
st.session_state.tts_audio = None
def get_speaker_embeddings():
url = "https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors/resolve/main/xvectors.npy"
response = requests.get(url)
if 'application/octet-stream' in response.headers['Content-Type']:
with io.BytesIO(response.content) as buffer:
try:
speaker_embeddings = np.load(buffer, allow_pickle=True)
except ValueError as e:
st.error("Failed to load speaker embeddings.")
speaker_embeddings = None
else:
st.error("Unexpected file type.")
speaker_embeddings = None
return speaker_embeddings
def convert_text_to_speech(text):
speaker_embeddings = get_speaker_embeddings() # Obtain speaker embeddings
if speaker_embeddings is None:
return None
try:
audio = tts_pipe(text, speaker_embeddings=speaker_embeddings)
return audio
except Exception as e:
st.error(f"Error generating speech: {e}")
return None
def convert_audio_to_bytes(audio):
audio_buffer = io.BytesIO(audio['audio'])
return audio_buffer
# Sidebar options
st.sidebar.title("App Settings")
feature = st.sidebar.radio("Choose Feature:", ("Text-to-Speech", "Other Options"))
if feature == "Text-to-Speech":
st.title("πŸ—£ Text-to-Speech Converter")
st.subheader("Convert your text to speech using a TTS model!")
user_message = st.text_area("Enter text to convert to speech:")
if st.button("Convert"):
if user_message:
tts_audio = convert_text_to_speech(user_message)
if tts_audio:
audio_bytes = convert_audio_to_bytes(tts_audio)
st.audio(audio_bytes, format='audio/wav')
st.success("Conversion successful!")
else:
st.error("Conversion failed.")
else:
st.warning("Please enter text before converting.")
st.markdown("---")
st.markdown("### About this App")
st.info("This app uses a text-to-speech model from the Hugging Face Transformers library. Enter text to hear it spoken out loud.")
st.sidebar.markdown("---")
st.sidebar.write("Created by [Your Name](https://github.com/yourprofile)")