tts-openai / app.py
matdmiller's picture
added text chunking for text over 4,000 chars
d9a62b3
raw
history blame
6.28 kB
# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
# %% auto 0
__all__ = ['secret_import_failed', 'tts_voices', 'launch_kwargs', 'split_text', 'concatenate_mp3', 'create_speech',
'get_input_text_len']
# %% app.ipynb 1
#tts_openai_secrets.py content:
#import os
#os.environ['OPENAI_API_KEY'] = 'sk-XXXXXXXXXXXXXXXXXXXXXX'
import os
secret_import_failed = False
try:
_ = os.environ['OPENAI_API_KEY']
print('OPENAI_API_KEY environment variable was found.')
except:
print('OPENAI_API_KEY environment variable was not found.')
secret_import_failed = True
try:
GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']
print('GRADIO_PASSWORD environment variable was found.')
except:
print('GRADIO_PASSWORD environment variable was not found.')
secret_import_failed = True
if secret_import_failed == True:
import tts_openai_secrets
GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']
print('import tts_openai_secrets succeeded')
# %% app.ipynb 3
import gradio as gr
import openai
from pydub import AudioSegment
import io
# %% app.ipynb 4
try:
tts_models = [o.id for o in openai.models.list().data if 'tts' in o.id]
print('successfully got tts model list:', tts_models)
except:
tts_models = ['tts-1']
# %% app.ipynb 5
tts_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
# %% app.ipynb 6
def split_text(input_text, max_length=4000, lookback=1000):
# If the text is shorter than the max_length, return it as is
if len(input_text) <= max_length:
return [input_text]
chunks = []
while input_text:
# Check if the remaining text is shorter than the max_length
if len(input_text) <= max_length:
chunks.append(input_text)
break
# Define the split point, initially set to max_length
split_point = max_length
# Look for a newline in the last 'lookback' characters
newline_index = input_text.rfind('\n', max_length-lookback, max_length)
if newline_index != -1:
split_point = newline_index + 1 # Include the newline in the current chunk
# If no newline, look for a period followed by space
elif '. ' in input_text[max_length-lookback:max_length]:
# Find the last '. ' in the lookback range
period_index = input_text.rfind('. ', max_length-lookback, max_length)
split_point = period_index + 2 # Split after the space
# Split the text and update the input_text
chunks.append(input_text[:split_point])
input_text = input_text[split_point:]
return chunks
# %% app.ipynb 7
def concatenate_mp3(mp3_files):
if len(mp3_files) == 1:
return mp3_files[0]
else:
# Initialize an empty AudioSegment object for concatenation
combined = AudioSegment.empty()
# Write out audio file responses as individual files for debugging
# for idx, mp3_data in enumerate(mp3_files):
# with open(f'./{idx}.mp3', 'wb') as f:
# f.write(mp3_data)
# Loop through the list of mp3 binary data
for mp3_data in mp3_files:
# Convert binary data to an audio segment
audio_segment = AudioSegment.from_file(io.BytesIO(mp3_data), format="mp3")
# Concatenate this segment to the combined segment
combined += audio_segment
# Export the combined segment to a new mp3 file
# Use a BytesIO object to handle this in memory
combined_mp3 = io.BytesIO()
combined.export(combined_mp3, format="mp3")
# Seek to the start so it's ready for reading
combined_mp3.seek(0)
return combined_mp3.getvalue()
# %% app.ipynb 8
def create_speech(input_text, model='tts-1', voice='alloy', progress=gr.Progress()):
# Split the input text into chunks
chunks = split_text(input_text)
# Initialize the progress bar
progress(0, desc="Starting TTS processing...")
# Initialize a list to hold the audio data of each chunk
audio_data = []
# Create a client instance for OpenAI
client = openai.OpenAI()
# Calculate the progress increment for each chunk
progress_increment = 1.0 / len(chunks)
# Process each chunk
for i, chunk in enumerate(chunks):
response = client.audio.speech.create(
model=model,
voice=voice,
input=chunk,
speed=1.0
)
# Append the audio content of the response to the list
audio_data.append(response.content)
# Update the progress bar
progress((i + 1) * progress_increment, desc=f"Processing chunk {i + 1} of {len(chunks)}")
# Close the client connection
client.close()
# Concatenate the audio data from all chunks
combined_audio = concatenate_mp3(audio_data)
# Final update to the progress bar
progress(1, desc="Processing completed")
return combined_audio
# %% app.ipynb 9
def get_input_text_len(input_text):
return len(input_text)
# %% app.ipynb 10
with gr.Blocks(title='OpenAI TTS', head='OpenAI TTS') as app:
gr.Markdown("# OpenAI TTS")
gr.Markdown("Start typing below and then click **Go** to create the speech from your text. The current limit is 4,000 characters.")
with gr.Row():
input_text = gr.Textbox(max_lines=100, label="Enter text here")
with gr.Row():
tts_model_dropdown = gr.Dropdown(value='tts-1',choices=tts_models, label='Model')
tts_voice_dropdown = gr.Dropdown(value='alloy',choices=tts_voices,label='Voice')
input_text_length = gr.Label(label="Number of characters")
output_audio = gr.Audio()
input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)
go_btn = gr.Button("Go")
go_btn.click(fn=create_speech, inputs=[input_text, tts_model_dropdown, tts_voice_dropdown], outputs=[output_audio])
clear_btn = gr.Button('Clear')
clear_btn.click(fn=lambda: '', outputs=input_text)
# %% app.ipynb 11
launch_kwargs = {'auth':('username',GRADIO_PASSWORD),
'auth_message':'Please log in to Mat\'s TTS App with username: username and password.'}
# %% app.ipynb 13
#.py launch
if __name__ == "__main__":
app.launch(**launch_kwargs)