Spaces:

matdmiller
/

tts-openai

Runtime error

File size: 6,281 Bytes

# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.

# %% auto 0
__all__ = ['secret_import_failed', 'tts_voices', 'launch_kwargs', 'split_text', 'concatenate_mp3', 'create_speech',
           'get_input_text_len']

# %% app.ipynb 1
#tts_openai_secrets.py content:
#import os
#os.environ['OPENAI_API_KEY'] = 'sk-XXXXXXXXXXXXXXXXXXXXXX'
import os
secret_import_failed = False
try:
    _ = os.environ['OPENAI_API_KEY']
    print('OPENAI_API_KEY environment variable was found.')
except:
    print('OPENAI_API_KEY environment variable was not found.')
    secret_import_failed = True
try:
    GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']
    print('GRADIO_PASSWORD environment variable was found.')
except:
    print('GRADIO_PASSWORD environment variable was not found.')
    secret_import_failed = True

if secret_import_failed == True:
    import tts_openai_secrets
    GRADIO_PASSWORD = os.environ['GRADIO_PASSWORD']
    print('import tts_openai_secrets succeeded')

# %% app.ipynb 3
import gradio as gr
import openai
from pydub import AudioSegment
import io

# %% app.ipynb 4
try:
    tts_models = [o.id for o in openai.models.list().data if 'tts' in o.id]
    print('successfully got tts model list:', tts_models)
except:
    tts_models = ['tts-1']

# %% app.ipynb 5
tts_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']

# %% app.ipynb 6
def split_text(input_text, max_length=4000, lookback=1000):
    # If the text is shorter than the max_length, return it as is
    if len(input_text) <= max_length:
        return [input_text]

    chunks = []
    while input_text:
        # Check if the remaining text is shorter than the max_length
        if len(input_text) <= max_length:
            chunks.append(input_text)
            break

        # Define the split point, initially set to max_length
        split_point = max_length

        # Look for a newline in the last 'lookback' characters
        newline_index = input_text.rfind('\n', max_length-lookback, max_length)
        if newline_index != -1:
            split_point = newline_index + 1  # Include the newline in the current chunk

        # If no newline, look for a period followed by space
        elif '. ' in input_text[max_length-lookback:max_length]:
            # Find the last '. ' in the lookback range
            period_index = input_text.rfind('. ', max_length-lookback, max_length)
            split_point = period_index + 2  # Split after the space

        # Split the text and update the input_text
        chunks.append(input_text[:split_point])
        input_text = input_text[split_point:]

    return chunks

# %% app.ipynb 7
def concatenate_mp3(mp3_files):
    if len(mp3_files) == 1:
        return mp3_files[0]
    else:
        # Initialize an empty AudioSegment object for concatenation
        combined = AudioSegment.empty()
        
        # Write out audio file responses as individual files for debugging
        # for idx, mp3_data in enumerate(mp3_files):
        #     with open(f'./{idx}.mp3', 'wb') as f:
        #         f.write(mp3_data)

        # Loop through the list of mp3 binary data
        for mp3_data in mp3_files:
            # Convert binary data to an audio segment
            audio_segment = AudioSegment.from_file(io.BytesIO(mp3_data), format="mp3")
            # Concatenate this segment to the combined segment
            combined += audio_segment

        # Export the combined segment to a new mp3 file
        # Use a BytesIO object to handle this in memory
        combined_mp3 = io.BytesIO()
        combined.export(combined_mp3, format="mp3")

        # Seek to the start so it's ready for reading
        combined_mp3.seek(0)

        return combined_mp3.getvalue()

# %% app.ipynb 8
def create_speech(input_text, model='tts-1', voice='alloy', progress=gr.Progress()):
    # Split the input text into chunks
    chunks = split_text(input_text)

    # Initialize the progress bar
    progress(0, desc="Starting TTS processing...")

    # Initialize a list to hold the audio data of each chunk
    audio_data = []

    # Create a client instance for OpenAI
    client = openai.OpenAI()

    # Calculate the progress increment for each chunk
    progress_increment = 1.0 / len(chunks)

    # Process each chunk
    for i, chunk in enumerate(chunks):
        response = client.audio.speech.create(
            model=model,
            voice=voice,
            input=chunk,
            speed=1.0
        )
        # Append the audio content of the response to the list
        audio_data.append(response.content)

        # Update the progress bar
        progress((i + 1) * progress_increment, desc=f"Processing chunk {i + 1} of {len(chunks)}")

    # Close the client connection
    client.close()

    # Concatenate the audio data from all chunks
    combined_audio = concatenate_mp3(audio_data)

    # Final update to the progress bar
    progress(1, desc="Processing completed")

    return combined_audio


# %% app.ipynb 9
def get_input_text_len(input_text):
    return len(input_text)

# %% app.ipynb 10
with gr.Blocks(title='OpenAI TTS', head='OpenAI TTS') as app:
    gr.Markdown("# OpenAI TTS")
    gr.Markdown("Start typing below and then click **Go** to create the speech from your text. The current limit is 4,000 characters.")
    with gr.Row():
        input_text = gr.Textbox(max_lines=100, label="Enter text here")
    with gr.Row():
        tts_model_dropdown = gr.Dropdown(value='tts-1',choices=tts_models, label='Model')
        tts_voice_dropdown = gr.Dropdown(value='alloy',choices=tts_voices,label='Voice')
        input_text_length = gr.Label(label="Number of characters")
        output_audio = gr.Audio()
    input_text.input(fn=get_input_text_len, inputs=input_text, outputs=input_text_length)
    go_btn = gr.Button("Go")
    go_btn.click(fn=create_speech, inputs=[input_text, tts_model_dropdown, tts_voice_dropdown], outputs=[output_audio])
    clear_btn = gr.Button('Clear')
    clear_btn.click(fn=lambda: '', outputs=input_text)
    

# %% app.ipynb 11
launch_kwargs = {'auth':('username',GRADIO_PASSWORD),
                 'auth_message':'Please log in to Mat\'s TTS App with username: username and password.'}

# %% app.ipynb 13
#.py launch
if __name__ == "__main__":
    app.launch(**launch_kwargs)