Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from PIL import Image | |
import io | |
from gtts import gTTS | |
st.title("🖼️ → 📖 Image-to-Story Demo") | |
st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!") | |
def load_captioner(): | |
return pipeline("image-to-text", model="unography/blip-large-long-cap") | |
def load_story_gen(): | |
return pipeline("text-generation", model="gpt2", tokenizer="gpt2") | |
captioner = load_captioner() | |
story_gen = load_story_gen() | |
# 1) Upload (key='image' gives us st.session_state.image) | |
uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image") | |
if uploaded: | |
img = Image.open(uploaded) | |
st.image(img, use_column_width=True) | |
# 2) Caption (once per upload) | |
if "caption" not in st.session_state: | |
with st.spinner("Generating caption…"): | |
st.session_state.caption = captioner(img)[0]["generated_text"] | |
st.write("**Caption:**", st.session_state.caption) | |
# 3) Story (once per upload) | |
if "story" not in st.session_state: | |
with st.spinner("Spinning up a story…"): | |
out = story_gen( | |
st.session_state.caption, | |
max_length=200, | |
num_return_sequences=1, | |
do_sample=True, | |
top_p=0.9 | |
) | |
st.session_state.story = out[0]["generated_text"] | |
st.write("**Story:**", st.session_state.story) | |
# 4) Pre-generate audio buffer (once per upload) | |
if "audio_buffer" not in st.session_state: | |
with st.spinner("Generating audio…"): | |
tts = gTTS(text=st.session_state.story, lang="en") | |
buf = io.BytesIO() | |
tts.write_to_fp(buf) | |
buf.seek(0) | |
st.session_state.audio_buffer = buf.read() | |
# 5) Play on demand | |
if st.button("🔊 Play Story Audio"): | |
st.audio(st.session_state.audio_buffer, format="audio/mp3") | |