Spaces:
Sleeping
Sleeping
File size: 1,991 Bytes
57d546d adb445d 9f8fd3c adb445d 9f8fd3c adb445d 9f8fd3c adb445d 9f8fd3c adb445d 9f8fd3c 57d546d 9f8fd3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from gtts import gTTS
st.title("🖼️ → 📖 Image-to-Story Demo")
st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
@st.cache_resource
def load_captioner():
return pipeline("image-to-text", model="unography/blip-large-long-cap")
@st.cache_resource
def load_story_gen():
return pipeline("text-generation", model="gpt2", tokenizer="gpt2")
captioner = load_captioner()
story_gen = load_story_gen()
# 1) Upload (key='image' gives us st.session_state.image)
uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
if uploaded:
img = Image.open(uploaded)
st.image(img, use_column_width=True)
# 2) Caption (once per upload)
if "caption" not in st.session_state:
with st.spinner("Generating caption…"):
st.session_state.caption = captioner(img)[0]["generated_text"]
st.write("**Caption:**", st.session_state.caption)
# 3) Story (once per upload)
if "story" not in st.session_state:
with st.spinner("Spinning up a story…"):
out = story_gen(
st.session_state.caption,
max_length=200,
num_return_sequences=1,
do_sample=True,
top_p=0.9
)
st.session_state.story = out[0]["generated_text"]
st.write("**Story:**", st.session_state.story)
# 4) Pre-generate audio buffer (once per upload)
if "audio_buffer" not in st.session_state:
with st.spinner("Generating audio…"):
tts = gTTS(text=st.session_state.story, lang="en")
buf = io.BytesIO()
tts.write_to_fp(buf)
buf.seek(0)
st.session_state.audio_buffer = buf.read()
# 5) Play on demand
if st.button("🔊 Play Story Audio"):
st.audio(st.session_state.audio_buffer, format="audio/mp3")
|