Spaces:

hskwon7
/

classification_test

Sleeping

File size: 2,116 Bytes

57d546d
 
adb445d
 
 
3c49d37
adb445d
 
 
 
 
 
2a4516f
adb445d
 
 
9f8fd3c
adb445d
 
 
 
9f8fd3c
adb445d
 
 
 
3c49d37
9f8fd3c
 
3c49d37
 
9f8fd3c
 
3c49d37
9f8fd3c
 
 
 
 
 
 
 
 
 
 
 
3c49d37
cdc9632
adb445d
9f8fd3c
adb445d
 
cdc9632
57d546d
3c49d37
9f8fd3c
3c49d37

import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from gtts import gTTS
import tempfile

st.title("🖼️ → 📖 Image-to-Story Demo")
st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")

@st.cache_resource
def load_captioner():
    return pipeline("image-to-text", model="unography/blip-large-long-cap")

@st.cache_resource
def load_story_gen():
    return pipeline("text-generation", model="gpt2", tokenizer="gpt2")

captioner = load_captioner()
story_gen = load_story_gen()

uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
if uploaded:
    img = Image.open(uploaded)
    st.image(img, use_column_width=True)

    # Caption
    if "caption" not in st.session_state:
        with st.spinner("Generating caption…"):
            caps = captioner(img)
            st.session_state.caption = caps[0] if isinstance(caps, list) else caps
    st.write("**Caption:**", st.session_state.caption)

    # Story
    if "story" not in st.session_state:
        with st.spinner("Spinning up a story…"):
            out = story_gen(
                st.session_state.caption,
                max_length=200,
                num_return_sequences=1,
                do_sample=True,
                top_p=0.9
            )
            st.session_state.story = out[0]["generated_text"]
    st.write("**Story:**", st.session_state.story)

    # Prepare audio bytes once
    if "audio_bytes" not in st.session_state:
        with st.spinner("Generating audio…"):
            tts = gTTS(text=st.session_state.story, lang="en")
            buf = io.BytesIO()
            tts.write_to_fp(buf)
            st.session_state.audio_bytes = buf.getvalue()

    # Play button
    if st.button("🔊 Play Story Audio"):
        # Write to a temp file
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tmp.write(st.session_state.audio_bytes)
        tmp.flush()
        tmp_path = tmp.name
        tmp.close()
        # Stream it
        st.audio(tmp_path, format="audio/mp3")