File size: 2,116 Bytes
57d546d
 
adb445d
 
 
3c49d37
adb445d
 
 
 
 
 
2a4516f
adb445d
 
 
9f8fd3c
adb445d
 
 
 
9f8fd3c
adb445d
 
 
 
3c49d37
9f8fd3c
 
3c49d37
 
9f8fd3c
 
3c49d37
9f8fd3c
 
 
 
 
 
 
 
 
 
 
 
3c49d37
cdc9632
adb445d
9f8fd3c
adb445d
 
cdc9632
57d546d
3c49d37
9f8fd3c
3c49d37
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from gtts import gTTS
import tempfile

st.title("🖼️ → 📖 Image-to-Story Demo")
st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")

@st.cache_resource
def load_captioner():
    return pipeline("image-to-text", model="unography/blip-large-long-cap")

@st.cache_resource
def load_story_gen():
    return pipeline("text-generation", model="gpt2", tokenizer="gpt2")

captioner = load_captioner()
story_gen = load_story_gen()

uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
if uploaded:
    img = Image.open(uploaded)
    st.image(img, use_column_width=True)

    # Caption
    if "caption" not in st.session_state:
        with st.spinner("Generating caption…"):
            caps = captioner(img)
            st.session_state.caption = caps[0] if isinstance(caps, list) else caps
    st.write("**Caption:**", st.session_state.caption)

    # Story
    if "story" not in st.session_state:
        with st.spinner("Spinning up a story…"):
            out = story_gen(
                st.session_state.caption,
                max_length=200,
                num_return_sequences=1,
                do_sample=True,
                top_p=0.9
            )
            st.session_state.story = out[0]["generated_text"]
    st.write("**Story:**", st.session_state.story)

    # Prepare audio bytes once
    if "audio_bytes" not in st.session_state:
        with st.spinner("Generating audio…"):
            tts = gTTS(text=st.session_state.story, lang="en")
            buf = io.BytesIO()
            tts.write_to_fp(buf)
            st.session_state.audio_bytes = buf.getvalue()

    # Play button
    if st.button("🔊 Play Story Audio"):
        # Write to a temp file
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tmp.write(st.session_state.audio_bytes)
        tmp.flush()
        tmp_path = tmp.name
        tmp.close()
        # Stream it
        st.audio(tmp_path, format="audio/mp3")