Spaces:

hskwon7
/

classification_test

Sleeping

App Files Files Community

hskwon7 commited on 20 days ago

Commit

3c49d37

verified ·

1 Parent(s): 2a4516f

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import pipeline
 from PIL import Image
 import io
 from gtts import gTTS
 st.title("🖼️ → 📖 Image-to-Story Demo")
 st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
@@ -18,19 +19,19 @@ def load_story_gen():
 captioner = load_captioner()
 story_gen = load_story_gen()
-# 1) Upload
 uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
 if uploaded:
     img = Image.open(uploaded)
     st.image(img, use_column_width=True)
-    # 2) Caption
     if "caption" not in st.session_state:
         with st.spinner("Generating caption…"):
-            st.session_state.caption = captioner(img)[0]["generated_text"]
     st.write("**Caption:**", st.session_state.caption)
-    # 3) Story
     if "story" not in st.session_state:
         with st.spinner("Spinning up a story…"):
             out = story_gen(
@@ -43,7 +44,7 @@ if uploaded:
             st.session_state.story = out[0]["generated_text"]
     st.write("**Story:**", st.session_state.story)
-    # 4) Pre-generate raw MP3 bytes
     if "audio_bytes" not in st.session_state:
         with st.spinner("Generating audio…"):
             tts = gTTS(text=st.session_state.story, lang="en")
@@ -51,7 +52,13 @@ if uploaded:
             tts.write_to_fp(buf)
             st.session_state.audio_bytes = buf.getvalue()
-    # 5) Play on demand
     if st.button("🔊 Play Story Audio"):
-        audio_buffer = io.BytesIO(st.session_state.audio_bytes)
-        st.audio(audio_buffer, format="audio/mp3")

 from PIL import Image
 import io
 from gtts import gTTS
+import tempfile
 st.title("🖼️ → 📖 Image-to-Story Demo")
 st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
 captioner = load_captioner()
 story_gen = load_story_gen()
 uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
 if uploaded:
     img = Image.open(uploaded)
     st.image(img, use_column_width=True)
+    # Caption
     if "caption" not in st.session_state:
         with st.spinner("Generating caption…"):
+            caps = captioner(img)
+            st.session_state.caption = caps[0] if isinstance(caps, list) else caps
     st.write("**Caption:**", st.session_state.caption)
+    # Story
     if "story" not in st.session_state:
         with st.spinner("Spinning up a story…"):
             out = story_gen(
             st.session_state.story = out[0]["generated_text"]
     st.write("**Story:**", st.session_state.story)
+    # Prepare audio bytes once
     if "audio_bytes" not in st.session_state:
         with st.spinner("Generating audio…"):
             tts = gTTS(text=st.session_state.story, lang="en")
             tts.write_to_fp(buf)
             st.session_state.audio_bytes = buf.getvalue()
+    # Play button
     if st.button("🔊 Play Story Audio"):
+        # Write to a temp file
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+        tmp.write(st.session_state.audio_bytes)
+        tmp.flush()
+        tmp_path = tmp.name
+        tmp.close()
+        # Stream it
+        st.audio(tmp_path, format="audio/mp3")