Spaces:

hskwon7
/

classification_test

Sleeping

App Files Files Community

classification_test / app.py

hskwon7

Update app.py

3c49d37 verified 20 days ago

raw

history blame contribute delete

2.12 kB

	import streamlit as st
	from transformers import pipeline
	from PIL import Image
	import io
	from gtts import gTTS
	import tempfile

	st.title("🖼️ → 📖 Image-to-Story Demo")
	st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")

	@st.cache_resource
	def load_captioner():
	return pipeline("image-to-text", model="unography/blip-large-long-cap")

	@st.cache_resource
	def load_story_gen():
	return pipeline("text-generation", model="gpt2", tokenizer="gpt2")

	captioner = load_captioner()
	story_gen = load_story_gen()

	uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
	if uploaded:
	img = Image.open(uploaded)
	st.image(img, use_column_width=True)

	# Caption
	if "caption" not in st.session_state:
	with st.spinner("Generating caption…"):
	caps = captioner(img)
	st.session_state.caption = caps[0] if isinstance(caps, list) else caps
	st.write("Caption:", st.session_state.caption)

	# Story
	if "story" not in st.session_state:
	with st.spinner("Spinning up a story…"):
	out = story_gen(
	st.session_state.caption,
	max_length=200,
	num_return_sequences=1,
	do_sample=True,
	top_p=0.9
	)
	st.session_state.story = out[0]["generated_text"]
	st.write("Story:", st.session_state.story)

	# Prepare audio bytes once
	if "audio_bytes" not in st.session_state:
	with st.spinner("Generating audio…"):
	tts = gTTS(text=st.session_state.story, lang="en")
	buf = io.BytesIO()
	tts.write_to_fp(buf)
	st.session_state.audio_bytes = buf.getvalue()

	# Play button
	if st.button("🔊 Play Story Audio"):
	# Write to a temp file
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tmp.write(st.session_state.audio_bytes)
	tmp.flush()
	tmp_path = tmp.name
	tmp.close()
	# Stream it
	st.audio(tmp_path, format="audio/mp3")