ISOM5240gr10's picture
Update app.py
c5da07b verified
import streamlit as st
from transformers import pipeline
st.set_page_config(page_title="Image Emotion Classification",
page_icon="🦜")
st.header("Turn Your Image to Emotion Audio")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image",
use_column_width=True)
#Stage 1: Image to Emotion Text
pipe1 = pipeline("image-classification", model="ISOM5240gr10/facial_emotion_finetune")
st.text('Image Emotion Classification Result...')
scenario = pipe1(uploaded_file.name)
highest_result = max(scenario, key=lambda x: x['score'])
st.write(highest_result['label'])
#Stage 2: Emotion Text to Audio data
st.text('Generating Audio Data...')
pipe2 = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
audio_data = pipe2(highest_result['label'])
# Play button
if st.button("Play Audio"):
st.audio(audio_data['audio'],
format="audio/wav",
start_time=0,
sample_rate = audio_data['sampling_rate'])