my_news_podcast / client.py
Hameed13's picture
first commit
97d03bb
import streamlit as st
import requests
import base64
from io import BytesIO
import pandas as pd
# Set page config
st.set_page_config(
page_title="Nigerian Text-to-Speech",
page_icon="πŸŽ™οΈ",
layout="wide"
)
# Define the available voices and languages
AVAILABLE_VOICES = {
"Female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"],
"Male": ["jude", "tayo", "umar", "osagie", "onye", "emma"]
}
AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
# IMPORTANT: Replace this with the ngrok URL shown in your Colab notebook
# Example: API_BASE_URL = "https://a1b2-34-56-78-90.ngrok.io"
API_BASE_URL = st.text_input(
"Enter the ngrok URL from Colab (e.g., https://a1b2-34-56-78-90.ngrok.io)",
value="",
key="api_url"
)
# Derive the TTS endpoint from the base URL
if API_BASE_URL:
API_TTS_ENDPOINT = f"{API_BASE_URL}/tts"
# Test connection to backend
try:
health_check = requests.get(f"{API_BASE_URL}")
if health_check.status_code == 200:
st.success(f"βœ… Connected to backend API successfully!")
else:
st.warning(f"⚠️ Backend API returned status code {health_check.status_code}")
except Exception as e:
st.error(f"❌ Cannot connect to backend API: {str(e)}")
else:
st.warning("⚠️ Please enter the ngrok URL from your Colab notebook to continue")
# App title and description
st.title("Nigerian Text-to-Speech")
st.markdown("""
Convert text to speech with authentic Nigerian accents. This app uses YarnGPT, a text-to-speech model
that generates natural Nigerian-accented speech in English, Yoruba, Igbo, and Hausa.
""")
# Create tabs for different functions
tab1, tab2, tab3 = st.tabs(["Basic TTS", "Batch Processing", "About"])
# Tab 1: Basic TTS
with tab1:
col1, col2 = st.columns([3, 1])
with col1:
# Text input
text_input = st.text_area(
"Enter text to convert to speech",
"Welcome to Nigeria, the giant of Africa. Our diverse cultures and languages make us unique.",
height=150
)
# Generate button
generate_button = st.button("Generate Audio", type="primary", disabled=not API_BASE_URL)
with col2:
# Options
language = st.selectbox("Language", AVAILABLE_LANGUAGES)
gender = st.radio("Gender", ["Female", "Male"])
voice = st.selectbox("Voice", AVAILABLE_VOICES[gender])
st.info(f"Selected voice: **{voice}** ({gender.lower()})")
# Generate audio when button is clicked
if generate_button and text_input and API_BASE_URL:
with st.spinner("Generating audio... (This may take a minute as the audio is processed through Colab)"):
try:
# Call the API with timeout increased
response = requests.post(
API_TTS_ENDPOINT,
json={"text": text_input, "language": language, "voice": voice},
timeout=100000 # Increase timeout to 2 minutes
)
if response.status_code == 200:
# Get response data
audio_data = response.json()
# Save info in session state
st.session_state.last_text = text_input
st.session_state.last_voice = voice
st.session_state.last_language = language
# Display success and audio player
st.success("Audio generated successfully!")
st.markdown(f"Voice: **{voice}** | Language: **{language}**")
# Handle base64-encoded audio
if "audio_base64" in audio_data:
audio_bytes = base64.b64decode(audio_data["audio_base64"])
audio_stream = BytesIO(audio_bytes)
# Play audio directly from the stream
st.audio(audio_stream, format="audio/wav")
else:
# Fall back to URL method (legacy support)
audio_url = f"{API_BASE_URL}{audio_data['audio_url']}"
st.warning("Using legacy URL-based audio (may not work)")
st.code(audio_url, language="text")
st.audio(audio_url, format="audio/wav")
else:
st.error(f"Error: {response.status_code} - {response.text}")
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
st.info(f"Make sure the backend API is running and accessible at {API_BASE_URL}")
# Tab 2: Batch Processing
with tab2:
st.header("Batch Text-to-Speech Conversion")
st.markdown("""
Process multiple text entries at once. Upload a CSV file with the following columns:
- `text`: The text to convert to speech
- `language` (optional): Language for the text (english, yoruba, igbo, hausa)
- `voice` (optional): Voice name to use
""")
# File uploader
uploaded_file = st.file_uploader("Upload CSV file", type="csv")
if uploaded_file and API_BASE_URL:
# Process the file
try:
df = pd.read_csv(uploaded_file)
if "text" not in df.columns:
st.error("CSV file must contain a 'text' column")
else:
st.dataframe(df.head())
# Default values
default_language = st.selectbox("Default language", AVAILABLE_LANGUAGES)
default_voice = st.selectbox("Default voice", AVAILABLE_VOICES["Female"] + AVAILABLE_VOICES["Male"])
if st.button("Process Batch", disabled=not API_BASE_URL):
# Create a container for audio files
audio_container = st.container()
progress_bar = st.progress(0)
status_text = st.empty()
# Process each row
results = []
audio_files = [] # Store audio data for playback
for i, row in enumerate(df.itertuples()):
# Update progress
progress = int((i + 1) / len(df) * 100)
progress_bar.progress(progress)
status_text.text(f"Processing item {i+1} of {len(df)}...")
# Get text and parameters
text = row.text
lang = getattr(row, 'language', default_language) if hasattr(row, 'language') else default_language
voice_name = getattr(row, 'voice', default_voice) if hasattr(row, 'voice') else default_voice
try:
# Make API call with increased timeout
response = requests.post(
API_TTS_ENDPOINT,
json={"text": text, "language": lang, "voice": voice_name},
timeout=120 # Increase timeout to 2 minutes
)
if response.status_code == 200:
audio_data = response.json()
# Handle base64-encoded audio
if "audio_base64" in audio_data:
audio_bytes = base64.b64decode(audio_data["audio_base64"])
audio_files.append({
"index": i,
"bytes": audio_bytes,
"text": text,
"voice": voice_name,
"language": lang
})
status = "Success"
else:
# Fall back to URL method (legacy support)
audio_url = f"{API_BASE_URL}{audio_data['audio_url']}"
status = "Success (URL mode)"
# Add to results
results.append({
"text": text[:50] + "..." if len(text) > 50 else text,
"language": lang,
"voice": voice_name,
"status": status
})
else:
results.append({
"text": text[:50] + "..." if len(text) > 50 else text,
"language": lang,
"voice": voice_name,
"status": f"Error: {response.status_code}"
})
except Exception as e:
results.append({
"text": text[:50] + "..." if len(text) > 50 else text,
"language": lang,
"voice": voice_name,
"status": f"Error: {str(e)}"
})
# Show results
st.success("Batch processing completed!")
results_df = pd.DataFrame(results)
st.dataframe(results_df)
# Display audio players for successful generations
with audio_container:
st.subheader("Generated Audio Files")
for audio_item in audio_files:
st.markdown(f"**{audio_item['index']+1}. {audio_item['text'][:50]}...** ({audio_item['voice']}, {audio_item['language']})")
audio_stream = BytesIO(audio_item["bytes"])
st.audio(audio_stream, format="audio/wav")
st.markdown("---")
except Exception as e:
st.error(f"Error processing file: {str(e)}")
elif not API_BASE_URL:
st.warning("Please enter the ngrok URL first to enable batch processing")
# Tab 3: About
with tab3:
st.header("About YarnGPT")
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("""
### Features
- πŸ—£οΈ 12 preset voices (6 male, 6 female)
- 🎯 Trained on 2000+ hours of Nigerian audio
- πŸ”Š 24kHz high-quality audio output
- πŸ“ Support for long-form text
### Model Details
- Base: HuggingFaceTB/SmolLM2-360M
- Training: 5 epochs on A100 GPU
- Data: Nigerian movies, podcasts, and open-source audio
""")
with col2:
st.markdown("""
### Available Voices
- **Female**: zainab, idera, regina, chinenye, joke, remi
- **Male**: jude, tayo, umar, osagie, onye, emma
### Limitations
- English to Nigerian-accented English primarily
- May not capture all Nigerian accent variations
- Training data includes auto-generated content
""")
st.markdown("""
### Credits
- YarnGPT was created by Saheed Abdulrahman, a Unilag student
- Model is available as open source on [GitHub](https://github.com/saheedniyi02/yarngpt)
- Web demo: [https://yarngpt.co/](https://yarngpt.co/)
""")
# Footer
st.markdown("---")
st.markdown("Developed for a Nigerian News App Podcaster API | Powered by YarnGPT")