Spaces:
Runtime error
Runtime error
import streamlit as st | |
import requests | |
import base64 | |
from io import BytesIO | |
import pandas as pd | |
# Set page config | |
st.set_page_config( | |
page_title="Nigerian Text-to-Speech", | |
page_icon="ποΈ", | |
layout="wide" | |
) | |
# Define the available voices and languages | |
AVAILABLE_VOICES = { | |
"Female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"], | |
"Male": ["jude", "tayo", "umar", "osagie", "onye", "emma"] | |
} | |
AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"] | |
# IMPORTANT: Replace this with the ngrok URL shown in your Colab notebook | |
# Example: API_BASE_URL = "https://a1b2-34-56-78-90.ngrok.io" | |
API_BASE_URL = st.text_input( | |
"Enter the ngrok URL from Colab (e.g., https://a1b2-34-56-78-90.ngrok.io)", | |
value="", | |
key="api_url" | |
) | |
# Derive the TTS endpoint from the base URL | |
if API_BASE_URL: | |
API_TTS_ENDPOINT = f"{API_BASE_URL}/tts" | |
# Test connection to backend | |
try: | |
health_check = requests.get(f"{API_BASE_URL}") | |
if health_check.status_code == 200: | |
st.success(f"β Connected to backend API successfully!") | |
else: | |
st.warning(f"β οΈ Backend API returned status code {health_check.status_code}") | |
except Exception as e: | |
st.error(f"β Cannot connect to backend API: {str(e)}") | |
else: | |
st.warning("β οΈ Please enter the ngrok URL from your Colab notebook to continue") | |
# App title and description | |
st.title("Nigerian Text-to-Speech") | |
st.markdown(""" | |
Convert text to speech with authentic Nigerian accents. This app uses YarnGPT, a text-to-speech model | |
that generates natural Nigerian-accented speech in English, Yoruba, Igbo, and Hausa. | |
""") | |
# Create tabs for different functions | |
tab1, tab2, tab3 = st.tabs(["Basic TTS", "Batch Processing", "About"]) | |
# Tab 1: Basic TTS | |
with tab1: | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
# Text input | |
text_input = st.text_area( | |
"Enter text to convert to speech", | |
"Welcome to Nigeria, the giant of Africa. Our diverse cultures and languages make us unique.", | |
height=150 | |
) | |
# Generate button | |
generate_button = st.button("Generate Audio", type="primary", disabled=not API_BASE_URL) | |
with col2: | |
# Options | |
language = st.selectbox("Language", AVAILABLE_LANGUAGES) | |
gender = st.radio("Gender", ["Female", "Male"]) | |
voice = st.selectbox("Voice", AVAILABLE_VOICES[gender]) | |
st.info(f"Selected voice: **{voice}** ({gender.lower()})") | |
# Generate audio when button is clicked | |
if generate_button and text_input and API_BASE_URL: | |
with st.spinner("Generating audio... (This may take a minute as the audio is processed through Colab)"): | |
try: | |
# Call the API with timeout increased | |
response = requests.post( | |
API_TTS_ENDPOINT, | |
json={"text": text_input, "language": language, "voice": voice}, | |
timeout=100000 # Increase timeout to 2 minutes | |
) | |
if response.status_code == 200: | |
# Get response data | |
audio_data = response.json() | |
# Save info in session state | |
st.session_state.last_text = text_input | |
st.session_state.last_voice = voice | |
st.session_state.last_language = language | |
# Display success and audio player | |
st.success("Audio generated successfully!") | |
st.markdown(f"Voice: **{voice}** | Language: **{language}**") | |
# Handle base64-encoded audio | |
if "audio_base64" in audio_data: | |
audio_bytes = base64.b64decode(audio_data["audio_base64"]) | |
audio_stream = BytesIO(audio_bytes) | |
# Play audio directly from the stream | |
st.audio(audio_stream, format="audio/wav") | |
else: | |
# Fall back to URL method (legacy support) | |
audio_url = f"{API_BASE_URL}{audio_data['audio_url']}" | |
st.warning("Using legacy URL-based audio (may not work)") | |
st.code(audio_url, language="text") | |
st.audio(audio_url, format="audio/wav") | |
else: | |
st.error(f"Error: {response.status_code} - {response.text}") | |
except Exception as e: | |
st.error(f"Error generating audio: {str(e)}") | |
st.info(f"Make sure the backend API is running and accessible at {API_BASE_URL}") | |
# Tab 2: Batch Processing | |
with tab2: | |
st.header("Batch Text-to-Speech Conversion") | |
st.markdown(""" | |
Process multiple text entries at once. Upload a CSV file with the following columns: | |
- `text`: The text to convert to speech | |
- `language` (optional): Language for the text (english, yoruba, igbo, hausa) | |
- `voice` (optional): Voice name to use | |
""") | |
# File uploader | |
uploaded_file = st.file_uploader("Upload CSV file", type="csv") | |
if uploaded_file and API_BASE_URL: | |
# Process the file | |
try: | |
df = pd.read_csv(uploaded_file) | |
if "text" not in df.columns: | |
st.error("CSV file must contain a 'text' column") | |
else: | |
st.dataframe(df.head()) | |
# Default values | |
default_language = st.selectbox("Default language", AVAILABLE_LANGUAGES) | |
default_voice = st.selectbox("Default voice", AVAILABLE_VOICES["Female"] + AVAILABLE_VOICES["Male"]) | |
if st.button("Process Batch", disabled=not API_BASE_URL): | |
# Create a container for audio files | |
audio_container = st.container() | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
# Process each row | |
results = [] | |
audio_files = [] # Store audio data for playback | |
for i, row in enumerate(df.itertuples()): | |
# Update progress | |
progress = int((i + 1) / len(df) * 100) | |
progress_bar.progress(progress) | |
status_text.text(f"Processing item {i+1} of {len(df)}...") | |
# Get text and parameters | |
text = row.text | |
lang = getattr(row, 'language', default_language) if hasattr(row, 'language') else default_language | |
voice_name = getattr(row, 'voice', default_voice) if hasattr(row, 'voice') else default_voice | |
try: | |
# Make API call with increased timeout | |
response = requests.post( | |
API_TTS_ENDPOINT, | |
json={"text": text, "language": lang, "voice": voice_name}, | |
timeout=120 # Increase timeout to 2 minutes | |
) | |
if response.status_code == 200: | |
audio_data = response.json() | |
# Handle base64-encoded audio | |
if "audio_base64" in audio_data: | |
audio_bytes = base64.b64decode(audio_data["audio_base64"]) | |
audio_files.append({ | |
"index": i, | |
"bytes": audio_bytes, | |
"text": text, | |
"voice": voice_name, | |
"language": lang | |
}) | |
status = "Success" | |
else: | |
# Fall back to URL method (legacy support) | |
audio_url = f"{API_BASE_URL}{audio_data['audio_url']}" | |
status = "Success (URL mode)" | |
# Add to results | |
results.append({ | |
"text": text[:50] + "..." if len(text) > 50 else text, | |
"language": lang, | |
"voice": voice_name, | |
"status": status | |
}) | |
else: | |
results.append({ | |
"text": text[:50] + "..." if len(text) > 50 else text, | |
"language": lang, | |
"voice": voice_name, | |
"status": f"Error: {response.status_code}" | |
}) | |
except Exception as e: | |
results.append({ | |
"text": text[:50] + "..." if len(text) > 50 else text, | |
"language": lang, | |
"voice": voice_name, | |
"status": f"Error: {str(e)}" | |
}) | |
# Show results | |
st.success("Batch processing completed!") | |
results_df = pd.DataFrame(results) | |
st.dataframe(results_df) | |
# Display audio players for successful generations | |
with audio_container: | |
st.subheader("Generated Audio Files") | |
for audio_item in audio_files: | |
st.markdown(f"**{audio_item['index']+1}. {audio_item['text'][:50]}...** ({audio_item['voice']}, {audio_item['language']})") | |
audio_stream = BytesIO(audio_item["bytes"]) | |
st.audio(audio_stream, format="audio/wav") | |
st.markdown("---") | |
except Exception as e: | |
st.error(f"Error processing file: {str(e)}") | |
elif not API_BASE_URL: | |
st.warning("Please enter the ngrok URL first to enable batch processing") | |
# Tab 3: About | |
with tab3: | |
st.header("About YarnGPT") | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.markdown(""" | |
### Features | |
- π£οΈ 12 preset voices (6 male, 6 female) | |
- π― Trained on 2000+ hours of Nigerian audio | |
- π 24kHz high-quality audio output | |
- π Support for long-form text | |
### Model Details | |
- Base: HuggingFaceTB/SmolLM2-360M | |
- Training: 5 epochs on A100 GPU | |
- Data: Nigerian movies, podcasts, and open-source audio | |
""") | |
with col2: | |
st.markdown(""" | |
### Available Voices | |
- **Female**: zainab, idera, regina, chinenye, joke, remi | |
- **Male**: jude, tayo, umar, osagie, onye, emma | |
### Limitations | |
- English to Nigerian-accented English primarily | |
- May not capture all Nigerian accent variations | |
- Training data includes auto-generated content | |
""") | |
st.markdown(""" | |
### Credits | |
- YarnGPT was created by Saheed Abdulrahman, a Unilag student | |
- Model is available as open source on [GitHub](https://github.com/saheedniyi02/yarngpt) | |
- Web demo: [https://yarngpt.co/](https://yarngpt.co/) | |
""") | |
# Footer | |
st.markdown("---") | |
st.markdown("Developed for a Nigerian News App Podcaster API | Powered by YarnGPT") |