Hameed13 commited on
Commit
97d03bb
·
1 Parent(s): d985a8f

first commit

Browse files
Files changed (4) hide show
  1. .gitignore +55 -0
  2. client.py +280 -0
  3. main.py +220 -0
  4. requirements.txt +13 -0
.gitignore ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Model files (these will be downloaded at runtime)
7
+ models/*.ckpt
8
+ models/*.yaml
9
+ models/*.pt
10
+ models/*.bin
11
+
12
+ # Audio files (generated content)
13
+ audio_files/
14
+ *.wav
15
+
16
+ # Distribution / packaging
17
+ dist/
18
+ build/
19
+ *.egg-info/
20
+
21
+ # Virtual environments
22
+ venv/
23
+ env/
24
+ ENV/
25
+ .env
26
+ .venv
27
+
28
+ # IDE specific files
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Jupyter Notebook
35
+ .ipynb_checkpoints
36
+
37
+ # OS specific files
38
+ .DS_Store
39
+ Thumbs.db
40
+
41
+ # Logs
42
+ *.log
43
+ logs/
44
+
45
+ # Local configuration
46
+ config.local.py
47
+ .env.local
48
+
49
+ # Temporary files
50
+ tmp/
51
+ temp/
52
+
53
+ # Cache directories
54
+ .cache/
55
+ .pytest_cache/
client.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import base64
4
+ from io import BytesIO
5
+ import pandas as pd
6
+
7
+ # Set page config
8
+ st.set_page_config(
9
+ page_title="Nigerian Text-to-Speech",
10
+ page_icon="🎙️",
11
+ layout="wide"
12
+ )
13
+
14
+ # Define the available voices and languages
15
+ AVAILABLE_VOICES = {
16
+ "Female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"],
17
+ "Male": ["jude", "tayo", "umar", "osagie", "onye", "emma"]
18
+ }
19
+ AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
20
+
21
+ # IMPORTANT: Replace this with the ngrok URL shown in your Colab notebook
22
+ # Example: API_BASE_URL = "https://a1b2-34-56-78-90.ngrok.io"
23
+ API_BASE_URL = st.text_input(
24
+ "Enter the ngrok URL from Colab (e.g., https://a1b2-34-56-78-90.ngrok.io)",
25
+ value="",
26
+ key="api_url"
27
+ )
28
+
29
+ # Derive the TTS endpoint from the base URL
30
+ if API_BASE_URL:
31
+ API_TTS_ENDPOINT = f"{API_BASE_URL}/tts"
32
+
33
+ # Test connection to backend
34
+ try:
35
+ health_check = requests.get(f"{API_BASE_URL}")
36
+ if health_check.status_code == 200:
37
+ st.success(f"✅ Connected to backend API successfully!")
38
+ else:
39
+ st.warning(f"⚠️ Backend API returned status code {health_check.status_code}")
40
+ except Exception as e:
41
+ st.error(f"❌ Cannot connect to backend API: {str(e)}")
42
+ else:
43
+ st.warning("⚠️ Please enter the ngrok URL from your Colab notebook to continue")
44
+
45
+ # App title and description
46
+ st.title("Nigerian Text-to-Speech")
47
+ st.markdown("""
48
+ Convert text to speech with authentic Nigerian accents. This app uses YarnGPT, a text-to-speech model
49
+ that generates natural Nigerian-accented speech in English, Yoruba, Igbo, and Hausa.
50
+ """)
51
+
52
+ # Create tabs for different functions
53
+ tab1, tab2, tab3 = st.tabs(["Basic TTS", "Batch Processing", "About"])
54
+
55
+ # Tab 1: Basic TTS
56
+ with tab1:
57
+ col1, col2 = st.columns([3, 1])
58
+
59
+ with col1:
60
+ # Text input
61
+ text_input = st.text_area(
62
+ "Enter text to convert to speech",
63
+ "Welcome to Nigeria, the giant of Africa. Our diverse cultures and languages make us unique.",
64
+ height=150
65
+ )
66
+
67
+ # Generate button
68
+ generate_button = st.button("Generate Audio", type="primary", disabled=not API_BASE_URL)
69
+
70
+ with col2:
71
+ # Options
72
+ language = st.selectbox("Language", AVAILABLE_LANGUAGES)
73
+
74
+ gender = st.radio("Gender", ["Female", "Male"])
75
+ voice = st.selectbox("Voice", AVAILABLE_VOICES[gender])
76
+
77
+ st.info(f"Selected voice: **{voice}** ({gender.lower()})")
78
+
79
+ # Generate audio when button is clicked
80
+ if generate_button and text_input and API_BASE_URL:
81
+ with st.spinner("Generating audio... (This may take a minute as the audio is processed through Colab)"):
82
+ try:
83
+ # Call the API with timeout increased
84
+ response = requests.post(
85
+ API_TTS_ENDPOINT,
86
+ json={"text": text_input, "language": language, "voice": voice},
87
+ timeout=100000 # Increase timeout to 2 minutes
88
+ )
89
+
90
+ if response.status_code == 200:
91
+ # Get response data
92
+ audio_data = response.json()
93
+
94
+ # Save info in session state
95
+ st.session_state.last_text = text_input
96
+ st.session_state.last_voice = voice
97
+ st.session_state.last_language = language
98
+
99
+ # Display success and audio player
100
+ st.success("Audio generated successfully!")
101
+ st.markdown(f"Voice: **{voice}** | Language: **{language}**")
102
+
103
+ # Handle base64-encoded audio
104
+ if "audio_base64" in audio_data:
105
+ audio_bytes = base64.b64decode(audio_data["audio_base64"])
106
+ audio_stream = BytesIO(audio_bytes)
107
+
108
+ # Play audio directly from the stream
109
+ st.audio(audio_stream, format="audio/wav")
110
+ else:
111
+ # Fall back to URL method (legacy support)
112
+ audio_url = f"{API_BASE_URL}{audio_data['audio_url']}"
113
+ st.warning("Using legacy URL-based audio (may not work)")
114
+ st.code(audio_url, language="text")
115
+ st.audio(audio_url, format="audio/wav")
116
+ else:
117
+ st.error(f"Error: {response.status_code} - {response.text}")
118
+ except Exception as e:
119
+ st.error(f"Error generating audio: {str(e)}")
120
+ st.info(f"Make sure the backend API is running and accessible at {API_BASE_URL}")
121
+
122
+ # Tab 2: Batch Processing
123
+ with tab2:
124
+ st.header("Batch Text-to-Speech Conversion")
125
+ st.markdown("""
126
+ Process multiple text entries at once. Upload a CSV file with the following columns:
127
+ - `text`: The text to convert to speech
128
+ - `language` (optional): Language for the text (english, yoruba, igbo, hausa)
129
+ - `voice` (optional): Voice name to use
130
+ """)
131
+
132
+ # File uploader
133
+ uploaded_file = st.file_uploader("Upload CSV file", type="csv")
134
+
135
+ if uploaded_file and API_BASE_URL:
136
+ # Process the file
137
+ try:
138
+ df = pd.read_csv(uploaded_file)
139
+ if "text" not in df.columns:
140
+ st.error("CSV file must contain a 'text' column")
141
+ else:
142
+ st.dataframe(df.head())
143
+
144
+ # Default values
145
+ default_language = st.selectbox("Default language", AVAILABLE_LANGUAGES)
146
+ default_voice = st.selectbox("Default voice", AVAILABLE_VOICES["Female"] + AVAILABLE_VOICES["Male"])
147
+
148
+ if st.button("Process Batch", disabled=not API_BASE_URL):
149
+ # Create a container for audio files
150
+ audio_container = st.container()
151
+
152
+ progress_bar = st.progress(0)
153
+ status_text = st.empty()
154
+
155
+ # Process each row
156
+ results = []
157
+ audio_files = [] # Store audio data for playback
158
+
159
+ for i, row in enumerate(df.itertuples()):
160
+ # Update progress
161
+ progress = int((i + 1) / len(df) * 100)
162
+ progress_bar.progress(progress)
163
+ status_text.text(f"Processing item {i+1} of {len(df)}...")
164
+
165
+ # Get text and parameters
166
+ text = row.text
167
+ lang = getattr(row, 'language', default_language) if hasattr(row, 'language') else default_language
168
+ voice_name = getattr(row, 'voice', default_voice) if hasattr(row, 'voice') else default_voice
169
+
170
+ try:
171
+ # Make API call with increased timeout
172
+ response = requests.post(
173
+ API_TTS_ENDPOINT,
174
+ json={"text": text, "language": lang, "voice": voice_name},
175
+ timeout=120 # Increase timeout to 2 minutes
176
+ )
177
+
178
+ if response.status_code == 200:
179
+ audio_data = response.json()
180
+
181
+ # Handle base64-encoded audio
182
+ if "audio_base64" in audio_data:
183
+ audio_bytes = base64.b64decode(audio_data["audio_base64"])
184
+ audio_files.append({
185
+ "index": i,
186
+ "bytes": audio_bytes,
187
+ "text": text,
188
+ "voice": voice_name,
189
+ "language": lang
190
+ })
191
+
192
+ status = "Success"
193
+ else:
194
+ # Fall back to URL method (legacy support)
195
+ audio_url = f"{API_BASE_URL}{audio_data['audio_url']}"
196
+ status = "Success (URL mode)"
197
+
198
+ # Add to results
199
+ results.append({
200
+ "text": text[:50] + "..." if len(text) > 50 else text,
201
+ "language": lang,
202
+ "voice": voice_name,
203
+ "status": status
204
+ })
205
+ else:
206
+ results.append({
207
+ "text": text[:50] + "..." if len(text) > 50 else text,
208
+ "language": lang,
209
+ "voice": voice_name,
210
+ "status": f"Error: {response.status_code}"
211
+ })
212
+ except Exception as e:
213
+ results.append({
214
+ "text": text[:50] + "..." if len(text) > 50 else text,
215
+ "language": lang,
216
+ "voice": voice_name,
217
+ "status": f"Error: {str(e)}"
218
+ })
219
+
220
+ # Show results
221
+ st.success("Batch processing completed!")
222
+ results_df = pd.DataFrame(results)
223
+ st.dataframe(results_df)
224
+
225
+ # Display audio players for successful generations
226
+ with audio_container:
227
+ st.subheader("Generated Audio Files")
228
+ for audio_item in audio_files:
229
+ st.markdown(f"**{audio_item['index']+1}. {audio_item['text'][:50]}...** ({audio_item['voice']}, {audio_item['language']})")
230
+ audio_stream = BytesIO(audio_item["bytes"])
231
+ st.audio(audio_stream, format="audio/wav")
232
+ st.markdown("---")
233
+
234
+ except Exception as e:
235
+ st.error(f"Error processing file: {str(e)}")
236
+ elif not API_BASE_URL:
237
+ st.warning("Please enter the ngrok URL first to enable batch processing")
238
+
239
+ # Tab 3: About
240
+ with tab3:
241
+ st.header("About YarnGPT")
242
+
243
+ col1, col2 = st.columns([1, 1])
244
+
245
+ with col1:
246
+ st.markdown("""
247
+ ### Features
248
+ - 🗣️ 12 preset voices (6 male, 6 female)
249
+ - 🎯 Trained on 2000+ hours of Nigerian audio
250
+ - 🔊 24kHz high-quality audio output
251
+ - 📝 Support for long-form text
252
+
253
+ ### Model Details
254
+ - Base: HuggingFaceTB/SmolLM2-360M
255
+ - Training: 5 epochs on A100 GPU
256
+ - Data: Nigerian movies, podcasts, and open-source audio
257
+ """)
258
+
259
+ with col2:
260
+ st.markdown("""
261
+ ### Available Voices
262
+ - **Female**: zainab, idera, regina, chinenye, joke, remi
263
+ - **Male**: jude, tayo, umar, osagie, onye, emma
264
+
265
+ ### Limitations
266
+ - English to Nigerian-accented English primarily
267
+ - May not capture all Nigerian accent variations
268
+ - Training data includes auto-generated content
269
+ """)
270
+
271
+ st.markdown("""
272
+ ### Credits
273
+ - YarnGPT was created by Saheed Abdulrahman, a Unilag student
274
+ - Model is available as open source on [GitHub](https://github.com/saheedniyi02/yarngpt)
275
+ - Web demo: [https://yarngpt.co/](https://yarngpt.co/)
276
+ """)
277
+
278
+ # Footer
279
+ st.markdown("---")
280
+ st.markdown("Developed for a Nigerian News App Podcaster API | Powered by YarnGPT")
main.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ import os
6
+ import uuid
7
+ import torch
8
+ import torchaudio
9
+ import base64
10
+ from io import BytesIO
11
+ from transformers import AutoModelForCausalLM
12
+ import sys
13
+ import subprocess
14
+ from datetime import datetime, timedelta
15
+
16
+ app = FastAPI(title="Nigerian TTS API")
17
+
18
+ # Add CORS middleware
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"], # In production, set this to your Next.js domain
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+ # Initialize necessary directories
28
+ os.makedirs("audio_files", exist_ok=True)
29
+ os.makedirs("models", exist_ok=True)
30
+
31
+ # Check if YarnGPT is installed, if not install it
32
+ try:
33
+ import yarngpt
34
+ from yarngpt.audiotokenizer import AudioTokenizerV2
35
+ except ImportError:
36
+ print("Installing YarnGPT and dependencies...")
37
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/saheedniyi02/yarngpt.git"])
38
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "outetts", "uroman", "transformers", "torchaudio"])
39
+ from yarngpt.audiotokenizer import AudioTokenizerV2
40
+
41
+ # Model configuration
42
+ tokenizer_path = "saheedniyi/YarnGPT2"
43
+
44
+ # Check if model files exist, if not download them
45
+ wav_tokenizer_config_path = "./models/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
46
+ wav_tokenizer_model_path = "./models/wavtokenizer_large_speech_320_24k.ckpt"
47
+
48
+ if not os.path.exists(wav_tokenizer_config_path):
49
+ print("Downloading model config file...")
50
+ subprocess.check_call([
51
+ "wget", "-O", wav_tokenizer_config_path,
52
+ "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
53
+ ])
54
+
55
+ if not os.path.exists(wav_tokenizer_model_path):
56
+ print("Downloading model checkpoint file...")
57
+ subprocess.check_call([
58
+ "wget", "-O", wav_tokenizer_model_path,
59
+ "https://drive.google.com/uc?id=1-ASeEkrn4HY49yZWHTASgfGFNXdVnLTt&export=download"
60
+ ])
61
+
62
+ print("Loading YarnGPT model and tokenizer...")
63
+ audio_tokenizer = AudioTokenizerV2(
64
+ tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path
65
+ )
66
+ model = AutoModelForCausalLM.from_pretrained(tokenizer_path, torch_dtype="auto").to(audio_tokenizer.device)
67
+ print("Model loaded successfully!")
68
+
69
+ # Available voices and languages
70
+ AVAILABLE_VOICES = {
71
+ "female": ["zainab", "idera", "regina", "chinenye", "joke", "remi"],
72
+ "male": ["jude", "tayo", "umar", "osagie", "onye", "emma"]
73
+ }
74
+ AVAILABLE_LANGUAGES = ["english", "yoruba", "igbo", "hausa"]
75
+
76
+ # Input validation model
77
+ class TTSRequest(BaseModel):
78
+ text: str
79
+ language: str = "english"
80
+ voice: str = "idera"
81
+
82
+ # Output model with base64-encoded audio
83
+ class TTSResponse(BaseModel):
84
+ audio_base64: str # Base64-encoded audio data
85
+ audio_url: str # Keep for backward compatibility
86
+ text: str
87
+ voice: str
88
+ language: str
89
+
90
+ @app.get("/")
91
+ async def root():
92
+ """API health check and info"""
93
+ return {
94
+ "status": "ok",
95
+ "message": "Nigerian TTS API is running",
96
+ "available_languages": AVAILABLE_LANGUAGES,
97
+ "available_voices": AVAILABLE_VOICES
98
+ }
99
+
100
+
101
+ @app.post("/tts", response_model=TTSResponse)
102
+ async def text_to_speech(request: TTSRequest, background_tasks: BackgroundTasks):
103
+ """Convert text to Nigerian-accented speech"""
104
+
105
+ # Validate inputs
106
+ if request.language not in AVAILABLE_LANGUAGES:
107
+ raise HTTPException(status_code=400, detail=f"Language must be one of {AVAILABLE_LANGUAGES}")
108
+
109
+ all_voices = AVAILABLE_VOICES["female"] + AVAILABLE_VOICES["male"]
110
+ if request.voice not in all_voices:
111
+ raise HTTPException(status_code=400, detail=f"Voice must be one of {all_voices}")
112
+
113
+ # Generate unique filename
114
+ audio_id = str(uuid.uuid4())
115
+ output_path = f"audio_files/{audio_id}.wav"
116
+
117
+ try:
118
+ # Create prompt and generate audio
119
+ prompt = audio_tokenizer.create_prompt(request.text, lang=request.language, speaker_name=request.voice)
120
+ input_ids = audio_tokenizer.tokenize_prompt(prompt)
121
+
122
+ output = model.generate(
123
+ input_ids=input_ids,
124
+ temperature=0.1,
125
+ repetition_penalty=1.1,
126
+ max_length=4000,
127
+ )
128
+
129
+ codes = audio_tokenizer.get_codes(output)
130
+ audio = audio_tokenizer.get_audio(codes)
131
+
132
+ # Save audio file
133
+ torchaudio.save(output_path, audio, sample_rate=24000)
134
+
135
+ # Read the file and encode as base64
136
+ with open(output_path, "rb") as audio_file:
137
+ audio_bytes = audio_file.read()
138
+ audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
139
+
140
+ # Clean up old files after a while
141
+ background_tasks.add_task(cleanup_old_files)
142
+
143
+ return TTSResponse(
144
+ audio_base64=audio_base64,
145
+ audio_url=f"/audio/{audio_id}.wav", # Keep for compatibility
146
+ text=request.text,
147
+ voice=request.voice,
148
+ language=request.language
149
+ )
150
+
151
+ except Exception as e:
152
+ raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
153
+
154
+ # File serving endpoint for direct audio access
155
+ @app.get("/audio/{filename}")
156
+ async def get_audio(filename: str):
157
+ file_path = f"audio_files/{filename}"
158
+ if not os.path.exists(file_path):
159
+ raise HTTPException(status_code=404, detail="Audio file not found")
160
+
161
+ def iterfile():
162
+ with open(file_path, "rb") as audio_file:
163
+ yield from audio_file
164
+
165
+ return StreamingResponse(iterfile(), media_type="audio/wav")
166
+
167
+ # Endpoint to stream audio directly from base64 (useful for debugging)
168
+ @app.post("/stream-audio")
169
+ async def stream_audio(request: TTSRequest):
170
+ """Stream audio directly without saving to disk"""
171
+ try:
172
+ # Create prompt and generate audio
173
+ prompt = audio_tokenizer.create_prompt(request.text, lang=request.language, speaker_name=request.voice)
174
+ input_ids = audio_tokenizer.tokenize_prompt(prompt)
175
+
176
+ output = model.generate(
177
+ input_ids=input_ids,
178
+ temperature=0.1,
179
+ repetition_penalty=1.1,
180
+ max_length=4000,
181
+ )
182
+
183
+ codes = audio_tokenizer.get_codes(output)
184
+ audio = audio_tokenizer.get_audio(codes)
185
+
186
+ # Create BytesIO object
187
+ buffer = BytesIO()
188
+ torchaudio.save(buffer, audio, sample_rate=24000, format="wav")
189
+ buffer.seek(0)
190
+
191
+ return StreamingResponse(buffer, media_type="audio/wav")
192
+ except Exception as e:
193
+ raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
194
+
195
+ # Cleanup function to remove old files
196
+ def cleanup_old_files():
197
+ """Delete audio files older than 6 hours to manage disk space"""
198
+ try:
199
+ now = datetime.now()
200
+ audio_dir = "audio_files"
201
+
202
+ for filename in os.listdir(audio_dir):
203
+ if not filename.endswith(".wav"):
204
+ continue
205
+
206
+ file_path = os.path.join(audio_dir, filename)
207
+ file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
208
+
209
+ # Delete files older than 6 hours
210
+ if now - file_mod_time > timedelta(hours=6):
211
+ os.remove(file_path)
212
+ print(f"Deleted old audio file: {filename}")
213
+ except Exception as e:
214
+ print(f"Error cleaning up old files: {e}")
215
+
216
+ # For running locally with uvicorn
217
+ if __name__ == "__main__":
218
+ import uvicorn
219
+ port = int(os.environ.get("PORT", 8000))
220
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ torch==2.1.0
4
+ torchaudio==2.1.0
5
+ transformers==4.35.0
6
+ pydantic==2.4.2
7
+ python-multipart==0.0.6
8
+ wget
9
+ gdown
10
+ numpy>=1.20.0
11
+ requests>=2.27.1
12
+ outetts
13
+ uroman