Lesterchia174 commited on
Commit
6a6baf6
·
verified ·
1 Parent(s): 5de3c7d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +283 -0
  2. apt.txt +1 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1hDCBaCrOX0FZx8VUT9_cUfWWg7y97yrx
8
+ """
9
+
10
+ import gradio as gr
11
+ import os
12
+ import tempfile
13
+ import whisper
14
+ import re
15
+ from groq import Groq
16
+ from gtts import gTTS
17
+
18
+ # Load the local Whisper model for speech-to-text
19
+ whisper_model = whisper.load_model("base")
20
+
21
+ # Instantiate Groq client with API key
22
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))
23
+
24
+ # Supported languages
25
+ SUPPORTED_LANGUAGES = [
26
+ "English", "Chinese", "Thai", "Malay", "Korean",
27
+ "Japanese", "Spanish", "German", "Hindi",
28
+ "French", "Russian", "Tagalog", "Arabic"
29
+ ]
30
+
31
+ LANGUAGE_CODES = {
32
+ "English": "en", "Chinese": "zh", "Thai": "th", "Malay": "ms", "Korean": "ko",
33
+ "Japanese": "ja", "Spanish": "es", "German": "de", "Hindi": "hi",
34
+ "French": "fr", "Russian": "ru", "Tagalog": "tl", "Arabic": "ar"
35
+ }
36
+
37
+ def transcribe_audio_locally(audio):
38
+ """Transcribe audio using local Whisper model"""
39
+ if audio is None:
40
+ return ""
41
+
42
+ try:
43
+ audio_path = audio["name"] if isinstance(audio, dict) and "name" in audio else audio
44
+ result = whisper_model.transcribe(audio_path)
45
+ return result["text"]
46
+ except Exception as e:
47
+ print(f"Error transcribing audio locally: {e}")
48
+ return f"Error transcribing audio: {str(e)}"
49
+
50
+ def translate_text(input_text, input_lang, output_langs):
51
+ """Translate text using Groq's API with improved prompt to avoid COT"""
52
+ if not input_text or not output_langs:
53
+ return []
54
+
55
+ try:
56
+ # Using a more direct instruction to avoid exposing the thinking process
57
+ system_prompt = """You are a translation assistant that provides direct, accurate translations.
58
+ Do NOT include any thinking, reasoning, or explanations in your response.
59
+ Do NOT use phrases like 'In [language]:', 'Translation:' or similar prefixes.
60
+ Always respond with ONLY the exact translation text itself."""
61
+
62
+ user_prompt = f"Translate this {input_lang} text: '{input_text}' into the following languages: {', '.join(output_langs)}. Provide each translation on a separate line with the language name as a prefix."
63
+
64
+ response = groq_client.chat.completions.create(
65
+ model="deepseek-r1-distill-Qwen-32b",
66
+ messages=[
67
+ {"role": "system", "content": system_prompt},
68
+ {"role": "user", "content": user_prompt}
69
+ ]
70
+ )
71
+
72
+ translation_text = response.choices[0].message.content.strip()
73
+
74
+ # Remove any "thinking" patterns or COT that might have leaked through
75
+ # Remove text between <think> tags if they exist
76
+ translation_text = re.sub(r'<think>.*?</think>', '', translation_text, flags=re.DOTALL)
77
+
78
+ # Remove any line starting with common thinking patterns
79
+ thinking_patterns = [
80
+ r'^\s*Let me think.*$',
81
+ r'^\s*I need to.*$',
82
+ r'^\s*First,.*$',
83
+ r'^\s*Okay, so.*$',
84
+ r'^\s*Hmm,.*$',
85
+ r'^\s*Let\'s break this down.*$'
86
+ ]
87
+
88
+ for pattern in thinking_patterns:
89
+ translation_text = re.sub(pattern, '', translation_text, flags=re.MULTILINE)
90
+
91
+ return translation_text
92
+ except Exception as e:
93
+ print(f"Error translating text: {e}")
94
+ return f"Error: {str(e)}"
95
+
96
+ def synthesize_speech(text, lang):
97
+ """Generate speech from text"""
98
+ if not text:
99
+ return None
100
+
101
+ try:
102
+ lang_code = LANGUAGE_CODES.get(lang, "en")
103
+ tts = gTTS(text=text, lang=lang_code)
104
+
105
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
106
+ tts.save(fp.name)
107
+ return fp.name
108
+ except Exception as e:
109
+ print(f"Error synthesizing speech: {e}")
110
+ return None
111
+
112
+ def clear_memory():
113
+ """Clear all fields"""
114
+ return "", "", "", "", None, None, None
115
+
116
+ def process_speech_to_text(audio):
117
+ """Process audio and return the transcribed text"""
118
+ if not audio:
119
+ return ""
120
+
121
+ transcribed_text = transcribe_audio_locally(audio)
122
+ return transcribed_text
123
+
124
+ def clean_translation_output(text):
125
+ """Clean translation output to remove any thinking or processing text"""
126
+ if not text:
127
+ return ""
128
+
129
+ # Remove any meta-content or thinking
130
+ text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
131
+
132
+ # Remove lines that appear to be thinking/reasoning
133
+ lines = text.split('\n')
134
+ cleaned_lines = []
135
+
136
+ for line in lines:
137
+ # Skip lines that look like thinking
138
+ if re.search(r'(^I need to|^Let me|^First|^Okay|^Hmm|^I will|^I am thinking|^I should)', line, re.IGNORECASE):
139
+ continue
140
+
141
+ # Keep translations with language names
142
+ if ':' in line and any(lang.lower() in line.lower() for lang in SUPPORTED_LANGUAGES):
143
+ cleaned_lines.append(line)
144
+ # Or keep direct translations without prefixes if they don't look like thinking
145
+ elif line.strip() and not re.search(r'(thinking|translating|understand|process)', line, re.IGNORECASE):
146
+ cleaned_lines.append(line)
147
+
148
+ return '\n'.join(cleaned_lines)
149
+
150
+ def extract_translations(translations_text, output_langs):
151
+ """Extract clean translations from the model output"""
152
+ if not translations_text or not output_langs:
153
+ return [""] * 3
154
+
155
+ # Clean the translations text first
156
+ clean_text = clean_translation_output(translations_text)
157
+
158
+ # Try to match language patterns
159
+ translation_results = []
160
+
161
+ # First try to find language-labeled translations
162
+ for lang in output_langs:
163
+ pattern = rf'{lang}[\s]*:[\s]*(.*?)(?=\n\s*[A-Z]|$)'
164
+ match = re.search(pattern, clean_text, re.IGNORECASE | re.DOTALL)
165
+ if match:
166
+ translation_results.append(match.group(1).strip())
167
+
168
+ # If we couldn't find labeled translations, just split by lines
169
+ if not translation_results and '\n' in clean_text:
170
+ lines = [line.strip() for line in clean_text.split('\n') if line.strip()]
171
+
172
+ for line in lines:
173
+ # Check if this line has a language prefix
174
+ if ':' in line:
175
+ parts = line.split(':', 1)
176
+ if len(parts) == 2:
177
+ translation_results.append(parts[1].strip())
178
+ else:
179
+ # Just add the line as is if it seems like a translation
180
+ translation_results.append(line)
181
+ elif not translation_results:
182
+ # If no newlines, just use the whole text
183
+ translation_results.append(clean_text)
184
+
185
+ # Ensure we have exactly 3 results
186
+ while len(translation_results) < 3:
187
+ translation_results.append("")
188
+
189
+ return translation_results[:3]
190
+
191
+ def perform_translation(audio, typed_text, input_lang, output_langs):
192
+ """Main function to handle translation process"""
193
+ # Check if we have valid inputs
194
+ if not output_langs:
195
+ return typed_text, "", "", "", None, None, None
196
+
197
+ # Limit to 3 output languages
198
+ selected_langs = output_langs[:3]
199
+
200
+ # Get the input text either from typed text or by transcribing audio
201
+ input_text = typed_text
202
+ if not input_text and audio:
203
+ input_text = transcribe_audio_locally(audio)
204
+
205
+ if not input_text:
206
+ return "", "", "", "", None, None, None
207
+
208
+ # Get translations
209
+ translations_text = translate_text(input_text, input_lang, selected_langs)
210
+
211
+ # Extract clean translations
212
+ translation_results = extract_translations(translations_text, selected_langs)
213
+
214
+ # Generate speech for each valid translation
215
+ audio_paths = []
216
+ for i, (trans, lang) in enumerate(zip(translation_results, selected_langs)):
217
+ if trans:
218
+ audio_path = synthesize_speech(trans, lang)
219
+ audio_paths.append(audio_path)
220
+ else:
221
+ audio_paths.append(None)
222
+
223
+ # Ensure we have exactly 3 audio paths
224
+ while len(audio_paths) < 3:
225
+ audio_paths.append(None)
226
+
227
+ # Return results in the expected format
228
+ return [input_text] + translation_results + audio_paths
229
+
230
+ with gr.Blocks() as demo:
231
+ gr.Markdown("## 🌍 Multilingual Translator with Speech Support")
232
+
233
+ with gr.Row():
234
+ input_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="English", label="Input Language")
235
+ output_langs = gr.CheckboxGroup(choices=SUPPORTED_LANGUAGES, label="Output Languages (select up to 3)")
236
+
237
+ with gr.Row():
238
+ audio_input = gr.Audio(type="filepath", label="Speak Your Input (upload or record)")
239
+ text_input = gr.Textbox(label="Or Type Text", elem_id="text_input")
240
+
241
+ transcribed_text = gr.Textbox(label="Transcribed Text (from audio)", interactive=False)
242
+ translated_outputs = [gr.Textbox(label=f"Translation {i+1}", interactive=False) for i in range(3)]
243
+ audio_outputs = [gr.Audio(label=f"Speech Output {i+1}") for i in range(3)]
244
+
245
+ with gr.Row():
246
+ translate_btn = gr.Button("Translate", elem_id="translate_btn")
247
+ clear_btn = gr.Button("Clear Memory")
248
+
249
+ # Handle audio input separately
250
+ def on_audio_change(audio):
251
+ if audio is None:
252
+ return ""
253
+ transcribed = process_speech_to_text(audio)
254
+ return transcribed
255
+
256
+ # Update text input when audio is processed
257
+ audio_input.change(
258
+ on_audio_change,
259
+ inputs=[audio_input],
260
+ #outputs=[text_input]
261
+ outputs=[transcribed_text]
262
+ )
263
+
264
+ # Enable Enter key to submit
265
+ text_input.submit(
266
+ perform_translation,
267
+ inputs=[audio_input, text_input, input_lang, output_langs],
268
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
269
+ )
270
+
271
+ translate_btn.click(
272
+ perform_translation,
273
+ inputs=[audio_input, text_input, input_lang, output_langs],
274
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
275
+ )
276
+
277
+ clear_btn.click(
278
+ clear_memory,
279
+ inputs=[],
280
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
281
+ )
282
+
283
+ demo.launch()
apt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ espeak
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ groq
4
+ soundfile
5
+ transformers
6
+ openai-whisper
7
+ gTTS