Lesterchia174 commited on
Commit
6f6d4d2
·
verified ·
1 Parent(s): 39beb03

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +291 -0
app2.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+ import whisper
5
+ import re
6
+ from groq import Groq
7
+ from gtts import gTTS
8
+
9
+ # Load the local Whisper model for speech-to-text
10
+ whisper_model = whisper.load_model("base")
11
+
12
+ # Instantiate Groq client with API key
13
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY", "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"))
14
+
15
+ # Supported languages
16
+ SUPPORTED_LANGUAGES = [
17
+ "English", "Chinese", "Thai", "Malay", "Korean",
18
+ "Japanese", "Spanish", "German", "Hindi",
19
+ "French", "Russian", "Tagalog", "Arabic",
20
+ "Myanmar", "Vietnamese"
21
+ ]
22
+
23
+ LANGUAGE_CODES = {
24
+ "English": "en", "Chinese": "zh", "Thai": "th", "Malay": "ms", "Korean": "ko",
25
+ "Japanese": "ja", "Spanish": "es", "German": "de", "Hindi": "hi",
26
+ "French": "fr", "Russian": "ru", "Tagalog": "tl", "Arabic": "ar",
27
+ "Myanmar": "my", "Vietnamese": "vi"
28
+ }
29
+
30
+ # Available LLM models
31
+ AVAILABLE_MODELS = {
32
+ "DeepSeek Qwen 32B": "deepseek-r1-distill-Qwen-32b",
33
+ "Llama-4 Maverick 17B": "meta-llama/llama-4-maverick-17b-128e-instruct"
34
+ }
35
+
36
+ def transcribe_audio_locally(audio):
37
+ """Transcribe audio using local Whisper model"""
38
+ if audio is None:
39
+ return ""
40
+
41
+ try:
42
+ audio_path = audio["name"] if isinstance(audio, dict) and "name" in audio else audio
43
+ result = whisper_model.transcribe(audio_path)
44
+ return result["text"]
45
+ except Exception as e:
46
+ print(f"Error transcribing audio locally: {e}")
47
+ return f"Error transcribing audio: {str(e)}"
48
+
49
+ def translate_text(input_text, input_lang, output_langs, model_name):
50
+ """Translate text using Groq's API with the selected model"""
51
+ if not input_text or not output_langs:
52
+ return []
53
+
54
+ try:
55
+ # Get the actual model ID from our dictionary
56
+ model_id = AVAILABLE_MODELS.get(model_name, "deepseek-r1-distill-Qwen-32b")
57
+
58
+ # Using a more direct instruction to avoid exposing the thinking process
59
+ system_prompt = """You are a translation assistant that provides direct, accurate translations.
60
+ Do NOT include any thinking, reasoning, or explanations in your response.
61
+ Do NOT use phrases like 'In [language]:', 'Translation:' or similar prefixes.
62
+ Always respond with ONLY the exact translation text itself."""
63
+
64
+ user_prompt = f"Translate this {input_lang} text: '{input_text}' into the following languages: {', '.join(output_langs)}. Provide each translation on a separate line with the language name as a prefix."
65
+
66
+ response = groq_client.chat.completions.create(
67
+ model=model_id,
68
+ messages=[
69
+ {"role": "system", "content": system_prompt},
70
+ {"role": "user", "content": user_prompt}
71
+ ]
72
+ )
73
+
74
+ translation_text = response.choices[0].message.content.strip()
75
+
76
+ # Remove any "thinking" patterns or COT that might have leaked through
77
+ # Remove text between <think> tags if they exist
78
+ translation_text = re.sub(r'<think>.*?</think>', '', translation_text, flags=re.DOTALL)
79
+
80
+ # Remove any line starting with common thinking patterns
81
+ thinking_patterns = [
82
+ r'^\s*Let me think.*$',
83
+ r'^\s*I need to.*$',
84
+ r'^\s*First,.*$',
85
+ r'^\s*Okay, so.*$',
86
+ r'^\s*Hmm,.*$',
87
+ r'^\s*Let\'s break this down.*$'
88
+ ]
89
+
90
+ for pattern in thinking_patterns:
91
+ translation_text = re.sub(pattern, '', translation_text, flags=re.MULTILINE)
92
+
93
+ return translation_text
94
+ except Exception as e:
95
+ print(f"Error translating text: {e}")
96
+ return f"Error: {str(e)}"
97
+
98
+ def synthesize_speech(text, lang):
99
+ """Generate speech from text"""
100
+ if not text:
101
+ return None
102
+
103
+ try:
104
+ lang_code = LANGUAGE_CODES.get(lang, "en")
105
+ tts = gTTS(text=text, lang=lang_code)
106
+
107
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
108
+ tts.save(fp.name)
109
+ return fp.name
110
+ except Exception as e:
111
+ print(f"Error synthesizing speech: {e}")
112
+ return None
113
+
114
+ def clear_memory():
115
+ """Clear all fields"""
116
+ return "", "", "", "", None, None, None
117
+
118
+ def process_speech_to_text(audio):
119
+ """Process audio and return the transcribed text"""
120
+ if not audio:
121
+ return ""
122
+
123
+ transcribed_text = transcribe_audio_locally(audio)
124
+ return transcribed_text
125
+
126
+ def clean_translation_output(text):
127
+ """Clean translation output to remove any thinking or processing text"""
128
+ if not text:
129
+ return ""
130
+
131
+ # Remove any meta-content or thinking
132
+ text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
133
+
134
+ # Remove lines that appear to be thinking/reasoning
135
+ lines = text.split('\n')
136
+ cleaned_lines = []
137
+
138
+ for line in lines:
139
+ # Skip lines that look like thinking
140
+ if re.search(r'(^I need to|^Let me|^First|^Okay|^Hmm|^I will|^I am thinking|^I should)', line, re.IGNORECASE):
141
+ continue
142
+
143
+ # Keep translations with language names
144
+ if ':' in line and any(lang.lower() in line.lower() for lang in SUPPORTED_LANGUAGES):
145
+ cleaned_lines.append(line)
146
+ # Or keep direct translations without prefixes if they don't look like thinking
147
+ elif line.strip() and not re.search(r'(thinking|translating|understand|process)', line, re.IGNORECASE):
148
+ cleaned_lines.append(line)
149
+
150
+ return '\n'.join(cleaned_lines)
151
+
152
+ def extract_translations(translations_text, output_langs):
153
+ """Extract clean translations from the model output"""
154
+ if not translations_text or not output_langs:
155
+ return [""] * 3
156
+
157
+ # Clean the translations text first
158
+ clean_text = clean_translation_output(translations_text)
159
+
160
+ # Try to match language patterns
161
+ translation_results = []
162
+
163
+ # First try to find language-labeled translations
164
+ for lang in output_langs:
165
+ pattern = rf'{lang}[\s]*:[\s]*(.*?)(?=\n\s*[A-Z]|$)'
166
+ match = re.search(pattern, clean_text, re.IGNORECASE | re.DOTALL)
167
+ if match:
168
+ translation_results.append(match.group(1).strip())
169
+
170
+ # If we couldn't find labeled translations, just split by lines
171
+ if not translation_results and '\n' in clean_text:
172
+ lines = [line.strip() for line in clean_text.split('\n') if line.strip()]
173
+
174
+ for line in lines:
175
+ # Check if this line has a language prefix
176
+ if ':' in line:
177
+ parts = line.split(':', 1)
178
+ if len(parts) == 2:
179
+ translation_results.append(parts[1].strip())
180
+ else:
181
+ # Just add the line as is if it seems like a translation
182
+ translation_results.append(line)
183
+ elif not translation_results:
184
+ # If no newlines, just use the whole text
185
+ translation_results.append(clean_text)
186
+
187
+ # Ensure we have exactly 3 results
188
+ while len(translation_results) < 3:
189
+ translation_results.append("")
190
+
191
+ return translation_results[:3]
192
+
193
+ def perform_translation(audio, typed_text, input_lang, output_langs, model_name):
194
+ """Main function to handle translation process"""
195
+ # Check if we have valid inputs
196
+ if not output_langs:
197
+ return typed_text, "", "", "", None, None, None
198
+
199
+ # Limit to 3 output languages
200
+ selected_langs = output_langs[:3]
201
+
202
+ # Get the input text either from typed text or by transcribing audio
203
+ input_text = typed_text
204
+ if not input_text and audio:
205
+ input_text = transcribe_audio_locally(audio)
206
+
207
+ if not input_text:
208
+ return "", "", "", "", None, None, None
209
+
210
+ # Get translations using the selected model
211
+ translations_text = translate_text(input_text, input_lang, selected_langs, model_name)
212
+
213
+ # Extract clean translations
214
+ translation_results = extract_translations(translations_text, selected_langs)
215
+
216
+ # Generate speech for each valid translation
217
+ audio_paths = []
218
+ for i, (trans, lang) in enumerate(zip(translation_results, selected_langs)):
219
+ if trans:
220
+ audio_path = synthesize_speech(trans, lang)
221
+ audio_paths.append(audio_path)
222
+ else:
223
+ audio_paths.append(None)
224
+
225
+ # Ensure we have exactly 3 audio paths
226
+ while len(audio_paths) < 3:
227
+ audio_paths.append(None)
228
+
229
+ # Return results in the expected format
230
+ return [input_text] + translation_results + audio_paths
231
+
232
+ with gr.Blocks() as demo:
233
+ gr.Markdown("## 🌍 Multilingual Translator with Speech Support")
234
+
235
+ with gr.Row():
236
+ input_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="English", label="Input Language")
237
+ output_langs = gr.CheckboxGroup(choices=SUPPORTED_LANGUAGES, label="Output Languages (select up to 3)")
238
+
239
+ with gr.Row():
240
+ model_selector = gr.Dropdown(
241
+ choices=list(AVAILABLE_MODELS.keys()),
242
+ value="DeepSeek Qwen 32B",
243
+ label="Translation Model"
244
+ )
245
+
246
+ with gr.Row():
247
+ audio_input = gr.Audio(type="filepath", label="Speak Your Input (upload or record)")
248
+ text_input = gr.Textbox(label="Or Type Text", elem_id="text_input")
249
+
250
+ transcribed_text = gr.Textbox(label="Transcribed Text (from audio)", interactive=False)
251
+ translated_outputs = [gr.Textbox(label=f"Translation {i+1}", interactive=False) for i in range(3)]
252
+ audio_outputs = [gr.Audio(label=f"Speech Output {i+1}") for i in range(3)]
253
+
254
+ with gr.Row():
255
+ translate_btn = gr.Button("Translate", elem_id="translate_btn")
256
+ clear_btn = gr.Button("Clear Memory")
257
+
258
+ # Handle audio input separately
259
+ def on_audio_change(audio):
260
+ if audio is None:
261
+ return ""
262
+ transcribed = process_speech_to_text(audio)
263
+ return transcribed
264
+
265
+ # Update text input when audio is processed
266
+ audio_input.change(
267
+ on_audio_change,
268
+ inputs=[audio_input],
269
+ outputs=[text_input]
270
+ )
271
+
272
+ # Enable Enter key to submit
273
+ text_input.submit(
274
+ perform_translation,
275
+ inputs=[audio_input, text_input, input_lang, output_langs, model_selector],
276
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
277
+ )
278
+
279
+ translate_btn.click(
280
+ perform_translation,
281
+ inputs=[audio_input, text_input, input_lang, output_langs, model_selector],
282
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
283
+ )
284
+
285
+ clear_btn.click(
286
+ clear_memory,
287
+ inputs=[],
288
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
289
+ )
290
+
291
+ demo.launch()