Luigi commited on
Commit
d236f33
·
1 Parent(s): b824b83

use also file mode to send audio from mic to whisper

Browse files
Files changed (1) hide show
  1. app.py +9 -16
app.py CHANGED
@@ -122,13 +122,11 @@ def transcribe_audio(audio_input):
122
  if isinstance(audio_input, str):
123
  result = whisper_pipe(audio_input)
124
  return result["text"]
125
- # For microphone input, Gradio returns a tuple (sample_rate, audio_array).
126
  elif isinstance(audio_input, tuple):
127
- audio_array = audio_input[1]
128
- if audio_array.ndim > 1:
129
- audio_array = np.mean(audio_array, axis=-1)
130
- result = whisper_pipe(audio_array)
131
- return result["text"]
132
  else:
133
  return ""
134
 
@@ -137,6 +135,7 @@ def transcribe_audio(audio_input):
137
  def classify_intent(mode, mic_audio, text_input, file_audio, model_choice):
138
  # Determine input based on selected mode.
139
  if mode == "Microphone" and mic_audio is not None:
 
140
  transcription = transcribe_audio(mic_audio)
141
  elif mode == "Text" and text_input:
142
  transcription = text_input
@@ -161,28 +160,22 @@ with gr.Blocks() as demo:
161
  gr.Markdown("錄音、上傳語音檔案或輸入文字,自動判斷是否具有訂位意圖。")
162
 
163
  with gr.Row():
164
- # Input Mode Selector with three options.
165
  mode = gr.Radio(choices=["Microphone", "Text", "File"], label="選擇輸入模式", value="Microphone")
166
 
167
  with gr.Row():
168
- # Three input components: microphone, text, and file upload.
169
- mic_audio = gr.Audio(sources=["microphone"], type="numpy", label="語音輸入 (點擊錄音)")
170
  text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
171
- # For file input, use 'filepath' so Whisper pipeline handles conversion.
172
  file_audio = gr.Audio(sources=["upload"], type="filepath", label="上傳語音檔案")
173
 
174
- # Initially, only the microphone input is visible.
175
- text_input.visible = False
176
- file_audio.visible = False
177
-
178
- # Change event for mode selection to toggle visibility.
179
  def update_visibility(selected_mode):
180
  if selected_mode == "Microphone":
181
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
182
  elif selected_mode == "Text":
183
  return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
184
  else: # File
185
- return gr.update(visible=False), gr.update(visible(False)), gr.update(visible=True)
186
  mode.change(fn=update_visibility, inputs=mode, outputs=[mic_audio, text_input, file_audio])
187
 
188
  with gr.Row():
 
122
  if isinstance(audio_input, str):
123
  result = whisper_pipe(audio_input)
124
  return result["text"]
125
+ # For microphone input, we now also use file_path.
126
  elif isinstance(audio_input, tuple):
127
+ # In our updated configuration, microphone input should be provided as a file path,
128
+ # so this branch may not be reached.
129
+ return ""
 
 
130
  else:
131
  return ""
132
 
 
135
  def classify_intent(mode, mic_audio, text_input, file_audio, model_choice):
136
  # Determine input based on selected mode.
137
  if mode == "Microphone" and mic_audio is not None:
138
+ # mic_audio is a file path.
139
  transcription = transcribe_audio(mic_audio)
140
  elif mode == "Text" and text_input:
141
  transcription = text_input
 
160
  gr.Markdown("錄音、上傳語音檔案或輸入文字,自動判斷是否具有訂位意圖。")
161
 
162
  with gr.Row():
 
163
  mode = gr.Radio(choices=["Microphone", "Text", "File"], label="選擇輸入模式", value="Microphone")
164
 
165
  with gr.Row():
166
+ # For microphone input, set type="filepath" so that we always get a file path.
167
+ mic_audio = gr.Audio(sources=["microphone"], type="filepath", label="語音輸入 (點擊錄音)")
168
  text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
 
169
  file_audio = gr.Audio(sources=["upload"], type="filepath", label="上傳語音檔案")
170
 
171
+ # Set visibility based on selected mode.
 
 
 
 
172
  def update_visibility(selected_mode):
173
  if selected_mode == "Microphone":
174
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
175
  elif selected_mode == "Text":
176
  return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
177
  else: # File
178
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
179
  mode.change(fn=update_visibility, inputs=mode, outputs=[mic_audio, text_input, file_audio])
180
 
181
  with gr.Row():