Spaces:
Sleeping
Sleeping
use also file mode to send audio from mic to whisper
Browse files
app.py
CHANGED
@@ -122,13 +122,11 @@ def transcribe_audio(audio_input):
|
|
122 |
if isinstance(audio_input, str):
|
123 |
result = whisper_pipe(audio_input)
|
124 |
return result["text"]
|
125 |
-
# For microphone input,
|
126 |
elif isinstance(audio_input, tuple):
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
result = whisper_pipe(audio_array)
|
131 |
-
return result["text"]
|
132 |
else:
|
133 |
return ""
|
134 |
|
@@ -137,6 +135,7 @@ def transcribe_audio(audio_input):
|
|
137 |
def classify_intent(mode, mic_audio, text_input, file_audio, model_choice):
|
138 |
# Determine input based on selected mode.
|
139 |
if mode == "Microphone" and mic_audio is not None:
|
|
|
140 |
transcription = transcribe_audio(mic_audio)
|
141 |
elif mode == "Text" and text_input:
|
142 |
transcription = text_input
|
@@ -161,28 +160,22 @@ with gr.Blocks() as demo:
|
|
161 |
gr.Markdown("錄音、上傳語音檔案或輸入文字,自動判斷是否具有訂位意圖。")
|
162 |
|
163 |
with gr.Row():
|
164 |
-
# Input Mode Selector with three options.
|
165 |
mode = gr.Radio(choices=["Microphone", "Text", "File"], label="選擇輸入模式", value="Microphone")
|
166 |
|
167 |
with gr.Row():
|
168 |
-
#
|
169 |
-
mic_audio = gr.Audio(sources=["microphone"], type="
|
170 |
text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
|
171 |
-
# For file input, use 'filepath' so Whisper pipeline handles conversion.
|
172 |
file_audio = gr.Audio(sources=["upload"], type="filepath", label="上傳語音檔案")
|
173 |
|
174 |
-
#
|
175 |
-
text_input.visible = False
|
176 |
-
file_audio.visible = False
|
177 |
-
|
178 |
-
# Change event for mode selection to toggle visibility.
|
179 |
def update_visibility(selected_mode):
|
180 |
if selected_mode == "Microphone":
|
181 |
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
182 |
elif selected_mode == "Text":
|
183 |
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
184 |
else: # File
|
185 |
-
return gr.update(visible=False), gr.update(visible
|
186 |
mode.change(fn=update_visibility, inputs=mode, outputs=[mic_audio, text_input, file_audio])
|
187 |
|
188 |
with gr.Row():
|
|
|
122 |
if isinstance(audio_input, str):
|
123 |
result = whisper_pipe(audio_input)
|
124 |
return result["text"]
|
125 |
+
# For microphone input, we now also use file_path.
|
126 |
elif isinstance(audio_input, tuple):
|
127 |
+
# In our updated configuration, microphone input should be provided as a file path,
|
128 |
+
# so this branch may not be reached.
|
129 |
+
return ""
|
|
|
|
|
130 |
else:
|
131 |
return ""
|
132 |
|
|
|
135 |
def classify_intent(mode, mic_audio, text_input, file_audio, model_choice):
|
136 |
# Determine input based on selected mode.
|
137 |
if mode == "Microphone" and mic_audio is not None:
|
138 |
+
# mic_audio is a file path.
|
139 |
transcription = transcribe_audio(mic_audio)
|
140 |
elif mode == "Text" and text_input:
|
141 |
transcription = text_input
|
|
|
160 |
gr.Markdown("錄音、上傳語音檔案或輸入文字,自動判斷是否具有訂位意圖。")
|
161 |
|
162 |
with gr.Row():
|
|
|
163 |
mode = gr.Radio(choices=["Microphone", "Text", "File"], label="選擇輸入模式", value="Microphone")
|
164 |
|
165 |
with gr.Row():
|
166 |
+
# For microphone input, set type="filepath" so that we always get a file path.
|
167 |
+
mic_audio = gr.Audio(sources=["microphone"], type="filepath", label="語音輸入 (點擊錄音)")
|
168 |
text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
|
|
|
169 |
file_audio = gr.Audio(sources=["upload"], type="filepath", label="上傳語音檔案")
|
170 |
|
171 |
+
# Set visibility based on selected mode.
|
|
|
|
|
|
|
|
|
172 |
def update_visibility(selected_mode):
|
173 |
if selected_mode == "Microphone":
|
174 |
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
175 |
elif selected_mode == "Text":
|
176 |
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
177 |
else: # File
|
178 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
179 |
mode.change(fn=update_visibility, inputs=mode, outputs=[mic_audio, text_input, file_audio])
|
180 |
|
181 |
with gr.Row():
|