Spaces:

ashantharosary
/

BahasaRojakMalaysia_STT_wav2vec_whisper

Running

App Files Files Community

ashantharosary commited on 5 days ago

Commit

d60cb2b

verified ·

1 Parent(s): 57f60e0

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -4

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ import time
 import re
 # Constants
-WHISPER_FINETUNED = "wy0909/Whisper-MixedLanguageModel"
 WHISPER_PRETRAINED = "openai/whisper-medium"
 WAV2VEC_MODEL = "mesolitica/wav2vec2-xls-r-300m-mixed"
 MAX_RECORDING_SECONDS = 12
@@ -193,11 +193,15 @@ else:
     @st.cache_resource
     def load_finetuned_model_and_processor():
-        model = WhisperForConditionalGeneration.from_pretrained(WHISPER_FINETUNED)
         processor = WhisperProcessor.from_pretrained(WHISPER_FINETUNED)
         model.config.forced_decoder_ids = None
         model.generation_config.forced_decoder_ids = None
         model.config.suppress_tokens = []
         return model, processor
     @st.cache_resource
@@ -206,6 +210,7 @@ else:
         processor = WhisperProcessor.from_pretrained(WHISPER_PRETRAINED)
         model.config.forced_decoder_ids = None
         model.generation_config.forced_decoder_ids = None
         model.config.suppress_tokens = []
         return model, processor
@@ -256,7 +261,7 @@ if st.button("📝 Transcribe"):
                     st.session_state.predicted_text.lower()
                 )
                 st.markdown("### 🧮 Word Error Rate (WER)")
-                st.write(f"WER: `{st.session_state.wer_value:.2f}`")
         except Exception as e:
             st.error(f"❌ Transcription failed: {str(e)}")
@@ -274,4 +279,4 @@ if st.button("📝 Transcribe"):
             end_time = time.time()
             duration = end_time - start_time
-            st.caption(f"🕒 Time taken: {duration:.2f}s")

 import re
 # Constants
+WHISPER_FINETUNED = "wy0909/whisper-medium_mixedLanguageModel"
 WHISPER_PRETRAINED = "openai/whisper-medium"
 WAV2VEC_MODEL = "mesolitica/wav2vec2-xls-r-300m-mixed"
 MAX_RECORDING_SECONDS = 12
     @st.cache_resource
     def load_finetuned_model_and_processor():
+        model = WhisperForConditionalGeneration.from_pretrained(WHISPER_FINETUNED,torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        attn_implementation="flash_attention_2" if torch.cuda.is_available() else None)
         processor = WhisperProcessor.from_pretrained(WHISPER_FINETUNED)
         model.config.forced_decoder_ids = None
         model.generation_config.forced_decoder_ids = None
+        model.config.use_cache = None
         model.config.suppress_tokens = []
+        if torch.cuda.is_available():
+            model = model.to("cuda")
         return model, processor
     @st.cache_resource
         processor = WhisperProcessor.from_pretrained(WHISPER_PRETRAINED)
         model.config.forced_decoder_ids = None
         model.generation_config.forced_decoder_ids = None
+        model.config.use_cache = None
         model.config.suppress_tokens = []
         return model, processor
                     st.session_state.predicted_text.lower()
                 )
                 st.markdown("### 🧮 Word Error Rate (WER)")
+                st.write(f"WER: `{st.session_state.wer_value * 100:.2f}%`")
         except Exception as e:
             st.error(f"❌ Transcription failed: {str(e)}")
             end_time = time.time()
             duration = end_time - start_time
+            st.caption(f"🕒 Time taken: {duration:.2f}s")