birgermoell commited on
Commit
1baa979
·
verified ·
1 Parent(s): 9b1b90d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
 
2
  # Setup model
3
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -19,3 +25,90 @@ def load_model():
19
  torch_dtype=torch_dtype,
20
  device=device,
21
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import base64
4
+ import tempfile
5
+ import os
6
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
7
 
8
  # Setup model
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
25
  torch_dtype=torch_dtype,
26
  device=device,
27
  )
28
+
29
+ asr_pipeline = load_model()
30
+
31
+ st.title("Swedish Speech-to-Text Demo")
32
+
33
+ # Audio Upload Option
34
+ uploaded_file = st.file_uploader("Ladda upp en ljudfil", type=["wav", "mp3", "flac"])
35
+
36
+ # JavaScript for recording audio
37
+ audio_recorder_js = """
38
+ <script>
39
+ let mediaRecorder;
40
+ let audioChunks = [];
41
+ let isRecording = false;
42
+
43
+ function startRecording() {
44
+ if (!isRecording) {
45
+ isRecording = true;
46
+ navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
47
+ mediaRecorder = new MediaRecorder(stream);
48
+ audioChunks = [];
49
+ mediaRecorder.ondataavailable = event => {
50
+ audioChunks.push(event.data);
51
+ };
52
+ mediaRecorder.onstop = () => {
53
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
54
+ const reader = new FileReader();
55
+ reader.readAsDataURL(audioBlob);
56
+ reader.onloadend = () => {
57
+ const base64Audio = reader.result.split(',')[1];
58
+ fetch('/save_audio', {
59
+ method: 'POST',
60
+ headers: { 'Content-Type': 'application/json' },
61
+ body: JSON.stringify({ audio: base64Audio })
62
+ }).then(response => response.json()).then(data => {
63
+ console.log(data);
64
+ window.location.reload();
65
+ });
66
+ };
67
+ };
68
+ mediaRecorder.start();
69
+ });
70
+ }
71
+ }
72
+
73
+ function stopRecording() {
74
+ if (isRecording) {
75
+ isRecording = false;
76
+ mediaRecorder.stop();
77
+ }
78
+ }
79
+ </script>
80
+
81
+ <button onclick="startRecording()">🎤 Starta inspelning</button>
82
+ <button onclick="stopRecording()">⏹️ Stoppa inspelning</button>
83
+ """
84
+
85
+ st.components.v1.html(audio_recorder_js)
86
+
87
+ # Processing audio input (uploaded file or recorded)
88
+ audio_path = None
89
+
90
+ if uploaded_file is not None:
91
+ # Save uploaded file to a temp location
92
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_audio:
93
+ temp_audio.write(uploaded_file.read())
94
+ audio_path = temp_audio.name
95
+
96
+ elif "audio_data" in st.session_state and st.session_state["audio_data"]:
97
+ # Decode base64 audio from JavaScript recording
98
+ audio_bytes = base64.b64decode(st.session_state["audio_data"])
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
100
+ temp_audio.write(audio_bytes)
101
+ audio_path = temp_audio.name
102
+
103
+ # Transcribe if we have audio
104
+ if audio_path:
105
+ st.audio(audio_path, format="audio/wav")
106
+
107
+ with st.spinner("Transkriberar..."):
108
+ transcription = asr_pipeline(audio_path)["text"]
109
+
110
+ st.subheader("📜 Transkription:")
111
+ st.write(transcription)
112
+
113
+ # Cleanup temp file
114
+ os.remove(audio_path)