dofbi commited on
Commit
6728136
·
0 Parent(s):

♻️ refactor (model): add new

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +110 -0
  4. model_list.json +59 -0
  5. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Wolof Asr
3
+ emoji: 📚
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.6.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Bienvenue sur **Wolof-ASR**, une application de reconnaissan
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ import gradio as gr
4
+ import librosa
5
+ import numpy as np
6
+ import json
7
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
8
+ from scipy.signal import butter, lfilter
9
+
10
+ # Charger la liste des modèles depuis un fichier JSON
11
+ def load_model_list(file_path="model_list.json"):
12
+ try:
13
+ with open(file_path, "r") as f:
14
+ return json.load(f)
15
+ except Exception as e:
16
+ raise ValueError(f"Erreur lors du chargement de la liste des modèles : {str(e)}")
17
+
18
+ # Charger les modèles depuis le fichier JSON
19
+ MODEL_LIST = load_model_list()
20
+
21
+ # Fonction pour charger le modèle et le processeur
22
+ def load_model_and_processor(model_name):
23
+ model_info = MODEL_LIST.get(model_name)
24
+ if not model_info:
25
+ raise ValueError("Modèle non trouvé dans la liste.")
26
+ model_path = model_info["model_path"]
27
+ processor = WhisperProcessor.from_pretrained(model_path)
28
+ model = WhisperForConditionalGeneration.from_pretrained(model_path)
29
+ model.eval()
30
+ return processor, model
31
+
32
+ # Nettoyage et normalisation de l'audio
33
+ def preprocess_audio(audio, sr=16000):
34
+ # Charger l'audio
35
+ audio_data, _ = librosa.load(audio, sr=sr)
36
+ # Filtrage passe-bas pour réduire les bruits aigus
37
+ b, a = butter(6, 0.1, btype="low", analog=False)
38
+ audio_data = lfilter(b, a, audio_data)
39
+ # Normaliser l'audio
40
+ audio_data = librosa.util.normalize(audio_data)
41
+ return audio_data
42
+
43
+ # Fonction pour transcrire l'audio
44
+ @spaces.GPU(duration=120)
45
+ def transcribe_audio(audio, model_name):
46
+ try:
47
+ # Charger le modèle et le processeur en fonction du choix
48
+ processor, model = load_model_and_processor(model_name)
49
+
50
+ # Nettoyer et normaliser l'audio
51
+ audio_input = preprocess_audio(audio)
52
+
53
+ # Prétraiter l'audio avec le processeur
54
+ inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
55
+ inputs["attention_mask"] = torch.ones_like(inputs["input_features"]).to(inputs["input_features"].dtype)
56
+
57
+ # Faire la prédiction
58
+ with torch.no_grad():
59
+ predicted_ids = model.generate(
60
+ inputs['input_features'],
61
+ forced_decoder_ids=None, # Suppression du conflit
62
+ language="fr", # Ajustez selon votre langue cible
63
+ task="transcribe"
64
+ )
65
+
66
+ # Convertir les IDs de prédiction en texte
67
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
68
+
69
+ return transcription[0]
70
+
71
+ except Exception as e:
72
+ return f"Erreur de transcription : {str(e)}"
73
+
74
+ # Charger une seule fois le tableau (statique)
75
+ MODEL_TABLE = [
76
+ [name, details.get("dataset", "Non spécifié"), details.get("performance", {}).get("WER", "Non spécifié"), details.get("performance", {}).get("CER", "Non spécifié")]
77
+ for name, details in MODEL_LIST.items()
78
+ ]
79
+
80
+ # Interface Gradio
81
+ with gr.Blocks() as app:
82
+ # Section principale
83
+ with gr.Row():
84
+ with gr.Column(scale=2):
85
+ gr.Markdown("## Téléchargez ou enregistrez un fichier audio")
86
+ audio_input = gr.Audio(type="filepath", label="Audio (télécharger ou enregistrer)")
87
+ model_dropdown = gr.Dropdown(choices=list(MODEL_LIST.keys()), label="Sélectionnez un modèle", value="Wolof ASR - dofbi")
88
+ submit_button = gr.Button("Transcrire")
89
+ with gr.Column(scale=3):
90
+ transcription_output = gr.Textbox(label="Transcription", lines=6)
91
+
92
+ # Tableau statique en bas
93
+ gr.Markdown("## Informations sur les modèles disponibles")
94
+ gr.Dataframe(
95
+ headers=["Nom du modèle", "Dataset utilisé", "WER", "CER"],
96
+ value=MODEL_TABLE,
97
+ interactive=False,
98
+ label="Informations sur les modèles"
99
+ )
100
+
101
+ # Action du bouton
102
+ submit_button.click(
103
+ fn=transcribe_audio,
104
+ inputs=[audio_input, model_dropdown],
105
+ outputs=transcription_output
106
+ )
107
+
108
+ # Lancer l'application
109
+ if __name__ == "__main__":
110
+ app.launch()
model_list.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Wolof ASR - dofbi": {
3
+ "model_path": "dofbi/wolof-asr",
4
+ "dataset": "galsenai/wolof_tts",
5
+ "performance": {
6
+ "WER": 0.12,
7
+ "CER": null
8
+ }
9
+ },
10
+ "Whisper Small Wolof - serge-wilson": {
11
+ "model_path": "serge-wilson/whisper-small-wolof",
12
+ "dataset": "",
13
+ "performance": {
14
+ "WER": 0.153788,
15
+ "CER": null
16
+ }
17
+ },
18
+ "Whisper Small Wolof - elmamounedieye": {
19
+ "model_path": "M9and2M/whisper-small-wolof",
20
+ "dataset": "M9and2M/Wolof_ASR_dataset",
21
+ "performance": {
22
+ "WER": 0.17,
23
+ "CER": null
24
+ }
25
+ },
26
+ "Whisper Small Wolof mix hum mach data - speechbrain": {
27
+ "model_path": "M9and2M/whisper_small_wolof_mix_hum_mach_data",
28
+ "dataset": "M9and2M/Wolof_ASR_dataset",
29
+ "performance": {
30
+ "WER": 0.16,
31
+ "CER": null
32
+ }
33
+ },
34
+ "Whisper Small Wolof - ngia": {
35
+ "model_path": "ngia/whisper-small-wolof",
36
+ "dataset": "IndabaxSenegal/asr-wolof-dataset",
37
+ "performance": {
38
+ "WER": 0.435071,
39
+ "CER": null
40
+ }
41
+ },
42
+ "Whisper Wolof - cibfaye": {
43
+ "model_path": "cibfaye/whisper-wolof",
44
+ "dataset": "google/fleurs",
45
+ "performance": {
46
+ "WER": 0.439413,
47
+ "CER": null
48
+ }
49
+ },
50
+ "Whisper Small Wolof - alfaDF9": {
51
+ "model_path": "alfaDF9/whisper-small-wolof",
52
+ "dataset": "IndabaxSenegal/asr-wolof-dataset",
53
+ "performance": {
54
+ "WER": 0.511557,
55
+ "CER": null
56
+ }
57
+ }
58
+
59
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ librosa