Commit
·
6728136
0
Parent(s):
♻️ refactor (model): add new
Browse files- .gitattributes +35 -0
- README.md +14 -0
- app.py +110 -0
- model_list.json +59 -0
- requirements.txt +4 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Wolof Asr
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.6.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
short_description: Bienvenue sur **Wolof-ASR**, une application de reconnaissan
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spaces
|
2 |
+
import torch
|
3 |
+
import gradio as gr
|
4 |
+
import librosa
|
5 |
+
import numpy as np
|
6 |
+
import json
|
7 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
8 |
+
from scipy.signal import butter, lfilter
|
9 |
+
|
10 |
+
# Charger la liste des modèles depuis un fichier JSON
|
11 |
+
def load_model_list(file_path="model_list.json"):
|
12 |
+
try:
|
13 |
+
with open(file_path, "r") as f:
|
14 |
+
return json.load(f)
|
15 |
+
except Exception as e:
|
16 |
+
raise ValueError(f"Erreur lors du chargement de la liste des modèles : {str(e)}")
|
17 |
+
|
18 |
+
# Charger les modèles depuis le fichier JSON
|
19 |
+
MODEL_LIST = load_model_list()
|
20 |
+
|
21 |
+
# Fonction pour charger le modèle et le processeur
|
22 |
+
def load_model_and_processor(model_name):
|
23 |
+
model_info = MODEL_LIST.get(model_name)
|
24 |
+
if not model_info:
|
25 |
+
raise ValueError("Modèle non trouvé dans la liste.")
|
26 |
+
model_path = model_info["model_path"]
|
27 |
+
processor = WhisperProcessor.from_pretrained(model_path)
|
28 |
+
model = WhisperForConditionalGeneration.from_pretrained(model_path)
|
29 |
+
model.eval()
|
30 |
+
return processor, model
|
31 |
+
|
32 |
+
# Nettoyage et normalisation de l'audio
|
33 |
+
def preprocess_audio(audio, sr=16000):
|
34 |
+
# Charger l'audio
|
35 |
+
audio_data, _ = librosa.load(audio, sr=sr)
|
36 |
+
# Filtrage passe-bas pour réduire les bruits aigus
|
37 |
+
b, a = butter(6, 0.1, btype="low", analog=False)
|
38 |
+
audio_data = lfilter(b, a, audio_data)
|
39 |
+
# Normaliser l'audio
|
40 |
+
audio_data = librosa.util.normalize(audio_data)
|
41 |
+
return audio_data
|
42 |
+
|
43 |
+
# Fonction pour transcrire l'audio
|
44 |
+
@spaces.GPU(duration=120)
|
45 |
+
def transcribe_audio(audio, model_name):
|
46 |
+
try:
|
47 |
+
# Charger le modèle et le processeur en fonction du choix
|
48 |
+
processor, model = load_model_and_processor(model_name)
|
49 |
+
|
50 |
+
# Nettoyer et normaliser l'audio
|
51 |
+
audio_input = preprocess_audio(audio)
|
52 |
+
|
53 |
+
# Prétraiter l'audio avec le processeur
|
54 |
+
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
|
55 |
+
inputs["attention_mask"] = torch.ones_like(inputs["input_features"]).to(inputs["input_features"].dtype)
|
56 |
+
|
57 |
+
# Faire la prédiction
|
58 |
+
with torch.no_grad():
|
59 |
+
predicted_ids = model.generate(
|
60 |
+
inputs['input_features'],
|
61 |
+
forced_decoder_ids=None, # Suppression du conflit
|
62 |
+
language="fr", # Ajustez selon votre langue cible
|
63 |
+
task="transcribe"
|
64 |
+
)
|
65 |
+
|
66 |
+
# Convertir les IDs de prédiction en texte
|
67 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
68 |
+
|
69 |
+
return transcription[0]
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
return f"Erreur de transcription : {str(e)}"
|
73 |
+
|
74 |
+
# Charger une seule fois le tableau (statique)
|
75 |
+
MODEL_TABLE = [
|
76 |
+
[name, details.get("dataset", "Non spécifié"), details.get("performance", {}).get("WER", "Non spécifié"), details.get("performance", {}).get("CER", "Non spécifié")]
|
77 |
+
for name, details in MODEL_LIST.items()
|
78 |
+
]
|
79 |
+
|
80 |
+
# Interface Gradio
|
81 |
+
with gr.Blocks() as app:
|
82 |
+
# Section principale
|
83 |
+
with gr.Row():
|
84 |
+
with gr.Column(scale=2):
|
85 |
+
gr.Markdown("## Téléchargez ou enregistrez un fichier audio")
|
86 |
+
audio_input = gr.Audio(type="filepath", label="Audio (télécharger ou enregistrer)")
|
87 |
+
model_dropdown = gr.Dropdown(choices=list(MODEL_LIST.keys()), label="Sélectionnez un modèle", value="Wolof ASR - dofbi")
|
88 |
+
submit_button = gr.Button("Transcrire")
|
89 |
+
with gr.Column(scale=3):
|
90 |
+
transcription_output = gr.Textbox(label="Transcription", lines=6)
|
91 |
+
|
92 |
+
# Tableau statique en bas
|
93 |
+
gr.Markdown("## Informations sur les modèles disponibles")
|
94 |
+
gr.Dataframe(
|
95 |
+
headers=["Nom du modèle", "Dataset utilisé", "WER", "CER"],
|
96 |
+
value=MODEL_TABLE,
|
97 |
+
interactive=False,
|
98 |
+
label="Informations sur les modèles"
|
99 |
+
)
|
100 |
+
|
101 |
+
# Action du bouton
|
102 |
+
submit_button.click(
|
103 |
+
fn=transcribe_audio,
|
104 |
+
inputs=[audio_input, model_dropdown],
|
105 |
+
outputs=transcription_output
|
106 |
+
)
|
107 |
+
|
108 |
+
# Lancer l'application
|
109 |
+
if __name__ == "__main__":
|
110 |
+
app.launch()
|
model_list.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Wolof ASR - dofbi": {
|
3 |
+
"model_path": "dofbi/wolof-asr",
|
4 |
+
"dataset": "galsenai/wolof_tts",
|
5 |
+
"performance": {
|
6 |
+
"WER": 0.12,
|
7 |
+
"CER": null
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"Whisper Small Wolof - serge-wilson": {
|
11 |
+
"model_path": "serge-wilson/whisper-small-wolof",
|
12 |
+
"dataset": "",
|
13 |
+
"performance": {
|
14 |
+
"WER": 0.153788,
|
15 |
+
"CER": null
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"Whisper Small Wolof - elmamounedieye": {
|
19 |
+
"model_path": "M9and2M/whisper-small-wolof",
|
20 |
+
"dataset": "M9and2M/Wolof_ASR_dataset",
|
21 |
+
"performance": {
|
22 |
+
"WER": 0.17,
|
23 |
+
"CER": null
|
24 |
+
}
|
25 |
+
},
|
26 |
+
"Whisper Small Wolof mix hum mach data - speechbrain": {
|
27 |
+
"model_path": "M9and2M/whisper_small_wolof_mix_hum_mach_data",
|
28 |
+
"dataset": "M9and2M/Wolof_ASR_dataset",
|
29 |
+
"performance": {
|
30 |
+
"WER": 0.16,
|
31 |
+
"CER": null
|
32 |
+
}
|
33 |
+
},
|
34 |
+
"Whisper Small Wolof - ngia": {
|
35 |
+
"model_path": "ngia/whisper-small-wolof",
|
36 |
+
"dataset": "IndabaxSenegal/asr-wolof-dataset",
|
37 |
+
"performance": {
|
38 |
+
"WER": 0.435071,
|
39 |
+
"CER": null
|
40 |
+
}
|
41 |
+
},
|
42 |
+
"Whisper Wolof - cibfaye": {
|
43 |
+
"model_path": "cibfaye/whisper-wolof",
|
44 |
+
"dataset": "google/fleurs",
|
45 |
+
"performance": {
|
46 |
+
"WER": 0.439413,
|
47 |
+
"CER": null
|
48 |
+
}
|
49 |
+
},
|
50 |
+
"Whisper Small Wolof - alfaDF9": {
|
51 |
+
"model_path": "alfaDF9/whisper-small-wolof",
|
52 |
+
"dataset": "IndabaxSenegal/asr-wolof-dataset",
|
53 |
+
"performance": {
|
54 |
+
"WER": 0.511557,
|
55 |
+
"CER": null
|
56 |
+
}
|
57 |
+
}
|
58 |
+
|
59 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|
3 |
+
gradio
|
4 |
+
librosa
|