Spaces:

ales
/

wav2vec2-cv-be-lm

Sleeping

App Files Files Community

wav2vec2-cv-be-lm / app.py

ales

upd to latest gradio veresion; format

adca0d8 3 months ago

raw

history blame contribute delete

3.11 kB

	from pprint import pformat

	import gradio as gr
	import librosa
	from huggingface_hub import hf_hub_download

	from pipeline import PreTrainedPipeline

	HF_HUB_URL = "ales/wav2vec2-cv-be"
	LM_HUB_FP = "language_model/cv8be_5gram.bin"
	MODEL_SAMPLING_RATE = 16_000 # 16kHz

	# download Language Model from HF Hub
	lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)

	# init pipeline
	pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)


	def main(recorded_audio_fp: str \| None, uploaded_audio_fp: str \| None):
	audio_fp = None
	if recorded_audio_fp is not None:
	audio_fp = recorded_audio_fp
	used_audiofile = "recorded"
	elif uploaded_audio_fp is not None:
	audio_fp = uploaded_audio_fp
	used_audiofile = "uploaded"
	else:
	return (
	"Памылка! Вы мусіце альбо запісаць, альбо запампаваць аўдыяфайл.",
	"Error! You have to either record or upload an audiofile.",
	)

	# read audio file
	inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]

	# recognize speech
	pipeline_res = pipeline(inputs=inputs)
	text = pipeline_res["text"][0] # unpack batch of size 1

	# add technical information to the output
	tech_data = pipeline_res
	del tech_data["text"]
	tech_data["used_audiofile"] = used_audiofile
	tech_data["recorded_file_present"] = recorded_audio_fp is not None
	tech_data["uploaded_file_present"] = uploaded_audio_fp is not None
	tech_data["audiofile_path"] = audio_fp
	tech_data["model_sampling_rate"] = MODEL_SAMPLING_RATE
	tech_data["inputs_shape"] = inputs.shape
	tech_data["inputs_max"] = inputs.max().item()
	tech_data["inputs_min"] = inputs.min().item()

	tech_data_str = pformat(tech_data)

	return text, tech_data_str


	article = """
	The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)

	![Page Visits](https://visitor-badge.glitch.me/badge?page_id=huggingface.co/spaces/ales/wav2vec2-cv-be-lm&left_color=darkgray&right_color=crimson&left_text=Page%20Visits)
	"""

	iface = gr.Interface(
	fn=main,
	inputs=[
	gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Запішыце аўдыяфайл, каб распазнаць маўленьне",
	),
	gr.Audio(
	sources=["upload"],
	type="filepath",
	label="Альбо загрузіце ўжо запісаны аўдыяфайл сюды",
	),
	],
	outputs=[
	gr.Textbox(label="Распазнаны тэкст"),
	gr.Textbox(label="Тэхнічная інфармацыя"),
	],
	title="wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model",
	description=(
	"Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n"
	"Акустычная мадэль + моўная мадэль."
	),
	article=article,
	)

	iface.launch()