Spaces:

DangoMachoo
/

Bot-and-life_speech-to-Text

Sleeping

App Files Files Community

Bot-and-life_speech-to-Text / app.py

DangoMachoo

latest ver

f89e973 8 days ago

raw

history blame contribute delete

4.9 kB

	import gradio as gr
	import torch
	from transformers import pipeline
	import os
	import tempfile
	import shutil
	from docx import Document
	import time

	# ✅ ตรวจสอบ ffmpeg
	if not shutil.which("ffmpeg"):
	raise EnvironmentError("ffmpeg not found. Please install ffmpeg and ensure it's in PATH.")

	# ✅ ลบ path ffmpeg เฉพาะ local เพราะ Spaces มี ffmpeg ติดตั้งแล้ว
	# os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg-master-latest-win64-gpl\bin"

	# ✅ โหลดโมเดล small
	MODEL_NAME = "biodatlab/whisper-th-small-combined"
	device = 0 if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)

	# ✅ ฟังก์ชันแปลงเสียงเป็นข้อความ (return text และ processing time)
	def transcribe_audio(audio):
	start_time = time.time() # บันทึกเวลาเริ่มต้น
	if not audio:
	return "กรุณาอัปโหลดไฟล์เสียงก่อน", "ไม่ได้ประมวลผล"
	try:
	result = pipe(audio, generate_kwargs={"language": "<\|th\|>", "task": "transcribe"}, batch_size=14)
	text = result["text"]
	end_time = time.time() # บันทึกเวลาสิ้นสุด
	processing_time = end_time - start_time
	return text, f"ใช้เวลา: {processing_time:.2f} วินาที"
	except Exception as e:
	return f"เกิดข้อผิดพลาด: {str(e)}", "เกิดข้อผิดพลาด"

	# ✅ ฟังก์ชันสร้างไฟล์สำหรับดาวน์โหลด (.txt หรือ .docx)
	def create_download_file(text, file_format):
	if not text or text.startswith("กรุณา") or text.startswith("เกิดข้อผิดพลาด"):
	return None
	try:
	if file_format == "Text (.txt)":
	with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w", encoding="utf-8") as f:
	f.write(text)
	return f.name
	else: # Word (.docx)
	doc = Document()
	doc.add_paragraph(text)
	with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as f:
	doc.save(f.name)
	return f.name
	except Exception as e:
	return None

	# ✅ CSS สำหรับจัด Markdown ตรงกลางและทำให้ Textbox มีแถบเลื่อน
	custom_css = """
	.markdown {
	text-align: center !important;
	}
	#transcribed-text textarea {
	height: 250px !important;
	overflow-y: auto !important;
	resize: vertical !important;
	}
	"""

	# ✅ UI Layout
	with gr.Blocks(css=custom_css) as demo:
	gr.Markdown("""
	<div style="text-align: center;">
	<h2> แปลงเสียงพูดภาษาไทยเป็นข้อความ </h2>
	</div>
	""")
	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(label="🎵 อัปโหลดไฟล์เสียง (MP3, WAV, M4A)", type="filepath")
	download_format = gr.Dropdown(
	choices=["Text (.txt)", "Word (.docx)"],
	label="📄 เลือกฟอร์แมตไฟล์",
	value="Text (.txt)"
	)
	transcribe_btn = gr.Button("🔄 แปลงเสียงเป็นข้อความ")
	with gr.Column(scale=2):
	transcribed_text = gr.Textbox(label="📜 ข้อความที่แปลงแล้ว", elem_id="transcribed-text")
	processing_time_display = gr.Textbox(label="⏱️ เวลาที่ใช้", interactive=False)
	with gr.Row():
	copy_button = gr.Button("📋 คัดลอกข้อความ")
	download_button = gr.DownloadButton(label="⬇️ ดาวน์โหลดไฟล์")

	# Action
	transcribe_btn.click(
	fn=transcribe_audio,
	inputs=audio_input,
	outputs=[transcribed_text, processing_time_display],
	show_progress=True
	)

	# Action คัดลอก (ใช้ JavaScript)
	copy_button.click(
	fn=None,
	inputs=transcribed_text,
	outputs=None,
	js="function(text) {navigator.clipboard.writeText(text); gr.Info('คัดลอกข้อความแล้ว!'); return []}"
	)

	# Action ดาวน์โหลด
	download_button.click(
	fn=create_download_file,
	inputs=[transcribed_text, download_format],
	outputs=download_button
	)

	# รันใน Hugging Face Spaces
	demo.launch()