Spaces:

ITI107-2024S2
/

7076670Q

Sleeping

App Files Files Community

7076670Q / app.py

crimson78

Upload app.py

cf74658 verified 5 months ago

raw

history blame contribute delete

3.81 kB

	from ultralytics import YOLO
	import cv2
	import gradio as gr
	from PIL import Image
	from huggingface_hub import snapshot_download
	import os
	import numpy as np

	# Model path and loading function
	model_path = "/Users/LeeTM/Downloads/best_int8_openvino_model"

	def load_model(repo_id):
	print("Loading model, please wait...")
	download_dir = snapshot_download(repo_id)
	path = os.path.join(download_dir, "best_int8_openvino_model")
	detection_model = YOLO(path, task='detect')
	return detection_model

	def process_image(pilimg):
	# Default values for confidence and IOU
	conf = 0.7 # Confidence threshold
	iou = 0.7 # IOU threshold

	# Apply the YOLOv8 model for object detection
	result = detection_model.predict(pilimg, conf=conf, iou=iou)

	# Plot the results (bounding boxes) on the image
	img_bgr = result[0].plot() # This will draw the bounding boxes
	out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # Convert to RGB-order PIL image

	# Collect labels and confidences for the detected objects
	labels = [detection_model.names[int(det.cls)] for det in result[0].boxes]
	confidences = [f"{float(det.conf):.2f}" for det in result[0].boxes]

	detection_details = "\n".join([f"{label}: {conf}" for label, conf in zip(labels, confidences)])

	return out_pilimg, detection_details

	def process_video(video_path):
	# Default values for confidence and IOU
	conf = 0.7 # Confidence threshold
	iou = 0.7 # IOU threshold

	cap = cv2.VideoCapture(video_path)

	# Get video details
	frame_width = int(cap.get(3))
	frame_height = int(cap.get(4))
	fps = int(cap.get(5))

	# Output video settings
	out_path = "output_video.mp4"
	out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Convert the frame to a PIL image (YOLO uses RGB format)
	pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	# Apply the YOLOv8 model for object detection
	result = detection_model.predict(pil_img, conf=conf, iou=iou)

	# Plot the results (bounding boxes) on the frame
	img_bgr = result[0].plot() # This will draw the bounding boxes
	frame_bgr = cv2.cvtColor(np.array(img_bgr), cv2.COLOR_RGB2BGR)

	# Write the processed frame to the output video
	out.write(frame_bgr)

	cap.release()
	out.release()

	return None, out_path # Return None for image, and video path for the video output

	# Model Repository
	REPO_ID = "crimson78/pokemon_ash_pikachu_yolov8"
	detection_model = load_model(REPO_ID)

	# Gradio Interface
	def detect_objects(input_file):
	# Check if the input is a video or an image
	if input_file.endswith(('.mp4', '.mov', '.avi', '.mkv')):
	# Process as video and return None for image
	return process_video(input_file)
	else:
	# Process as image and return video as None
	pil_img = Image.open(input_file)
	return process_image(pil_img)

	# Gradio Interface
	gui = gr.Interface(
	fn=detect_objects,
	inputs=[
	gr.File(label="Upload an Image or Video", type="filepath"), # File input (both image and video supported)
	],
	outputs=[
	gr.Image(type="pil", label="Detection Results"), # Image output for images
	gr.Video(label="Detection Results") # Video output for videos
	],
	title="YOLO Object Detection",
	description="Upload an image or video, and the model will automatically detect objects using the YOLO model. The confidence and IOU thresholds are fixed and cannot be adjusted.",
	flagging_mode='never' # Use flagging_mode as never
	)

	# Launch the Gradio app with a public link
	gui.launch()