Spaces:
Sleeping
Sleeping
from ultralytics import YOLO | |
import cv2 | |
import gradio as gr | |
from PIL import Image | |
from huggingface_hub import snapshot_download | |
import os | |
import numpy as np | |
# Model path and loading function | |
model_path = "/Users/LeeTM/Downloads/best_int8_openvino_model" | |
def load_model(repo_id): | |
print("Loading model, please wait...") | |
download_dir = snapshot_download(repo_id) | |
path = os.path.join(download_dir, "best_int8_openvino_model") | |
detection_model = YOLO(path, task='detect') | |
return detection_model | |
def process_image(pilimg): | |
# Default values for confidence and IOU | |
conf = 0.7 # Confidence threshold | |
iou = 0.7 # IOU threshold | |
# Apply the YOLOv8 model for object detection | |
result = detection_model.predict(pilimg, conf=conf, iou=iou) | |
# Plot the results (bounding boxes) on the image | |
img_bgr = result[0].plot() # This will draw the bounding boxes | |
out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # Convert to RGB-order PIL image | |
# Collect labels and confidences for the detected objects | |
labels = [detection_model.names[int(det.cls)] for det in result[0].boxes] | |
confidences = [f"{float(det.conf):.2f}" for det in result[0].boxes] | |
detection_details = "\n".join([f"{label}: {conf}" for label, conf in zip(labels, confidences)]) | |
return out_pilimg, detection_details | |
def process_video(video_path): | |
# Default values for confidence and IOU | |
conf = 0.7 # Confidence threshold | |
iou = 0.7 # IOU threshold | |
cap = cv2.VideoCapture(video_path) | |
# Get video details | |
frame_width = int(cap.get(3)) | |
frame_height = int(cap.get(4)) | |
fps = int(cap.get(5)) | |
# Output video settings | |
out_path = "output_video.mp4" | |
out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Convert the frame to a PIL image (YOLO uses RGB format) | |
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) | |
# Apply the YOLOv8 model for object detection | |
result = detection_model.predict(pil_img, conf=conf, iou=iou) | |
# Plot the results (bounding boxes) on the frame | |
img_bgr = result[0].plot() # This will draw the bounding boxes | |
frame_bgr = cv2.cvtColor(np.array(img_bgr), cv2.COLOR_RGB2BGR) | |
# Write the processed frame to the output video | |
out.write(frame_bgr) | |
cap.release() | |
out.release() | |
return None, out_path # Return None for image, and video path for the video output | |
# Model Repository | |
REPO_ID = "crimson78/pokemon_ash_pikachu_yolov8" | |
detection_model = load_model(REPO_ID) | |
# Gradio Interface | |
def detect_objects(input_file): | |
# Check if the input is a video or an image | |
if input_file.endswith(('.mp4', '.mov', '.avi', '.mkv')): | |
# Process as video and return None for image | |
return process_video(input_file) | |
else: | |
# Process as image and return video as None | |
pil_img = Image.open(input_file) | |
return process_image(pil_img) | |
# Gradio Interface | |
gui = gr.Interface( | |
fn=detect_objects, | |
inputs=[ | |
gr.File(label="Upload an Image or Video", type="filepath"), # File input (both image and video supported) | |
], | |
outputs=[ | |
gr.Image(type="pil", label="Detection Results"), # Image output for images | |
gr.Video(label="Detection Results") # Video output for videos | |
], | |
title="YOLO Object Detection", | |
description="Upload an image or video, and the model will automatically detect objects using the YOLO model. The confidence and IOU thresholds are fixed and cannot be adjusted.", | |
flagging_mode='never' # Use flagging_mode as never | |
) | |
# Launch the Gradio app with a public link | |
gui.launch() | |