from ultralytics import YOLO import cv2 import gradio as gr from PIL import Image from huggingface_hub import snapshot_download import os import numpy as np # Model path and loading function model_path = "/Users/LeeTM/Downloads/best_int8_openvino_model" def load_model(repo_id): print("Loading model, please wait...") download_dir = snapshot_download(repo_id) path = os.path.join(download_dir, "best_int8_openvino_model") detection_model = YOLO(path, task='detect') return detection_model def process_image(pilimg): # Default values for confidence and IOU conf = 0.7 # Confidence threshold iou = 0.7 # IOU threshold # Apply the YOLOv8 model for object detection result = detection_model.predict(pilimg, conf=conf, iou=iou) # Plot the results (bounding boxes) on the image img_bgr = result[0].plot() # This will draw the bounding boxes out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # Convert to RGB-order PIL image # Collect labels and confidences for the detected objects labels = [detection_model.names[int(det.cls)] for det in result[0].boxes] confidences = [f"{float(det.conf):.2f}" for det in result[0].boxes] detection_details = "\n".join([f"{label}: {conf}" for label, conf in zip(labels, confidences)]) return out_pilimg, detection_details def process_video(video_path): # Default values for confidence and IOU conf = 0.7 # Confidence threshold iou = 0.7 # IOU threshold cap = cv2.VideoCapture(video_path) # Get video details frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fps = int(cap.get(5)) # Output video settings out_path = "output_video.mp4" out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Convert the frame to a PIL image (YOLO uses RGB format) pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Apply the YOLOv8 model for object detection result = detection_model.predict(pil_img, conf=conf, iou=iou) # Plot the results (bounding boxes) on the frame img_bgr = result[0].plot() # This will draw the bounding boxes frame_bgr = cv2.cvtColor(np.array(img_bgr), cv2.COLOR_RGB2BGR) # Write the processed frame to the output video out.write(frame_bgr) cap.release() out.release() return None, out_path # Return None for image, and video path for the video output # Model Repository REPO_ID = "crimson78/pokemon_ash_pikachu_yolov8" detection_model = load_model(REPO_ID) # Gradio Interface def detect_objects(input_file): # Check if the input is a video or an image if input_file.endswith(('.mp4', '.mov', '.avi', '.mkv')): # Process as video and return None for image return process_video(input_file) else: # Process as image and return video as None pil_img = Image.open(input_file) return process_image(pil_img) # Gradio Interface gui = gr.Interface( fn=detect_objects, inputs=[ gr.File(label="Upload an Image or Video", type="filepath"), # File input (both image and video supported) ], outputs=[ gr.Image(type="pil", label="Detection Results"), # Image output for images gr.Video(label="Detection Results") # Video output for videos ], title="YOLO Object Detection", description="Upload an image or video, and the model will automatically detect objects using the YOLO model. The confidence and IOU thresholds are fixed and cannot be adjusted.", flagging_mode='never' # Use flagging_mode as never ) # Launch the Gradio app with a public link gui.launch()