from ultralytics import YOLO from PIL import Image import gradio as gr from huggingface_hub import snapshot_download import os import cv2 import numpy as np model_path = "best_int8_openvino_model" def load_model(repo_id): download_dir = snapshot_download(repo_id) print(download_dir) path = os.path.join(download_dir, "best_int8_openvino_model") print(path) detection_model = YOLO(path, task='detect') return detection_model # Image prediction function def predict_image(input_image): source = input_image result = detection_model.predict(source, conf=0.5, iou=0.6) img_bgr = result[0].plot() img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # Convert BGR to RGB output_image = Image.fromarray(img_rgb) # Use the RGB image for output return output_image # Video prediction function def predict_video(input_video): # Get the original filename (without path) original_filename = os.path.basename(input_video.name) # Get the name without the extension and append '_detected' base_filename, _ = os.path.splitext(original_filename) output_video = base_filename + "_detected.mp4" # Read video file video_capture = cv2.VideoCapture(input_video.name) frames = [] while True: ret, frame = video_capture.read() if not ret: break # Process each frame result = detection_model.predict(frame, conf=0.5, iou=0.6) img_bgr = result[0].plot() # Get the frame with detected objects frames.append(img_bgr) # Add the RGB frame to the list # Release video capture video_capture.release() # Convert frames to video fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for mp4 out = cv2.VideoWriter(output_video, fourcc, 30, (frames[0].shape[1], frames[0].shape[0])) for frame in frames: out.write(frame) # Write each frame to video out.release() return output_video # Return the path to the output video REPO_ID = "AI-Pagoda/4673483T" detection_model = load_model(REPO_ID) # Create Gradio interface with tabs with gr.Blocks() as app: with gr.Tabs(): with gr.Tab("Image Detection"): gr.Interface(fn=predict_image, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Image(type="pil", label="Download Image"), title="Image Object Detection", description="Upload an image to detect Snake and Lizard.") with gr.Tab("Video Detection"): gr.Interface(fn=predict_video, inputs=gr.File(label="Upload Video"), outputs=gr.File(label="Download Video"), title="Video Object Detection", description="Upload an image to detect Snake and Lizard.") app.launch()