from ultralytics import YOLO from PIL import Image import gradio as gr from huggingface_hub import snapshot_download import os import cv2 import tempfile def load_model(repo_id): download_dir = snapshot_download(repo_id) print(download_dir) path = os.path.join(download_dir, "best.pt") print(path) detection_model = YOLO(path, task='detect') return detection_model def predict(pilimg): if pilimg is None: return None source = pilimg # x = np.asarray(pilimg) # print(x.shape) result = detection_model.predict(source, conf=0.5, iou=0.6) img_bgr = result[0].plot() out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # RGB-order PIL image return out_pilimg def predict_video(video): if video is None: return None # Return None if no video was uploaded # Read video file using OpenCV (video is now a string, so we can directly pass it as a path) cap = cv2.VideoCapture(video) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) temp_output_path = tempfile.mktemp(suffix=".mp4") fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(temp_output_path, fourcc, fps, (frame_width, frame_height)) while True: ret, frame = cap.read() if not ret: break result = detection_model.predict(frame, conf=0.5, iou=0.6) img_bgr = result[0].plot() out_frame = img_bgr[..., ::-1] # Convert BGR to RGB out.write(out_frame) cap.release() out.release() # Return the path to the processed video return temp_output_path # Return the path to the processed video def enable_button(image_input, video_input): if image_input is None and video_input is None: return gr.Button.update(interactive=False) return gr.Button.update(interactive=True) REPO_ID = "dexpyw/model" detection_model = load_model(REPO_ID) image_interface = gr.Interface( fn=predict, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Image(type="pil", label="Predicted Image"), live=False ) video_interface = gr.Interface( fn=predict_video, inputs=gr.Video(label="Upload Video"), outputs=gr.Video(label="Predicted Video"), live=False ) gr.TabbedInterface([image_interface, video_interface], ["Image", "Video"]).launch(share=True) # image_interface.launch(share=True) # video_interface.launch(share=True)