from ultralytics import YOLO from PIL import Image import gradio as gr from huggingface_hub import snapshot_download import os import tempfile import cv2 model_path = "best.onnx" def load_model(repo_id): download_dir = snapshot_download(repo_id) print(download_dir) path = os.path.join(download_dir, "best.onnx") print(path) detection_model = YOLO(path, task='detect') return detection_model def process_image(pilimg): source = pilimg result = detection_model.predict(source, conf=0.5) img_bgr = result[0].plot() out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # RGB-order PIL image return out_pilimg def process_video(video): cap = cv2.VideoCapture(video) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = cap.get(cv2.CAP_PROP_FPS) temp_dir = tempfile.mkdtemp() fourcc = cv2.VideoWriter_fourcc(*'MP4V') output_path = os.path.join(temp_dir, "output.mp4") output = cv2.VideoWriter(output_path, fourcc, fps, (int(width), int(height))) # Loop through the video frames while cap.isOpened(): # Read a frame from the video success, frame = cap.read() if success: # Run YOLO inference on the frame on GPU Device 0 results = detection_model.predict(frame, conf=0.5) # Visualize the results on the frame annotated_frame = results[0].plot() # Write the annotated frame output.write(annotated_frame) output.release() output.release() cv2.destroyAllWindows() cv2.waitKey(1) return output_path REPO_ID = "1657866Y/grocery" detection_model = load_model(REPO_ID) # Create the interface for image upload image_interface = gr.Interface(fn=process_image, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil")) # Create the interface for video upload video_interface = gr.Interface(fn=process_video, inputs=gr.Video(label="Upload a Video"), outputs="video") # Use gr.Blocks to arrange components and launch the app with gr.Blocks() as app: # Add a header using Markdown gr.Markdown("# Grocery? No wait!") gr.Markdown("Choose whether to upload an image or a video below!") # Add the tabbed interface gr.TabbedInterface([image_interface, video_interface], tab_names=["Image Upload", "Video Upload"]) # Launch the interface app.launch()