from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
import os
import cv2
import tempfile


def load_model(repo_id):
    download_dir = snapshot_download(repo_id)
    print(download_dir)
    path  = os.path.join(download_dir, "best.pt")
    print(path)
    detection_model = YOLO(path, task='detect')
    return detection_model


def predict(pilimg):
    
    if pilimg is None:
        return None
    source = pilimg
    # x = np.asarray(pilimg)
    # print(x.shape)
    result = detection_model.predict(source, conf=0.5, iou=0.6)
    img_bgr = result[0].plot()
    out_pilimg = Image.fromarray(img_bgr[..., ::-1])  # RGB-order PIL image
    
    return out_pilimg



def predict_video(video):
    if video is None:
        return None  # Return None if no video was uploaded
    
    # Read video file using OpenCV (video is now a string, so we can directly pass it as a path)
    cap = cv2.VideoCapture(video)
    
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    temp_output_path = tempfile.mktemp(suffix=".mp4")
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    out = cv2.VideoWriter(temp_output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break 
        
        result = detection_model.predict(frame, conf=0.5, iou=0.6)
        img_bgr = result[0].plot()
        out_frame = img_bgr[..., ::-1]  # Convert BGR to RGB
        out.write(out_frame)
    
    cap.release()
    out.release()
    
    # Return the path to the processed video
    return temp_output_path  # Return the path to the processed video

def enable_button(image_input, video_input):
    if image_input is None and video_input is None:
        return gr.Button.update(interactive=False) 
    return gr.Button.update(interactive=True)



REPO_ID = "dexpyw/model"
detection_model = load_model(REPO_ID)



image_interface = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Image(type="pil", label="Predicted Image"),
    live=False
)

 
video_interface = gr.Interface(
    fn=predict_video,
    inputs=gr.Video(label="Upload Video"),
    outputs=gr.Video(label="Predicted Video"),
    live=False
)

 
gr.TabbedInterface([image_interface, video_interface], ["Image", "Video"]).launch(share=True)

# image_interface.launch(share=True)
# video_interface.launch(share=True)