File size: 3,082 Bytes
869cb7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5dfa2ed
869cb7b
5dfa2ed
869cb7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5dfa2ed
 
 
 
 
869cb7b
5dfa2ed
869cb7b
 
843df20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
from PIL import Image
import torch
import torchvision.transforms as transforms
from model import RetinaNet  # Import your RetinaNet model definition
import cv2
import numpy as np

# Define the image transformation pipeline
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RetinaNet(num_classes=2).to(device)
model.load_state_dict(torch.load("retinanet_best_model.pth", map_location=device))
model.eval()

# Prediction function
def predict_image(image, is_frame):

    if is_frame == "No":
        # Preprocess the image
        img = Image.fromarray(image).convert('RGB')  # Convert Gradio input to PIL Image
        input_tensor = image_transform(img).unsqueeze(0).to(device)

        # Perform inference
        with torch.no_grad():
            prediction = model(input_tensor.float())
            sum_value = abs(torch.sum(prediction[0]))
            p_true = abs(prediction[0][0])
            p_false = abs(prediction[0][1])

        # Interpret the prediction
        if p_true > 0.7:
            result = "Accepted"
            confidence = float(p_true)
        else:
            result = "Rejected"
            confidence = float(p_false)
    else:
        frame = image
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        resized_frame = cv2.resize(rgb_frame, (224, 224))
        normalized_frame = resized_frame / 255.0
        input_frame = np.expand_dims(normalized_frame, axis=0)

        # Convert to PyTorch tensor and move to device
        input_frame = torch.from_numpy(input_frame).to(device).float()

        # Permute dimensions to [batch_size, channels, height, width]
        input_frame = input_frame.permute(0, 3, 1, 2)

        # Predict using the best model
        with torch.no_grad():
            prediction = model(input_frame)
            sum_value=torch.sum(abs(prediction[0]))
            p_true=abs(prediction[0][0])
            p_false=abs(prediction[0][1])

        if p_true < 0.4:#if p_true > p_false:
            result = "Accepted"
            confidence = float(p_true)
        else:
            result = "Rejected"
            confidence = float(p_false)

    return f"Result: {result}, Confidence: {confidence:.2f}"

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# RetinaNet Model Prediction")
    with gr.Row():
        image_input = gr.Image(label="Upload Image", type="numpy")
        output_text = gr.Textbox(label="Prediction Result")
    is_frame_radio = gr.Radio(
        choices=["Yes", "No"],  # Options for the radio button
        label="Is this a frame from a video?",  # Label for the radio button
        value="Not a Frame"  # Default selected option
    )
    predict_button = gr.Button("Predict")
    predict_button.click(predict_image, inputs=[image_input, is_frame_radio], outputs=output_text)

# Launch the app
demo.launch()