WeldPrediction / app.py
DennyW's picture
Update app.py
5dfa2ed verified
import gradio as gr
from PIL import Image
import torch
import torchvision.transforms as transforms
from model import RetinaNet # Import your RetinaNet model definition
import cv2
import numpy as np
# Define the image transformation pipeline
image_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RetinaNet(num_classes=2).to(device)
model.load_state_dict(torch.load("retinanet_best_model.pth", map_location=device))
model.eval()
# Prediction function
def predict_image(image, is_frame):
if is_frame == "No":
# Preprocess the image
img = Image.fromarray(image).convert('RGB') # Convert Gradio input to PIL Image
input_tensor = image_transform(img).unsqueeze(0).to(device)
# Perform inference
with torch.no_grad():
prediction = model(input_tensor.float())
sum_value = abs(torch.sum(prediction[0]))
p_true = abs(prediction[0][0])
p_false = abs(prediction[0][1])
# Interpret the prediction
if p_true > 0.7:
result = "Accepted"
confidence = float(p_true)
else:
result = "Rejected"
confidence = float(p_false)
else:
frame = image
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
resized_frame = cv2.resize(rgb_frame, (224, 224))
normalized_frame = resized_frame / 255.0
input_frame = np.expand_dims(normalized_frame, axis=0)
# Convert to PyTorch tensor and move to device
input_frame = torch.from_numpy(input_frame).to(device).float()
# Permute dimensions to [batch_size, channels, height, width]
input_frame = input_frame.permute(0, 3, 1, 2)
# Predict using the best model
with torch.no_grad():
prediction = model(input_frame)
sum_value=torch.sum(abs(prediction[0]))
p_true=abs(prediction[0][0])
p_false=abs(prediction[0][1])
if p_true < 0.4:#if p_true > p_false:
result = "Accepted"
confidence = float(p_true)
else:
result = "Rejected"
confidence = float(p_false)
return f"Result: {result}, Confidence: {confidence:.2f}"
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# RetinaNet Model Prediction")
with gr.Row():
image_input = gr.Image(label="Upload Image", type="numpy")
output_text = gr.Textbox(label="Prediction Result")
is_frame_radio = gr.Radio(
choices=["Yes", "No"], # Options for the radio button
label="Is this a frame from a video?", # Label for the radio button
value="Not a Frame" # Default selected option
)
predict_button = gr.Button("Predict")
predict_button.click(predict_image, inputs=[image_input, is_frame_radio], outputs=output_text)
# Launch the app
demo.launch()