Spaces:

Yiheyihe
/

unsup-affordance-vision

Sleeping

File size: 1,608 Bytes

"""
Interface for HuggingFace deployment
"""

import gradio as gr
import numpy as np
from src.model import AffordanceModel
from src.utils.argument_utils import get_yaml_config
import cv2

print("Loading config...")
config = get_yaml_config("checkpoints/gemini/config.yaml")
print("Building model...")
model = AffordanceModel(config)
print("Model built successfully!")

def predict(image, text):
    """
    Gradio inference function
    Args:
        image: PIL Image (Gradio's default image input format)
        text: str
    Returns:
        visualization of the heatmap
    """
    # Convert PIL image to numpy array
    image = np.array(image)
    
    # Run model inference
    heatmap = model.inference(image, text)  # Returns (H, W) array
    
    # Visualize heatmap (convert to RGB for display)
    # Scale to 0-255 and apply colormap
    heatmap_vis = (heatmap * 255).astype(np.uint8)
    heatmap_colored = cv2.applyColorMap(heatmap_vis, cv2.COLORMAP_JET)
    heatmap_colored = cv2.cvtColor(heatmap_colored, cv2.COLOR_BGR2RGB)
    
    return heatmap_colored

# Create Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Image(type="pil", label="Input Image"),  # Accepts uploaded images
        gr.Textbox(label="Text Query", placeholder="Enter text description...")
    ],
    outputs=gr.Image(label="Affordance Heatmap"),
    title="Affordance Detection",
    description="Upload an image and provide a text query to detect affordances.",
    examples=[
        ["test.png", "rim"]  # Add your test image and query
    ]
)

if __name__ == "__main__":
    demo.launch()