Spaces:

mostlycached
/

aspect-ratio-adjuster

Sleeping

App Files Files Community

mostlycached commited on 7 days ago

Commit

b344378

verified ·

1 Parent(s): 28889af

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -80

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import gradio as gr
 import torch
 import numpy as np
 import cv2
-from PIL import Image
 from transformers import SamModel, SamProcessor
 from diffusers import StableDiffusionInpaintPipeline
-import requests
-from io import BytesIO
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -24,27 +23,35 @@ inpaint_model = StableDiffusionInpaintPipeline.from_pretrained(
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
-def get_sam_mask(image, points=None):
-    """Get segmentation mask using SAM model"""
-    if points is None:
-        # If no points provided, use center point
-        height, width = image.shape[:2]
-        points = [[[width // 2, height // 2]]]
-    # Convert to PIL if needed
-    if not isinstance(image, Image.Image):
-        image_pil = Image.fromarray(image)
     else:
-        image_pil = image
-    # Process the image and point prompts
     inputs = sam_processor(
-        images=image_pil,
         input_points=points,
         return_tensors="pt"
     ).to(device)
-    # Generate mask
     with torch.no_grad():
         outputs = sam_model(**inputs)
         masks = sam_processor.image_processor.post_process_masks(
@@ -53,86 +60,123 @@ def get_sam_mask(image, points=None):
             inputs["reshaped_input_sizes"].cpu()
         )
-    # Get the mask
-    mask = masks[0][0].numpy()
-    return mask
-def adjust_aspect_ratio(image, mask, target_ratio, prompt=""):
     """Adjust image to target aspect ratio while preserving important content"""
     # Convert PIL to numpy if needed
     if isinstance(image, Image.Image):
         image_np = np.array(image)
     else:
         image_np = image
     h, w = image_np.shape[:2]
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
-    # Determine if we need to add width or height
     if current_ratio < target_ratio_value:
         # Need to add width (outpaint left/right)
         new_width = int(h * target_ratio_value)
         new_height = h
-        # Calculate padding
-        pad_width = new_width - w
-        pad_left = pad_width // 2
-        pad_right = pad_width - pad_left
-        # Create canvas with padding
-        result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
-        # Place original image in the center
-        result[:, pad_left:pad_left+w, :] = image_np
-        # Create mask for inpainting
-        inpaint_mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
-        inpaint_mask[:, pad_left:pad_left+w] = 0
-        # Perform outpainting using Stable Diffusion
-        result = outpaint_regions(result, inpaint_mask, prompt)
     else:
         # Need to add height (outpaint top/bottom)
         new_width = w
         new_height = int(w / target_ratio_value)
-        # Calculate padding
-        pad_height = new_height - h
-        pad_top = pad_height // 2
-        pad_bottom = pad_height - pad_top
-        # Create canvas with padding
-        result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
-        # Place original image in the center
-        result[pad_top:pad_top+h, :, :] = image_np
-        # Create mask for inpainting
-        inpaint_mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
-        inpaint_mask[pad_top:pad_top+h, :] = 0
-        # Perform outpainting using Stable Diffusion
-        result = outpaint_regions(result, inpaint_mask, prompt)
-    return result
-def outpaint_regions(image, mask, prompt):
-    """Use Stable Diffusion to outpaint masked regions"""
-    # Convert to PIL images
-    image_pil = Image.fromarray(image)
     mask_pil = Image.fromarray(mask)
-    # If prompt is empty, use a generic one
     if not prompt or prompt.strip() == "":
-        prompt = "seamless extension of the image, same style, same scene"
-    # Generate the outpainting
     output = inpaint_model(
         prompt=prompt,
-        image=image_pil,
         mask_image=mask_pil,
         guidance_scale=7.5,
-        num_inference_steps=25
     ).images[0]
     return np.array(output)
@@ -140,7 +184,7 @@ def outpaint_regions(image, mask, prompt):
 def process_image(input_image, target_ratio="16:9", prompt=""):
     """Main processing function for the Gradio interface"""
     try:
-        # Convert from Gradio format
         if isinstance(input_image, dict) and 'image' in input_image:
             image = input_image['image']
         else:
@@ -152,11 +196,8 @@ def process_image(input_image, target_ratio="16:9", prompt=""):
         else:
             image_np = image
-        # Get SAM mask to identify important regions
-        mask = get_sam_mask(image_np)
         # Adjust aspect ratio while preserving content
-        result = adjust_aspect_ratio(image_np, mask, target_ratio, prompt)
         # Convert result to PIL for visualization
         result_pil = Image.fromarray(result)
@@ -168,9 +209,9 @@ def process_image(input_image, target_ratio="16:9", prompt=""):
         return None
 # Create the Gradio interface
-with gr.Blocks(title="Automatic Aspect Ratio Adjuster") as demo:
-    gr.Markdown("# Automatic Aspect Ratio Adjuster")
-    gr.Markdown("Upload an image, choose your target aspect ratio, and let the AI adjust it while preserving important content.")
     with gr.Row():
         with gr.Column():
@@ -178,7 +219,7 @@ with gr.Blocks(title="Automatic Aspect Ratio Adjuster") as demo:
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
-                    choices=["16:9", "4:3", "1:1", "9:16", "3:4"],
                     value="16:9",
                     label="Target Aspect Ratio"
                 )
@@ -201,9 +242,9 @@ with gr.Blocks(title="Automatic Aspect Ratio Adjuster") as demo:
     gr.Markdown("""
     ## How it works
-    1. SAM (Segment Anything Model) identifies important content in your image
-    2. The algorithm calculates how to adjust the aspect ratio while preserving this content
-    3. Stable Diffusion fills in the new areas with AI-generated content that matches the original image
     ## Tips
     - For best results, provide a descriptive prompt that matches the scene

 import torch
 import numpy as np
 import cv2
+from PIL import Image, ImageOps
 from transformers import SamModel, SamProcessor
 from diffusers import StableDiffusionInpaintPipeline
+import os
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
+def get_importance_map(image, points=None):
+    """Get importance map using SAM model to identify key content regions"""
+    # Convert to numpy if needed
+    if isinstance(image, Image.Image):
+        image_np = np.array(image)
     else:
+        image_np = image
+    h, w = image_np.shape[:2]
+    # If no points provided, use grid sampling to identify important areas
+    if points is None:
+        # Create a grid of points to sample the image
+        x_points = np.linspace(w//4, 3*w//4, 5, dtype=int)
+        y_points = np.linspace(h//4, 3*h//4, 5, dtype=int)
+        grid_points = []
+        for y in y_points:
+            for x in x_points:
+                grid_points.append([x, y])
+        points = [grid_points]
+    # Process image through SAM
     inputs = sam_processor(
+        images=image_np,
         input_points=points,
         return_tensors="pt"
     ).to(device)
+    # Generate masks
     with torch.no_grad():
         outputs = sam_model(**inputs)
         masks = sam_processor.image_processor.post_process_masks(
             inputs["reshaped_input_sizes"].cpu()
         )
+    # Combine all masks to create importance map
+    importance_map = np.zeros((h, w), dtype=np.float32)
+    for i in range(len(masks[0])):
+        importance_map += masks[0][i].numpy().astype(np.float32)
+    # Normalize to 0-1
+    if importance_map.max() > 0:
+        importance_map = importance_map / importance_map.max()
+    return importance_map
+def find_optimal_placement(importance_map, original_size, new_size):
+    """Find the optimal placement for the original image within the new canvas based on importance"""
+    oh, ow = original_size
+    nh, nw = new_size
+    # If the new size is smaller in any dimension, then just center it
+    if nh <= oh or nw <= ow:
+        x_offset = max(0, (nw - ow) // 2)
+        y_offset = max(0, (nh - oh) // 2)
+        return x_offset, y_offset
+    # Calculate all possible positions
+    possible_x = nw - ow + 1
+    possible_y = nh - oh + 1
+    best_score = -np.inf
+    best_x = 0
+    best_y = 0
+    # Create a border-weighted importance map (gives extra weight to content near borders)
+    y_coords, x_coords = np.ogrid[:oh, :ow]
+    border_weight = np.minimum(np.minimum(x_coords, ow-1-x_coords), np.minimum(y_coords, oh-1-y_coords))
+    border_weight = 1.0 - border_weight / border_weight.max()
+    weighted_importance = importance_map * (1.0 + 0.5 * border_weight)
+    # Optimize for 9 positions (corners, center of edges, and center)
+    positions = [
+        (0, 0),  # Top-left
+        (0, (possible_y-1)//2),  # Middle-left
+        (0, possible_y-1),  # Bottom-left
+        ((possible_x-1)//2, 0),  # Top-center
+        ((possible_x-1)//2, (possible_y-1)//2),  # Center
+        ((possible_x-1)//2, possible_y-1),  # Bottom-center
+        (possible_x-1, 0),  # Top-right
+        (possible_x-1, (possible_y-1)//2),  # Middle-right
+        (possible_x-1, possible_y-1)  # Bottom-right
+    ]
+    # Find position with highest importance score
+    for x, y in positions:
+        # Calculate importance score for this position
+        score = weighted_importance.sum()
+        if score > best_score:
+            best_score = score
+            best_x = x
+            best_y = y
+    return best_x, best_y
+def adjust_aspect_ratio(image, target_ratio, prompt=""):
     """Adjust image to target aspect ratio while preserving important content"""
     # Convert PIL to numpy if needed
     if isinstance(image, Image.Image):
+        image_pil = image
         image_np = np.array(image)
     else:
         image_np = image
+        image_pil = Image.fromarray(image_np)
+    # Get dimensions
     h, w = image_np.shape[:2]
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
+    # Generate importance map to identify key regions
+    importance_map = get_importance_map(image_np)
+    # Calculate new dimensions
     if current_ratio < target_ratio_value:
         # Need to add width (outpaint left/right)
         new_width = int(h * target_ratio_value)
         new_height = h
     else:
         # Need to add height (outpaint top/bottom)
         new_width = w
         new_height = int(w / target_ratio_value)
+    # Find optimal placement based on importance map
+    x_offset, y_offset = find_optimal_placement(importance_map, (h, w), (new_height, new_width))
+    # Create new canvas
+    result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
+    mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
+    # Place original image at calculated position
+    result[y_offset:y_offset+h, x_offset:x_offset+w] = image_np
+    mask[y_offset:y_offset+h, x_offset:x_offset+w] = 0
+    # Convert to PIL for inpainting
+    result_pil = Image.fromarray(result)
     mask_pil = Image.fromarray(mask)
+    # Use default prompt if none provided
     if not prompt or prompt.strip() == "":
+        if len(image_np.shape) == 3 and image_np.shape[2] == 4:  # Check if image has alpha channel
+            prompt = "seamless extension of the image, same style and content"
+        else:
+            prompt = "seamless extension of the image, same style, same scene, consistent lighting"
+    # Perform outpainting using Stable Diffusion
     output = inpaint_model(
         prompt=prompt,
+        image=result_pil,
         mask_image=mask_pil,
         guidance_scale=7.5,
+        num_inference_steps=30
     ).images[0]
     return np.array(output)
 def process_image(input_image, target_ratio="16:9", prompt=""):
     """Main processing function for the Gradio interface"""
     try:
+        # Convert from Gradio format if needed
         if isinstance(input_image, dict) and 'image' in input_image:
             image = input_image['image']
         else:
         else:
             image_np = image
         # Adjust aspect ratio while preserving content
+        result = adjust_aspect_ratio(image_np, target_ratio, prompt)
         # Convert result to PIL for visualization
         result_pil = Image.fromarray(result)
         return None
 # Create the Gradio interface
+with gr.Blocks(title="Smart Aspect Ratio Adjuster") as demo:
+    gr.Markdown("# Smart Aspect Ratio Adjuster")
+    gr.Markdown("Upload an image, choose your target aspect ratio, and the AI will adjust it while intelligently preserving important content.")
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
+                    choices=["16:9", "4:3", "1:1", "9:16", "3:4", "2:1", "1:2"],
                     value="16:9",
                     label="Target Aspect Ratio"
                 )
     gr.Markdown("""
     ## How it works
+    1. **Content Analysis**: SAM (Segment Anything Model) identifies important regions in your image
+    2. **Smart Placement**: The algorithm calculates optimal positioning to preserve key content
+    3. **AI Outpainting**: Stable Diffusion fills in new areas with matching content
     ## Tips
     - For best results, provide a descriptive prompt that matches the scene