Spaces:

mostlycached
/

smart-cropper

Build error

App Files Files Community

mostlycached commited on Apr 28

Commit

7bab192

verified ·

1 Parent(s): 19af1ed

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -170

app.py CHANGED Viewed

@@ -1,197 +1,248 @@
 import gradio as gr
 import numpy as np
 import torch
-from PIL import Image
 import cv2
 from transformers import SamModel, SamProcessor
-# Set up device
-device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 # Load SAM model and processor
-print("Loading SAM model...")
-sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
-sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
-def smart_crop(image, target_ratio):
     """
-    Intelligently crop an image to a target aspect ratio using SAM to preserve important content
-    Args:
-        image: Input image as numpy array
-        target_ratio: Desired width/height ratio
-    Returns:
-        Cropped image
     """
-    # Convert image to RGB if needed
-    if len(image.shape) == 2:
-        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-    elif image.shape[2] == 4:
-        image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-    # Get current dimensions and ratio
-    height, width = image.shape[:2]
-    current_ratio = width / height
-    # Convert to PIL Image for processor
-    pil_image = Image.fromarray(image)
-    # Generate input points (grid across the image)
-    points_per_side = 32
-    x_points = np.linspace(0, width, points_per_side)
-    y_points = np.linspace(0, height, points_per_side)
-    points = []
-    labels = []
-    for x in x_points:
-        for y in y_points:
-            points.append([x, y])
-            labels.append(1)  # 1 means foreground
-    # Process inputs
-    inputs = sam_processor(pil_image, input_points=[points], input_labels=[labels], return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Generate masks
-    with torch.no_grad():
-        outputs = sam_model(**inputs)
-    # Get masks from model output
-    masks = outputs.pred_masks.squeeze().cpu().numpy()
-    # Combine all masks to get regions of interest
-    combined_mask = np.any(masks, axis=0)
-    # Find the bounding box of important content
-    y_indices, x_indices = np.where(combined_mask)
-    if len(x_indices) == 0 or len(y_indices) == 0:
-        # If no important content detected, use center crop
-        return center_crop(image, target_ratio)
-    x_min, x_max = x_indices.min(), x_indices.max()
-    y_min, y_max = y_indices.min(), y_indices.max()
-    # Calculate new dimensions to match target ratio while including important content
-    content_width = x_max - x_min
-    content_height = y_max - y_min
-    if target_ratio > current_ratio:
-        # Need wider image
-        new_width = min(width, int(height * target_ratio))
-        new_height = height
     else:
-        # Need taller image
-        new_width = width
-        new_height = min(height, int(width / target_ratio))
-    # Calculate crop coordinates ensuring important content is included
-    x_center = (x_min + x_max) // 2
-    y_center = (y_min + y_max) // 2
-    x1 = max(0, x_center - new_width // 2)
-    x2 = min(width, x1 + new_width)
-    y1 = max(0, y_center - new_height // 2)
-    y2 = min(height, y1 + new_height)
-    # Adjust if crop goes out of bounds
-    if x1 == 0:
-        x2 = new_width
-    if x2 == width:
-        x1 = width - new_width
-    if y1 == 0:
-        y2 = new_height
-    if y2 == height:
-        y1 = height - new_height
-    return image[y1:y2, x1:x2]
-def center_crop(image, target_ratio):
-    """Fallback center crop if no important content is detected"""
-    height, width = image.shape[:2]
-    current_ratio = width / height
-    if target_ratio > current_ratio:
-        new_width = width
-        new_height = int(width / target_ratio)
     else:
-        new_height = height
-        new_width = int(height * target_ratio)
-    x1 = (width - new_width) // 2
-    y1 = (height - new_height) // 2
-    return image[y1:y1+new_height, x1:x1+new_width]
-ASPECT_RATIOS = {
-    "Square (1:1)": 1.0,
-    "Landscape (16:9)": 16/9,
-    "Landscape (4:3)": 4/3,
-    "Portrait (3:4)": 3/4,
-    "Portrait (4:5)": 4/5,
-    "Portrait (9:16)": 9/16,
-    "Custom": "custom"
-}
-def process_image(input_image, ratio_choice, custom_ratio=1.0):
-    """Process the input image and return the cropped result"""
-    # Convert to numpy array if needed
-    if isinstance(input_image, str):
-        input_image = np.array(Image.open(input_image))
-    # Get target ratio based on selection
-    target_ratio = float(custom_ratio) if ratio_choice == "Custom" else ASPECT_RATIOS[ratio_choice]
-    # Perform smart cropping
-    result = smart_crop(input_image, target_ratio)
-    return result
-# Create Gradio interface
-with gr.Blocks(title="Smart Image Cropper") as iface:
-    gr.Markdown("# Smart Image Cropper")
-    gr.Markdown("Upload an image and choose the desired aspect ratio. The system will intelligently crop the image while preserving important content.")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="numpy", label="Input Image")
-            ratio_choice = gr.Dropdown(
-                choices=list(ASPECT_RATIOS.keys()),
-                value="Square (1:1)",
-                label="Aspect Ratio Preset"
-            )
-            custom_ratio = gr.Number(
-                label="Custom Aspect Ratio (width/height)",
-                value=1.0,
-                visible=False
-            )
-            def toggle_custom_ratio(choice):
-                return gr.Number.update(visible=choice == "Custom")
-            ratio_choice.change(fn=toggle_custom_ratio, inputs=ratio_choice, outputs=custom_ratio)
-            process_btn = gr.Button("Process Image")
-        with gr.Column():
-            output_image = gr.Image(type="numpy", label="Cropped Result")
-    process_btn.click(
-        fn=process_image,
-        inputs=[input_image, ratio_choice, custom_ratio],
-        outputs=output_image
-    )
-    # Add examples
-    gr.Examples(
-        examples=[
-            ["example_image.jpg", "Square (1:1)"],
-            ["example_image.jpg", "Landscape (16:9)"],
-            ["example_image.jpg", "Portrait (4:5)"]
-        ],
-        inputs=[input_image, ratio_choice]
-    )
-)
-# Launch the app
 if __name__ == "__main__":
-    iface.launch()

+import os
 import gradio as gr
 import numpy as np
 import torch
 import cv2
+from PIL import Image
+import matplotlib.pyplot as plt
 from transformers import SamModel, SamProcessor
+import warnings
+warnings.filterwarnings("ignore")
+# Check if CUDA is available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 # Load SAM model and processor
+model_id = "facebook/sam-vit-base"
+processor = SamProcessor.from_pretrained(model_id)
+model = SamModel.from_pretrained(model_id).to(device)
+def get_sam_mask(image, points=None):
     """
+    Generate mask from SAM model based on the entire image
     """
+    # Convert to RGB if needed
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    # Process image with SAM
+    if points is None:
+        # Generate automatic masks for the whole image
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Get the best mask (highest IoU)
+        masks = processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )[0][0]
+        # Convert to binary mask and return the largest mask
+        masks = masks.numpy()
+        if masks.shape[0] > 0:
+            # Calculate area of each mask and get the largest one
+            areas = [np.sum(mask) for mask in masks]
+            largest_mask_idx = np.argmax(areas)
+            return masks[largest_mask_idx].astype(np.uint8) * 255
+        else:
+            # If no masks found, return full image mask
+            return np.ones((image.height, image.width), dtype=np.uint8) * 255
     else:
+        # Use the provided points to generate a mask
+        inputs = processor(images=image, input_points=[points], return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Get the mask
+        masks = processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )[0][0]
+        return masks[0].numpy().astype(np.uint8) * 255
+def find_optimal_crop(image, mask, target_aspect_ratio):
+    """
+    Find the optimal crop that preserves important content based on the mask
+    """
+    # Convert PIL image to numpy array
+    image_np = np.array(image)
+    h, w = mask.shape
+    # Calculate target dimensions
+    if target_aspect_ratio > w / h:
+        # Target is wider than original
+        target_h = int(w / target_aspect_ratio)
+        target_w = w
     else:
+        # Target is taller than original
+        target_h = h
+        target_w = int(h * target_aspect_ratio)
+    # Find the best crop position
+    best_score = -1
+    best_box = (0, 0, target_w, target_h)
+    # Step size for searching (can be adjusted for performance)
+    step = max(1, min(h, w) // 50)
+    for y in range(0, h - target_h + 1, step):
+        for x in range(0, w - target_w + 1, step):
+            # Calculate how much of the mask is preserved in this crop
+            crop_mask = mask[y:y+target_h, x:x+target_w]
+            score = np.sum(crop_mask) / 255.0
+            if score > best_score:
+                best_score = score
+                best_box = (x, y, x + target_w, y + target_h)
+    return best_box
+def smart_crop(input_image, target_aspect_ratio, point_x=None, point_y=None):
+    """
+    Main function to perform smart cropping
+    """
+    if input_image is None:
+        return None
+    # Open image and convert to RGB
+    pil_image = Image.fromarray(input_image) if isinstance(input_image, np.ndarray) else input_image
+    if pil_image.mode != "RGB":
+        pil_image = pil_image.convert("RGB")
+    # Generate mask using SAM
+    points = None
+    if point_x is not None and point_y is not None and point_x > 0 and point_y > 0:
+        points = [[point_x, point_y]]
+    mask = get_sam_mask(pil_image, points)
+    # Calculate the best crop
+    crop_box = find_optimal_crop(pil_image, mask, target_aspect_ratio)
+    # Crop the image
+    cropped_img = pil_image.crop(crop_box)
+    # Visualize the process
+    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
+    ax[0].imshow(pil_image)
+    ax[0].set_title("Original Image")
+    ax[0].axis("off")
+    ax[1].imshow(mask, cmap='gray')
+    ax[1].set_title("SAM Segmentation Mask")
+    ax[1].axis("off")
+    ax[2].imshow(cropped_img)
+    ax[2].set_title(f"Smart Cropped ({target_aspect_ratio:.2f})")
+    ax[2].axis("off")
+    plt.tight_layout()
+    # Create a temporary file for visualization
+    vis_path = "visualization.png"
+    plt.savefig(vis_path)
+    plt.close()
+    return cropped_img, vis_path
+def aspect_ratio_options(choice):
+    """Map aspect ratio choices to actual values"""
+    options = {
+        "16:9 (Landscape)": 16/9,
+        "9:16 (Portrait)": 9/16,
+        "4:3 (Standard)": 4/3,
+        "3:4 (Portrait)": 3/4,
+        "1:1 (Square)": 1/1,
+        "21:9 (Ultrawide)": 21/9,
+        "2:3 (Portrait)": 2/3,
+        "3:2 (Landscape)": 3/2,
+    }
+    return options.get(choice, 16/9)
+def process_image(input_image, aspect_ratio_choice, point_x=None, point_y=None):
+    if input_image is None:
+        return None, None
+    # Get the actual aspect ratio value
+    target_aspect_ratio = aspect_ratio_options(aspect_ratio_choice)
+    # Process the image
+    result_img, vis_path = smart_crop(input_image, target_aspect_ratio, point_x, point_y)
+    return result_img, vis_path
+def create_app():
+    with gr.Blocks(title="Smart Image Cropper using SAM") as app:
+        gr.Markdown("# Smart Image Cropper using Segment Anything Model (SAM)")
+        gr.Markdown("""
+        Upload an image and choose your target aspect ratio. The app will use the Segment Anything Model (SAM)
+        to identify important content and crop intelligently to preserve it.
+        Optionally, you can click on the uploaded image to specify a point of interest.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                input_image = gr.Image(type="pil", label="Upload Image")
+                aspect_ratio = gr.Dropdown(
+                    choices=[
+                        "16:9 (Landscape)",
+                        "9:16 (Portrait)",
+                        "4:3 (Standard)",
+                        "3:4 (Portrait)",
+                        "1:1 (Square)",
+                        "21:9 (Ultrawide)",
+                        "2:3 (Portrait)",
+                        "3:2 (Landscape)"
+                    ],
+                    value="16:9 (Landscape)",
+                    label="Target Aspect Ratio"
+                )
+                point_coords = gr.State(value=[None, None])
+                def update_coords(img, evt: gr.SelectData):
+                    return [evt.index[0], evt.index[1]]
+                input_image.select(update_coords, inputs=[input_image], outputs=[point_coords])
+                process_btn = gr.Button("Process Image")
+            with gr.Column(scale=2):
+                output_image = gr.Image(type="pil", label="Cropped Result")
+                visualization = gr.Image(type="filepath", label="Process Visualization")
+        process_btn.click(
+            fn=lambda img, ratio, coords: process_image(img, ratio, coords[0], coords[1]),
+            inputs=[input_image, aspect_ratio, point_coords],
+            outputs=[output_image, visualization]
+        )
+        gr.Markdown("""
+        ## How It Works
+        1. The Segment Anything Model (SAM) analyzes your image to identify the important content
+        2. The app finds the optimal crop window that maximizes the preservation of that content
+        3. The image is cropped to your desired aspect ratio while keeping the important parts
+        ## Tips
+        - For better results with specific subjects, click on the important object in the image
+        - Try different aspect ratios to see how the model adapts the cropping
+        """)
+    return app
+# Create and launch the app
+demo = create_app()
+# For local testing
 if __name__ == "__main__":
+    demo.launch()
+else:
+    # For Hugging Face Spaces
+    demo.launch()