Spaces:

sohamnk
/

lnf_v2_ai_pipeline

Sleeping

App Files Files Community

sohamnk commited on Aug 31

Commit

ccd38a7

verified ·

1 Parent(s): fdc1498

opencv

Browse files

Files changed (1) hide show

app.py +30 -8

app.py CHANGED Viewed

@@ -115,6 +115,7 @@ def segment_guided_object(image: Image.Image, object_label: str, colors: list =
     """
     Finds and segments ALL instances of an object based on a text label and colors,
     returning the original image with the detected objects segmented with transparency.
     """
     # Create a more descriptive prompt using colors, as per your new app's logic
     color_str = " ".join(c.lower() for c in colors if c)
@@ -122,7 +123,7 @@ def segment_guided_object(image: Image.Image, object_label: str, colors: list =
         prompt = f"a {color_str} {object_label}."
     else:
         prompt = f"a {object_label}."
     print(f"  [Segment] Using prompt: '{prompt}' for segmentation.")
     image_rgb = image.convert("RGB")
     image_np = np.array(image_rgb)
@@ -132,7 +133,7 @@ def segment_guided_object(image: Image.Image, object_label: str, colors: list =
     inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model_gnd(**inputs)
     # Process results with a threshold
     results = processor_gnd.post_process_grounded_object_detection(
         outputs, inputs.input_ids, threshold=0.35, text_threshold=0.5, target_sizes=[(height, width)]
@@ -140,16 +141,15 @@ def segment_guided_object(image: Image.Image, object_label: str, colors: list =
     if not results or len(results[0]['boxes']) == 0:
         print(f"  [Segment] ⚠ Warning: Could not detect '{object_label}' with GroundingDINO. Returning original image.")
-        # Return the original RGB image converted to RGBA with a full alpha channel
         return Image.fromarray(np.concatenate([image_np, np.full((height, width, 1), 255, dtype=np.uint8)], axis=-1), 'RGBA')
     boxes = results[0]['boxes']
     scores = results[0]['scores']
     print(f"  [Segment] ✅ Found {len(boxes)} potential object(s) with confidence scores: {[round(s.item(), 2) for s in scores]}")
     # Set image for SAM
     sam_predictor.set_image(image_np)
     # Initialize an empty mask to combine all detections
     combined_mask = np.zeros((height, width), dtype=np.uint8)
@@ -158,14 +158,36 @@ def segment_guided_object(image: Image.Image, object_label: str, colors: list =
         box = box.cpu().numpy().astype(int)
         masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
         combined_mask = np.bitwise_or(combined_mask, masks[0]) # Combine masks
     print("  [Segment] Combined masks for all detected objects.")
-    # Create an RGBA image where the background is transparent outside the combined mask
     object_rgba = np.zeros((height, width, 4), dtype=np.uint8)
     object_rgba[:, :, :3] = image_np # Copy original RGB
-    object_rgba[:, :, 3] = combined_mask * 255 # Apply the combined mask as alpha channel
     return Image.fromarray(object_rgba, 'RGBA')

     """
     Finds and segments ALL instances of an object based on a text label and colors,
     returning the original image with the detected objects segmented with transparency.
+    This version includes a hole-filling step to create solid masks.
     """
     # Create a more descriptive prompt using colors, as per your new app's logic
     color_str = " ".join(c.lower() for c in colors if c)
         prompt = f"a {color_str} {object_label}."
     else:
         prompt = f"a {object_label}."
     print(f"  [Segment] Using prompt: '{prompt}' for segmentation.")
     image_rgb = image.convert("RGB")
     image_np = np.array(image_rgb)
     inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model_gnd(**inputs)
     # Process results with a threshold
     results = processor_gnd.post_process_grounded_object_detection(
         outputs, inputs.input_ids, threshold=0.35, text_threshold=0.5, target_sizes=[(height, width)]
     if not results or len(results[0]['boxes']) == 0:
         print(f"  [Segment] ⚠ Warning: Could not detect '{object_label}' with GroundingDINO. Returning original image.")
         return Image.fromarray(np.concatenate([image_np, np.full((height, width, 1), 255, dtype=np.uint8)], axis=-1), 'RGBA')
     boxes = results[0]['boxes']
     scores = results[0]['scores']
     print(f"  [Segment] ✅ Found {len(boxes)} potential object(s) with confidence scores: {[round(s.item(), 2) for s in scores]}")
     # Set image for SAM
     sam_predictor.set_image(image_np)
     # Initialize an empty mask to combine all detections
     combined_mask = np.zeros((height, width), dtype=np.uint8)
         box = box.cpu().numpy().astype(int)
         masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
         combined_mask = np.bitwise_or(combined_mask, masks[0]) # Combine masks
     print("  [Segment] Combined masks for all detected objects.")
+    # --- START: HOLE FILLING LOGIC ---
+    # This new block will fill any holes within the combined mask.
+    print("  [Segment] Post-processing: Filling holes in the combined mask...")
+    # Find contours. RETR_EXTERNAL retrieves only the extreme outer contours.
+    contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    # Create a new blank mask to draw the filled contours on.
+    filled_mask = np.zeros_like(combined_mask)
+    if contours:
+        # Draw the detected contours onto the new mask and fill them.
+        # The -1 index means draw all contours, and cv2.FILLED fills them.
+        cv2.drawContours(filled_mask, contours, -1, 255, thickness=cv2.FILLED)
+    else:
+        # If for some reason no contours were found, fall back to the original mask.
+        filled_mask = combined_mask
+    print("  [Segment] ✅ Hole filling complete.")
+    # --- END: HOLE FILLING LOGIC ---
+    # Create an RGBA image where the background is transparent
     object_rgba = np.zeros((height, width, 4), dtype=np.uint8)
     object_rgba[:, :, :3] = image_np # Copy original RGB
+    # Apply the NEW filled mask as the alpha channel
+    object_rgba[:, :, 3] = filled_mask
     return Image.fromarray(object_rgba, 'RGBA')