Spaces:

sohamnk
/

lnf_v2_ai_pipeline

Sleeping

App Files Files Community

sohamnk commited on Aug 31

Commit

10deabd

verified ·

1 Parent(s): 708c63e

chainging segment guided object

Browse files

Files changed (1) hide show

app.py +41 -25

app.py CHANGED Viewed

@@ -59,7 +59,7 @@ model_dinov2 = AutoModel.from_pretrained(dinov2_model_id).to(device)
 print("✅ DINOv2 model loaded.")
 print("...Loading Grounding DINO model for segmentation...")
-gnd_model_id = "IDEA-Research/grounding-dino-base"
 processor_gnd = AutoGndProcessor.from_pretrained(gnd_model_id)
 model_gnd = AutoModelForZeroShotObjectDetection.from_pretrained(gnd_model_id).to(device)
 print("✅ Grounding DINO model loaded.")
@@ -112,46 +112,62 @@ def jaccard_similarity(set1, set2):
     return len(intersection) / len(union)
 def segment_guided_object(image: Image.Image, object_label: str, colors: list = []) -> Image.Image:
-    # --- UPDATED: Create a more descriptive prompt using colors ---
     color_str = " ".join(c.lower() for c in colors if c)
     if color_str:
         prompt = f"a {color_str} {object_label}."
     else:
         prompt = f"a {object_label}."
-    print(f"  [Segment] Using prompt: '{prompt}'")
     image_rgb = image.convert("RGB")
     image_np = np.array(image_rgb)
-    h, w = image_np.shape[:2]
     inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model_gnd(**inputs)
-        results = processor_gnd.post_process_grounded_object_detection(
-            outputs, inputs.input_ids, threshold=0.5, text_threshold=0.5, target_sizes=[(h, w)]
-        )
     if not results or len(results[0]['boxes']) == 0:
-        print(f"  [Segment] ⚠ Warning: Could not detect object with Grounding DINO. Using full image.")
-        return image_rgb
-    print(f"  [Segment] ✅ Object detected successfully.")
-    box = results[0]['boxes'][0].cpu().numpy()
     sam_predictor.set_image(image_np)
-    masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
-    mask = masks[0]
-    image_rgba = np.concatenate([image_np, np.full((h, w, 1), 255, dtype=np.uint8)], axis=-1)
-    image_rgba[:, :, 3] = mask * 255
-    segmented_image = Image.fromarray(image_rgba, 'RGBA')
-    true_points = np.argwhere(mask)
-    if true_points.size > 0:
-        top_left = true_points.min(axis=0)
-        bottom_right = true_points.max(axis=0)
-        bbox = (top_left[1], top_left[0], bottom_right[1], bottom_right[0])
-        segmented_image = segmented_image.crop(bbox)
-    return segmented_image
 def upload_to_uploadcare(image: Image.Image) -> str:
     if not UPLOADCARE_PUBLIC_KEY:

 print("✅ DINOv2 model loaded.")
 print("...Loading Grounding DINO model for segmentation...")
+gnd_model_id = "IDEA-Research/grounding-dino-base" # Kept base as you didn't specify changing this
 processor_gnd = AutoGndProcessor.from_pretrained(gnd_model_id)
 model_gnd = AutoModelForZeroShotObjectDetection.from_pretrained(gnd_model_id).to(device)
 print("✅ Grounding DINO model loaded.")
     return len(intersection) / len(union)
 def segment_guided_object(image: Image.Image, object_label: str, colors: list = []) -> Image.Image:
+    """
+    Finds and segments ALL instances of an object based on a text label and colors,
+    returning the original image with the detected objects segmented with transparency.
+    """
+    # Create a more descriptive prompt using colors, as per your new app's logic
     color_str = " ".join(c.lower() for c in colors if c)
     if color_str:
         prompt = f"a {color_str} {object_label}."
     else:
         prompt = f"a {object_label}."
+    print(f"  [Segment] Using prompt: '{prompt}' for segmentation.")
     image_rgb = image.convert("RGB")
     image_np = np.array(image_rgb)
+    height, width = image_np.shape[:2]
+    # Grounding DINO detection
     inputs = processor_gnd(images=image_rgb, text=prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model_gnd(**inputs)
+    # Process results with a threshold
+    results = processor_gnd.post_process_grounded_object_detection(
+        outputs, inputs.input_ids, threshold=0.35, text_threshold=0.5, target_sizes=[(height, width)]
+    )
     if not results or len(results[0]['boxes']) == 0:
+        print(f"  [Segment] ⚠ Warning: Could not detect '{object_label}' with GroundingDINO. Returning original image.")
+        # Return the original RGB image converted to RGBA with a full alpha channel
+        return Image.fromarray(np.concatenate([image_np, np.full((height, width, 1), 255, dtype=np.uint8)], axis=-1), 'RGBA')
+    boxes = results[0]['boxes']
+    scores = results[0]['scores']
+    print(f"  [Segment] ✅ Found {len(boxes)} potential object(s) with confidence scores: {[round(s.item(), 2) for s in scores]}")
+    # Set image for SAM
     sam_predictor.set_image(image_np)
+    # Initialize an empty mask to combine all detections
+    combined_mask = np.zeros((height, width), dtype=np.uint8)
+    # Predict masks for all detected boxes and combine them
+    for box in boxes:
+        box = box.cpu().numpy().astype(int)
+        masks, _, _ = sam_predictor.predict(box=box, multimask_output=False)
+        combined_mask = np.bitwise_or(combined_mask, masks[0]) # Combine masks
+    print("  [Segment] Combined masks for all detected objects.")
+    # Create an RGBA image where the background is transparent outside the combined mask
+    object_rgba = np.zeros((height, width, 4), dtype=np.uint8)
+    object_rgba[:, :, :3] = image_np # Copy original RGB
+    object_rgba[:, :, 3] = combined_mask * 255 # Apply the combined mask as alpha channel
+    return Image.fromarray(object_rgba, 'RGBA')
 def upload_to_uploadcare(image: Image.Image) -> str:
     if not UPLOADCARE_PUBLIC_KEY: