Spaces:

Agents-MCP-Hackathon
/

ImageAlfred

Sleeping

App Files Files Community

mahan_ym commited on Jun 10

Commit

79b337b

1 Parent(s): 4255d22

Add detection threshold slider and update privacy preservation functions. added owl2

Browse files

Files changed (3) hide show

src/app.py +14 -3
src/modal_app.py +99 -6
src/tools.py +5 -0

src/app.py CHANGED Viewed

@@ -107,6 +107,14 @@ privacy_preserve_tool = gr.Interface(
             step=1,
             info="Higher values result in stronger blurring.",
         ),
     ],
     outputs=gr.Image(label="Output Image"),
     title="Privacy Preserving Tool",
@@ -114,18 +122,21 @@ privacy_preserve_tool = gr.Interface(
     examples=[
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_3.jpg",
-            "license plate.",
             10,
         ],
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_8.jpg",
-            "face.",
             15,
         ],
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_6.jpg",
-            "face.",
             20,
         ],
     ],
 )

             step=1,
             info="Higher values result in stronger blurring.",
         ),
+        gr.Slider(
+            label="Detection Threshold",
+            minimum=0.01,
+            maximum=0.99,
+            value=0.2,
+            step=0.01,
+            info="Model threshold for detecting objects.",
+        ),
     ],
     outputs=gr.Image(label="Output Image"),
     title="Privacy Preserving Tool",
     examples=[
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_3.jpg",
+            "license plate",
             10,
+            0.5,
         ],
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_8.jpg",
+            "face",
             15,
+            0.1,
         ],
         [
             "https://raw.githubusercontent.com/mahan-ym/ImageAlfred/main/src/assets/examples/test_6.jpg",
+            "face",
             20,
+            0.1,
         ],
     ],
 )

src/modal_app.py CHANGED Viewed

@@ -39,6 +39,7 @@ image = (
         "numpy",
         "transformers",
         "opencv-contrib-python-headless",
         gpu="A10G",
     )
     .pip_install(
@@ -102,6 +103,47 @@ def prompt_segment(
     volumes={volume_path: volume},
     timeout=60 * 3,
 )
 def sam2(image_pil: Image.Image, boxes: list[np.ndarray]) -> list[dict]:
     import torch
     from sam2.sam2_image_predictor import SAM2ImagePredictor
@@ -121,7 +163,56 @@ def sam2(image_pil: Image.Image, boxes: list[np.ndarray]) -> list[dict]:
 @app.function(
     image=image,
-    gpu="A10G",
     volumes={volume_path: volume},
     timeout=60 * 3,
 )
@@ -234,7 +325,7 @@ def clip(
 @app.function(
-    gpu="T4",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
@@ -332,7 +423,7 @@ def change_image_objects_hsv(
 @app.function(
-    gpu="T4",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
@@ -404,7 +495,7 @@ def change_image_objects_lab(
 @app.function(
-    gpu="T4",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
@@ -432,7 +523,7 @@ def apply_mosaic_with_bool_mask(
 @app.function(
-    gpu="T4",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
@@ -441,6 +532,7 @@ def preserve_privacy(
     image_pil: Image.Image,
     prompts: str,
     privacy_strength: int = 15,
 ) -> Image.Image:
     """
     Preserves privacy in an image by applying a mosaic effect to specified objects.
@@ -449,9 +541,10 @@ def preserve_privacy(
     if isinstance(prompts, str):
         prompts = [prompt.strip() for prompt in prompts.split(".")]
         print(f"Parsed prompts: {prompts}")
-    prompt_segment_results = prompt_segment.remote(
         image_pil=image_pil,
         prompts=prompts,
     )
     if not prompt_segment_results:
         return image_pil

         "numpy",
         "transformers",
         "opencv-contrib-python-headless",
+        "scipy",
         gpu="A10G",
     )
     .pip_install(
     volumes={volume_path: volume},
     timeout=60 * 3,
 )
+def privacy_prompt_segment(
+    image_pil: Image.Image,
+    prompts: list[str],
+    threshold: float,
+) -> list[dict]:
+    owlv2_results = owlv2.remote(image_pil, prompts, threshold=threshold)
+    if not owlv2_results:
+        print("No boxes returned from OWLV2.")
+        return None
+    boxes = np.array(owlv2_results["boxes"])
+    sam_result_masks, sam_result_scores = sam2.remote(image_pil=image_pil, boxes=boxes)
+    print(f"sam_result_mask {sam_result_masks}")
+    if not sam_result_masks.any():
+        print("No masks or scores returned from SAM2.")
+        return None
+    if sam_result_masks.ndim == 3:
+        # If the masks are in 3D, we need to convert them to 4D
+        sam_result_masks = [sam_result_masks]
+    results = {
+        "labels": owlv2_results["labels"],
+        "boxes": boxes,
+        "owlv2_scores": owlv2_results["scores"],
+        "sam_masking_scores": sam_result_scores,
+        "masks": sam_result_masks,
+    }
+    return results
+@app.function(
+    image=image,
+    gpu="A100",
+    volumes={volume_path: volume},
+    timeout=60 * 3,
+)
 def sam2(image_pil: Image.Image, boxes: list[np.ndarray]) -> list[dict]:
     import torch
     from sam2.sam2_image_predictor import SAM2ImagePredictor
 @app.function(
     image=image,
+    gpu="A100",
+    volumes={volume_path: volume},
+)
+def owlv2(
+    image_pil: Image.Image,
+    labels: list[str],
+    threshold: float,
+) -> list[dict]:
+    """
+    Perform zero-shot segmentation on an image using specified labels.
+    Args:
+        image_pil (Image.Image): The input image as a PIL Image.
+        labels (list[str]): List of labels for zero-shot segmentation.
+    Returns:
+        list[dict]: List of dictionaries containing label and bounding box information.
+    """
+    from transformers import pipeline
+    checkpoint = "google/owlv2-large-patch14-ensemble"
+    detector = pipeline(
+        model=checkpoint,
+        task="zero-shot-object-detection",
+        device="cuda",
+        use_fast=True,
+    )
+    # Load the image
+    predictions = detector(
+        image_pil,
+        candidate_labels=labels,
+    )
+    labels = []
+    scores = []
+    boxes = []
+    for prediction in predictions:
+        if prediction["score"] < threshold:
+            continue
+        labels.append(prediction["label"])
+        scores.append(prediction["score"])
+        boxes.append(np.array(list(prediction["box"].values())))
+    if labels == []:
+        print("No predictions found with score above threshold.")
+        return None
+    predictions = {"labels": labels, "scores": scores, "boxes": boxes}
+    return predictions
+@app.function(
+    image=image,
+    gpu="A100",
     volumes={volume_path: volume},
     timeout=60 * 3,
 )
 @app.function(
+    gpu="A10G",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
 @app.function(
+    gpu="A10G",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
 @app.function(
+    gpu="A10G",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
 @app.function(
+    gpu="A10G",
     image=image,
     volumes={volume_path: volume},
     timeout=60 * 3,
     image_pil: Image.Image,
     prompts: str,
     privacy_strength: int = 15,
+    threshold: float = 0.2,
 ) -> Image.Image:
     """
     Preserves privacy in an image by applying a mosaic effect to specified objects.
     if isinstance(prompts, str):
         prompts = [prompt.strip() for prompt in prompts.split(".")]
         print(f"Parsed prompts: {prompts}")
+    prompt_segment_results = privacy_prompt_segment.remote(
         image_pil=image_pil,
         prompts=prompts,
+        threshold=threshold,
     )
     if not prompt_segment_results:
         return image_pil

src/tools.py CHANGED Viewed

@@ -42,6 +42,7 @@ def privacy_preserve_image(
     input_img,
     input_prompt,
     privacy_strength: int = 15,
 ) -> np.ndarray | Image.Image | str | Path | None:
     """
     Obscures specified objects in the input image based on a natural language prompt, using a privacy-preserving blur or distortion effect.
@@ -53,6 +54,7 @@ def privacy_preserve_image(
         input_img: Input image or can be URL string of the image or base64 string. Cannot be None.
         input_prompt (str): Object to obscure in the image. It can be a single word or multiple words, e.g., "left person face", "license plate".
         privacy_strength (int): Strength of the privacy preservation effect. Higher values result in stronger blurring. Default is 15.
     Returns:
         bytes: Binary image data of the modified image.
@@ -63,12 +65,15 @@ def privacy_preserve_image(
         raise gr.Error("Input image cannot be None or empty.")
     if not input_prompt or input_prompt.strip() == "":
         raise gr.Error("Input prompt cannot be None or empty.")
     func = modal.Function.from_name(modal_app_name, "preserve_privacy")
     output_pil = func.remote(
         image_pil=input_img,
         prompts=input_prompt,
         privacy_strength=privacy_strength,
     )
     if output_pil is None:

     input_img,
     input_prompt,
     privacy_strength: int = 15,
+    threshold: float = 0.2,
 ) -> np.ndarray | Image.Image | str | Path | None:
     """
     Obscures specified objects in the input image based on a natural language prompt, using a privacy-preserving blur or distortion effect.
         input_img: Input image or can be URL string of the image or base64 string. Cannot be None.
         input_prompt (str): Object to obscure in the image. It can be a single word or multiple words, e.g., "left person face", "license plate".
         privacy_strength (int): Strength of the privacy preservation effect. Higher values result in stronger blurring. Default is 15.
+        threshold (float): Model threshold for detecting objects. It should be between 0.01 and 0.99. Default is 0.2. for detecting smaller objects, small regions or faces a lower threshold is recommended.
     Returns:
         bytes: Binary image data of the modified image.
         raise gr.Error("Input image cannot be None or empty.")
     if not input_prompt or input_prompt.strip() == "":
         raise gr.Error("Input prompt cannot be None or empty.")
+    if threshold < 0.01 or threshold > 0.99:
+        raise gr.Error("Threshold must be between 0.01 and 0.99.")
     func = modal.Function.from_name(modal_app_name, "preserve_privacy")
     output_pil = func.remote(
         image_pil=input_img,
         prompts=input_prompt,
         privacy_strength=privacy_strength,
+        threshold=threshold,
     )
     if output_pil is None: