hoanshf commited on Aug 22, 2024

Commit

15a19e0

verified ·

1 Parent(s): 620541f

Upload 15 files

Browse files

Files changed (16) hide show

.gitattributes +2 -0
MyConfig.py +13 -0
MyPipe.py +76 -0
README.md +158 -3
briarmbg.py +458 -0
config.json +25 -0
example_inference.py +39 -0
example_input.jpg +0 -0
model.pth +3 -0
model.safetensors +3 -0
preprocessor_config.json +23 -0
pytorch_model.bin +3 -0
requirements.txt +8 -0
results.png +3 -0
t4.png +3 -0
utilities.py +25 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+results.png filter=lfs diff=lfs merge=lfs -text
+t4.png filter=lfs diff=lfs merge=lfs -text

MyConfig.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from transformers import PretrainedConfig
+from typing import List
+class RMBGConfig(PretrainedConfig):
+    model_type = "SegformerForSemanticSegmentation"
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        **kwargs):
+      self.in_ch = in_ch
+      self.out_ch = out_ch
+      super().__init__(**kwargs)

MyPipe.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch, os
+import torch.nn.functional as F
+from torchvision.transforms.functional import normalize
+import numpy as np
+from transformers import Pipeline
+from transformers.image_utils import load_image
+from skimage import io
+from PIL import Image
+class RMBGPipe(Pipeline):
+  def __init__(self,**kwargs):
+    Pipeline.__init__(self,**kwargs)
+    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    self.model.to(self.device)
+    self.model.eval()
+  def _sanitize_parameters(self, **kwargs):
+    # parse parameters
+    preprocess_kwargs = {}
+    postprocess_kwargs = {}
+    if "model_input_size" in kwargs :
+      preprocess_kwargs["model_input_size"] = kwargs["model_input_size"]
+    if "return_mask" in kwargs:
+      postprocess_kwargs["return_mask"] = kwargs["return_mask"]
+    return preprocess_kwargs, {}, postprocess_kwargs
+  def preprocess(self,input_image,model_input_size: list=[1024,1024]):
+      # preprocess the input
+      orig_im = load_image(input_image)
+      orig_im = np.array(orig_im)
+      orig_im_size = orig_im.shape[0:2]
+      preprocessed_image = self.preprocess_image(orig_im, model_input_size).to(self.device)
+      inputs = {
+          "preprocessed_image":preprocessed_image,
+          "orig_im_size":orig_im_size,
+          "input_image" : input_image
+      }
+      return inputs
+  def _forward(self,inputs):
+    result = self.model(inputs.pop("preprocessed_image"))
+    inputs["result"] = result
+    return inputs
+  def postprocess(self,inputs,return_mask:bool=False ):
+    result = inputs.pop("result")
+    orig_im_size = inputs.pop("orig_im_size")
+    input_image = inputs.pop("input_image")
+    result_image = self.postprocess_image(result[0][0], orig_im_size)
+    pil_im = Image.fromarray(result_image)
+    if return_mask ==True :
+      return pil_im
+    no_bg_image = Image.new("RGBA", pil_im.size, (0,0,0,0))
+    input_image = load_image(input_image)
+    no_bg_image.paste(input_image, mask=pil_im)
+    return no_bg_image
+  # utilities functions
+  def preprocess_image(self,im: np.ndarray, model_input_size: list=[1024,1024]) -> torch.Tensor:
+    # same as utilities.py with minor modification
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2,0,1)
+    im_tensor = F.interpolate(torch.unsqueeze(im_tensor,0), size=model_input_size, mode='bilinear')
+    image = torch.divide(im_tensor,255.0)
+    image = normalize(image,[0.5,0.5,0.5],[1.0,1.0,1.0])
+    return image
+  def postprocess_image(self,result: torch.Tensor, im_size: list)-> np.ndarray:
+      result = torch.squeeze(F.interpolate(result, size=im_size, mode='bilinear') ,0)
+      ma = torch.max(result)
+      mi = torch.min(result)
+      result = (result-mi)/(ma-mi)
+      im_array = (result*255).permute(1,2,0).cpu().data.numpy().astype(np.uint8)
+      im_array = np.squeeze(im_array)
+      return im_array

README.md CHANGED Viewed

@@ -1,3 +1,158 @@
----
-license: apache-2.0
----

+---
+license: other
+license_name: bria-rmbg-1.4
+license_link: https://bria.ai/bria-huggingface-model-license-agreement/
+pipeline_tag: image-segmentation
+tags:
+- remove background
+- background
+- background-removal
+- Pytorch
+- vision
+- legal liability
+- transformers
+extra_gated_description: RMBG v1.4 is available as a source-available model for non-commercial use
+extra_gated_heading: "Fill in this form to get instant access"
+extra_gated_fields:
+  Name: text
+  Company/Org name: text
+  Org Type (Early/Growth Startup, Enterprise, Academy): text
+  Role: text
+  Country: text
+  Email: text
+  By submitting this form, I agree to BRIA’s Privacy policy and Terms & conditions, see links below: checkbox
+---
+# BRIA Background Removal v1.4 Model Card
+RMBG v1.4 is our state-of-the-art background removal model, designed to effectively separate foreground from background in a range of
+categories and image types. This model has been trained on a carefully selected dataset, which includes:
+general stock images, e-commerce, gaming, and advertising content, making it suitable for commercial use cases powering enterprise content creation at scale.
+The accuracy, efficiency, and versatility currently rival leading source-available models.
+It is ideal where content safety, legally licensed datasets, and bias mitigation are paramount.
+Developed by BRIA AI, RMBG v1.4 is available as a source-available model for non-commercial use.
+[CLICK HERE FOR A DEMO](https://huggingface.co/spaces/briaai/BRIA-RMBG-1.4)
+![examples](t4.png)
+### Model Description
+- **Developed by:** [BRIA AI](https://bria.ai/)
+- **Model type:** Background Removal
+- **License:** [bria-rmbg-1.4](https://bria.ai/bria-huggingface-model-license-agreement/)
+  - The model is released under a Creative Commons license for non-commercial use.
+  - Commercial use is subject to a commercial agreement with BRIA. [Contact Us](https://bria.ai/contact-us) for more information.
+- **Model Description:** BRIA RMBG 1.4 is a saliency segmentation model trained exclusively on a professional-grade dataset.
+- **BRIA:** Resources for more information: [BRIA AI](https://bria.ai/)
+## Training data
+Bria-RMBG model was trained with over 12,000 high-quality, high-resolution, manually labeled (pixel-wise accuracy), fully licensed images.
+Our benchmark included balanced gender, balanced ethnicity, and people with different types of disabilities.
+For clarity, we provide our data distribution according to different categories, demonstrating our model’s versatility.
+### Distribution of images:
+| Category | Distribution |
+| -----------------------------------| -----------------------------------:|
+| Objects only | 45.11% |
+| People with objects/animals | 25.24% |
+| People only | 17.35% |
+| people/objects/animals with text | 8.52% |
+| Text only | 2.52% |
+| Animals only | 1.89% |
+| Category | Distribution |
+| -----------------------------------| -----------------------------------------:|
+| Photorealistic | 87.70% |
+| Non-Photorealistic | 12.30% |
+| Category | Distribution |
+| -----------------------------------| -----------------------------------:|
+| Non Solid Background | 52.05% |
+| Solid Background | 47.95%
+| Category | Distribution |
+| -----------------------------------| -----------------------------------:|
+| Single main foreground object | 51.42% |
+| Multiple objects in the foreground | 48.58% |
+## Qualitative Evaluation
+![examples](results.png)
+## Architecture
+RMBG v1.4 is developed on the [IS-Net](https://github.com/xuebinqin/DIS) enhanced with our unique training scheme and proprietary dataset.
+These modifications significantly improve the model’s accuracy and effectiveness in diverse image-processing scenarios.
+## Installation
+```bash
+pip install -qr https://huggingface.co/briaai/RMBG-1.4/resolve/main/requirements.txt
+```
+## Usage
+Either load the pipeline
+```python
+from transformers import pipeline
+image_path = "https://farm5.staticflickr.com/4007/4322154488_997e69e4cf_z.jpg"
+pipe = pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
+pillow_mask = pipe(image_path, return_mask = True) # outputs a pillow mask
+pillow_image = pipe(image_path) # applies mask on input and returns a pillow image
+```
+Or load the model
+```python
+from transformers import AutoModelForImageSegmentation
+from torchvision.transforms.functional import normalize
+model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-1.4",trust_remote_code=True)
+def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    # orig_im_size=im.shape[0:2]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2,0,1)
+    im_tensor = F.interpolate(torch.unsqueeze(im_tensor,0), size=model_input_size, mode='bilinear')
+    image = torch.divide(im_tensor,255.0)
+    image = normalize(image,[0.5,0.5,0.5],[1.0,1.0,1.0])
+    return image
+def postprocess_image(result: torch.Tensor, im_size: list)-> np.ndarray:
+    result = torch.squeeze(F.interpolate(result, size=im_size, mode='bilinear') ,0)
+    ma = torch.max(result)
+    mi = torch.min(result)
+    result = (result-mi)/(ma-mi)
+    im_array = (result*255).permute(1,2,0).cpu().data.numpy().astype(np.uint8)
+    im_array = np.squeeze(im_array)
+    return im_array
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# prepare input
+image_path = "https://farm5.staticflickr.com/4007/4322154488_997e69e4cf_z.jpg"
+orig_im = io.imread(image_path)
+orig_im_size = orig_im.shape[0:2]
+image = preprocess_image(orig_im, model_input_size).to(device)
+# inference
+result=model(image)
+# post process
+result_image = postprocess_image(result[0][0], orig_im_size)
+# save result
+pil_im = Image.fromarray(result_image)
+no_bg_image = Image.new("RGBA", pil_im.size, (0,0,0,0))
+orig_image = Image.open(image_path)
+no_bg_image.paste(orig_image, mask=pil_im)
+```

briarmbg.py ADDED Viewed

	@@ -0,0 +1,458 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import PreTrainedModel
+from .MyConfig import RMBGConfig
+class REBNCONV(nn.Module):
+    def __init__(self,in_ch=3,out_ch=3,dirate=1,stride=1):
+        super(REBNCONV,self).__init__()
+        self.conv_s1 = nn.Conv2d(in_ch,out_ch,3,padding=1*dirate,dilation=1*dirate,stride=stride)
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self,x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src,tar):
+    src = F.interpolate(src,size=tar.shape[2:],mode='bilinear')
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7,self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1) ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool5 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d =  self.rebnconv6d(torch.cat((hx7,hx6),1))
+        hx6dup = _upsample_like(hx6d,hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6dup,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d =  self.rebnconv5d(torch.cat((hx6,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F,self).__init__()
+        self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
+    def forward(self,x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d,hx2),1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d,hx1),1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(self, in_ch=3,
+                       out_ch=1,
+                       kernel_size=3,
+                       stride=1,
+                       padding=1,
+                       dilation=1,
+                       groups=1):
+        super(myrebnconv,self).__init__()
+        self.conv = nn.Conv2d(in_ch,
+                              out_ch,
+                              kernel_size=kernel_size,
+                              stride=stride,
+                              padding=padding,
+                              dilation=dilation,
+                              groups=groups)
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self,x):
+        return self.rl(self.bn(self.conv(x)))
+class BriaRMBG(PreTrainedModel):
+    config_class = RMBGConfig
+    def __init__(self,config:RMBGConfig = RMBGConfig()):
+        super().__init__(config)
+        in_ch = config.in_ch # 3
+        out_ch = config.out_ch # 1
+        self.conv_in = nn.Conv2d(in_ch,64,3,stride=2,padding=1)
+        self.pool_in = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage1 = RSU7(64,32,64)
+        self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage2 = RSU6(64,32,128)
+        self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage3 = RSU5(128,64,256)
+        self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage4 = RSU4(256,128,512)
+        self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage5 = RSU4F(512,256,512)
+        self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
+        self.stage6 = RSU4F(512,256,512)
+        # decoder
+        self.stage5d = RSU4F(1024,256,512)
+        self.stage4d = RSU4(1024,128,256)
+        self.stage3d = RSU5(512,64,128)
+        self.stage2d = RSU6(256,32,64)
+        self.stage1d = RSU7(128,16,64)
+        self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
+        self.side3 = nn.Conv2d(128,out_ch,3,padding=1)
+        self.side4 = nn.Conv2d(256,out_ch,3,padding=1)
+        self.side5 = nn.Conv2d(512,out_ch,3,padding=1)
+        self.side6 = nn.Conv2d(512,out_ch,3,padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self,x):
+        hx = x
+        hxin = self.conv_in(hx)
+        #hx = self.pool_in(hxin)
+        #stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        #stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        #stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        #stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        #stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        #stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6,hx5)
+        #-------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
+        hx5dup = _upsample_like(hx5d,hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
+        hx4dup = _upsample_like(hx4d,hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
+        hx3dup = _upsample_like(hx3d,hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
+        hx2dup = _upsample_like(hx2d,hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
+        #side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1,x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2,x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3,x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4,x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5,x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6,x)
+        return [F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)],[hx1d,hx2d,hx3d,hx4d,hx5d,hx6]

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "briaai/RMBG-1.4",
+  "architectures": [
+    "BriaRMBG"
+  ],
+  "auto_map": {
+    "AutoConfig": "MyConfig.RMBGConfig",
+    "AutoModelForImageSegmentation": "briarmbg.BriaRMBG"
+  },
+  "custom_pipelines": {
+    "image-segmentation": {
+      "impl": "MyPipe.RMBGPipe",
+      "pt": [
+        "AutoModelForImageSegmentation"
+      ],
+      "tf": [],
+      "type": "image"
+    }
+  },
+  "in_ch": 3,
+  "model_type": "SegformerForSemanticSegmentation",
+  "out_ch": 1,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.0.dev0"
+}

example_inference.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from skimage import io
+import torch, os
+from PIL import Image
+from briarmbg import BriaRMBG
+from utilities import preprocess_image, postprocess_image
+from huggingface_hub import hf_hub_download
+def example_inference():
+    im_path = f"{os.path.dirname(os.path.abspath(__file__))}/example_input.jpg"
+    net = BriaRMBG()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
+    net.to(device)
+    net.eval()
+    # prepare input
+    model_input_size = [1024,1024]
+    orig_im = io.imread(im_path)
+    orig_im_size = orig_im.shape[0:2]
+    image = preprocess_image(orig_im, model_input_size).to(device)
+    # inference
+    result=net(image)
+    # post process
+    result_image = postprocess_image(result[0][0], orig_im_size)
+    # save result
+    pil_im = Image.fromarray(result_image)
+    no_bg_image = Image.new("RGBA", pil_im.size, (0,0,0,0))
+    orig_image = Image.open(im_path)
+    no_bg_image.paste(orig_image, mask=pil_im)
+    no_bg_image.save("example_image_no_bg.png")
+if __name__ == "__main__":
+    example_inference()

example_input.jpg ADDED Viewed

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:893c16c340b1ddafc93e78457a4d94190da9b7179149f8574284c83caebf5e8c
+size 176718373

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46ef7fe46f2ae284d8f1aaa24bfa5fca5ef25a34e2c7caa890a0029eb100e87f
+size 176381984

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_normalize": true,
+  "do_pad": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "feature_extractor_type": "ImageFeatureExtractor",
+  "image_std": [
+    1,
+    1,
+    1
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "width": 1024,
+    "height": 1024
+  }
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59569acdb281ac9fc9f78f9d33b6f9f17f68e25086b74f9025c35bb5f2848967
+size 176574018

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+pillow
+numpy
+typing
+scikit-image
+huggingface_hub
+transformers>=4.39.1

results.png ADDED Viewed

Git LFS Details

SHA256: 2b7f08fc4c09db56b516186c0629f72523a5cbe328beaedda8b36349af4b04bc
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

t4.png ADDED Viewed

Git LFS Details

SHA256: 43a9453f567d9bff7fe4481205575bbf302499379047ee6073247315452ba8fb
Pointer size: 132 Bytes
Size of remote file: 2.16 MB

utilities.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+import torch.nn.functional as F
+from torchvision.transforms.functional import normalize
+import numpy as np
+def preprocess_image(im: np.ndarray, model_input_size: list) -> torch.Tensor:
+    if len(im.shape) < 3:
+        im = im[:, :, np.newaxis]
+    # orig_im_size=im.shape[0:2]
+    im_tensor = torch.tensor(im, dtype=torch.float32).permute(2,0,1)
+    im_tensor = F.interpolate(torch.unsqueeze(im_tensor,0), size=model_input_size, mode='bilinear').type(torch.uint8)
+    image = torch.divide(im_tensor,255.0)
+    image = normalize(image,[0.5,0.5,0.5],[1.0,1.0,1.0])
+    return image
+def postprocess_image(result: torch.Tensor, im_size: list)-> np.ndarray:
+    result = torch.squeeze(F.interpolate(result, size=im_size, mode='bilinear') ,0)
+    ma = torch.max(result)
+    mi = torch.min(result)
+    result = (result-mi)/(ma-mi)
+    im_array = (result*255).permute(1,2,0).cpu().data.numpy().astype(np.uint8)
+    im_array = np.squeeze(im_array)
+    return im_array