Spaces:

Ani24
/

SFM_Inference_Demo

Build error

App Files Files Community

Anirudh Bhalekar commited on Aug 20

Commit

a3f0d6c

1 Parent(s): 6e87dc7

added models and util folder

Browse files

Files changed (50) hide show

models_Facies.py +397 -0
models_Fault.py +327 -0
util/__pycache__/datasets.cpython-311.pyc +0 -0
util/__pycache__/datasets.cpython-312.pyc +0 -0
util/__pycache__/datasets.cpython-36.pyc +0 -0
util/__pycache__/datasets.cpython-37.pyc +0 -0
util/__pycache__/lars.cpython-36.pyc +0 -0
util/__pycache__/lr_decay.cpython-311.pyc +0 -0
util/__pycache__/lr_decay.cpython-312.pyc +0 -0
util/__pycache__/lr_decay.cpython-36.pyc +0 -0
util/__pycache__/lr_decay.cpython-37.pyc +0 -0
util/__pycache__/lr_sched.cpython-311.pyc +0 -0
util/__pycache__/lr_sched.cpython-312.pyc +0 -0
util/__pycache__/lr_sched.cpython-36.pyc +0 -0
util/__pycache__/lr_sched.cpython-37.pyc +0 -0
util/__pycache__/metrics.cpython-36.pyc +0 -0
util/__pycache__/misc.cpython-311.pyc +0 -0
util/__pycache__/misc.cpython-312.pyc +0 -0
util/__pycache__/misc.cpython-36.pyc +0 -0
util/__pycache__/misc.cpython-37.pyc +0 -0
util/__pycache__/msssim.cpython-311.pyc +0 -0
util/__pycache__/msssim.cpython-312.pyc +0 -0
util/__pycache__/msssim.cpython-36.pyc +0 -0
util/__pycache__/msssim.cpython-37.pyc +0 -0
util/__pycache__/pos_embed.cpython-311.pyc +0 -0
util/__pycache__/pos_embed.cpython-312.pyc +0 -0
util/__pycache__/pos_embed.cpython-36.pyc +0 -0
util/__pycache__/pos_embed.cpython-37.pyc +0 -0
util/__pycache__/size_aware_batching.cpython-312.pyc +0 -0
util/__pycache__/skeletonize.cpython-312.pyc +0 -0
util/__pycache__/tools.cpython-311.pyc +0 -0
util/__pycache__/tools.cpython-312.pyc +0 -0
util/__pycache__/tools.cpython-36.pyc +0 -0
util/__pycache__/tools.cpython-37.pyc +0 -0
util/__pycache__/variable_pos_embed.cpython-312.pyc +0 -0
util/crop.py +42 -0
util/datasets.py +599 -0
util/lars.py +47 -0
util/lr_decay.py +76 -0
util/lr_sched.py +21 -0
util/metrics.py +90 -0
util/misc.py +340 -0
util/msssim.py +146 -0
util/pos_embed.py +104 -0
util/pos_embedtest.py +127 -0
util/post_processing.py +305 -0
util/size_aware_batching.py +251 -0
util/skeletonize.py +486 -0
util/tools.py +143 -0
util/variable_pos_embed.py +143 -0

models_Facies.py ADDED Viewed

	@@ -0,0 +1,397 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+from functools import partial
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import timm.models.vision_transformer
+import numpy as np
+from util.pos_embed import get_2d_sincos_pos_embed
+from util.variable_pos_embed import interpolate_pos_embed_variable
+class FlexiblePatchEmbed(nn.Module):
+    """ 2D Image to Patch Embedding that handles variable input sizes """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, bias=True):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.num_patches = (img_size // patch_size) ** 2  # default number of patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # Calculate number of patches dynamically
+        self.num_patches = (H // self.patch_size) * (W // self.patch_size)
+        x = self.proj(x).flatten(2).transpose(1, 2)  # BCHW -> BNC
+        return x
+class VisionTransformer(timm.models.vision_transformer.VisionTransformer):
+    """ Vision Transformer with support for variable image sizes and adaptive positional embeddings
+    """
+    def __init__(self, global_pool=False, **kwargs):
+        super(VisionTransformer, self).__init__(**kwargs)
+        self.global_pool = global_pool
+        self.decoder = VIT_MLAHead(mla_channels=self.embed_dim,num_classes=self.num_classes)
+        self.segmentation_head = SegmentationHead(
+            in_channels=16,
+            out_channels=self.num_classes,
+            kernel_size=3,
+        )
+        if self.global_pool:
+            norm_layer = kwargs['norm_layer']
+            embed_dim = kwargs['embed_dim']
+            self.fc_norm = norm_layer(embed_dim)
+            del self.norm  # remove the original norm
+    def interpolate_pos_encoding(self, x, h, w):
+        """
+        Interpolate positional embeddings for arbitrary input sizes
+        """
+        npatch = x.shape[1] - 1  # subtract 1 for cls token
+        N = self.pos_embed.shape[1] - 1  # original number of patches
+        if npatch == N and h == w:
+            return self.pos_embed
+        # Use the new variable position embedding utility
+        return interpolate_pos_embed_variable(self.pos_embed, h, w, cls_token=True)
+    def forward_features(self, x):
+        B, C, H, W = x.shape
+        # Handle padding for non-16-divisible images
+        patch_size = self.patch_embed.patch_size
+        pad_h = (patch_size - H % patch_size) % patch_size
+        pad_w = (patch_size - W % patch_size) % patch_size
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, (0, pad_w, 0, pad_h), mode='reflect')
+            H_padded, W_padded = H + pad_h, W + pad_w
+        else:
+            H_padded, W_padded = H, W
+        # Extract patches
+        x = self.patch_embed(x)
+        _H, _W = H_padded // patch_size, W_padded // patch_size
+        # Add class token
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        # Add interpolated positional embeddings
+        pos_embed = self.interpolate_pos_encoding(x, _H, _W)
+        x = x + pos_embed
+        x = self.pos_drop(x)
+        featureskip = []
+        featureskipnum = 1
+        for blk in self.blocks:
+            x = blk(x)
+            if featureskipnum % (len(self.blocks) // 4) == 0:
+                featureskip.append(x[:, 1:, :])  # exclude cls token
+            featureskipnum += 1
+        # Pass original dimensions for proper reconstruction
+        x = self.decoder(featureskip[0], featureskip[1], featureskip[2], featureskip[3],
+                        h=_H, w=_W, target_h=H, target_w=W)
+        return x
+    def forward(self, x):
+        x = self.forward_features(x)
+        return x
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            padding=0,
+            stride=1,
+            use_batchnorm=True,
+    ):
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+        bn = nn.BatchNorm2d(out_channels)
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+class DecoderBlock(nn.Module):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            skip_channels=0,
+            use_batchnorm=True,
+    ):
+        super().__init__()
+        self.conv1 = Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.up = nn.UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, x, skip=None):
+        # print(x.shape,skip.shape)
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+        x = self.up(x)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+class SegmentationHead(nn.Sequential):
+    def __init__(self, in_channels, out_channels, kernel_size=3, upsampling=1):
+        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
+        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
+        super().__init__(conv2d, upsampling)
+class DecoderCup(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # self.config = config
+        head_channels = 512
+        self.conv_more = Conv2dReLU(
+            1024,
+            head_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=True,
+        )
+        decoder_channels = (256,128,64,16)
+        in_channels = [head_channels] + list(decoder_channels[:-1])
+        out_channels = decoder_channels
+        # if self.config.n_skip != 0:
+        #     skip_channels = self.config.skip_channels
+        #     for i in range(4-self.config.n_skip):  # re-select the skip channels according to n_skip
+        #         skip_channels[3-i]=0
+        # else:
+        #     skip_channels=[0,0,0,0]
+        skip_channels=[512,256,128,64]
+        self.conv_feature1 = Conv2dReLU(1024,skip_channels[0],kernel_size=3,padding=1,use_batchnorm=True)
+        self.conv_feature2 = Conv2dReLU(1024,skip_channels[1],kernel_size=3,padding=1,use_batchnorm=True)
+        self.up2 = nn.UpsamplingBilinear2d(scale_factor=2)
+        self.conv_feature3 = Conv2dReLU(1024,skip_channels[2],kernel_size=3,padding=1,use_batchnorm=True)
+        self.up3 = nn.UpsamplingBilinear2d(scale_factor=4)
+        self.conv_feature4 = Conv2dReLU(1024,skip_channels[3],kernel_size=3,padding=1,use_batchnorm=True)
+        self.up4 = nn.UpsamplingBilinear2d(scale_factor=8)
+        # skip_channels=[128,64,32,8]
+        blocks = [
+            DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
+        ]
+        self.blocks = nn.ModuleList(blocks)
+    def TransShape(self,x,head_channels = 512,up=0):
+        B, n_patch, hidden = x.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = x.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        if up==0:
+            x = self.conv_feature1(x)
+        elif up==1:
+            x = self.conv_feature2(x)
+            x = self.up2(x)
+        elif up==2:
+            x = self.conv_feature3(x)
+            x = self.up3(x)
+        elif up==3:
+            x = self.conv_feature4(x)
+            x = self.up4(x)
+        return x
+    def forward(self, hidden_states, features=None):
+        B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = hidden_states.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        x = self.conv_more(x)
+        skip_channels=[512,256,128,64]
+        for i, decoder_block in enumerate(self.blocks):
+            if features is not None:
+                skip = self.TransShape(features[i],head_channels=skip_channels[i],up=i)
+            else:
+                skip = None
+            x = decoder_block(x, skip=skip)
+        return x
+class MLAHead(nn.Module):
+    def __init__(self, mla_channels=256, mlahead_channels=128, norm_cfg=None):
+        super(MLAHead, self).__init__()
+        self.head2 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
+                                   nn.BatchNorm2d(mlahead_channels), nn.ReLU(),
+            nn.Conv2d(
+                                       mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(mlahead_channels), nn.ReLU())
+        self.head3 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
+                                   nn.BatchNorm2d(mlahead_channels), nn.ReLU(),
+            nn.Conv2d(
+                                       mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(mlahead_channels), nn.ReLU())
+        self.head4 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
+                                   nn.BatchNorm2d(mlahead_channels), nn.ReLU(),
+            nn.Conv2d(
+                                       mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(mlahead_channels), nn.ReLU())
+        self.head5 = nn.Sequential(nn.Conv2d(mla_channels, mlahead_channels, 3, padding=1, bias=False),
+                                   nn.BatchNorm2d(mlahead_channels), nn.ReLU(),
+            nn.Conv2d(
+                                       mlahead_channels, mlahead_channels, 3, padding=1, bias=False),
+            nn.BatchNorm2d(mlahead_channels), nn.ReLU())
+    def forward(self, mla_p2, mla_p3, mla_p4, mla_p5):
+        head2 = F.interpolate(self.head2(
+            mla_p2), (4*mla_p2.shape[-2],4*mla_p2.shape[-1]), mode='bilinear', align_corners=True)
+        head3 = F.interpolate(self.head3(
+            mla_p3), (4*mla_p3.shape[-2],4*mla_p3.shape[-1]), mode='bilinear', align_corners=True)
+        head4 = F.interpolate(self.head4(
+            mla_p4), (4*mla_p4.shape[-2],4*mla_p4.shape[-1]), mode='bilinear', align_corners=True)
+        head5 = F.interpolate(self.head5(
+            mla_p5), (4*mla_p5.shape[-2],4*mla_p5.shape[-1]), mode='bilinear', align_corners=True)
+        return torch.cat([head2, head3, head4, head5], dim=1)
+class VIT_MLAHead(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=768, mla_channels=256, mlahead_channels=128, num_classes=6,
+                 norm_layer=nn.BatchNorm2d, norm_cfg=None, **kwargs):
+        super(VIT_MLAHead, self).__init__(**kwargs)
+        self.img_size = img_size
+        self.norm_cfg = norm_cfg
+        self.mla_channels = mla_channels
+        self.BatchNorm = norm_layer
+        self.mlahead_channels = mlahead_channels
+        self.num_classes = num_classes
+        self.mlahead = MLAHead(mla_channels=self.mla_channels,
+                               mlahead_channels=self.mlahead_channels, norm_cfg=self.norm_cfg)
+        self.cls = nn.Conv2d(4 * self.mlahead_channels,
+                             self.num_classes, 3, padding=1)
+    def forward(self, x1, x2, x3, x4, h=14, w=14, target_h=None, target_w=None):
+        B, n_patch, hidden = x1.size()
+        if h == w:
+            h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        # Reshape all feature maps
+        x1 = x1.permute(0, 2, 1).contiguous().view(B, hidden, h, w)
+        x2 = x2.permute(0, 2, 1).contiguous().view(B, hidden, h, w)
+        x3 = x3.permute(0, 2, 1).contiguous().view(B, hidden, h, w)
+        x4 = x4.permute(0, 2, 1).contiguous().view(B, hidden, h, w)
+        # Apply MLA head
+        x = self.mlahead(x1, x2, x3, x4)
+        x = self.cls(x)
+        # Calculate target size - if original image wasn't patch-size divisible
+        patch_size = 16  # assuming patch size of 16
+        if target_h is not None and target_w is not None:
+            target_size = (target_h, target_w)
+        else:
+            target_size = (h * patch_size, w * patch_size)
+        # Interpolate to target size
+        x = F.interpolate(x, size=target_size, mode='bilinear', align_corners=True)
+        return x
+def mae_vit_small_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=6, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=768
+    )
+    return model
+def vit_base_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=768
+    )
+    return model
+def vit_large_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=1024
+    )
+    return model
+def vit_huge_patch14(**kwargs):
+    model = VisionTransformer(
+        patch_size=14, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=14,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=1280
+    )
+    return model

models_Fault.py ADDED Viewed

	@@ -0,0 +1,327 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+from functools import partial
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import timm.models.vision_transformer
+import numpy as np
+from util.msssim import MSSSIM
+from util.pos_embed import get_2d_sincos_pos_embed
+from util.variable_pos_embed import interpolate_pos_embed_variable
+class FlexiblePatchEmbed(nn.Module):
+    """ 2D Image to Patch Embedding that handles variable input sizes """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, bias=True):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        self.num_patches = (img_size // patch_size) ** 2  # default number of patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # Calculate number of patches dynamically
+        self.num_patches = (H // self.patch_size) * (W // self.patch_size)
+        x = self.proj(x).flatten(2).transpose(1, 2)  # BCHW -> BNC
+        return x
+class VisionTransformer(timm.models.vision_transformer.VisionTransformer):
+    """ Vision Transformer with support for global average pooling
+    """
+    def __init__(self, global_pool=False,**kwargs):
+        super(VisionTransformer, self).__init__(**kwargs)
+        self.global_pool = global_pool
+        self.decoder = DecoderCup(in_channels=[self.embed_dim,256,128,64])
+        self.segmentation_head = SegmentationHead(
+            in_channels=64,
+            out_channels=self.num_classes,
+            kernel_size=1
+        )
+        if self.global_pool:
+            norm_layer = kwargs['norm_layer']
+            embed_dim = kwargs['embed_dim']
+            self.fc_norm = norm_layer(embed_dim)
+            del self.norm  # remove the original norm
+    def interpolate_pos_encoding(self, x, h, w):
+        """
+        Interpolate positional embeddings for arbitrary input sizes
+        """
+        npatch = x.shape[1] - 1  # subtract 1 for cls token
+        N = self.pos_embed.shape[1] - 1  # original number of patches
+        if npatch == N and h == w:
+            return self.pos_embed
+        # Use the new variable position embedding utility
+        return interpolate_pos_embed_variable(self.pos_embed, h, w, cls_token=True)
+    def generate_mask(self,input_tensor, ratio):
+        mask = torch.zeros_like(input_tensor)
+        indices = torch.randperm(mask.size(3)//16)[:int(mask.size(3)//16 * ratio)]
+        sorted_indices = torch.sort(indices)[0]
+        for i in range(0, len(sorted_indices)):
+                mask[:, :, :, sorted_indices[i]*16:(sorted_indices[i]+1)*16] = 1
+        return mask
+    def forward_features(self, x):
+        B,C,H,W = x.shape
+        # Handle padding for non-16-divisible images
+        patch_size = self.patch_embed.patch_size
+        pad_h = (patch_size - H % patch_size) % patch_size
+        pad_w = (patch_size - W % patch_size) % patch_size
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, (0, pad_w, 0, pad_h), mode='reflect')
+            H_padded, W_padded = H + pad_h, W + pad_w
+        else:
+            H_padded, W_padded = H, W
+        img = x
+        x = self.patch_embed(x)
+        _H, _W = H_padded // patch_size, W_padded // patch_size
+        # Add class token
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        # Add interpolated positional embeddings
+        pos_embed = self.interpolate_pos_encoding(x, _H, _W)
+        x = x + pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.decoder(x[:, 1:, :], img)
+        x = self.segmentation_head(x)
+        return x
+    def forward(self, x):
+        x = self.forward_features(x)
+        return x
+    def inference(self, x):
+        x = self.forward_features(x)
+        x = F.softmax(x, dim=1)
+        return x
+class Conv2dReLU(nn.Sequential):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            padding=0,
+            stride=1,
+            use_batchnorm=True,
+    ):
+        conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=not (use_batchnorm),
+        )
+        relu = nn.ReLU(inplace=True)
+        bn = nn.BatchNorm2d(out_channels)
+        super(Conv2dReLU, self).__init__(conv, bn, relu)
+class DecoderBlock(nn.Module):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            skip_channels=0,
+            use_batchnorm=True,
+    ):
+        super().__init__()
+        self.conv1 = Conv2dReLU(
+            in_channels + skip_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.conv2 = Conv2dReLU(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=use_batchnorm,
+        )
+        self.up = nn.UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, x, skip=None):
+        x = self.up(x)
+        if skip is not None:
+            x = torch.cat([x, skip], dim=1)
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+class SegmentationHead(nn.Sequential):
+    def __init__(self, in_channels, out_channels, kernel_size=1, upsampling=1):
+        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=0)
+        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
+        super().__init__(conv2d, upsampling)
+class DecoderCup(nn.Module):
+    def __init__(self,in_channels=[1024,256,128,64]):
+        super().__init__()
+        head_channels = 512
+        self.conv_more = Conv2dReLU(
+            1,
+            32,
+            kernel_size=3,
+            padding=1,
+            use_batchnorm=True,
+        )
+        skip_channels=[0,0,0,32]
+        out_channels=[256,128,64,64]
+        blocks = [
+            DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
+        ]
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, hidden_states, img, features=None):
+        B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
+        h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
+        x = hidden_states.permute(0, 2, 1)
+        x = x.contiguous().view(B, hidden, h, w)
+        skip_channels=[None,None,None,self.conv_more(img)]
+        for i, decoder_block in enumerate(self.blocks):
+            x = decoder_block(x, skip=skip_channels[i])
+        return x
+def forward_loss(imgs, pred):
+        """
+        imgs: [N, 3, H, W]
+        pred: [N, L, p*p*3]
+        mask: [N, L], 0 is keep, 1 is remove,
+        """
+        loss1f = torch.nn.MSELoss()
+        loss1 = loss1f(imgs, pred)
+        loss2f = MSSSIM()
+        loss2 = loss2f(imgs, pred)
+        a = 0.5
+        loss = (1-a)*loss1+a*loss2
+        return loss
+def weighted_cross_entropy(pred, target):
+    """
+    Compute the weighted cross entropy loss.
+    NEED VERIFICATION
+    """
+    # Function to compute weighted cross entropy loss
+    # target: [batch, channel, s, s]
+    # pred: [batch, channel, s, s]
+    #print('pred shape ', pred.shape)
+    #print('target shape ', target.shape)
+    #print('--------------')
+    #print('sums of pred', torch.sum(pred))
+    #print('sums of target', torch.sum(target))
+    # beta is the fraction of non-fault pixels in the target (i.e the zeroes in the target)
+    beta = torch.mean(target) # fraction of fault pixels
+    beta = 1 - beta  # fraction of non-fault pixels
+    beta = torch.clamp(beta, min=0.01, max=0.99)  # avoid division by zero
+    #print('beta', beta)
+    # Compute the weighted cross entropy loss
+    loss = -(beta * target * torch.log(pred + 1e-8) + (1-beta) * (1 - target) * torch.log(1 - pred + 1e-8))
+    return torch.mean(loss)
+def mae_vit_small_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=6, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=768
+    )
+    return model
+def vit_base_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=768
+    )
+    return model
+def vit_large_patch16(**kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=16,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=1024
+    )
+    return model
+def vit_huge_patch14(**kwargs):
+    model = VisionTransformer(
+        patch_size=14, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    # Replace with flexible patch embedding
+    model.patch_embed = FlexiblePatchEmbed(
+        img_size=kwargs.get('img_size', 224),
+        patch_size=14,
+        in_chans=kwargs.get('in_chans', 3),
+        embed_dim=1280
+    )
+    return model

util/__pycache__/datasets.cpython-311.pyc ADDED Viewed

Binary file (23.6 kB). View file

util/__pycache__/datasets.cpython-312.pyc ADDED Viewed

Binary file (32.1 kB). View file

util/__pycache__/datasets.cpython-36.pyc ADDED Viewed

Binary file (19.2 kB). View file

util/__pycache__/datasets.cpython-37.pyc ADDED Viewed

Binary file (19.6 kB). View file

util/__pycache__/lars.cpython-36.pyc ADDED Viewed

Binary file (1.34 kB). View file

util/__pycache__/lr_decay.cpython-311.pyc ADDED Viewed

Binary file (2.66 kB). View file

util/__pycache__/lr_decay.cpython-312.pyc ADDED Viewed

Binary file (2.39 kB). View file

util/__pycache__/lr_decay.cpython-36.pyc ADDED Viewed

Binary file (1.6 kB). View file

util/__pycache__/lr_decay.cpython-37.pyc ADDED Viewed

Binary file (1.59 kB). View file

util/__pycache__/lr_sched.cpython-311.pyc ADDED Viewed

Binary file (1.02 kB). View file

util/__pycache__/lr_sched.cpython-312.pyc ADDED Viewed

Binary file (1.12 kB). View file

util/__pycache__/lr_sched.cpython-36.pyc ADDED Viewed

Binary file (595 Bytes). View file

util/__pycache__/lr_sched.cpython-37.pyc ADDED Viewed

Binary file (599 Bytes). View file

util/__pycache__/metrics.cpython-36.pyc ADDED Viewed

Binary file (3.83 kB). View file

util/__pycache__/misc.cpython-311.pyc ADDED Viewed

Binary file (21.4 kB). View file

util/__pycache__/misc.cpython-312.pyc ADDED Viewed

Binary file (19.4 kB). View file

util/__pycache__/misc.cpython-36.pyc ADDED Viewed

Binary file (10.8 kB). View file

util/__pycache__/misc.cpython-37.pyc ADDED Viewed

Binary file (10.8 kB). View file

util/__pycache__/msssim.cpython-311.pyc ADDED Viewed

Binary file (8.9 kB). View file

util/__pycache__/msssim.cpython-312.pyc ADDED Viewed

Binary file (7.84 kB). View file

util/__pycache__/msssim.cpython-36.pyc ADDED Viewed

Binary file (4.51 kB). View file

util/__pycache__/msssim.cpython-37.pyc ADDED Viewed

Binary file (4.49 kB). View file

util/__pycache__/pos_embed.cpython-311.pyc ADDED Viewed

Binary file (4.35 kB). View file

util/__pycache__/pos_embed.cpython-312.pyc ADDED Viewed

Binary file (4.14 kB). View file

util/__pycache__/pos_embed.cpython-36.pyc ADDED Viewed

Binary file (2.43 kB). View file

util/__pycache__/pos_embed.cpython-37.pyc ADDED Viewed

Binary file (2.42 kB). View file

util/__pycache__/size_aware_batching.cpython-312.pyc ADDED Viewed

Binary file (10.6 kB). View file

util/__pycache__/skeletonize.cpython-312.pyc ADDED Viewed

Binary file (35.5 kB). View file

util/__pycache__/tools.cpython-311.pyc ADDED Viewed

Binary file (7.76 kB). View file

util/__pycache__/tools.cpython-312.pyc ADDED Viewed

Binary file (6.81 kB). View file

util/__pycache__/tools.cpython-36.pyc ADDED Viewed

Binary file (4.25 kB). View file

util/__pycache__/tools.cpython-37.pyc ADDED Viewed

Binary file (4.26 kB). View file

util/__pycache__/variable_pos_embed.cpython-312.pyc ADDED Viewed

Binary file (5.42 kB). View file

util/crop.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+import torch
+from torchvision import transforms
+from torchvision.transforms import functional as F
+class RandomResizedCrop(transforms.RandomResizedCrop):
+    """
+    RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
+    This may lead to results different with torchvision's version.
+    Following BYOL's TF code:
+    https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
+    """
+    @staticmethod
+    def get_params(img, scale, ratio):
+        width, height = F._get_image_size(img)
+        area = height * width
+        target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+        log_ratio = torch.log(torch.tensor(ratio))
+        aspect_ratio = torch.exp(
+            torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+        ).item()
+        w = int(round(math.sqrt(target_area * aspect_ratio)))
+        h = int(round(math.sqrt(target_area / aspect_ratio)))
+        w = min(w, width)
+        h = min(h, height)
+        i = torch.randint(0, height - h + 1, size=(1,)).item()
+        j = torch.randint(0, width - w + 1, size=(1,)).item()
+        return i, j, h, w

util/datasets.py ADDED Viewed

	@@ -0,0 +1,599 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+import os
+import PIL
+import os, random, glob
+import numpy as np
+import torch
+import torch.utils.data as data
+import torchvision.transforms as transforms
+from os.path import isfile, join
+import segyio
+from itertools import permutations
+random.seed(42)
+from torchvision import datasets, transforms
+from timm.data import create_transform
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+def build_dataset(is_train, args):
+    transform = build_transform(is_train, args)
+    root = os.path.join(args.data_path, 'train' if is_train else 'val')
+    dataset = datasets.ImageFolder(root, transform=transform)
+    print(dataset)
+    return dataset
+def build_transform(is_train, args):
+    mean = IMAGENET_DEFAULT_MEAN
+    std = IMAGENET_DEFAULT_STD
+    # train transform
+    if is_train:
+        # this should always dispatch to transforms_imagenet_train
+        transform = create_transform(
+            input_size=args.input_size,
+            is_training=True,
+            color_jitter=args.color_jitter,
+            auto_augment=args.aa,
+            interpolation='bicubic',
+            re_prob=args.reprob,
+            re_mode=args.remode,
+            re_count=args.recount,
+            mean=mean,
+            std=std,
+        )
+        return transform
+    # eval transform
+    t = []
+    if args.input_size <= 224:
+        crop_pct = 224 / 256
+    else:
+        crop_pct = 1.0
+    size = int(args.input_size / crop_pct)
+    t.append(
+        transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
+    )
+    t.append(transforms.CenterCrop(args.input_size))
+    t.append(transforms.ToTensor())
+    t.append(transforms.Normalize(mean, std))
+    return transforms.Compose(t)
+## pretrain
+class SeismicSet(data.Dataset):
+    def __init__(self, path, input_size) -> None:
+        super().__init__()
+        # self.file_list = os.listdir(path)
+        # self.file_list = [os.path.join(path, f) for f in self.file_list]
+        self.get_file_list(path)
+        self.input_size = input_size
+        print(len(self.file_list))
+    def __len__(self) -> int:
+        return len(self.file_list)
+        # return 100000
+    def __getitem__(self, index):
+        d = np.fromfile(self.file_list[index], dtype=np.float32)
+        d = d.reshape(1, self.input_size, self.input_size)
+        d = (d - d.mean()) / (d.std()+1e-6)
+        # return to_transforms(d, self.input_size)
+        return d,torch.tensor([1])
+    def get_file_list(self, path):
+        dirs = [os.path.join(path, f) for f in os.listdir(path)]
+        self.file_list = dirs
+        # for ds in dirs:
+        #     if os.path.isdir(ds):
+        #         self.file_list += [os.path.join(ds, f) for f in os.listdir(ds)]
+        return random.shuffle(self.file_list)
+def to_transforms(d, input_size):
+    t = transforms.Compose([
+        transforms.RandomResizedCrop(input_size,
+                                     scale=(0.2, 1.0),
+                                     interpolation=3),  # 3 is bicubic
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor()
+    ])
+    return t(d)
+### fintune
+class FacesSet(data.Dataset):
+    # folder/train/data/**.dat, folder/train/label/**.dat
+    # folder/test/data/**.dat, folder/test/label/**.dat
+    def __init__(self,
+                 folder,
+                 shape=[768, 768],
+                 is_train=True) -> None:
+        super().__init__()
+        self.shape = shape
+        # self.data_list = sorted(glob.glob(folder + 'seismic/*.dat'))
+        self.data_list = [folder +'seismic/'+ str(f)+'.dat' for f in range(117)]
+        n = len(self.data_list)
+        if is_train:
+            self.data_list = self.data_list[:100]
+        elif not is_train:
+            self.data_list = self.data_list[100:]
+        self.label_list = [
+            f.replace('/seismic/', '/label/') for f in self.data_list
+        ]
+    def __getitem__(self, index):
+        d = np.fromfile(self.data_list[index], np.float32)
+        d = d.reshape([1] + self.shape)
+        l = np.fromfile(self.label_list[index], np.float32).reshape(self.shape)-1
+        l = l.astype(int)
+        return torch.tensor(d), torch.tensor(l)
+    def __len__(self):
+        return len(self.data_list)
+class SaltSet(data.Dataset):
+    def __init__(self,
+                 folder,
+                 shape=[224, 224],
+                 is_train=True) -> None:
+        super().__init__()
+        self.shape = shape
+        self.data_list = [folder +'seismic/'+ str(f)+'.dat' for f in range(4000)]
+        n = len(self.data_list)
+        if is_train:
+            self.data_list = self.data_list[:3500]
+        elif not is_train:
+            self.data_list = self.data_list[3500:]
+        self.label_list = [
+            f.replace('/seismic/', '/label/') for f in self.data_list
+        ]
+    def __getitem__(self, index):
+        d = np.fromfile(self.data_list[index], np.float32)
+        d = d.reshape([1] + self.shape)
+        l = np.fromfile(self.label_list[index], np.float32).reshape(self.shape)
+        l = l.astype(int)
+        return torch.tensor(d), torch.tensor(l)
+    def __len__(self):
+        return len(self.data_list)
+class InterpolationSet(data.Dataset):
+    # folder/train/data/**.dat, folder/train/label/**.dat
+    # folder/test/data/**.dat, folder/test/label/**.dat
+    def __init__(self,
+                 folder,
+                 shape=[224, 224],
+                 is_train=True) -> None:
+        super().__init__()
+        self.shape = shape
+        self.data_list = [folder + str(f)+'.dat' for f in range(6000)]
+        n = len(self.data_list)
+        if is_train:
+            self.data_list = self.data_list
+        elif not is_train:
+            self.data_list = [folder+'U'+ + str(f)+'.dat' for f in range(2000,4000)]
+        self.label_list = self.data_list
+    def __getitem__(self, index):
+        d = np.fromfile(self.data_list[index], np.float32)
+        d = d.reshape([1] + self.shape)
+        return torch.tensor(d), torch.tensor(d)
+    def __len__(self):
+        return len(self.data_list)
+        # return 10000
+class DenoiseSet(data.Dataset):
+    def __init__(self,
+                 folder,
+                 shape=[224, 224],
+                 is_train=True) -> None:
+        super().__init__()
+        self.shape = shape
+        self.data_list = [folder+'seismic/'+ str(f)+'.dat' for f in range(2000)]
+        n = len(self.data_list)
+        if is_train:
+            self.data_list = self.data_list
+            self.label_list = [f.replace('/seismic/', '/label/') for f in self.data_list]
+        elif not is_train:
+            self.data_list = [folder+'field/'+ str(f)+'.dat' for f in range(4000)]
+            self.label_list = self.data_list
+    def __getitem__(self, index):
+        d = np.fromfile(self.data_list[index], np.float32)
+        d = d.reshape([1] + self.shape)
+        # d = (d - d.mean())/d.std()
+        l = np.fromfile(self.label_list[index], np.float32)
+        l = l.reshape([1] + self.shape)
+        # l = (l - d.mean())/l.std()
+        return torch.tensor(d), torch.tensor(l)
+    def __len__(self):
+        return len(self.data_list)
+class ReflectSet(data.Dataset):
+    # folder/train/data/**.dat, folder/train/label/**.dat
+    # folder/test/data/**.dat, folder/test/label/**.dat
+    def __init__(self,
+                 folder,
+                 shape=[224, 224],
+                 is_train=True) -> None:
+        super().__init__()
+        self.shape = shape
+        self.data_list = [folder+'seismic/'+ str(f)+'.dat' for f in range(2200)]
+        n = len(self.data_list)
+        if is_train:
+            self.data_list = self.data_list
+            self.label_list = [
+            f.replace('/seismic/', '/label/') for f in self.data_list
+        ]
+        elif not is_train:
+            self.data_list = [folder+'SEAMseismic/'+ str(f)+'.dat' for f in range(4000)]
+            self.label_list = [
+            f.replace('/SEAMseismic/', '/SEAMreflect/') for f in self.data_list
+        ]
+    def __getitem__(self, index):
+        d = np.fromfile(self.data_list[index], np.float32)
+        d = d- d.mean()
+        d = d/(d.std()+1e-6)
+        d = d.reshape([1] + self.shape)
+        l = np.fromfile(self.label_list[index], np.float32)
+        l = l-l.mean()
+        l = l/(l.std()+1e-6)
+        l = l.reshape([1] + self.shape)
+        return torch.tensor(d), torch.tensor(l)
+    def __len__(self):
+        return len(self.data_list)
+class ThebeSet(data.Dataset):
+    def __init__(self, folder, shape=[224, 224], mode ='train') -> None:
+        super().__init__()
+        self.folder = folder
+        if not os.path.exists(folder):
+            raise FileNotFoundError(f"The folder {folder} does not exist.")
+        self.num_files = len(os.listdir(join(folder, 'fault')))
+        self.shape = shape
+        self.fault_list = [folder + '/fault/{i}.npy'.format(i=i) for i in range(1, self.num_files + 1)]
+        self.seis_list = [folder + '/seis/{i}.npy'.format(i=i) for i in range(1, self.num_files + 1)]
+        self.train_size = int(0.75 * self.num_files)
+        self.val_size = int(0.15 * self.num_files)
+        self.test_size = self.num_files - self.train_size - self.val_size
+        self.train_index = self.train_size
+        self.val_index = self.train_index + self.val_size
+        if mode == 'train':
+            self.fault_list = self.fault_list[:self.train_index]
+            self.seis_list = self.seis_list[:self.train_index]
+        elif mode == 'val':
+            self.fault_list = self.fault_list[self.train_index:self.val_index]
+            self.seis_list = self.seis_list[self.train_index:self.val_index]
+        elif mode == 'test':
+            self.fault_list = self.fault_list[self.val_index:]
+            self.seis_list = self.seis_list[self.val_index:]
+        else:
+            raise ValueError("Mode must be 'train', 'val', or 'test'.")
+    def __len__(self):
+        return len(self.fault_list)
+    def retrieve_patch(self, fault, seis):
+        # image will (probably) be of size [3174, 1537]
+        # return a patch of size [224, 224]
+        patch_height = self.shape[0]
+        patch_width = self.shape[1]
+        h, w = fault.shape
+        if h < patch_height or w < patch_width:
+            raise ValueError(f"Image dimensions must be at least {patch_height}x{patch_width}.")
+        top = random.randint(0, h - patch_height)
+        left = random.randint(0, w - patch_width)
+        return fault[top:top + patch_height, left:left + patch_width], seis[top:top + patch_height, left:left + patch_width]
+    def random_transform(self, fault, seis):
+        # Apply the same random transformations to the fault and seismic data
+        # Mirror the patch horizontally
+        if random.random() > 0.5:
+            fault = np.fliplr(fault)
+            seis = np.fliplr(seis)
+        # Mirror the patch vertically
+        if random.random() > 0.5:
+            fault = np.flipud(fault)
+            seis = np.flipud(seis)
+        return fault, seis
+    def __getitem__(self, index):
+        # need to see if we do normalization here (i.e. what data pre-treatement we do)
+        fault = np.load(self.fault_list[index])
+        seis = np.load(self.seis_list[index])
+        fault, seis = self.retrieve_patch(fault, seis)
+        fault, seis = self.random_transform(fault, seis)
+        seis = (seis - seis.mean()) / (seis.std() + 1e-6)
+        fault = torch.tensor(fault.copy(), dtype=torch.float32).unsqueeze(0)
+        seis = torch.tensor(seis.copy(), dtype=torch.float32).unsqueeze(0)
+        return seis, fault
+class FSegSet(data.Dataset):
+    def __init__(self, folder, shape=[128, 128], mode ='train') -> None:
+        super().__init__()
+        self.folder = folder
+        if not os.path.exists(folder):
+            raise FileNotFoundError(f"The folder {folder} does not exist.")
+        self.shape = shape
+        self.mode = mode
+        if mode == 'train':
+            self.fault_path = join(self.folder, 'train/fault')
+            self.seis_path = join(self.folder, 'train/seis')
+        elif mode == 'val':
+            self.fault_path = join(self.folder, 'val/fault')
+            self.seis_path = join(self.folder, 'val/seis')
+        else:
+            raise ValueError("Mode must be 'train' or 'val'.")
+        self.fault_list = [join(self.fault_path, f) for f in os.listdir(self.fault_path) if f.endswith('.npy')]
+        self.seis_list = [join(self.seis_path, f) for f in os.listdir(self.seis_path) if f.endswith('.npy')]
+    def __len__(self):
+        return len(self.fault_list)
+    def __getitem__(self, index):
+        fault_img, seis_img = np.load(self.fault_list[index]), np.load(self.seis_list[index])
+        # These will be 128x128
+        seis_img = (seis_img - seis_img.mean()) / (seis_img.std() + 1e-6)
+        fault = torch.tensor(fault_img.copy(), dtype=torch.float32).unsqueeze(0)
+        seis = torch.tensor(seis_img.copy(), dtype=torch.float32).unsqueeze(0)
+        return seis, fault
+class F3DFaciesSet(data.Dataset):
+    def __init__(self, folder, shape=[128, 128], mode='train', random_resize = False):
+        super().__init__()
+        self.folder = folder
+        if not os.path.exists(folder):
+            raise FileNotFoundError(f"The folder {folder} does not exist.")
+        self.seises = np.load(join(folder, "{}/seismic.npy".format(mode)))
+        self.labels = np.load(join(folder, "{}/labels.npy".format(mode)))
+        self.image_shape = shape
+        if mode == 'train':
+            self.size_categories = [
+                (401, 701),
+                (701, 255),
+                (401, 255)
+            ]
+        elif mode == 'val':
+            self.size_categories = [
+                (601, 200),
+                (200, 255),
+                (601, 255)
+            ]
+        elif mode == 'test':
+            self.size_categories = [
+                (701, 255),
+                (200, 701),
+                (200, 255)
+            ]
+        else:
+            raise ValueError("Mode must be 'train', 'val', or 'test'.")
+    def __len__(self):
+        # We will take cross sections along each dimension, so the length is the sum of all dimensions
+        return sum(self.seises.shape)
+    def random_transform(self, label, seis):
+        # Apply the same random transformations to the fault and seismic data
+        # Mirror the patch horizontally
+        if random.random() > 0.5:
+            label = np.fliplr(label)
+            seis = np.fliplr(seis)
+        # Mirror the patch vertically
+        if random.random() > 0.5:
+            label = np.flipud(label)
+            seis = np.flipud(seis)
+        return label, seis
+    def __getitem__(self, index):
+        m1, m2, m3 = self.seises.shape
+        if index < m1:
+            seis, label = self.seises[index, :, :], self.labels[index, :, :]
+        elif index < m1 + m2:
+            seis, label = self.seises[:, index - m1, :], self.labels[:, index - m1, :]
+        elif index < m1 + m2 + m3:
+            seis, label = self.seises[:, :, index - m1 - m2], self.labels[:, :, index - m1 - m2]
+        else:
+            raise IndexError("Index out of bounds")
+        seis, label = self.random_transform(seis, label)
+        seis = (seis - seis.mean()) / (seis.std() + 1e-6)
+        seis, label = torch.tensor(seis.copy(), dtype=torch.float32).unsqueeze(0), torch.tensor(label.copy(), dtype=torch.float32).unsqueeze(0)
+        # label is now shape [1, H, W]
+        # we want shape [6, H, W] with each slice being a binary mask depending on the int value of label
+        label = label.squeeze(0)
+        label = (label == torch.arange(6, device=label.device).view(6, 1, 1)).float()
+        return seis, label
+class P3DFaciesSet(data.Dataset):
+    def __init__(self, folder, shape=[128, 128], mode='train', random_resize = False):
+        super().__init__()
+        self.folder = folder
+        if not os.path.exists(folder):
+            raise FileNotFoundError(f"The folder {folder} does not exist.")
+        self.random_resize = random_resize
+        # Validation set will be validation set from F3DSet
+        if mode == 'val': mode = 'train' # TEMPORARY SINCE P3D does not have labelled val set
+        self.mode = mode
+        self.image_shape = shape
+        self.s_path = join(folder, "{}/seismic.segy".format(mode))
+        self.l_path = join(folder, "{}/labels.segy".format(mode))
+        if mode != 'val':
+            with segyio.open(self.s_path, ignore_geometry=True) as seis_file:
+                self.seises = seis_file.trace.raw[:]
+            if self.mode in ['val', 'train']:
+                with segyio.open(self.l_path, ignore_geometry=True) as label_file:
+                    self.labels = label_file.trace.raw[:]
+            else:
+                # Since the test files are unlabeled
+                self.labels = np.zeros_like(self.seises)
+        else:
+            f3d_file_path = "C:\\Users\\abhalekar\\Desktop\\DATASETS\\F3D_facies_DATASET"
+            self.seises = np.load(join(f3d_file_path, "val/seismic.npy"))
+            self.labels = np.load(join(f3d_file_path, "val/labels.npy"))
+        if mode == 'train':
+            m1, m2, m3 = 590, 782, 1006
+        elif mode == 'val':
+            m1, m2, m3 = 601, 200, 255
+        elif mode == 'test_1':
+            m1, m2, m3 = 841, 334, 1006
+        elif mode == 'test_2':
+            m1, m2, m3 = 251, 782, 1006
+        else:
+            raise ValueError("Mode must be 'train', 'test_2', 'val', or 'test_1'.")
+        self.size_categories = list(permutations([m1, m2, m3], 2))
+        self.seises = self.seises.reshape(m1, m2, m3)
+        self.labels = self.labels.reshape(m1, m2, m3)
+    def __len__(self):
+        # We will take cross sections along the first 2 dimensions ONLY
+        return self.seises.shape[0] +  self.seises.shape[1]
+    def _random_transform(self, label, seis):
+        # Apply the same random transformations to the fault and seismic data
+        # Mirror the patch horizontally
+        if random.random() > 0.5:
+            label = np.fliplr(label)
+            seis = np.fliplr(seis)
+        # Mirror the patch vertically
+        if random.random() > 0.5:
+            label = np.flipud(label)
+            seis = np.flipud(seis)
+        # random rotation to 2D image label,seis
+        #r_int = random.randint(0, 3)
+        #label = np.rot90(label, r_int)
+        #seis = np.rot90(seis, r_int)
+        return label, seis
+    def _random_resize(self, label, seis, min_size = (256, 256)):
+        # Randomly resize the label and seismic data
+        r_height = random.randint(min_size[0], seis.shape[0])
+        r_width = random.randint(min_size[1], seis.shape[1])
+        r_pos_x = random.randint(0, seis.shape[0] - r_height)
+        r_pos_y = random.randint(0, seis.shape[1] - r_width)
+        label = label[r_pos_x:r_pos_x + r_height, r_pos_y:r_pos_y + r_width]
+        seis = seis[r_pos_x:r_pos_x + r_height, r_pos_y:r_pos_y + r_width]
+        return label, seis
+    def __getitem__(self, index):
+        m1, m2, m3 = self.seises.shape
+        if index < m1:
+            seis, label = self.seises[index, :, :], self.labels[index, :, :]
+        elif index < m1 + m2:
+            seis, label = self.seises[:, index - m1, :], self.labels[:, index - m1, :]
+        elif index < m1 + m2 + m3:
+            seis, label = self.seises[:, :, index - m1 - m2], self.labels[:, :, index - m1 - m2]
+        else:
+            raise IndexError("Index out of bounds")
+        seis, label = self._random_transform(seis, label)
+        if self.random_resize: seis, label = self._random_resize(seis, label)
+        seis = (seis - seis.mean()) / (seis.std() + 1e-6)
+        seis, label = torch.tensor(seis.copy(), dtype=torch.float32).unsqueeze(0), torch.tensor(label.copy(), dtype=torch.float32).unsqueeze(0)
+        # label is now shape [1, H, W]
+        # we want shape [6, H, W] with each slice being a binary mask depending on the int value of label
+        label = label.squeeze(0)
+        label = (label == torch.arange(1, 7, device=label.device).view(6, 1, 1)).float()
+        return seis, label

util/lars.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# LARS optimizer, implementation from MoCo v3:
+# https://github.com/facebookresearch/moco-v3
+# --------------------------------------------------------
+import torch
+class LARS(torch.optim.Optimizer):
+    """
+    LARS optimizer, no rate scaling or weight decay for parameters <= 1D.
+    """
+    def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001):
+        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient)
+        super().__init__(params, defaults)
+    @torch.no_grad()
+    def step(self):
+        for g in self.param_groups:
+            for p in g['params']:
+                dp = p.grad
+                if dp is None:
+                    continue
+                if p.ndim > 1: # if not normalization gamma/beta or bias
+                    dp = dp.add(p, alpha=g['weight_decay'])
+                    param_norm = torch.norm(p)
+                    update_norm = torch.norm(dp)
+                    one = torch.ones_like(param_norm)
+                    q = torch.where(param_norm > 0.,
+                                    torch.where(update_norm > 0,
+                                    (g['trust_coefficient'] * param_norm / update_norm), one),
+                                    one)
+                    dp = dp.mul(q)
+                param_state = self.state[p]
+                if 'mu' not in param_state:
+                    param_state['mu'] = torch.zeros_like(p)
+                mu = param_state['mu']
+                mu.mul_(g['momentum']).add_(dp)
+                p.add_(mu, alpha=-g['lr'])

util/lr_decay.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# ELECTRA https://github.com/google-research/electra
+# BEiT: https://github.com/microsoft/unilm/tree/master/beit
+# --------------------------------------------------------
+import json
+def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
+    """
+    Parameter groups for layer-wise lr decay
+    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
+    """
+    param_group_names = {}
+    param_groups = {}
+    num_layers = len(model.blocks) + 1
+    layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))
+    for n, p in model.named_parameters():
+        if not p.requires_grad:
+            continue
+        # no decay: all 1D parameters and model specific ones
+        if p.ndim == 1 or n in no_weight_decay_list:
+            g_decay = "no_decay"
+            this_decay = 0.
+        else:
+            g_decay = "decay"
+            this_decay = weight_decay
+        layer_id = get_layer_id_for_vit(n, num_layers)
+        group_name = "layer_%d_%s" % (layer_id, g_decay)
+        if group_name not in param_group_names:
+            this_scale = layer_scales[layer_id]
+            param_group_names[group_name] = {
+                "lr_scale": this_scale,
+                "weight_decay": this_decay,
+                "params": [],
+            }
+            param_groups[group_name] = {
+                "lr_scale": this_scale,
+                "weight_decay": this_decay,
+                "params": [],
+            }
+        param_group_names[group_name]["params"].append(n)
+        param_groups[group_name]["params"].append(p)
+    # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))
+    return list(param_groups.values())
+def get_layer_id_for_vit(name, num_layers):
+    """
+    Assign a parameter with its layer id
+    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
+    """
+    if name in ['cls_token', 'pos_embed']:
+        return 0
+    elif name.startswith('patch_embed'):
+        return 0
+    elif name.startswith('blocks'):
+        return int(name.split('.')[1]) + 1
+    else:
+        return num_layers

util/lr_sched.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+def adjust_learning_rate(optimizer, epoch, args):
+    """Decay the learning rate with half-cycle cosine after warmup"""
+    if epoch < args.warmup_epochs:
+        lr = args.lr * epoch / args.warmup_epochs
+    else:
+        lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \
+            (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
+    for param_group in optimizer.param_groups:
+        if "lr_scale" in param_group:
+            param_group["lr"] = lr * param_group["lr_scale"]
+        else:
+            param_group["lr"] = lr
+    return lr

util/metrics.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+refer to https://github.com/jfzhang95/pytorch-deeplab-xception/blob/master/utils/metrics.py
+"""
+import numpy as np
+__all__ = ['SegmentationMetric']
+"""
+confusionMetric  # 注意：此处横着代表预测值，竖着代表真实值，与之前介绍的相反
+P\L     P    N
+P      TP    FP
+N      FN    TN
+"""
+class SegmentationMetric(object):
+    def __init__(self, numClass):
+        self.numClass = numClass
+        self.confusionMatrix = np.zeros((self.numClass,) * 2)  # 混淆矩阵（空）
+    def pixelAccuracy(self):
+        # return all class overall pixel accuracy 正确的像素占总像素的比例
+        #  PA = acc = (TP + TN) / (TP + TN + FP + TN)
+        acc = np.diag(self.confusionMatrix).sum() / self.confusionMatrix.sum()
+        return acc
+    def classPixelAccuracy(self):
+        # return each category pixel accuracy(A more accurate way to call it precision)
+        # acc = (TP) / TP + FP
+        classAcc = np.diag(self.confusionMatrix) / self.confusionMatrix.sum(axis=1)
+        return classAcc  # 返回的是一个列表值，如：[0.90, 0.80, 0.96]，表示类别1 2 3各类别的预测准确率
+    def meanPixelAccuracy(self):
+        """
+        Mean Pixel Accuracy(MPA，均像素精度)：是PA的一种简单提升，计算每个类内被正确分类像素数的比例，之后求所有类的平均。
+        :return:
+        """
+        classAcc = self.classPixelAccuracy()
+        meanAcc = np.nanmean(classAcc)  # np.nanmean 求平均值，nan表示遇到Nan类型，其值取为0
+        return meanAcc  # 返回单个值，如：np.nanmean([0.90, 0.80, 0.96, nan, nan]) = (0.90 + 0.80 + 0.96） / 3 =  0.89
+    def IntersectionOverUnion(self):
+        # Intersection = TP Union = TP + FP + FN
+        # IoU = TP / (TP + FP + FN)
+        intersection = np.diag(self.confusionMatrix)  # 取对角元素的值，返回列表
+        union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(
+            self.confusionMatrix)  # axis = 1表示混淆矩阵行的值，返回列表； axis = 0表示取混淆矩阵列的值，返回列表
+        IoU = intersection / union  # 返回列表，其值为各个类别的IoU
+        return IoU
+    def meanIntersectionOverUnion(self):
+        mIoU = np.nanmean(self.IntersectionOverUnion())  # 求各类别IoU的平均
+        return mIoU
+    def genConfusionMatrix(self, imgPredict, imgLabel):  #
+        """
+        同FCN中score.py的fast_hist()函数,计算混淆矩阵
+        :param imgPredict:
+        :param imgLabel:
+        :return: 混淆矩阵
+        """
+        # remove classes from unlabeled pixels in gt image and predict
+        mask = (imgLabel >= 0) & (imgLabel < self.numClass)
+        label = self.numClass * imgLabel[mask] + imgPredict[mask]
+        count = np.bincount(label, minlength=self.numClass ** 2)
+        confusionMatrix = count.reshape(self.numClass, self.numClass)
+        # print(confusionMatrix)
+        return confusionMatrix
+    def Frequency_Weighted_Intersection_over_Union(self):
+        """
+        FWIoU，频权交并比:为MIoU的一种提升，这种方法根据每个类出现的频率为其设置权重。
+        FWIOU =     [(TP+FN)/(TP+FP+TN+FN)] *[TP / (TP + FP + FN)]
+        """
+        freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
+        iu = np.diag(self.confusion_matrix) / (
+                np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
+                np.diag(self.confusion_matrix))
+        FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
+        return FWIoU
+    def addBatch(self, imgPredict, imgLabel):
+        assert imgPredict.shape == imgLabel.shape
+        self.confusionMatrix += self.genConfusionMatrix(imgPredict, imgLabel)  # 得到混淆矩阵
+        return self.confusionMatrix
+    def reset(self):
+        self.confusionMatrix = np.zeros((self.numClass, self.numClass))

util/misc.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# BEiT: https://github.com/microsoft/unilm/tree/master/beit
+# --------------------------------------------------------
+import builtins
+import datetime
+import os
+import time
+from collections import defaultdict, deque
+from pathlib import Path
+import torch
+import torch.distributed as dist
+from torch import inf
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+    @property
+    def global_avg(self):
+        return self.total / self.count
+    @property
+    def max(self):
+        return max(self.deque)
+    @property
+    def value(self):
+        return self.deque[-1]
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value)
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if v is None:
+                continue
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(
+            type(self).__name__, attr))
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append(
+                "{}: {}".format(name, str(meter))
+            )
+        return self.delimiter.join(loss_str)
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ''
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt='{avg:.4f}')
+        data_time = SmoothedValue(fmt='{avg:.4f}')
+        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        log_msg = [
+            header,
+            '[{0' + space_fmt + '}/{1}]',
+            'eta: {eta}',
+            '{meters}',
+            'time: {time}',
+            'data: {data}'
+        ]
+        if torch.cuda.is_available():
+            log_msg.append('max mem: {memory:.0f}')
+        log_msg = self.delimiter.join(log_msg)
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time),
+                        memory=torch.cuda.max_memory_allocated() / MB))
+                else:
+                    print(log_msg.format(
+                        i, len(iterable), eta=eta_string,
+                        meters=str(self),
+                        time=str(iter_time), data=str(data_time)))
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print('{} Total time: {} ({:.4f} s / it)'.format(
+            header, total_time_str, total_time / len(iterable)))
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    builtin_print = builtins.print
+    def print(*args, **kwargs):
+        force = kwargs.pop('force', False)
+        force = force or (get_world_size() > 8)
+        if is_master or force:
+            now = datetime.datetime.now().time()
+            builtin_print('[{}] '.format(now), end='')  # print with time stamp
+            builtin_print(*args, **kwargs)
+    builtins.print = print
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+def is_main_process():
+    return get_rank() == 0
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+def init_distributed_mode(args):
+    if args.dist_on_itp:
+        args.rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
+        args.world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
+        args.gpu = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
+        args.dist_url = "tcp://%s:%s" % (os.environ['MASTER_ADDR'], os.environ['MASTER_PORT'])
+        os.environ['LOCAL_RANK'] = str(args.gpu)
+        os.environ['RANK'] = str(args.rank)
+        os.environ['WORLD_SIZE'] = str(args.world_size)
+        # ["RANK", "WORLD_SIZE", "MASTER_ADDR", "MASTER_PORT", "LOCAL_RANK"]
+    elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ['WORLD_SIZE'])
+        args.gpu = int(os.environ['LOCAL_RANK'])
+    elif 'SLURM_PROCID' in os.environ:
+        args.rank = int(os.environ['SLURM_PROCID'])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print('Not using distributed mode')
+        setup_for_distributed(is_master=True)  # hack
+        args.distributed = False
+        return
+    args.distributed = True
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = 'nccl'
+    print('| distributed init (rank {}): {}, gpu {}'.format(
+        args.rank, args.dist_url, args.gpu), flush=True)
+    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+                                         world_size=args.world_size, rank=args.rank)
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+class NativeScalerWithGradNormCount:
+    state_dict_key = "amp_scaler"
+    def __init__(self):
+        self._scaler = torch.cuda.amp.GradScaler()
+    def __call__(self, loss, optimizer, clip_grad=None, parameters=None, create_graph=False, update_grad=True):
+        self._scaler.scale(loss).backward(create_graph=create_graph)
+        if update_grad:
+            if clip_grad is not None:
+                assert parameters is not None
+                self._scaler.unscale_(optimizer)  # unscale the gradients of optimizer's assigned params in-place
+                norm = torch.nn.utils.clip_grad_norm_(parameters, clip_grad)
+            else:
+                self._scaler.unscale_(optimizer)
+                norm = get_grad_norm_(parameters)
+            self._scaler.step(optimizer)
+            self._scaler.update()
+        else:
+            norm = None
+        return norm
+    def state_dict(self):
+        return self._scaler.state_dict()
+    def load_state_dict(self, state_dict):
+        self._scaler.load_state_dict(state_dict)
+def get_grad_norm_(parameters, norm_type: float = 2.0) -> torch.Tensor:
+    if isinstance(parameters, torch.Tensor):
+        parameters = [parameters]
+    parameters = [p for p in parameters if p.grad is not None]
+    norm_type = float(norm_type)
+    if len(parameters) == 0:
+        return torch.tensor(0.)
+    device = parameters[0].grad.device
+    if norm_type == inf:
+        total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
+    else:
+        total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
+    return total_norm
+def save_model(args, epoch, model, model_without_ddp, optimizer, loss_scaler):
+    output_dir = Path(args.output_dir)
+    epoch_name = str(epoch)
+    if loss_scaler is not None:
+        checkpoint_paths = [output_dir / ('checkpoint-%s.pth' % epoch_name)]
+        for checkpoint_path in checkpoint_paths:
+            to_save = {
+                'model': model_without_ddp.state_dict(),
+                'optimizer': optimizer.state_dict(),
+                'epoch': epoch,
+                'scaler': loss_scaler.state_dict(),
+                'args': args,
+            }
+            save_on_master(to_save, checkpoint_path)
+    else:
+        client_state = {'epoch': epoch}
+        model.save_checkpoint(save_dir=args.output_dir, tag="checkpoint-%s" % epoch_name, client_state=client_state)
+def load_model(args, model_without_ddp, optimizer, loss_scaler):
+    if args.resume:
+        if args.resume.startswith('https'):
+            checkpoint = torch.hub.load_state_dict_from_url(
+                args.resume, map_location='cpu', check_hash=True)
+        else:
+            checkpoint = torch.load(args.resume, map_location='cpu')
+        model_without_ddp.load_state_dict(checkpoint['model'])
+        print("Resume checkpoint %s" % args.resume)
+        if 'optimizer' in checkpoint and 'epoch' in checkpoint and not (hasattr(args, 'eval') and args.eval):
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            args.start_epoch = checkpoint['epoch'] + 1
+            if 'scaler' in checkpoint:
+                loss_scaler.load_state_dict(checkpoint['scaler'])
+            print("With optim & sched!")
+def all_reduce_mean(x):
+    world_size = get_world_size()
+    if world_size > 1:
+        x_reduce = torch.tensor(x).cuda()
+        dist.all_reduce(x_reduce)
+        x_reduce /= world_size
+        return x_reduce.item()
+    else:
+        return x

util/msssim.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import torch
+import torch.nn.functional as F
+from math import exp
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
+    return gauss/gauss.sum()
+def create_window(window_size, channel=1):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+    return window
+def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
+    # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
+    if val_range is None:
+        if torch.max(img1) > 128:
+            max_val = 255
+        else:
+            max_val = 1
+        if torch.min(img1) < -0.5:
+            min_val = -1
+        else:
+            min_val = 0
+        L = max_val - min_val
+    else:
+        L = val_range
+    padd = 0
+    (_, channel, height, width) = img1.size()
+    if window is None:
+        real_size = min(window_size, height, width)
+        window = create_window(real_size, channel=channel).to(img1.device)
+    mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
+    mu2 = F.conv2d(img2, window, padding=padd, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2
+    C1 = (0.01 * L) ** 2
+    C2 = (0.03 * L) ** 2
+    v1 = 2.0 * sigma12 + C2
+    v2 = sigma1_sq + sigma2_sq + C2
+    cs = torch.mean(v1 / v2)  # contrast sensitivity
+    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
+    if size_average:
+        ret = ssim_map.mean()
+    else:
+        ret = ssim_map.mean(1).mean(1).mean(1)
+    if full:
+        return ret, cs
+    return ret
+def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=True):
+    device = img1.device
+    weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device)
+    levels = weights.size()[0]
+    mssim = []
+    mcs = []
+    for _ in range(levels):
+        sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range)
+        mssim.append(sim)
+        mcs.append(cs)
+        img1 = F.avg_pool2d(img1, (2, 2))
+        img2 = F.avg_pool2d(img2, (2, 2))
+    mssim = torch.stack(mssim)
+    mcs = torch.stack(mcs)
+    # Normalize (to avoid NaNs during training unstable models, not compliant with original definition)
+    if normalize:
+        mssim = (mssim + 1) / 2
+        mcs = (mcs + 1) / 2
+    pow1 = mcs ** weights
+    pow2 = mssim ** weights
+    # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/
+    output = torch.prod(pow1[:-1] * pow2[-1])
+    return output
+# Classes to re-use window
+class SSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, val_range=None):
+        super(SSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.val_range = val_range
+        # Assume 1 channel for SSIM
+        self.channel = 1
+        self.window = create_window(window_size)
+    def forward(self, img1, img2):
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.dtype == img1.dtype:
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
+            self.window = window
+            self.channel = channel
+        return 1 - ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
+class MSSSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, channel=1):
+        super(MSSSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = channel
+    def forward(self, img1, img2):
+        # TODO: store window between calls if possible
+        return 1 - msssim(img1, img2, window_size=self.window_size, size_average=self.size_average)
+class PSNR(torch.nn.Module):
+    def __init__(self):
+        super(PSNR, self).__init__()
+    def torchPSNR(self,tar_img, prd_img):
+        imdff = torch.clamp(prd_img,0,1) - torch.clamp(tar_img,0,1)
+        rmse = (imdff**2).mean().sqrt()
+        ps = 20*torch.log10(1/rmse)
+        return ps
+    def forward(self, img1, img2):
+        # TODO: store window between calls if possible
+        return self.torchPSNR(img1, img2)

util/pos_embed.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# Position embedding utils
+# --------------------------------------------------------
+import numpy as np
+import torch
+# --------------------------------------------------------
+# 2D sine-cosine position embedding
+# References:
+# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py
+# MoCo v3: https://github.com/facebookresearch/moco-v3
+# --------------------------------------------------------
+def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
+    """
+    grid_size: int of the grid height and width
+    return:
+    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h = np.arange(grid_size, dtype=np.float32)
+    grid_w = np.arange(grid_size, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size, grid_size])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+# --------------------------------------------------------
+# Interpolate position embeddings for high-resolution
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+def interpolate_pos_embed(model, checkpoint_model,newsize1=None,newsize2=None):
+    if 'pos_embed' in checkpoint_model:
+        pos_embed_checkpoint = checkpoint_model['pos_embed']
+        embedding_size = pos_embed_checkpoint.shape[-1]
+        num_patches = model.patch_embed.num_patches
+        num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+        # height (== width) for the checkpoint position embedding
+        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+        # height (== width) for the new position embedding
+        new_size = int(num_patches ** 0.5)
+        # class_token and dist_token are kept unchanged
+        if orig_size != new_size:
+            if newsize1 == None:
+                newsize1,newsize2 = new_size,new_size
+            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, newsize1, newsize2))
+            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+            # only the position tokens are interpolated
+            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+            pos_tokens = torch.nn.functional.interpolate(
+                pos_tokens, size=(newsize1, newsize2), mode='bicubic', align_corners=False)
+            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+            checkpoint_model['pos_embed'] = new_pos_embed
+        # elif orig_size > new_size:
+        #     print("Position generate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
+        #     pos_tokens = get_2d_sincos_pos_embed(embedding_size, new_size, cls_token=True)
+        #     pos_tokens = torch.from_numpy(pos_tokens).float().unsqueeze(0)
+        #     checkpoint_model['pos_embed'] = pos_tokens

util/pos_embedtest.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# Position embedding utils
+# --------------------------------------------------------
+import numpy as np
+import torch
+# --------------------------------------------------------
+# 2D sine-cosine position embedding
+# References:
+# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py
+# MoCo v3: https://github.com/facebookresearch/moco-v3
+# --------------------------------------------------------
+def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
+    """
+    grid_size: int of the grid height and width
+    return:
+    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h = np.arange(grid_size, dtype=np.float32)
+    grid_w = np.arange(grid_size, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size, grid_size])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+# --------------------------------------------------------
+# Interpolate position embeddings for high-resolution
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+def interpolate_pos_embed(model, checkpoint_model,newsize1=None,newsize2=None):
+    if 'pos_embed' in checkpoint_model:
+        pos_embed_checkpoint = checkpoint_model['pos_embed']
+        embedding_size = pos_embed_checkpoint.shape[-1]
+        num_patches = model.patch_embed.num_patches
+        num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+        # height (== width) for the checkpoint position embedding
+        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+        # height (== width) for the new position embedding
+        new_size = int(num_patches ** 0.5)
+        # class_token and dist_token are kept unchanged
+        if orig_size != new_size:
+            if newsize1 == None:
+                newsize1,newsize2 = new_size,new_size
+            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, newsize1, newsize2))
+            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+            # only the position tokens are interpolated
+            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+            pos_tokens = torch.nn.functional.interpolate(
+                pos_tokens, size=(newsize1, newsize2), mode='bicubic', align_corners=False)
+            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+            checkpoint_model['pos_embed'] = new_pos_embed
+def interpolate_dec_embed(model, checkpoint_model):
+    if 'decoder_pos_embed' in checkpoint_model:
+        pos_embed_checkpoint = checkpoint_model['decoder_pos_embed']
+        embedding_size = pos_embed_checkpoint.shape[-1]
+        num_patches = model.decoder_pos_embed.num_patches
+        num_extra_tokens = model.decoder_pos_embed.shape[-2] - num_patches
+        # height (== width) for the checkpoint position embedding
+        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+        # height (== width) for the new position embedding
+        new_size = int(num_patches ** 0.5)
+        # class_token and dist_token are kept unchanged
+        if orig_size != new_size:
+            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
+            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+            # only the position tokens are interpolated
+            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+            pos_tokens = torch.nn.functional.interpolate(
+                pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False)
+            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+            checkpoint_model['decoder_pos_embed'] = new_pos_embed
+        # elif orig_size > new_size:
+        #     print("Position generate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
+        #     pos_tokens = get_2d_sincos_pos_embed(embedding_size, new_size, cls_token=True)
+        #     pos_tokens = torch.from_numpy(pos_tokens).float().unsqueeze(0)
+        #     checkpoint_model['pos_embed'] = pos_tokens

util/post_processing.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import torch
+import numpy as np
+import PIL.Image as Image
+import torchvision.transforms as transforms
+import torch.nn.functional as F
+from typing import Optional, Tuple, Union
+def morphological_open(image: torch.Tensor, kernel_size: int = 3) -> torch.Tensor:
+    """
+    Perform morphological opening on a 2D torch tensor (image).
+    Args:
+        image (torch.Tensor): image to open
+        kernel_size (int): size of the structuring element - roughly the size of hole to be opened
+    Returns:
+        torch.Tensor: The opened image.
+    """
+    kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=image.device)
+    eroded = F.conv2d(image.unsqueeze(0), kernel, stride=1, padding=kernel_size // 2)
+    eroded = (eroded > 0).float()
+    dilated = F.conv2d(eroded, kernel, stride=1, padding=kernel_size // 2)
+    return (dilated > 0).float()
+def morphological_close(image: torch.Tensor, kernel_size: int = 3) -> torch.Tensor:
+    """
+    Perform morphological closing on a 2D torch tensor (image).
+    Args:
+        image (torch.Tensor): image to close
+        kernel_size (int): size of the structuring element - roughly the size of hole to be closed
+    Returns:
+        torch.Tensor: The closed image.
+    """
+    kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32, device=image.device)
+    dilated = F.conv2d(image.unsqueeze(0), kernel, stride=1, padding=kernel_size // 2)
+    dilated = (dilated > 0).float()
+    eroded = F.conv2d(dilated, kernel, stride=1, padding=kernel_size // 2)
+    return (eroded > 0).float()
+def gaussian_convolve(image: torch.Tensor, kernel_size: int = 5, sigma: float = 1.0) -> torch.Tensor:
+    """
+    Gaussian Convolution to smooth image
+    Args:
+        image (torch.Tensor): image to convolve
+        kernel_size (int): size of the Gaussian kernel
+        sigma (float): standard deviation of the Gaussian distribution
+    Returns:
+        torch.Tensor: The convolved image.
+    """
+    x = torch.arange(-kernel_size // 2 + 1, kernel_size // 2 + 1, dtype=torch.float32)
+    y = torch.arange(-kernel_size // 2 + 1, kernel_size // 2 + 1, dtype=torch.float32)
+    x, y = torch.meshgrid(x, y)
+    kernel = torch.exp(-(x**2 + y**2) / (2 * sigma**2))
+    kernel = kernel / kernel.sum()
+    # Apply the Gaussian kernel
+    return F.conv2d(image.unsqueeze(0), kernel.unsqueeze(0).unsqueeze(0), stride=1, padding=kernel_size // 2)
+def hysteresis_filter(image: torch.Tensor, low_threshold: float, high_threshold: float) -> torch.Tensor:
+    """
+    Hysteresis Filter Function - for Canny Edge detection
+    Args:
+        image (torch.Tensor): image to process
+        low_threshold (float): low threshold for hysteresis
+        high_threshold (float): high threshold for hysteresis
+    Returns:
+        edge (torch.Tensor): The edges detected in the image.
+    """
+    edges = (image > high_threshold).float()
+    # Perform hysteresis thresholding
+    edges = torch.where(image > low_threshold, edges, 0)
+    return edges
+def non_maxima_suppression_2d(
+    image: torch.Tensor,
+    kernel_size: int = 3,
+    threshold: Optional[float] = None,
+    return_mask: bool = False
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    """
+    Perform non-maxima suppression on a 2D torch tensor (image).
+    Args:
+        image (torch.Tensor): Input tensor of shape (H, W) or (B, C, H, W) or (C, H, W)
+        kernel_size (int): Size of the local neighborhood for maxima detection (default: 3)
+        threshold (float, optional): Minimum value threshold for considering pixels
+        return_mask (bool): If True, return both suppressed image and binary mask
+    Returns:
+        torch.Tensor: Image with non-maxima suppressed
+        torch.Tensor (optional): Binary mask of local maxima if return_mask=True
+    """
+    original_shape = image.shape
+    # Handle different input shapes
+    if len(image.shape) == 2:  # (H, W)
+        image = image.unsqueeze(0).unsqueeze(0)  # (1, 1, H, W)
+    elif len(image.shape) == 3:  # (C, H, W)
+        image = image.unsqueeze(0)  # (1, C, H, W)
+    elif len(image.shape) == 4:  # (B, C, H, W)
+        pass
+    else:
+        raise ValueError(f"Unsupported tensor shape: {original_shape}")
+    batch_size, channels, height, width = image.shape
+    # Apply threshold if specified
+    if threshold is not None:
+        image = torch.where(image >= threshold, image, torch.tensor(0.0, device=image.device))
+    # Perform max pooling to find local maxima
+    padding = kernel_size // 2
+    max_pooled = F.max_pool2d(image, kernel_size=kernel_size, stride=1, padding=padding)
+    # Create mask where original values equal max pooled values (local maxima)
+    mask = (image == max_pooled) & (image > 0)
+    # Apply non-maxima suppression
+    suppressed = image * mask.float()
+    # Reshape back to original shape
+    if len(original_shape) == 2:
+        suppressed = suppressed.squeeze(0).squeeze(0)
+        mask = mask.squeeze(0).squeeze(0)
+    elif len(original_shape) == 3:
+        suppressed = suppressed.squeeze(0)
+        mask = mask.squeeze(0)
+    if return_mask:
+        return suppressed, mask
+    return suppressed
+def non_maxima_suppression_with_orientation(
+    magnitude: torch.Tensor,
+    orientation: torch.Tensor,
+    threshold: Optional[float] = None
+) -> torch.Tensor:
+    """
+    Perform oriented non-maxima suppression (commonly used in edge detection).
+    Args:
+        magnitude (torch.Tensor): Gradient magnitude tensor of shape (H, W) or (B, C, H, W)
+        orientation (torch.Tensor): Gradient orientation tensor (in radians) of same shape
+        threshold (float, optional): Minimum magnitude threshold
+    Returns:
+        torch.Tensor: Non-maxima suppressed magnitude
+    """
+    original_shape = magnitude.shape
+    # Handle different input shapes
+    if len(magnitude.shape) == 2:
+        magnitude = magnitude.unsqueeze(0).unsqueeze(0)
+        orientation = orientation.unsqueeze(0).unsqueeze(0)
+    elif len(magnitude.shape) == 3:
+        magnitude = magnitude.unsqueeze(0)
+        orientation = orientation.unsqueeze(0)
+    batch_size, channels, height, width = magnitude.shape
+    device = magnitude.device
+    # Apply threshold if specified
+    if threshold is not None:
+        magnitude = torch.where(magnitude >= threshold, magnitude, torch.tensor(0.0, device=device))
+    # Convert orientation to degrees and normalize to [0, 180)
+    angle = torch.rad2deg(orientation) % 180
+    # Create padded magnitude for neighbor comparison
+    mag_padded = F.pad(magnitude, (1, 1, 1, 1), mode='constant', value=0)
+    # Initialize output
+    suppressed = torch.zeros_like(magnitude)
+    # Define 8-connectivity neighbors
+    for b in range(batch_size):
+        for c in range(channels):
+            mag = magnitude[b, c]
+            ang = angle[b, c]
+            mag_pad = mag_padded[b, c]
+            for i in range(1, height + 1):
+                for j in range(1, width + 1):
+                    current_mag = mag_pad[i, j]
+                    current_angle = ang[i-1, j-1]
+                    if current_mag == 0:
+                        continue
+                    # Determine interpolation direction based on angle
+                    if (0 <= current_angle < 22.5) or (157.5 <= current_angle < 180):
+                        # Horizontal direction (0°)
+                        neighbor1 = mag_pad[i, j-1]
+                        neighbor2 = mag_pad[i, j+1]
+                    elif 22.5 <= current_angle < 67.5:
+                        # Diagonal direction (45°)
+                        neighbor1 = mag_pad[i-1, j+1]
+                        neighbor2 = mag_pad[i+1, j-1]
+                    elif 67.5 <= current_angle < 112.5:
+                        # Vertical direction (90°)
+                        neighbor1 = mag_pad[i-1, j]
+                        neighbor2 = mag_pad[i+1, j]
+                    else:  # 112.5 <= current_angle < 157.5
+                        # Diagonal direction (135°)
+                        neighbor1 = mag_pad[i-1, j-1]
+                        neighbor2 = mag_pad[i+1, j+1]
+                    # Keep pixel if it's a local maximum
+                    if current_mag >= neighbor1 and current_mag >= neighbor2:
+                        suppressed[b, c, i-1, j-1] = current_mag
+    # Reshape back to original shape
+    if len(original_shape) == 2:
+        suppressed = suppressed.squeeze(0).squeeze(0)
+    elif len(original_shape) == 3:
+        suppressed = suppressed.squeeze(0)
+    return suppressed
+def adaptive_non_maxima_suppression(
+    image: torch.Tensor,
+    num_points: int,
+    min_distance: int = 5,
+    threshold: Optional[float] = None
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Adaptive non-maxima suppression that selects a fixed number of strongest points
+    while maintaining minimum distance between them.
+    Args:
+        image (torch.Tensor): Input tensor of shape (H, W)
+        num_points (int): Number of points to select
+        min_distance (int): Minimum distance between selected points
+        threshold (float, optional): Minimum value threshold
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: Coordinates (y, x) and values of selected points
+    """
+    if len(image.shape) != 2:
+        raise ValueError("Input must be a 2D tensor")
+    height, width = image.shape
+    device = image.device
+    # Apply threshold if specified
+    if threshold is not None:
+        image = torch.where(image >= threshold, image, torch.tensor(0.0, device=device))
+    # Find all local maxima using simple NMS
+    nms_result = non_maxima_suppression_2d(image, kernel_size=3)
+    # Get coordinates and values of all local maxima
+    y_coords, x_coords = torch.nonzero(nms_result > 0, as_tuple=True)
+    values = nms_result[y_coords, x_coords]
+    if len(values) == 0:
+        return torch.empty((0, 2), device=device), torch.empty(0, device=device)
+    # Sort by strength (descending)
+    sorted_indices = torch.argsort(values, descending=True)
+    y_coords = y_coords[sorted_indices]
+    x_coords = x_coords[sorted_indices]
+    values = values[sorted_indices]
+    # Select points with minimum distance constraint
+    selected_coords = []
+    selected_values = []
+    for i in range(len(values)):
+        if len(selected_coords) >= num_points:
+            break
+        current_y, current_x = y_coords[i].item(), x_coords[i].item()
+        current_val = values[i].item()
+        # Check distance to all previously selected points
+        valid = True
+        for sel_y, sel_x in selected_coords:
+            distance = ((current_y - sel_y) ** 2 + (current_x - sel_x) ** 2) ** 0.5
+            if distance < min_distance:
+                valid = False
+                break
+        if valid:
+            selected_coords.append((current_y, current_x))
+            selected_values.append(current_val)
+    if selected_coords:
+        coords_tensor = torch.tensor(selected_coords, device=device, dtype=torch.float32)
+        values_tensor = torch.tensor(selected_values, device=device, dtype=torch.float32)
+    else:
+        coords_tensor = torch.empty((0, 2), device=device)
+        values_tensor = torch.empty(0, device=device)
+    return coords_tensor, values_tensor

util/size_aware_batching.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+Size-aware batching utilities for variable-sized seismic images
+"""
+import torch
+from torch.utils.data import DataLoader, Sampler
+import numpy as np
+from collections import defaultdict
+import random
+class SizeAwareSampler(Sampler):
+    """
+    Groups samples by size and creates batches with images of the same size
+    """
+    def __init__(self, dataset, batch_size, get_size_fn=None):
+        """
+        Args:
+            dataset: PyTorch dataset
+            batch_size: batch size for each size group
+            get_size_fn: function that takes dataset index and returns (height, width)
+                        If None, will try to infer from dataset
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.get_size_fn = get_size_fn
+        # Group indices by size
+        self.size_groups = self._group_by_size()
+        # Create batches
+        self.batches = self._create_batches()
+    def _group_by_size(self):
+        """Group dataset indices by image size"""
+        size_groups = defaultdict(list)
+        for idx in range(len(self.dataset)):
+            if self.get_size_fn:
+                size = self.get_size_fn(idx)
+            else:
+                # Try to get size from dataset item
+                sample = self.dataset[idx]
+                if isinstance(sample, (tuple, list)):
+                    # Assume first element is the image tensor
+                    img_tensor = sample[0]
+                else:
+                    img_tensor = sample
+                # Get size from tensor shape (assuming shape is [C, H, W] or [H, W])
+                if len(img_tensor.shape) == 3:
+                    size = (img_tensor.shape[1], img_tensor.shape[2])  # H, W
+                elif len(img_tensor.shape) == 2:
+                    size = (img_tensor.shape[0], img_tensor.shape[1])  # H, W
+                else:
+                    raise ValueError(f"Unexpected tensor shape: {img_tensor.shape}")
+            size_groups[size].append(idx)
+        return size_groups
+    def _create_batches(self, random_size = True):
+        """Create batches from size groups"""
+        batches = []
+        for size, indices in self.size_groups.items():
+            # Shuffle indices within each size group
+            random.shuffle(indices)
+            # Create batches of the specified size
+            for i in range(0, len(indices), self.batch_size):
+                batch = indices[i:i + self.batch_size]
+                batches.append(batch)
+        return batches
+    def __iter__(self):
+        # Shuffle the order of batches
+        random.shuffle(self.batches)
+        for batch in self.batches:
+            yield batch
+    def __len__(self):
+        return len(self.batches)
+class FixedSizeSampler(Sampler):
+    """
+    Sampler for datasets where you know the exact 3 size categories
+    More efficient than SizeAwareSampler when sizes are known
+    """
+    def __init__(self, dataset, batch_size, size_categories):
+        """
+        Args:
+            dataset: PyTorch dataset
+            batch_size: batch size for each size category
+            size_categories: list of (height, width) tuples for the 3 categories
+                           e.g., [(601, 200), (200, 255), (601, 255)]
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.size_categories = size_categories
+        # Map indices to size categories
+        self.size_to_indices = {size: [] for size in size_categories}
+        self._categorize_indices()
+        # Create batches
+        self.batches = self._create_batches()
+    def _categorize_indices(self):
+        """Categorize dataset indices by their size"""
+        for idx in range(len(self.dataset)):
+            sample = self.dataset[idx]
+            if isinstance(sample, (tuple, list)):
+                img_tensor = sample[0]
+            else:
+                img_tensor = sample
+            # Get size from tensor
+            if len(img_tensor.shape) == 3:
+                size = (img_tensor.shape[1], img_tensor.shape[2])
+            elif len(img_tensor.shape) == 2:
+                size = (img_tensor.shape[0], img_tensor.shape[1])
+            else:
+                raise ValueError(f"Unexpected tensor shape: {img_tensor.shape}")
+            # Find matching category
+            if size in self.size_categories:
+                self.size_to_indices[size].append(idx)
+            else:
+                # Find closest size category (optional)
+                closest_size = min(self.size_categories,
+                                 key=lambda cat: abs(cat[0] - size[0]) + abs(cat[1] - size[1]))
+                print(f"Warning: Size {size} not in categories, assigning to {closest_size}")
+                self.size_to_indices[closest_size].append(idx)
+    def _create_batches(self, random_size = True):
+        """Create batches from size categories"""
+        batches = []
+        for size, indices in self.size_to_indices.items():
+            if not indices:
+                continue
+            # Shuffle indices within each size category
+            random.shuffle(indices)
+            # Create batches
+            for i in range(0, len(indices), self.batch_size):
+                batch = indices[i:i + self.batch_size]
+                batches.append(batch)
+        return batches
+    def __iter__(self):
+        # Shuffle the order of batches across all size categories
+        random.shuffle(self.batches)
+        for batch in self.batches:
+            yield batch
+    def __len__(self):
+        return len(self.batches)
+    def get_size_distribution(self):
+        """Get the distribution of samples across size categories"""
+        distribution = {}
+        for size, indices in self.size_to_indices.items():
+            distribution[size] = len(indices)
+        return distribution
+def create_size_aware_dataloader(dataset, batch_size=8, size_categories=None,
+                                num_workers=4, pin_memory=True, **kwargs):
+    """
+    Create a DataLoader that batches samples by size
+    Args:
+        dataset: PyTorch dataset
+        batch_size: batch size for each size group
+        size_categories: list of (height, width) tuples for known size categories
+                        If None, will auto-detect sizes
+        num_workers: number of worker processes
+        pin_memory: whether to pin memory
+        **kwargs: additional arguments for DataLoader
+    Returns:
+        DataLoader with size-aware batching
+    """
+    if size_categories:
+        sampler = FixedSizeSampler(dataset, batch_size, size_categories)
+    else:
+        sampler = SizeAwareSampler(dataset, batch_size)
+    # Remove batch_size from kwargs since we're using a custom sampler
+    kwargs.pop('batch_size', None)
+    kwargs.pop('shuffle', None)  # Sampler handles shuffling
+    return DataLoader(
+        dataset,
+        batch_sampler=sampler,
+        num_workers=num_workers,
+        pin_memory=pin_memory,
+        **kwargs
+    )
+# Custom collate function for same-size batches (no padding needed)
+def same_size_collate_fn(batch):
+    """
+    Collate function for batches where all items have the same size
+    No padding required since all images in batch are same size
+    """
+    if isinstance(batch[0], (tuple, list)):
+        # Assuming (image, target) pairs
+        images, targets = zip(*batch)
+        return torch.stack(images), torch.stack(targets)
+    else:
+        # Just images
+        return torch.stack(batch)
+# Utility function to check batch sizes
+def validate_batch_sizes(dataloader, num_batches_to_check=5):
+    """
+    Validate that all images in each batch have the same size
+    """
+    print("Validating batch sizes...")
+    for i, batch in enumerate(dataloader):
+        if i >= num_batches_to_check:
+            break
+        if isinstance(batch, (tuple, list)):
+            images = batch[0]
+        else:
+            images = batch
+        batch_size = images.shape[0]
+        height = images.shape[2]
+        width = images.shape[3]
+        print(f"Batch {i}: {batch_size} images of size {height}x{width}")
+        # Verify all images in batch have same size
+        for j in range(batch_size):
+            img_h, img_w = images[j].shape[1], images[j].shape[2]
+            if img_h != height or img_w != width:
+                print(f"  WARNING: Image {j} has different size {img_h}x{img_w}")
+    print("Validation complete!")

util/skeletonize.py ADDED Viewed

	@@ -0,0 +1,486 @@

+"""
+Courtesy of Martin Mentan:
+Works Cited
+Menten, Martin J., et al. ‘A Skeletonization Algorithm for Gradient-Based Optimization’.
+Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV), 2023.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Skeletonize(torch.nn.Module):
+    """
+    Class based on PyTorch's Module class to skeletonize two- or three-dimensional input images
+    while being fully compatible with PyTorch's autograd automatic differention engine as proposed in [1].
+    Attributes:
+        propabilistic: a Boolean that indicates whether the input image should be binarized using
+                       the reparametrization trick and straight-through estimator.
+                       It should always be set to True if non-binary inputs are being provided.
+        beta: scale of added logistic noise during the reparametrization trick. If too small, there will not be any learning via
+              gradient-based optimization; if too large, the learning is very slow.
+        tau: Boltzmann temperature for reparametrization trick.
+        simple_point_detection: decides whether simple points should be identified using Boolean characterization of their 26-neighborhood (Boolean) [2]
+                                or by checking whether the Euler characteristic changes under their deletion (EulerCharacteristic) [3].
+        num_iter: number of iterations that each include one end-point check, eight checks for simple points and eight subsequent deletions.
+                  The number of iterations should be tuned to the type of input image.
+    [1] Martin J. Menten et al. A skeletonization algorithm for gradient-based optimization.
+        Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV), 2023.
+    [2] Gilles Bertrand. A boolean characterization of three- dimensional simple points.
+        Pattern recognition letters, 17(2):115-124, 1996.
+    [3] Steven Lobregt et al. Three-dimensional skeletonization:principle and algorithm.
+        IEEE Transactions on pattern analysis and machine intelligence, 2(1):75-77, 1980.
+    """
+    def __init__(self, probabilistic=True, beta=0.33, tau=1.0, simple_point_detection='Boolean', num_iter=5):
+        super(Skeletonize, self).__init__()
+        self.probabilistic = probabilistic
+        self.tau = tau
+        self.beta = beta
+        self.num_iter = num_iter
+        self.endpoint_check = self._single_neighbor_check
+        if simple_point_detection == 'Boolean':
+            self.simple_check = self._boolean_simple_check
+        elif simple_point_detection == 'EulerCharacteristic':
+            self.simple_check = self._euler_characteristic_simple_check
+        else:
+            raise Exception()
+    def forward(self, img):
+        img = self._prepare_input(img)
+        if self.probabilistic:
+            img = self._stochastic_discretization(img)
+        for current_iter in range(self.num_iter):
+            # At each iteration create a new map of the end-points
+            is_endpoint = self.endpoint_check(img)
+            # Sub-iterate through eight different subfields
+            x_offsets = [0, 1, 0, 1, 0, 1, 0, 1]
+            y_offsets = [0, 0, 1, 1, 0, 0, 1, 1]
+            z_offsets = [0, 0, 0, 0, 1, 1, 1, 1]
+            for x_offset, y_offset, z_offset in zip(x_offsets, y_offsets, z_offsets):
+                # At each sub-iteration detect all simple points and delete all simple points that are not end-points
+                is_simple = self.simple_check(img[:, :, x_offset:, y_offset:, z_offset:])
+                deletion_candidates = is_simple * (1 - is_endpoint[:, :, x_offset::2, y_offset::2, z_offset::2])
+                img[:, :, x_offset::2, y_offset::2, z_offset::2] = torch.min(img[:, :, x_offset::2, y_offset::2, z_offset::2].clone(), 1 - deletion_candidates)
+        img = self._prepare_output(img)
+        return img
+    def _prepare_input(self, img):
+        """
+        Function to check that the input image is compatible with the subsequent calculations.
+        Only two- and three-dimensional images with values between 0 and 1 are supported.
+        If the input image is two-dimensional then it is converted into a three-dimensional one for further processing.
+        """
+        if img.dim() == 5:
+            self.expanded_dims = False
+        elif img.dim() == 4:
+            self.expanded_dims = True
+            img = img.unsqueeze(2)
+        else:
+            raise Exception("Only two-or three-dimensional images (tensor dimensionality of 4 or 5) are supported as input.")
+        if img.shape[2] == 2 or img.shape[3] == 2 or img.shape[4] == 2 or img.shape[3] == 1 or img.shape[4] == 1:
+            raise Exception()
+        if img.min() < 0.0 or img.max() > 1.0:
+            raise Exception("Image values must lie between 0 and 1.")
+        img = F.pad(img, (1, 1, 1, 1, 1, 1), value=0)
+        return img
+    def _stochastic_discretization(self, img):
+        """
+        Function to binarize the image so that it can be processed by our skeletonization method.
+        In order to remain compatible with backpropagation we utilize the reparameterization trick and a straight-through estimator.
+        """
+        alpha = (img + 1e-8) / (1.0 - img + 1e-8)
+        uniform_noise = torch.rand_like(img)
+        uniform_noise = torch.empty_like(img).uniform_(1e-8, 1 - 1e-8)
+        logistic_noise = (torch.log(uniform_noise) - torch.log(1 - uniform_noise))
+        img = torch.sigmoid((torch.log(alpha) + logistic_noise * self.beta) / self.tau)
+        img = (img.detach() > 0.5).float() - img.detach() + img
+        return img
+    def _single_neighbor_check(self, img):
+        """
+        Function that characterizes points as endpoints if they have a single neighbor or no neighbor at all.
+        """
+        img = F.pad(img, (1, 1, 1, 1, 1, 1))
+        # Check that number of ones in twentysix-neighborhood is exactly 0 or 1
+        K = torch.tensor([[[1.0, 1.0, 1.0],
+                           [1.0, 1.0, 1.0],
+                           [1.0, 1.0, 1.0]],
+                          [[1.0, 1.0, 1.0],
+                           [1.0, 0.0, 1.0],
+                           [1.0, 1.0, 1.0]],
+                          [[1.0, 1.0, 1.0],
+                           [1.0, 1.0, 1.0],
+                           [1.0, 1.0, 1.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        num_twentysix_neighbors = F.conv3d(img, K)
+        condition1 = F.hardtanh(-(num_twentysix_neighbors - 2), min_val=0, max_val=1) # 1 or fewer neigbors
+        return condition1
+    def _boolean_simple_check(self, img):
+        """
+        Function that identifies simple points using Boolean conditions introduced by Bertrand et al. [1].
+        Each Boolean conditions can be assessed via convolutions with a limited number of pre-defined kernels.
+        It total, four conditions are checked. If any one is fulfilled, the point is deemed simple.
+        [1] Gilles Bertrand. A boolean characterization of three- dimensional simple points.
+            Pattern recognition letters, 17(2):115-124, 1996.
+        """
+        img = F.pad(img, (1, 1, 1, 1, 1, 1), value=0)
+        # Condition 1: number of zeros in the six-neighborhood is exactly 1
+        K_N6 = torch.tensor([[[0.0, 0.0, 0.0],
+                              [0.0, 1.0, 0.0],
+                              [0.0, 0.0, 0.0]],
+                             [[0.0, 1.0, 0.0],
+                              [1.0, 0.0, 1.0],
+                              [0.0, 1.0, 0.0]],
+                             [[0.0, 0.0, 0.0],
+                              [0.0, 1.0, 0.0],
+                              [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        num_six_neighbors = F.conv3d(1 - img, K_N6, stride=2)
+        subcondition1a = F.hardtanh(num_six_neighbors, min_val=0, max_val=1) # 1 or more neighbors
+        subcondition1b = F.hardtanh(-(num_six_neighbors - 2), min_val=0, max_val=1) # 1 or fewer neighbors
+        condition1 = subcondition1a * subcondition1b
+        # Condition 2: number of ones in twentysix-neighborhood is exactly 1
+        K_N26 = torch.tensor([[[1.0, 1.0, 1.0],
+                               [1.0, 1.0, 1.0],
+                               [1.0, 1.0, 1.0]],
+                              [[1.0, 1.0, 1.0],
+                               [1.0, 0.0, 1.0],
+                               [1.0, 1.0, 1.0]],
+                              [[1.0, 1.0, 1.0],
+                               [1.0, 1.0, 1.0],
+                               [1.0, 1.0, 1.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        num_twentysix_neighbors = F.conv3d(img, K_N26, stride=2)
+        subcondition2a = F.hardtanh(num_twentysix_neighbors, min_val=0, max_val=1) # 1 or more neighbors
+        subcondition2b = F.hardtanh(-(num_twentysix_neighbors - 2), min_val=0, max_val=1) # 1 or fewer neigbors
+        condition2 =  subcondition2a * subcondition2b
+        # Condition 3: Number of ones in eighteen-neigborhood exactly 1...
+        K_N18 = torch.tensor([[[0.0, 1.0, 0.0],
+                               [1.0, 1.0, 1.0],
+                               [0.0, 1.0, 0.0]],
+                              [[1.0, 1.0, 1.0],
+                               [1.0, 0.0, 1.0],
+                               [1.0, 1.0, 1.0]],
+                              [[0.0, 1.0, 0.0],
+                               [1.0, 1.0, 1.0],
+                               [0.0, 1.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        num_eighteen_neighbors = F.conv3d(img, K_N18, stride=2)
+        subcondition3a = F.hardtanh(num_eighteen_neighbors, min_val=0, max_val=1) # 1 or more neighbors
+        subcondition3b = F.hardtanh(-(num_eighteen_neighbors - 2), min_val=0, max_val=1) # 1 or fewer neigbors
+        # ... and cell configration B26 does not exist
+        K_B26 =  torch.tensor([[[1.0, -1.0, 0.0],
+                                [-1.0, -1.0, 0.0],
+                                [0.0, 0.0, 0.0]],
+                               [[-1.0, -1.0, 0.0],
+                                [-1.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0]],
+                               [[0.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        B26_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_B26, stride=2) - 6)
+        B26_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2]), stride=2) - 6)
+        B26_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[3]), stride=2) - 6)
+        B26_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[4]), stride=2) - 6)
+        B26_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 3]), stride=2) - 6)
+        B26_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 4]), stride=2) - 6)
+        B26_7_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[3, 4]), stride=2) - 6)
+        B26_8_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 3, 4]), stride=2) - 6)
+        num_B26_cells = B26_1_present + B26_2_present + B26_3_present + B26_4_present + B26_5_present + B26_6_present + B26_7_present + B26_8_present
+        subcondition3c = F.hardtanh(-(num_B26_cells - 1), min_val=0, max_val=1)
+        condition3 = subcondition3a * subcondition3b * subcondition3c
+        # Condition 4: cell configuration A6 does not exist...
+        K_A6 = torch.tensor([[[0.0, 1.0, 0.0],
+                              [1.0, -1.0, 1.0],
+                              [0.0, 1.0, 0.0]],
+                             [[0.0, 0.0, 0.0],
+                              [0.0, 0.0, 0.0],
+                              [0.0, 0.0, 0.0]],
+                             [[0.0, 0.0, 0.0],
+                              [0.0, 0.0, 0.0],
+                              [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        A6_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_A6, stride=2) - 4)
+        A6_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A6, dims=[2, 3]), stride=2) - 4)
+        A6_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A6, dims=[2, 4]), stride=2) - 4)
+        A6_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A6, dims=[2]), stride=2) - 4)
+        A6_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.flip(K_A6, dims=[2]), dims=[2, 3]), stride=2) - 4)
+        A6_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.flip(K_A6, dims=[2]), dims=[2, 4]), stride=2) - 4)
+        num_A6_cells = A6_1_present + A6_2_present + A6_3_present + A6_4_present + A6_5_present + A6_6_present
+        subcondition4a = F.hardtanh(-(num_A6_cells - 1), min_val=0, max_val=1)
+        # ... and cell configuration B26 does not exist...
+        K_B26 =  torch.tensor([[[1.0, -1.0, 0.0],
+                                [-1.0, -1.0, 0.0],
+                                [0.0, 0.0, 0.0]],
+                               [[-1.0, -1.0, 0.0],
+                                [-1.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0]],
+                               [[0.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0],
+                                [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        B26_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_B26, stride=2) - 6)
+        B26_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2]), stride=2) - 6)
+        B26_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[3]), stride=2) - 6)
+        B26_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[4]), stride=2) - 6)
+        B26_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 3]), stride=2) - 6)
+        B26_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 4]), stride=2) - 6)
+        B26_7_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[3, 4]), stride=2) - 6)
+        B26_8_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_B26, dims=[2, 3, 4]), stride=2) - 6)
+        num_B26_cells = B26_1_present + B26_2_present + B26_3_present + B26_4_present + B26_5_present + B26_6_present + B26_7_present + B26_8_present
+        subcondition4b = F.hardtanh(-(num_B26_cells - 1), min_val=0, max_val=1)
+        # ... and cell configuration B18 does not exist...
+        K_B18 = torch.tensor([[[0.0, 1.0, 0.0],
+                               [-1.0, -1.0, -1.0],
+                               [0.0, 0.0, 0.0]],
+                              [[-1.0, -1.0, -1.0],
+                               [-1.0, 0.0, -1.0],
+                               [0.0, 0.0, 0.0]],
+                              [[0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        B18_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_B18, stride=2) - 8)
+        B18_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_B18, dims=[2, 4]), stride=2) - 8)
+        B18_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_B18, dims=[2, 4], k=2), stride=2) - 8)
+        B18_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_B18, dims=[2, 4], k=3), stride=2) - 8)
+        B18_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_B18, dims=[3, 4]), stride=2) - 8)
+        B18_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4]), dims=[2, 4]), stride=2) - 8)
+        B18_7_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4]), dims=[2, 4], k=2), stride=2) - 8)
+        B18_8_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4]), dims=[2, 4], k=3), stride=2) - 8)
+        B18_9_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_B18, dims=[3, 4], k=2), stride=2) - 8)
+        B18_10_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4], k=2), dims=[2, 4]), stride=2) - 8)
+        B18_11_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4], k=2), dims=[2, 4], k=2), stride=2) - 8)
+        B18_12_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_B18, dims=[3, 4], k=2), dims=[2, 4], k=3), stride=2) - 8)
+        num_B18_cells = B18_1_present + B18_2_present + B18_3_present + B18_4_present + B18_5_present + B18_6_present + B18_7_present + B18_8_present + B18_9_present + B18_10_present + B18_11_present + B18_12_present
+        subcondition4c = F.hardtanh(-(num_B18_cells - 1), min_val=0, max_val=1)
+        # ... and the number of zeros in the six-neighborhood minus the number of A18 cell configurations plus the number of A26 cell configurations is exactly one
+        K_N6 = torch.tensor([[[0.0, 0.0, 0.0],
+                              [0.0, 1.0, 0.0],
+                              [0.0, 0.0, 0.0]],
+                             [[0.0, 1.0, 0.0],
+                              [1.0, 0.0, 1.0],
+                              [0.0, 1.0, 0.0]],
+                             [[0.0, 0.0, 0.0],
+                              [0.0, 1.0, 0.0],
+                              [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        num_six_neighbors = F.conv3d(1-img, K_N6, stride=2)
+        K_A18 = torch.tensor([[[0.0, -1.0, 0.0],
+                               [0.0, -1.0, 0.0],
+                               [0.0, 0.0, 0.0]],
+                              [[0.0, -1.0, 0.0],
+                               [0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0]],
+                              [[0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        A18_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_A18, stride=2) - 2)
+        A18_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A18, dims=[2, 4]), stride=2) - 2)
+        A18_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A18, dims=[2, 4], k=2), stride=2) - 2)
+        A18_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A18, dims=[2, 4], k=3), stride=2) - 2)
+        A18_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A18, dims=[3, 4]), stride=2) - 2)
+        A18_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4]), dims=[2, 4]), stride=2) - 2)
+        A18_7_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4]), dims=[2, 4], k=2), stride=2) - 2)
+        A18_8_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4]), dims=[2, 4], k=3), stride=2) - 2)
+        A18_9_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(K_A18, dims=[3, 4], k=2), stride=2) - 2)
+        A18_10_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4], k=2), dims=[2, 4]), stride=2) - 2)
+        A18_11_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4], k=2), dims=[2, 4], k=2), stride=2) - 2)
+        A18_12_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.rot90(torch.rot90(K_A18, dims=[3, 4], k=2), dims=[2, 4], k=3), stride=2) - 2)
+        num_A18_cells = A18_1_present + A18_2_present + A18_3_present + A18_4_present + A18_5_present + A18_6_present + A18_7_present + A18_8_present + A18_9_present + A18_10_present + A18_11_present + A18_12_present
+        K_A26 = torch.tensor([[[-1.0, -1.0, 0.0],
+                               [-1.0, -1.0, 0.0],
+                               [0.0, 0.0, 0.0]],
+                              [[-1.0, -1.0, 0.0],
+                               [-1.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0]],
+                              [[0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0],
+                               [0.0, 0.0, 0.0]]], device=img.device).view(1, 1, 3, 3, 3)
+        A26_1_present = F.relu(F.conv3d(2.0 * img - 1.0, K_A26, stride=2) - 6)
+        A26_2_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[2]), stride=2) - 6)
+        A26_3_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[3]), stride=2) - 6)
+        A26_4_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[4]), stride=2) - 6)
+        A26_5_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[2, 3]), stride=2) - 6)
+        A26_6_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[2, 4]), stride=2) - 6)
+        A26_7_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[3, 4]), stride=2) - 6)
+        A26_8_present = F.relu(F.conv3d(2.0 * img - 1.0, torch.flip(K_A26, dims=[2, 3, 4]), stride=2) - 6)
+        num_A26_cells = A26_1_present + A26_2_present + A26_3_present + A26_4_present + A26_5_present + A26_6_present + A26_7_present + A26_8_present
+        subcondition4d = F.hardtanh(num_six_neighbors - num_A18_cells + num_A26_cells, min_val=0, max_val=1) # 1 or more configurations
+        subcondition4e = F.hardtanh(-(num_six_neighbors - num_A18_cells + num_A26_cells - 2), min_val=0, max_val=1) # 1 or fewer configurations
+        condition4 = subcondition4a * subcondition4b * subcondition4c * subcondition4d * subcondition4e
+        # If any of the four conditions is fulfilled the point is simple
+        combined = torch.cat([condition1, condition2, condition3, condition4], dim=1)
+        is_simple = torch.amax(combined, dim=1, keepdim=True)
+        return is_simple
+    # Specifically designed to be used with the eight-subfield iterative scheme from above.
+    def _euler_characteristic_simple_check(self, img):
+        """
+        Function that identifies simple points by assessing whether the Euler characteristic changes when deleting it [1].
+        In order to calculate the Euler characteristic, the amount of vertices, edges, faces and octants are counted using convolutions with pre-defined kernels.
+        The function is meant to be used in combination with the subfield-based iterative scheme employed in the forward function.
+        [1] Steven Lobregt et al. Three-dimensional skeletonization:principle and algorithm.
+            IEEE Transactions on pattern analysis and machine intelligence, 2(1):75-77, 1980.
+        """
+        img = F.pad(img, (1, 1, 1, 1, 1, 1), value=0)
+        # Create masked version of the image where the center of 26-neighborhoods is changed to zero
+        mask = torch.ones_like(img)
+        mask[:, :, 1::2, 1::2, 1::2] = 0
+        masked_img = img.clone() * mask
+        # Count vertices
+        vertices = F.relu(-(2.0 * img - 1.0))
+        num_vertices = F.avg_pool3d(vertices, (3, 3, 3), stride=2) * 27
+        masked_vertices = F.relu(-(2.0 * masked_img - 1.0))
+        num_masked_vertices = F.avg_pool3d(masked_vertices, (3, 3, 3), stride=2) * 27
+        # Count edges
+        K_ud_edge = torch.tensor([0.5, 0.5], device=img.device).view(1, 1, 2, 1, 1)
+        K_ns_edge = torch.tensor([0.5, 0.5], device=img.device).view(1, 1, 1, 2, 1)
+        K_we_edge = torch.tensor([0.5, 0.5], device=img.device).view(1, 1, 1, 1, 2)
+        ud_edges = F.relu(F.conv3d(-(2.0 * img - 1.0), K_ud_edge))
+        num_ud_edges = F.avg_pool3d(ud_edges, (2, 3, 3), stride=2) * 18
+        ns_edges = F.relu(F.conv3d(-(2.0 * img - 1.0), K_ns_edge))
+        num_ns_edges = F.avg_pool3d(ns_edges, (3, 2, 3), stride=2) * 18
+        we_edges = F.relu(F.conv3d(-(2.0 * img - 1.0), K_we_edge))
+        num_we_edges = F.avg_pool3d(we_edges, (3, 3, 2), stride=2) * 18
+        num_edges = num_ud_edges + num_ns_edges + num_we_edges
+        masked_ud_edges = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_ud_edge))
+        num_masked_ud_edges = F.avg_pool3d(masked_ud_edges, (2, 3, 3), stride=2) * 18
+        masked_ns_edges = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_ns_edge))
+        num_masked_ns_edges = F.avg_pool3d(masked_ns_edges, (3, 2, 3), stride=2) * 18
+        masked_we_edges = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_we_edge))
+        num_masked_we_edges = F.avg_pool3d(masked_we_edges, (3, 3, 2), stride=2) * 18
+        num_masked_edges = num_masked_ud_edges + num_masked_ns_edges + num_masked_we_edges
+        # Count faces
+        K_ud_face = torch.tensor([[0.25, 0.25], [0.25, 0.25]], device=img.device).view(1, 1, 1, 2, 2)
+        K_ns_face = torch.tensor([[0.25, 0.25], [0.25, 0.25]], device=img.device).view(1, 1, 2, 1, 2)
+        K_we_face = torch.tensor([[0.25, 0.25], [0.25, 0.25]], device=img.device).view(1, 1, 2, 2, 1)
+        ud_faces = F.relu(F.conv3d(-(2.0 * img - 1.0), K_ud_face) - 0.5) * 2
+        num_ud_faces = F.avg_pool3d(ud_faces, (3, 2, 2), stride=2) * 12
+        ns_faces = F.relu(F.conv3d(-(2.0 * img - 1.0), K_ns_face) - 0.5) * 2
+        num_ns_faces = F.avg_pool3d(ns_faces, (2, 3, 2), stride=2) * 12
+        we_faces = F.relu(F.conv3d(-(2.0 * img - 1.0), K_we_face) - 0.5) * 2
+        num_we_faces = F.avg_pool3d(we_faces, (2, 2, 3), stride=2) * 12
+        num_faces = num_ud_faces + num_ns_faces + num_we_faces
+        masked_ud_faces = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_ud_face) - 0.5) * 2
+        num_masked_ud_faces = F.avg_pool3d(masked_ud_faces, (3, 2, 2), stride=2) * 12
+        masked_ns_faces = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_ns_face) - 0.5) * 2
+        num_masked_ns_faces = F.avg_pool3d(masked_ns_faces, (2, 3, 2), stride=2) * 12
+        masked_we_faces = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_we_face) - 0.5) * 2
+        num_masked_we_faces = F.avg_pool3d(masked_we_faces, (2, 2, 3), stride=2) * 12
+        num_masked_faces = num_masked_ud_faces + num_masked_ns_faces + num_masked_we_faces
+        # Count octants
+        K_octants = torch.tensor([[[0.125, 0.125], [0.125, 0.125]], [[0.125, 0.125], [0.125, 0.125]]], device=img.device).view(1, 1, 2, 2, 2)
+        octants = F.relu(F.conv3d(-(2.0 * img - 1.0), K_octants) - 0.75) * 4
+        num_octants = F.avg_pool3d(octants, (2, 2, 2), stride=2) * 8
+        masked_octants = F.relu(F.conv3d(-(2.0 * masked_img - 1.0), K_octants) - 0.75) * 4
+        num_masked_octants = F.avg_pool3d(masked_octants, (2, 2, 2), stride=2) * 8
+        # Combined number of vertices, edges, faces and octants to calculate the euler characteristic
+        euler_characteristic = num_vertices - num_edges + num_faces - num_octants
+        masked_euler_characteristic = num_masked_vertices - num_masked_edges + num_masked_faces - num_masked_octants
+        # If the Euler characteristic is unchanged after switching a point from 1 to 0 this indicates that the point is simple
+        euler_change = F.hardtanh(torch.abs(masked_euler_characteristic - euler_characteristic), min_val=0, max_val=1)
+        is_simple = 1 - euler_change
+        is_simple = (is_simple.detach() > 0.5).float() - is_simple.detach() + is_simple
+        return is_simple
+    def _prepare_output(self, img):
+        """
+        Function that removes the padding and dimensions added by _prepare_input function.
+        """
+        img = img[:, :, 1:-1, 1:-1, 1:-1]
+        if self.expanded_dims:
+            img = torch.squeeze(img, dim=2)
+        return img

util/tools.py ADDED Viewed

	@@ -0,0 +1,143 @@

+'''
+Author: Jintao Li
+Date: 2022-05-30 16:42:14
+LastEditors: Jintao Li
+LastEditTime: 2022-07-11 23:05:53
+2022 by CIG.
+'''
+import os, shutil
+import yaml, argparse
+from sklearn.metrics import confusion_matrix
+import numpy as np
+import torch
+def accuracy(output, target):
+    '''
+    output: [N, num_classes, ...], torch.float
+    target: [N, ...], torch.int
+    '''
+    output = output.argmax(dim=1).flatten().detach().cpu().numpy()
+    target = target.flatten().detach().cpu().numpy()
+    return pixel_acc(output, target), _miou(output, target)
+def pixel_acc(output, target):
+    r"""
+    计算像素准确率 (Pixel Accuracy, PA)
+    $$ PA = \frac{\sum_{i=0}^k p_{ii}}
+    {\sum_{i=0}^k \sum_{j=0}^k p_{ij}} $$ and
+    $n_class = k+1$
+    Parameters:
+    -----------
+        shape: [N, ], (use flatten() function)
+    return:
+    ----------
+        - PA
+    """
+    assert output.shape == target.shape, "shapes must be same"
+    cm = confusion_matrix(target, output)
+    return np.diag(cm).sum() / cm.sum()
+def _miou(output, target):
+    r"""
+    计算均值交并比 MIoU (Mean Intersection over Union)
+    $$ MIoU = \frac{1}{k+1} \sum_{i=0}^k \frac{p_{ii}}
+    {\sum_{j=0}^k p_{ij} + \sum_{j=0}^k p_{ji} - p_{ii}} $$
+    Parameters:
+        output, target: [N, ]
+    return:
+        MIoU
+    """
+    assert output.shape == target.shape, "shapes must be same"
+    cm = confusion_matrix(target, output)
+    intersection = np.diag(cm)
+    union = np.sum(cm, 1) + np.sum(cm, 0) - np.diag(cm)
+    iou = intersection / union
+    miou = np.nanmean(iou)
+    return miou
+def yaml_config_hook(config_file: str) -> argparse.Namespace:
+    """
+    加载yaml文件里面的参数配置, 并生成argparse形式的参数集合
+    """
+    with open(config_file) as f:
+        cfg = yaml.safe_load(f)
+        for d in cfg.get("defaults", []):
+            config_dir, cf = d.popitem()
+            cf = os.path.join(os.path.dirname(config_file), config_dir,
+                              cf + ".yaml")
+            with open(cf) as f:
+                l = yaml.safe_load(f)
+                cfg.update(l)
+    if "defaults" in cfg.keys():
+        del cfg["defaults"]
+    parser = argparse.ArgumentParser()
+    for k, v in cfg.items():
+        parser.add_argument(f"--{k}", default=v, type=type(v))
+    args = parser.parse_args()
+    return args
+def backup_code(work_dir, back_dir, exceptions=[], include=[]):
+    r"""
+    备份本次运行的代码到指定目录下, 并排除某些文件和目录
+    Args:
+        work_dir: 工作目录, i.e. 需要备份的代码
+        back_dir: 目标目录.备份代码放置的目录
+        exception (list): 被排除的目录和以指定后缀结尾的文件, 默认的有
+                ["__pycache__", ".pyc", ".dat", "backup", ".vscode"]
+        include (list): 某些必须被备份的文件,该文件可能在exception里面
+    """
+    _exp = [
+        "*__pycache__*", "*.pyc", "*.dat", "backup", ".vscode", "*.log",
+        "*log*"
+    ]
+    exceptions = exceptions + _exp
+    # if not os.path.exists(back_dir):
+    os.makedirs(back_dir, exist_ok=True)
+    shutil.copytree(work_dir,
+                    back_dir + 'code/',
+                    ignore=shutil.ignore_patterns(*exceptions),
+                    dirs_exist_ok=True)
+    for f in include:
+        shutil.copyfile(os.path.join(work_dir, f),
+                        os.path.join(back_dir + 'code', f))
+def list_files(path, full=False):
+    r"""
+    递归列出目录下所有的文件，包括子目录下的文件
+    """
+    out = []
+    for f in os.listdir(path):
+        fname = os.path.join(path, f)
+        if os.path.isdir(fname):
+            fname = list_files(fname)
+            out += [os.path.join(f, i) for i in fname]
+        else:
+            out.append(f)
+    if full:
+        out = [os.path.join(path, i) for i in out]
+    return out
+if __name__ == "__main__":
+    output = torch.randn(4, 2, 6, 6)
+    target = torch.randn(4, 2, 6, 6)
+    # output = output.cuda()
+    # target = target.cuda()
+    target = target.argmax(1)
+    accuracy(output, target)

util/variable_pos_embed.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# Variable size position embedding utils for handling different image dimensions
+# --------------------------------------------------------
+import numpy as np
+import torch
+import torch.nn.functional as F
+def get_2d_sincos_pos_embed_variable(embed_dim, grid_h, grid_w, cls_token=False):
+    """
+    Create 2D sine-cosine position embeddings for variable grid sizes
+    Args:
+        embed_dim: embedding dimension
+        grid_h: height of the grid (number of patches in height)
+        grid_w: width of the grid (number of patches in width)
+        cls_token: whether to include class token
+    Returns:
+        pos_embed: [grid_h*grid_w, embed_dim] or [1+grid_h*grid_w, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h_coords = np.arange(grid_h, dtype=np.float32)
+    grid_w_coords = np.arange(grid_w, dtype=np.float32)
+    grid = np.meshgrid(grid_w_coords, grid_h_coords)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_h, grid_w])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+def interpolate_pos_embed_variable(original_pos_embed, target_h, target_w, cls_token=True):
+    """
+    Interpolate position embeddings for arbitrary target sizes
+    Args:
+        original_pos_embed: original positional embeddings [1, N, D]
+        target_h: target height in patches
+        target_w: target width in patches
+        cls_token: whether the first token is a class token
+    Returns:
+        interpolated_pos_embed: [1, target_h*target_w + cls_token, D]
+    """
+    embed_dim = original_pos_embed.shape[-1]
+    if cls_token:
+        class_pos_embed = original_pos_embed[:, 0:1]  # [1, 1, D]
+        patch_pos_embed = original_pos_embed[:, 1:]   # [1, N-1, D]
+        orig_num_patches = patch_pos_embed.shape[1]
+    else:
+        class_pos_embed = None
+        patch_pos_embed = original_pos_embed
+        orig_num_patches = patch_pos_embed.shape[1]
+    # Determine original grid size (assume square for original)
+    orig_h = orig_w = int(np.sqrt(orig_num_patches))
+    if orig_h * orig_w != orig_num_patches:
+        raise ValueError(f"Original number of patches {orig_num_patches} is not a perfect square")
+    # Reshape to spatial dimensions
+    patch_pos_embed = patch_pos_embed.reshape(1, orig_h, orig_w, embed_dim)
+    patch_pos_embed = patch_pos_embed.permute(0, 3, 1, 2)  # [1, D, orig_h, orig_w]
+    # Interpolate to target size
+    patch_pos_embed = F.interpolate(
+        patch_pos_embed,
+        size=(target_h, target_w),
+        mode='bicubic',
+        align_corners=False
+    )
+    # Reshape back to token sequence
+    patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1)  # [1, target_h, target_w, D]
+    patch_pos_embed = patch_pos_embed.flatten(1, 2)       # [1, target_h*target_w, D]
+    if cls_token:
+        new_pos_embed = torch.cat([class_pos_embed, patch_pos_embed], dim=1)
+    else:
+        new_pos_embed = patch_pos_embed
+    return new_pos_embed
+def create_variable_pos_embed(embed_dim, height_patches, width_patches, cls_token=True):
+    """
+    Create positional embeddings for specific patch grid dimensions
+    Args:
+        embed_dim: embedding dimension
+        height_patches: number of patches in height
+        width_patches: number of patches in width
+        cls_token: whether to include class token
+    Returns:
+        pos_embed: positional embeddings tensor
+    """
+    pos_embed_np = get_2d_sincos_pos_embed_variable(
+        embed_dim, height_patches, width_patches, cls_token=cls_token
+    )
+    pos_embed = torch.from_numpy(pos_embed_np).float().unsqueeze(0)
+    return pos_embed