Spaces:
Sleeping
Sleeping
File size: 1,793 Bytes
1f584ff f535d79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# -*- coding: utf-8 -*-
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
class ResnetEncoderDecoder(nn.Module):
def __init__(self, char_dict):
super(ResnetEncoderDecoder, self).__init__()
self.bn = nn.BatchNorm2d(64)
resnet = timm.create_model('resnet18', pretrained=True, drop_rate=0.2, drop_path_rate=0.3)
self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1)
self.cnn = nn.Sequential(*list(resnet.children())[4:-2])
self.out = nn.Linear(512, len(char_dict))
self.char_dict = char_dict
def forward(self, input):
input = F.silu(self.bn(self.conv(input)), True)
input = F.max_pool2d(input, kernel_size=(2, 2), stride=(2, 2))
input = self.cnn(input)
input = input.permute(0, 2, 3, 1)
input = F.softmax(self.out(input), dim=-1)
return input
class CaformerEncoderDecoder(nn.Module):
def __init__(self, char_dict, drop_rate=0.2, drop_path_rate=0.3):
super().__init__()
self.bn = nn.BatchNorm2d(64)
backbone = timm.create_model('caformer_s18.sail_in22k_ft_in1k', pretrained=True, drop_rate=drop_rate, drop_path_rate=drop_path_rate)
backbone.set_grad_checkpointing(True)
self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1)
self.cnn = nn.Sequential(*list(backbone.children())[1:-1])
self.out = nn.Linear(512, len(char_dict))
self.char_dict = char_dict
def forward(self, input):
input = F.silu(self.bn(self.conv(input)), True)
input = F.max_pool2d(input, kernel_size=(2, 2), stride=(2, 2))
input = self.cnn(input)
input = input.permute(0, 2, 3, 1)
input = F.softmax(self.out(input), dim=-1)
return input |