Henishma commited on
Commit
efb665a
·
verified ·
1 Parent(s): 157bea6

Create model.py

Browse files
Files changed (1) hide show
  1. model.py +52 -0
model.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import BertModel
4
+
5
+ class MultimodalClassifier(nn.Module):
6
+ def __init__(self, text_hidden_size=768, image_feat_size=2048, num_classes=5):
7
+ super(MultimodalClassifier, self).__init__()
8
+ self.bert = BertModel.from_pretrained("bert-base-uncased")
9
+
10
+ self.text_fc = nn.Sequential(
11
+ nn.Linear(text_hidden_size, 256),
12
+ nn.BatchNorm1d(256),
13
+ nn.ReLU(),
14
+ nn.Dropout(0.2)
15
+ )
16
+
17
+ self.image_fc = nn.Sequential(
18
+ nn.Linear(image_feat_size, 256),
19
+ nn.BatchNorm1d(256),
20
+ nn.ReLU(),
21
+ nn.Dropout(0.2)
22
+ )
23
+
24
+ self.fusion_fc = nn.Sequential(
25
+ nn.Linear(512, 256),
26
+ nn.ReLU(),
27
+ nn.Dropout(0.3),
28
+ nn.Linear(256, 64),
29
+ nn.ReLU(),
30
+ nn.Dropout(0.2),
31
+ nn.Linear(64, num_classes)
32
+ )
33
+
34
+ def forward(self, input_ids, attention_mask, image_vector):
35
+ text_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
36
+ text_feat = self.text_fc[0](text_output.pooler_output)
37
+ if text_feat.size(0) > 1:
38
+ text_feat = self.text_fc[1:](text_feat)
39
+ else:
40
+ text_feat = self.text_fc[2:](text_feat)
41
+
42
+ image_feat = self.image_fc[0](image_vector)
43
+ if image_feat.size(0) > 1:
44
+ image_feat = self.image_fc[1:](image_feat)
45
+ else:
46
+ image_feat = self.image_fc[2:](image_feat)
47
+
48
+ fused = torch.cat((text_feat, image_feat), dim=1)
49
+ logits = self.fusion_fc(fused)
50
+ return logits
51
+
52
+