Upload 10 files
Browse files- base/base_refcoco+.pth +3 -0
- base/base_refcoco.pth +3 -0
- base/base_refcocog_g.pth +3 -0
- base/base_refcocog_u.pth +3 -0
- large/lagre_refcoco.pth +3 -0
- large/large_refcoco+.pth +3 -0
- large/large_refcocog_g.pth +3 -0
- large/large_refcocog_u.pth +3 -0
- mix/DETRIS_large_mixed.yaml +64 -0
- mix/mixed.pth +3 -0
base/base_refcoco+.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdcc780f9ee49b561db0a6812dd5ec3879227d5e1aff049056c0ff170cf01899
|
| 3 |
+
size 702505557
|
base/base_refcoco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f52c5b00b713281c7c894d17003bd641b11e2a9a993f41e44a4a06ad2ca431d5
|
| 3 |
+
size 702504561
|
base/base_refcocog_g.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5ac5531fdb02df8caecde84329c48e77dd845f8063a6cd1a705f8ac2ccd4c5c
|
| 3 |
+
size 702507549
|
base/base_refcocog_u.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afec3bca7f02abde52713b4b2de4a13aa1a9a72312dc959e238e8595a0fc7e79
|
| 3 |
+
size 702507549
|
large/lagre_refcoco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c623b640a86968fdd286d501b758a41ddce682545eb71ff2963e32cea07c0318
|
| 3 |
+
size 1576863055
|
large/large_refcoco+.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77a15dfd95968f0d5ea9433acc12ac9d0c82caeb81aa0dd8b173a669fc0090e6
|
| 3 |
+
size 1576864731
|
large/large_refcocog_g.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4017f91fcf1dee25fe3ea133036dc28b38846ee1ba8f76c866781250022299b
|
| 3 |
+
size 1576867059
|
large/large_refcocog_u.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ca45376202dd6ba7ce9b7cbc6069a54c3c2710ce1d76abe5213547f4875a52b
|
| 3 |
+
size 1576867059
|
mix/DETRIS_large_mixed.yaml
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DATA:
|
| 2 |
+
dataset: refcoco
|
| 3 |
+
train_lmdb: datasets/lmdb/refcoco/train.lmdb
|
| 4 |
+
train_split: train
|
| 5 |
+
val_lmdb: datasets/lmdb/refcoco/val.lmdb
|
| 6 |
+
val_split: val
|
| 7 |
+
mask_root: datasets/masks/refcoco
|
| 8 |
+
TRAIN:
|
| 9 |
+
visual_adapter_dim: 384
|
| 10 |
+
visual_adapter_layer: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
|
| 11 |
+
txt_adapter_dim: 384
|
| 12 |
+
txtual_adapter_layer: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
| 13 |
+
# Base Arch
|
| 14 |
+
clip_pretrain: pretrain/ViT-B-16.pt
|
| 15 |
+
dino_pretrain: pretrain/dinov2_vitl14_reg4_pretrain.pth
|
| 16 |
+
dino_name: dinov2-large
|
| 17 |
+
dino_layers: 24
|
| 18 |
+
output_dinov2: [8, 16]
|
| 19 |
+
model_name: CLIP-b-16
|
| 20 |
+
input_size: 448
|
| 21 |
+
word_len: 17
|
| 22 |
+
word_dim: 512
|
| 23 |
+
ladder_dim: 64
|
| 24 |
+
nhead: 8
|
| 25 |
+
multi_stage: 3
|
| 26 |
+
stride: [1, 1, 1]
|
| 27 |
+
vis_dim: 512
|
| 28 |
+
fpn_in: [1024, 1024, 1024] #dinov2 backbone
|
| 29 |
+
fpn_out: [256, 512, 1024]
|
| 30 |
+
sync_bn: True
|
| 31 |
+
|
| 32 |
+
# Decoder
|
| 33 |
+
num_layers: 3
|
| 34 |
+
num_head: 8
|
| 35 |
+
dim_ffn: 512
|
| 36 |
+
dropout: 0.1
|
| 37 |
+
intermediate: False
|
| 38 |
+
# Training Setting
|
| 39 |
+
workers: 32 # data loader workers
|
| 40 |
+
workers_val: 16
|
| 41 |
+
epochs: 50
|
| 42 |
+
milestones: [35]
|
| 43 |
+
start_epoch: 0
|
| 44 |
+
batch_size: 32 # batch size for training
|
| 45 |
+
batch_size_val: 32 # batch size for validation during training, memory and speed tradeoff
|
| 46 |
+
base_lr: 0.0001
|
| 47 |
+
lr_decay: 0.1
|
| 48 |
+
lr_multi: 1
|
| 49 |
+
weight_decay: 0.
|
| 50 |
+
max_norm: 0.
|
| 51 |
+
manual_seed: 0
|
| 52 |
+
print_freq: 100
|
| 53 |
+
# Resume & Save
|
| 54 |
+
exp_name: DETRIS_large
|
| 55 |
+
output_folder: exp/refcoco
|
| 56 |
+
save_freq: 1
|
| 57 |
+
weight: # path to initial weight (default: none)
|
| 58 |
+
resume: # path to latest checkpoint (default: none)
|
| 59 |
+
evaluate: True # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
|
| 60 |
+
|
| 61 |
+
TEST:
|
| 62 |
+
test_split: val-test
|
| 63 |
+
test_lmdb: datasets/lmdb/refcoco/val.lmdb
|
| 64 |
+
visualize: False
|
mix/mixed.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53905e1efed16578ef4c94a44f6e7a2afca65c694ab30b85f58181028bcf0a1c
|
| 3 |
+
size 1698155435
|