Spaces:
Runtime error
Runtime error
| MODEL: | |
| META_ARCHITECTURE: "GeneralizedVLRCNN" | |
| WEIGHT: "swin_base_patch4_window12_384_22k.pth" | |
| RPN_ONLY: True | |
| RPN_ARCHITECTURE: "VLDYHEAD" | |
| ATSS: | |
| PRE_NMS_TOP_N: 3000 | |
| DETECTIONS_PER_IMG: 100 | |
| INFERENCE_TH: 0.0 | |
| SWINT: | |
| VERSION: "fusion" | |
| EMBED_DIM: 128 | |
| DEPTHS: (2, 2, 18, 2) | |
| NUM_HEADS: (4, 8, 16, 32) | |
| WINDOW_SIZE: 12 | |
| OUT_CHANNELS: (128, 256, 512, 1024) | |
| DROP_PATH_RATE: 0.4 | |
| BACKBONE: | |
| FUSION_VERSION: "v3" | |
| CONV_BODY: "SWINT-FPN-RETINANET" | |
| OUT_CHANNELS: 256 | |
| USE_CHECKPOINT: True | |
| FREEZE_CONV_BODY_AT: -1 | |
| LANGUAGE_BACKBONE: | |
| FREEZE: False | |
| MODEL_TYPE: "roberta-fused-v2" | |
| TOKENIZER_TYPE: "roberta-base" | |
| LANG_DIM: 768 | |
| MASK_SPECIAL: False | |
| USE_CHECKPOINT: False | |
| RPN: | |
| USE_FPN: True | |
| ANCHOR_SIZES: (64, 128, 256, 512, 1024) | |
| ANCHOR_STRIDE: (8, 16, 32, 64, 128) | |
| ASPECT_RATIOS: (1.0,) | |
| SCALES_PER_OCTAVE: 1 | |
| DYHEAD: | |
| CHANNELS: 256 | |
| NUM_CONVS: 6 | |
| USE_GN: True | |
| USE_DYRELU: True | |
| USE_DFCONV: True | |
| USE_DYFUSE: True | |
| TOPK: 9 | |
| SCORE_AGG: "MEAN" | |
| LOG_SCALE: 0.0 | |
| USE_CHECKPOINT: True | |
| FUSE_CONFIG: | |
| EARLY_FUSE_ON: False | |
| TYPE: "NONE" # "MHA-B", "MHA-S", "FILM", "SCAN", "NONE" | |
| USE_CLASSIFICATION_LOSS: False | |
| USE_TOKEN_LOSS: False | |
| USE_CONTRASTIVE_ALIGN_LOSS: False | |
| CONTRASTIVE_HIDDEN_DIM: 64 | |
| USE_DOT_PRODUCT_TOKEN_LOSS: True | |
| USE_LAYER_SCALE: True | |
| CLAMP_MIN_FOR_UNDERFLOW: True | |
| CLAMP_MAX_FOR_OVERFLOW: True | |
| CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True | |
| CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True | |
| CLAMP_DOT_PRODUCT: True | |
| # use for grounding model | |
| DATASETS: | |
| TRAIN: ("refcoco_train", ) | |
| TEST: ("refcoco_val", ) | |
| DISABLE_SHUFFLE: True | |
| INPUT: | |
| PIXEL_MEAN: [ 103.530, 116.280, 123.675 ] | |
| PIXEL_STD: [ 57.375, 57.120, 58.395 ] | |
| MIN_SIZE_TRAIN: 800 | |
| MAX_SIZE_TRAIN: 1333 | |
| MIN_SIZE_TEST: 800 | |
| MAX_SIZE_TEST: 1333 | |
| AUGMENT: | |
| MULT_MIN_SIZE_TRAIN: (480,560,640,720,800) | |
| FLIP_PROB_TRAIN: 0.0 # Important for refcoco esp | |
| DATALOADER: | |
| SIZE_DIVISIBILITY: 32 | |
| SOLVER: | |
| OPTIMIZER: ADAMW | |
| BASE_LR: 0.00001 | |
| LANG_LR: 0.00001 | |
| WEIGHT_DECAY: 0.0001 | |
| STEPS: (0.67, 0.89) | |
| MAX_EPOCH: 20 | |
| IMS_PER_BATCH: 16 | |
| WARMUP_ITERS: 2000 | |
| WARMUP_FACTOR: 0.001 | |
| TEST_WITH_INFERENCE: True | |
| FIND_UNUSED_PARAMETERS: False | |
| USE_AMP: True | |
| MODEL_EMA: 0.999 | |
| CLIP_GRADIENTS: | |
| ENABLED: False | |
| CLIP_TYPE: "full_model" | |
| CLIP_VALUE: 1.0 | |
| NORM_TYPE: 2.0 | |
| TEST: | |
| DURING_TRAINING: True | |
| EVAL_TASK: "grounding" | |
| IMS_PER_BATCH: 16 | |