File size: 6,106 Bytes
002bd9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# Usage
```shell
# accelerate launch --config_file amlt_configs/accelerate_deepspeed_config.local.yaml \
python \
-m src.train \
train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \
+model=base_sca_multitask_v2 \
model.cache_dir=.model.cache/ \
training.do_train=True \
training.do_eval=True \
training.fp16=True \
training.num_masks_per_sample=16 \
training.per_device_train_batch_size=1 \
training.dataloader_num_workers=4 \
training.max_steps=99 \
training.logging_first_step=True \
training.logging_steps=5 \
training.evaluate_before_train=True \
training.max_eval_samples=3 \
training.eval_steps=50 \
training.save_steps=50 \
wandb.log=False \
training.lr_scheduler_type=cosine \
+data_transforms=lsj-0_1-2_0 \
model.lm_head_model_name_or_path=gpt2 \
model.sam_model_name_or_path=facebook/sam-vit-base
# model.lm_head_model_name_or_path=openlm-research/open_llama_3b_v2
# To use llama, you need to install sentencepiece
# training.gradient_checkpointing=true
# Use extra args in data module
# train_data_overrides='[data.streaming\=True]'
```
SCA
Training.
```shell
python \
-m src.train \
train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \
+model=base_sca \
training.do_train=True \
training.do_eval=True \
training.num_masks_per_sample=32 \
# training.num_masks_per_sample=10 \
# training.num_masks_per_sample=4 \
+data.streaming=False \
training.per_device_train_batch_size=1 \
training.fp16=True \
# model.lm_head_model_name_or_path=gpt2-large \
# model.lm_head_model_name_or_path=gpt2-xl \
training.dataloader_num_workers=4 \
training.logging_first_step=True \
training.trainable_params='[mask_decoder.additional_transformer,mask_decoder.caption_tokens,task_tokens,language_project,language_model]'
+training.custom_param_lrs='{language_model:1e-5}'
training.compute_metrics=null # Computer METEOR during training. If ture, use generate, about 0.4 it/s on A100; false or null, only compute loss, 1.5 it/s
```
Inference.
```shell
python \
-m src.train \
train_data='[vg-densecap-local]' eval_data='[vg-densecap-local]' \
+model=base_sca \
training.do_train=False \
training.do_eval=False \
training.do_inference=True \
training.output_dir=amlt/train-sca-vg_densecap-081023/gpt2-large/ \
wandb.log=False \ # training.fp16_full_eval=True
model.model_name_or_path=amlt/train-sca-vg_densecap-081023/gpt2-large/checkpoint-9000 \
# FIXME: when load weights from existing sca model, we should use the same tokenizer as the existing sca model
# model.lm_head_model_name_or_path=$(grep lm_head_model_name_or_path $AMLT_MAP_INPUT_DIR/.hydra/config.yaml | tail -n1 | sed 's/ *//g' | cut -d ':' -f2)
# model.sam_model_name_or_path=$(grep sam_model_name_or_path $AMLT_MAP_INPUT_DIR/.hydra/config.yaml | tail -n1 | sed 's/ *//g' | cut -d ':' -f2)
```
## Data Configs
```shell
src/conf/data
βββ coco_caption-pseudo_region.yaml
βββ coco-instance-local.yaml
βββ coco-instance-task_type_caption-local.yaml
βββ coco-instance-task_type_caption.yaml
βββ coco-instance.yaml
βββ objects365-local.yaml
βββ objects365-task_type_caption-local.yaml
βββ refclef-berkeley.yaml
βββ refclef-unc.yaml
βββ refcocog-google.yaml
βββ refcoco-google.yaml
βββ refcocog-umd.yaml
βββ refcoco+-unc-split_testA.yaml
βββ refcoco-unc-split_testA.yaml
βββ refcoco+-unc-split_testB.yaml
βββ refcoco-unc-split_testB.yaml
βββ refcoco+-unc.yaml
βββ refcoco-unc.yaml
βββ sa1b-cap-streaming-hard_code_filter-num_tars_11.yaml
βββ sa1b-cap-streaming-hard_code_filter-num_tars_2.yaml
βββ sa1b-cap-streaming-hard_code_filter-num_tars_6.yaml
βββ sa1b-cap-streaming-num_tars_11.yaml
βββ sa1b-cap-streaming-num_tars_2.yaml
βββ sa1b-cap-streaming-num_tars_6.yaml
βββ sa1b-cap-streaming.yaml
βββ sbu-pseudo_region-local.yaml
βββ sbu-pseudo_region.yaml
βββ v3det-local.yaml
βββ v3det-task_type_caption-local.yaml
βββ vg-densecap-local.yaml
βββ vg-densecap-mask_region_descriptions.yaml
βββ vg-densecap-region_descriptions.yaml
βββ vg_densecap.yaml
βββ vg-full-vg-densecap-mask_region_descriptions.yaml
βββ vg-full-vg-densecap-region_descriptions.yaml
βββ vg-grit-local.yaml
```
## Debug
Use vscode debugger, the config is in `.vscode/launch.json`.
```shell
python -m debugpy --wait-for-client --listen 0.0.0.0:5678 \
-m src.train \
train_data='[vg-densecap-region_descriptions]' eval_data='[vg-densecap-region_descriptions]' \
+model=base_sam_captioner \
training.do_train=True \
training.do_eval=True \
training.num_masks_per_sample=6 \
+data.streaming=False \
# sample
training.max_eval_samples=1 \
training.max_train_samples=1 \
# logging training step
training.logging_steps=5 \
# eval
training.evaluation_strategy=steps \
training.eval_steps=5 \
# num_stape
training.max_steps=1000 \
# save model
training.save_strategy=steps \
training.save_steps=10 \
training.save_total_limit=2 \
# optimizer
training.optim=adamw_torch
training.learning_rate=5e-5
# wandb
wandb.log=False
wandb.project=sca
wandb.group=debug
wandb.name=sca-debug
# test
training.evaluate_before_train=False \
# Set log_level in `transformer` to `info`. By default, it is `warning`.
# debug - 10; info - 20; warning - 30; error - 40; critical - 50;
# by default, it is `passive` which is 30.
training.log_level="info"
# Set log_level=DEBUG in my loggers controlled by hydra.
hydra.verbose=true
```
## About Wandb Resume
We save the run id inside `training.output_dir/wandb_id`. Therefore, if the output_dir is different, then the wandb run_id should be different.
- Reference: https://github.com/wandb/wandb/issues/335#issuecomment-493284910
|