| | env_defaults: |
| |
|
| | SHARED_CMD_ARGS: ' |
| | -m src.train |
| | train_data=[vg-densecap-region_descriptions] eval_data=[vg-densecap-region_descriptions] |
| | +model=base_sam_captioner |
| | training.do_train=False |
| | training.do_eval=False |
| | training.do_inference=True |
| | training.num_masks_per_sample=1 |
| | +data.streaming=False |
| | training.max_eval_samples=10 |
| | training.max_train_samples=1 |
| | training.num_train_epochs=10 |
| | training.fp16=True |
| | training.output_dir=$AMLT_OUTPUT_DIR |
| | training.output_log_dir=$AMLT_LOGS_DIR |
| | model.cache_dir=/mnt/blob/weights/.model.cache/ |
| | training.dataloader_num_workers=4 |
| | ' |
| |
|
| |
|
| |
|
| | environment: |
| | image: nvidia/pytorch:23.07-py3 |
| | registry: nvcr.io |
| |
|
| | code: |
| | local_dir: $CONFIG_DIR/../ |
| |
|
| |
|
| |
|
| | jobs: |
| | - name: sam_captioner-infer-debug |
| | sku: G$NUM_GPUS |
| | preemptible: False |
| | process_count_per_node: 1 |
| | command: |
| | - . amlt_configs/setup.sh |
| | - source ~/.bashrc |
| | - . amlt_configs/setup_accelerate_on_azure.sh |
| | - . amlt_configs/post_process.sh |
| | |
| |
|
| | |
| | submit_args: |
| | env: |
| | AZFUSE_USE_FUSE: "1" |
| | SHARED_MEMORY_PERCENT: 0.5 |
| | container_args: |
| | shm_size: 256g |