#!/usr/bin/env bash set -x PARTITION=$1 JOB_NAME=$2 CONFIG=$3 GPUS=${GPUS:-8} GPUS_PER_NODE=${GPUS_PER_NODE:-8} CPUS_PER_TASK=${CPUS_PER_TASK:-10} SRUN_ARGS=${SRUN_ARGS:-""} PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ srun -p ${PARTITION} \ --job-name=${JOB_NAME} \ --gres=gpu:${GPUS_PER_NODE} \ --ntasks=${GPUS} \ --ntasks-per-node=${GPUS_PER_NODE} \ --cpus-per-task=${CPUS_PER_TASK} \ --kill-on-bad-exit=1 \ --quotatype=spot \ ${SRUN_ARGS} \ python -u main_deepspeed.py \ --cfg ${CONFIG} \ --local-rank 0 \ --data-path /mnt/lustre/share/images \ --output work_dirs_deepspeed ${@:4}