export CUDA_VISIBLE_DEVICES=0,1,2,3 export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 export VLLM_USE_V1=0 MODEL_PATH='' vllm serve $MODEL_PATH \ --served-model-name medguide-v \ --port 8232 \ --host 0.0.0.0 \ --dtype bfloat16 \ --limit-mm-per-prompt image=6,video=0 \ --max-model-len 12000 \ --gpu-memory-utilization 0.9 \ --tensor-parallel-size 4 \ --api-key medguide-v \ --generation-config vllm