export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve \ "/data/zeju/DeepSeek-R1-Distill-Qwen-14B" \ --served-model-name "DeepSeek-R1-Distill-Qwen-14B" \ --port 8014 \ --tensor-parallel-size 8 \ --dtype auto \ --api-key "token-abc123" \ # --gpu_memory_utilization 0.8 \ # --enable-prefix-caching