Dyve_plus_RL_copy / run_vllm_LM.sh
zeju-0727's picture
Upload run_vllm_LM.sh with huggingface_hub
e6cbef5 verified
raw
history blame contribute delete
328 Bytes
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
vllm serve \
"/data/zeju/DeepSeek-R1-Distill-Qwen-14B" \
--served-model-name "DeepSeek-R1-Distill-Qwen-14B" \
--port 8014 \
--tensor-parallel-size 8 \
--dtype auto \
--api-key "token-abc123" \
# --gpu_memory_utilization 0.8 \
# --enable-prefix-caching