zeju-0727 commited on
Commit
e6cbef5
·
verified ·
1 Parent(s): 6fc6611

Upload run_vllm_LM.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_vllm_LM.sh +10 -0
run_vllm_LM.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
2
+ vllm serve \
3
+ "/data/zeju/DeepSeek-R1-Distill-Qwen-14B" \
4
+ --served-model-name "DeepSeek-R1-Distill-Qwen-14B" \
5
+ --port 8014 \
6
+ --tensor-parallel-size 8 \
7
+ --dtype auto \
8
+ --api-key "token-abc123" \
9
+ # --gpu_memory_utilization 0.8 \
10
+ # --enable-prefix-caching