id4thomas
/

emotion-predictor-Qwen2.5-3B-Instruct

[add] perf fig, vllm example

5ecee23 13 days ago

753 Bytes

	#!/bin/bash
	VLLM_VERSION="v0.8.4"

	BASE_MODEL_NAME="Qwen/Qwen2.5-3B-Instruct"

	ADAPTER_NAME="id4thomas/emotion-predictor-Qwen2.5-3B-Instruct"
	ADAPTER_DIR="..." # Path to the directory containing the LoRA adapter

	docker run --runtime nvidia --gpus all \
	-v ${ADAPTER_DIR}:/vllm-workspace/adapter \
	-v ./cache:/root/.cache/huggingface \
	-p 8010:8000 \
	--ipc=host \
	vllm/vllm-openai:${VLLM_VERSION} \
	--model "Qwen/Qwen2.5-3B-Instruct" \
	--lora-modules '{"name": "${ADAPTER_NAME}", "path": "/vllm-workspace/adapter", "base_model_name": "${BASE_MODEL_NAME}"}' \
	--enable-lora \
	--max-lora-rank 16 \
	--served-model-name ${BASE_MODEL_NAME} \
	--gpu-memory-utilization=0.5