FROM vllm/vllm-openai:latest | |
WORKDIR /app | |
EXPOSE 7860 | |
CMD exec vllm serve --model Qwen/Qwen3-0.6B \ | |
--host 0.0.0.0 \ | |
--port 7860 \ | |
--max-model-len 8192 \ | |
--dtype float32 \ | |
--enable-reasoning \ | |
--reasoning-parser deepseek_r1 \ | |
--engine-use-ray \ | |
--trust-remote-code \ | |
--disable-log-requests | |