FROM vllm/vllm-openai:latest WORKDIR /app EXPOSE 7860 CMD exec vllm serve --model Qwen/Qwen3-0.6B \ --host 0.0.0.0 \ --port 7860 \ --max-model-len 8192 \ --dtype float32 \ --enable-reasoning \ --reasoning-parser deepseek_r1 \ --engine-use-ray \ --trust-remote-code \ --disable-log-requests