Spaces:
Sleeping
Sleeping
File size: 768 Bytes
f189c3b 6d838bd a38161f 6d838bd edd1153 004e40c f189c3b 6201226 004f73b cd22bfc f189c3b edd1153 f189c3b edd1153 cd22bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
#!/bin/bash
# Set a writable cache directory for Hugging Face Hub
export HF_HOME=/app/.cache
export XDG_CONFIG_HOME=/app/.config
mkdir -p /app/.cache
# Optionally set a USER_AGENT to identify your requests
export USER_AGENT="vllm_huggingface_space"
# Launch the vLLM server with the model tag as a positional argument
vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
--enable-auto-tool-choice \
--tool-call-parser llama3_json \
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
--quantization bitsandbytes \
--load-format bitsandbytes \
--dtype half \
--enforce-eager \
--max-model-len 8192 &
# Wait to ensure the vLLM server is fully started (adjust if needed)
sleep 10
# Start the Gradio application using python3
python3 app.py
|