AgenticRAG / start.sh
Sumkh's picture
Upload 4 files
cd22bfc verified
raw
history blame contribute delete
768 Bytes
#!/bin/bash
# Set a writable cache directory for Hugging Face Hub
export HF_HOME=/app/.cache
export XDG_CONFIG_HOME=/app/.config
mkdir -p /app/.cache
# Optionally set a USER_AGENT to identify your requests
export USER_AGENT="vllm_huggingface_space"
# Launch the vLLM server with the model tag as a positional argument
vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
--enable-auto-tool-choice \
--tool-call-parser llama3_json \
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
--quantization bitsandbytes \
--load-format bitsandbytes \
--dtype half \
--enforce-eager \
--max-model-len 8192 &
# Wait to ensure the vLLM server is fully started (adjust if needed)
sleep 10
# Start the Gradio application using python3
python3 app.py