Spaces:

Sumkh
/

AgenticRAG

Sleeping

Sumkh commited on Feb 25

Commit

004f73b

verified ·

1 Parent(s): 6201226

Update start.sh

Files changed (1) hide show

start.sh CHANGED Viewed

@@ -12,10 +12,13 @@ vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
-  --dtype half &
 # Wait to ensure the vLLM server is fully started (adjust if needed)
 sleep 10
 # Start the Gradio application using python3
-python3 app.py

   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
+  --quantization bitsandbytes \
+  --load-format bitsandbytes \
+  --dtype half \
+  --enforce-eager &
 # Wait to ensure the vLLM server is fully started (adjust if needed)
 sleep 10
 # Start the Gradio application using python3
+python3 app.py