Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
start.sh
CHANGED
@@ -15,10 +15,11 @@ vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
|
|
15 |
--quantization bitsandbytes \
|
16 |
--load-format bitsandbytes \
|
17 |
--dtype half \
|
18 |
-
--enforce-eager
|
|
|
19 |
|
20 |
# Wait to ensure the vLLM server is fully started (adjust if needed)
|
21 |
sleep 10
|
22 |
|
23 |
# Start the Gradio application using python3
|
24 |
-
python3 app.py
|
|
|
15 |
--quantization bitsandbytes \
|
16 |
--load-format bitsandbytes \
|
17 |
--dtype half \
|
18 |
+
--enforce-eager \
|
19 |
+
--max-model-len 8192 &
|
20 |
|
21 |
# Wait to ensure the vLLM server is fully started (adjust if needed)
|
22 |
sleep 10
|
23 |
|
24 |
# Start the Gradio application using python3
|
25 |
+
python3 app.py
|