#!/bin/bash # Start llama-server in background cd /llama.cpp/build ./bin/llama-server --host 0.0.0.0 --port 8081 --model /models/model.q8_0.gguf --ctx-size 32768 & # Wait for llama-server to initialize echo "Waiting for llama-server to start..." until curl -s "http://localhost:8081/v1/models" >/dev/null; do sleep 1 done echo "llama-server is ready." # Start Go application (main service) echo "Starting Go application..." cd /app exec ./main