#!/bin/bash

# Start llama-server in background
cd /llama.cpp/build
./bin/llama-server --host 0.0.0.0 --port 8081 --model /models/model.q8_0.gguf --ctx-size 32768 &

# Wait for llama-server to initialize
echo "Waiting for llama-server to start..."
until curl -s "http://localhost:8081/v1/models" >/dev/null; do
    sleep 1
done

echo "llama-server is ready."

# Start Go application (main service)
echo "Starting Go application..."
cd /app
exec ./main