version: '3.8' services: # The Intelligence Router router: build: ./intelligence-router ports: - "9000:9000" environment: - MAIN_PC_URL=http://10.0.4.x:8080/v1 - LOCAL_SLM_URL=http://llama-slm:8080/v1 - OPENAI_API_KEY=${OPENAI_API_KEY} depends_on: - llama-slm # The Local SLM (Fallback Brain) llama-slm: image: ghcr.io/ggerganov/llama.cpp:server volumes: - ./models:/models # Command to run a small, fast model (e.g., Llama-3-8B GGUF) command: > -m /models/llama-3-8b-instruct.Q4_K_M.gguf --host 0.0.0.0 --port 8080 --ctx-size 2048 ports: - "8081:8080"