intelligence-router/docker-compose.yml

29 lines
665 B
YAML

version: '3.8'
services:
# The Intelligence Router
router:
build: ./intelligence-router
ports:
- "9000:9000"
environment:
- MAIN_PC_URL=http://10.0.4.x:8080/v1
- LOCAL_SLM_URL=http://llama-slm:8080/v1
- OPENAI_API_KEY=${OPENAI_API_KEY}
depends_on:
- llama-slm
# The Local SLM (Fallback Brain)
llama-slm:
image: ghcr.io/ggerganov/llama.cpp:server
volumes:
- ./models:/models
# Command to run a small, fast model (e.g., Llama-3-8B GGUF)
command: >
-m /models/llama-3-8b-instruct.Q4_K_M.gguf
--host 0.0.0.0
--port 8080
--ctx-size 2048
ports:
- "8081:8080"