Fix build context, port conflict, and improve proxy/health-check logic

This commit is contained in:
Tudorel Oprisan 2026-06-09 17:34:07 +01:00
parent cb01b42f38
commit d7090b1644
2 changed files with 19 additions and 15 deletions

View File

@ -3,9 +3,9 @@ version: '3.8'
services: services:
# The Intelligence Router # The Intelligence Router
router: router:
build: ./intelligence-router build: .
ports: ports:
- "9000:9000" - "9001:9000"
environment: environment:
- MAIN_PC_URL=http://10.0.4.11:8080/v1 - MAIN_PC_URL=http://10.0.4.11:8080/v1
- LOCAL_SLM_URL=http://10.0.4.200:8080/v1 - LOCAL_SLM_URL=http://10.0.4.200:8080/v1

30
main.py
View File

@ -10,21 +10,28 @@ load_dotenv()
app = FastAPI() app = FastAPI()
# Configuration from environment variables # Configuration from environment variables
MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1") # We use removesuffix to ensure we have the base URL without the /v1 part,
LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1") # as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)
OPENAI_URL = "https://api.openai.com/v1" MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1").removesuffix("/v1")
LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1").removesuffix("/v1")
OPENAI_BASE = "https://api.openai.com"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
# Health check endpoint for the Main PC # Health check endpoint for the Main PC
async def check_main_pc_health(): async def check_main_pc_health():
try: try:
# We check a simple endpoint or just attempt a connection to the base URL # We check the /v1/models endpoint
async with httpx.AsyncClient(timeout=2.0) as client: async with httpx.AsyncClient(timeout=2.0) as client:
response = await client.get(f"{MAIN_PC_URL}/models") response = await client.get(f"{MAIN_PC_BASE}/v1/models")
return response.status_code == 200 return response.status_code == 200
except Exception: except Exception:
return False return False
@app.get("/health")
async def health():
"""Local router health check."""
return {"status": "router_online"}
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy( async def proxy(
request: Request, request: Request,
@ -37,17 +44,18 @@ async def proxy(
target_url = None target_url = None
# 1. Check for "Turbo" (High Intelligence) request # 1. Check for "Turbo" (High Intelligence) request
# Note: OPENAI_API_KEY must be set in environment
if x_intelligence_level == "High" and OPENAI_API_KEY: if x_intelligence_level == "High" and OPENAI_API_KEY:
target_url = f"{OPENAI_URL}/{path}" target_url = f"{OPENAI_BASE}/{path}"
# 2. Try Primary (Main PC) # 2. Try Primary (Main PC)
else: else:
is_main_pc_online = await check_main_pc_health() is_main_pc_online = await check_main_pc_health()
if is_main_pc_online: if is_main_pc_online:
target_url = f"{MAIN_PC_URL}/{path}" target_url = f"{MAIN_PC_BASE}/{path}"
else: else:
# 3. Fallback to Local SLM (on Docker host) # 3. Fallback to Local SLM (on Docker host)
target_url = f"{LOCAL_SLM_URL}/{path}" target_url = f"{LOCAL_SLM_BASE}/{path}"
if not target_url: if not target_url:
return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503) return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)
@ -77,7 +85,7 @@ async def proxy(
# Handle streaming responses (essential for LLM) # Handle streaming responses (essential for LLM)
accept_header = request.headers.get("accept", "") accept_header = request.headers.get("accept", "")
if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header: if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
return StreamingResponse(stream_generator(), status_code=200, background=None) return StreamingResponse(stream_generator(), status_code=200)
# For non-streaming, we'll just use a simple proxy logic # For non-streaming, we'll just use a simple proxy logic
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=60.0) as client:
@ -95,7 +103,3 @@ async def proxy(
) )
except Exception as e: except Exception as e:
return Response(content=str(e), status_code=500) return Response(content=str(e), status_code=500)
@app.get("/health")
async def health():
return {"status": "router_online"}