diff --git a/docker-compose.yml b/docker-compose.yml index 56b5394..6543646 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,9 +3,9 @@ version: '3.8' services: # The Intelligence Router router: - build: ./intelligence-router + build: . ports: - - "9000:9000" + - "9001:9000" environment: - MAIN_PC_URL=http://10.0.4.11:8080/v1 - LOCAL_SLM_URL=http://10.0.4.200:8080/v1 diff --git a/main.py b/main.py index 2bc8b38..ef9e5d1 100644 --- a/main.py +++ b/main.py @@ -10,21 +10,28 @@ load_dotenv() app = FastAPI() # Configuration from environment variables -MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1") -LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1") -OPENAI_URL = "https://api.openai.com/v1" +# We use removesuffix to ensure we have the base URL without the /v1 part, +# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions) +MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1").removesuffix("/v1") +LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1").removesuffix("/v1") +OPENAI_BASE = "https://api.openai.com" OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") # Health check endpoint for the Main PC async def check_main_pc_health(): try: - # We check a simple endpoint or just attempt a connection to the base URL + # We check the /v1/models endpoint async with httpx.AsyncClient(timeout=2.0) as client: - response = await client.get(f"{MAIN_PC_URL}/models") + response = await client.get(f"{MAIN_PC_BASE}/v1/models") return response.status_code == 200 except Exception: return False +@app.get("/health") +async def health(): + """Local router health check.""" + return {"status": "router_online"} + @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy( request: Request, @@ -37,17 +44,18 @@ async def proxy( target_url = None # 1. Check for "Turbo" (High Intelligence) request + # Note: OPENAI_API_KEY must be set in environment if x_intelligence_level == "High" and OPENAI_API_KEY: - target_url = f"{OPENAI_URL}/{path}" + target_url = f"{OPENAI_BASE}/{path}" # 2. Try Primary (Main PC) else: is_main_pc_online = await check_main_pc_health() if is_main_pc_online: - target_url = f"{MAIN_PC_URL}/{path}" + target_url = f"{MAIN_PC_BASE}/{path}" else: # 3. Fallback to Local SLM (on Docker host) - target_url = f"{LOCAL_SLM_URL}/{path}" + target_url = f"{LOCAL_SLM_BASE}/{path}" if not target_url: return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503) @@ -77,7 +85,7 @@ async def proxy( # Handle streaming responses (essential for LLM) accept_header = request.headers.get("accept", "") if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header: - return StreamingResponse(stream_generator(), status_code=200, background=None) + return StreamingResponse(stream_generator(), status_code=200) # For non-streaming, we'll just use a simple proxy logic async with httpx.AsyncClient(timeout=60.0) as client: @@ -95,7 +103,3 @@ async def proxy( ) except Exception as e: return Response(content=str(e), status_code=500) - -@app.get("/health") -async def health(): - return {"status": "router_online"}