diff --git a/docker-compose.yml b/docker-compose.yml
index 56b5394..6543646 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,9 +3,9 @@ version: '3.8'
 services:
   # The Intelligence Router
   router:
-    build: ./intelligence-router
+    build: .
     ports:
-      - "9000:9000"
+      - "9001:9000"
     environment:
       - MAIN_PC_URL=http://10.0.4.11:8080/v1
       - LOCAL_SLM_URL=http://10.0.4.200:8080/v1
diff --git a/main.py b/main.py
index 2bc8b38..ef9e5d1 100644
--- a/main.py
+++ b/main.py
@@ -10,21 +10,28 @@ load_dotenv()
 app = FastAPI()
 
 # Configuration from environment variables
-MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1")
-LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1")
-OPENAI_URL = "https://api.openai.com/v1"
+# We use removesuffix to ensure we have the base URL without the /v1 part, 
+# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)
+MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1").removesuffix("/v1")
+LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1").removesuffix("/v1")
+OPENAI_BASE = "https://api.openai.com"
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 
 # Health check endpoint for the Main PC
 async def check_main_pc_health():
     try:
-        # We check a simple endpoint or just attempt a connection to the base URL
+        # We check the /v1/models endpoint
         async with httpx.AsyncClient(timeout=2.0) as client:
-            response = await client.get(f"{MAIN_PC_URL}/models")
+            response = await client.get(f"{MAIN_PC_BASE}/v1/models")
             return response.status_code == 200
     except Exception:
         return False
 
+@app.get("/health")
+async def health():
+    """Local router health check."""
+    return {"status": "router_online"}
+
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def proxy(
     request: Request, 
@@ -37,17 +44,18 @@ async def proxy(
     target_url = None
     
     # 1. Check for "Turbo" (High Intelligence) request
+    # Note: OPENAI_API_KEY must be set in environment
     if x_intelligence_level == "High" and OPENAI_API_KEY:
-        target_url = f"{OPENAI_URL}/{path}"
+        target_url = f"{OPENAI_BASE}/{path}"
     
     # 2. Try Primary (Main PC)
     else:
         is_main_pc_online = await check_main_pc_health()
         if is_main_pc_online:
-            target_url = f"{MAIN_PC_URL}/{path}"
+            target_url = f"{MAIN_PC_BASE}/{path}"
         else:
             # 3. Fallback to Local SLM (on Docker host)
-            target_url = f"{LOCAL_SLM_URL}/{path}"
+            target_url = f"{LOCAL_SLM_BASE}/{path}"
 
     if not target_url:
         return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)
@@ -77,7 +85,7 @@ async def proxy(
     # Handle streaming responses (essential for LLM)
     accept_header = request.headers.get("accept", "")
     if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
-        return StreamingResponse(stream_generator(), status_code=200, background=None)
+        return StreamingResponse(stream_generator(), status_code=200)
     
     # For non-streaming, we'll just use a simple proxy logic
     async with httpx.AsyncClient(timeout=60.0) as client:
@@ -95,7 +103,3 @@ async def proxy(
             )
         except Exception as e:
             return Response(content=str(e), status_code=500)
-
-@app.get("/health")
-async def health():
-    return {"status": "router_online"}