intelligence-router/main.py

import os
import asyncio
import httpx
from fastapi import FastAPI, Request, Response, Header
from fastapi.responses import StreamingResponse
from dotenv import load_dotenv

load_dotenv()

app = FastAPI()

# Configuration from environment variables
# We use removesuffix to ensure we have the base URL without the /v1 part, 
# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)
MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.11:8080/v1").removesuffix("/v1")
LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://10.0.4.200:8080/v1").removesuffix("/v1")
OPENAI_BASE = "https://api.openai.com"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

print(f"MAIN_PC_BASE={MAIN_PC_BASE}")
print(f"LOCAL_SLM_BASE={LOCAL_SLM_BASE}")

# Health check endpoint for the Main PC
async def check_main_pc_health():
    try:
        # We check the /v1/models endpoint
        async with httpx.AsyncClient(timeout=2.0) as client:
            response = await client.get(f"{MAIN_PC_BASE}/v1/models")
            return response.status_code == 200
    except Exception:
        return False

@app.get("/health")
async def health():
    """Local router health check."""
    return {"status": "router_online"}

@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(
    request: Request, 
    path: str, 
    x_intelligence_level: str = Header(None)
):
    """
    Smart Proxy: Routes requests based on target availability and intelligence requirements.
    """
    target_url = None
    
    # 1. Check for "Turbo" (High Intelligence) request
    # Note: OPENAI_API_KEY must be set in environment
    if x_intelligence_level == "High" and OPENAI_API_KEY:
        target_url = f"{OPENAI_BASE}/{path}"
    
    # 2. Try Primary (Main PC)
    else:
        is_main_pc_online = await check_main_pc_health()
        if is_main_pc_online:
            target_url = f"{MAIN_PC_BASE}/{path}"
        else:
            # 3. Fallback to Local SLM (on Docker host)
            target_url = f"{LOCAL_SLM_BASE}/{path}"

    if not target_url:
        return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)

    print(f"Routing {path} -> {target_url}")
    # Prepare request for proxying
    body = await request.body()
    headers = dict(request.headers)
    
    # Update headers for the target
    headers.pop("host", None)
    headers.pop("content-length", None)
    if target_url.startswith("https://api.openai.com"):
        headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"

    # Execute the request
    async def stream_generator():
        async with httpx.AsyncClient(timeout=60.0) as client:
            async with client.stream(
                request.method,
                target_url,
                content=body,
                headers=headers,
            ) as resp:
                async for chunk in resp.aiter_bytes():
                    yield chunk

    # Handle streaming responses (essential for LLM)
    accept_header = request.headers.get("accept", "")
    if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
        return StreamingResponse(stream_generator(), status_code=200)
    
    # For non-streaming, we'll just use a simple proxy logic
    async with httpx.AsyncClient(timeout=60.0) as client:
        try:
            resp = await client.request(
                method=request.method,
                url=target_url,
                content=body,
                headers=headers,
            )
            return Response(
                content=resp.content,
                status_code=resp.status_code,
                headers=dict(resp.headers)
            )
        except Exception as e:
            return Response(content=str(e), status_code=500)
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`import os`
			`import asyncio`
			`import httpx`
			`from fastapi import FastAPI, Request, Response, Header`
			`from fastapi.responses import StreamingResponse`
			`from dotenv import load_dotenv`

			`load_dotenv()`

			`app = FastAPI()`

			`# Configuration from environment variables`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`# We use removesuffix to ensure we have the base URL without the /v1 part,`
			`# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)`
added debug 2026-06-09 20:05:10 +03:00			`MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.11:8080/v1").removesuffix("/v1")`
			`LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://10.0.4.200:8080/v1").removesuffix("/v1")`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`OPENAI_BASE = "https://api.openai.com"`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")`

test 2026-06-09 21:54:03 +03:00			`print(f"MAIN_PC_BASE={MAIN_PC_BASE}")`
			`print(f"LOCAL_SLM_BASE={LOCAL_SLM_BASE}")`

Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`# Health check endpoint for the Main PC`
			`async def check_main_pc_health():`
			`try:`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`# We check the /v1/models endpoint`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`async with httpx.AsyncClient(timeout=2.0) as client:`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`response = await client.get(f"{MAIN_PC_BASE}/v1/models")`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`return response.status_code == 200`
			`except Exception:`
			`return False`

Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`@app.get("/health")`
			`async def health():`
			`"""Local router health check."""`
			`return {"status": "router_online"}`

Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])`
			`async def proxy(`
			`request: Request,`
			`path: str,`
			`x_intelligence_level: str = Header(None)`
			`):`
			`"""`
			`Smart Proxy: Routes requests based on target availability and intelligence requirements.`
			`"""`
			`target_url = None`

			`# 1. Check for "Turbo" (High Intelligence) request`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`# Note: OPENAI_API_KEY must be set in environment`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`if x_intelligence_level == "High" and OPENAI_API_KEY:`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`target_url = f"{OPENAI_BASE}/{path}"`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00
			`# 2. Try Primary (Main PC)`
			`else:`
			`is_main_pc_online = await check_main_pc_health()`
			`if is_main_pc_online:`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`target_url = f"{MAIN_PC_BASE}/{path}"`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`else:`
			`# 3. Fallback to Local SLM (on Docker host)`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`target_url = f"{LOCAL_SLM_BASE}/{path}"`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00
			`if not target_url:`
			`return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)`

added debug 2026-06-09 20:05:10 +03:00			`print(f"Routing {path} -> {target_url}")`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00			`# Prepare request for proxying`
			`body = await request.body()`
			`headers = dict(request.headers)`

			`# Update headers for the target`
			`headers.pop("host", None)`
			`headers.pop("content-length", None)`
			`if target_url.startswith("https://api.openai.com"):`
			`headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"`

			`# Execute the request`
			`async def stream_generator():`
			`async with httpx.AsyncClient(timeout=60.0) as client:`
			`async with client.stream(`
			`request.method,`
			`target_url,`
			`content=body,`
			`headers=headers,`
			`) as resp:`
			`async for chunk in resp.aiter_bytes():`
			`yield chunk`

			`# Handle streaming responses (essential for LLM)`
			`accept_header = request.headers.get("accept", "")`
			`if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:`
Fix build context, port conflict, and improve proxy/health-check logic 2026-06-09 19:34:07 +03:00			`return StreamingResponse(stream_generator(), status_code=200)`
Initial commit: migrate intelligence-router files 2026-06-09 13:48:43 +03:00
			`# For non-streaming, we'll just use a simple proxy logic`
			`async with httpx.AsyncClient(timeout=60.0) as client:`
			`try:`
			`resp = await client.request(`
			`method=request.method,`
			`url=target_url,`
			`content=body,`
			`headers=headers,`
			`)`
			`return Response(`
			`content=resp.content,`
			`status_code=resp.status_code,`
			`headers=dict(resp.headers)`
			`)`
			`except Exception as e:`
			`return Response(content=str(e), status_code=500)`