intelligence-router/main.py

110 lines
3.7 KiB
Python
Raw Normal View History

import os
import asyncio
import httpx
from fastapi import FastAPI, Request, Response, Header
from fastapi.responses import StreamingResponse
from dotenv import load_dotenv
load_dotenv()
app = FastAPI()
# Configuration from environment variables
# We use removesuffix to ensure we have the base URL without the /v1 part,
# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)
2026-06-09 20:05:10 +03:00
MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.11:8080/v1").removesuffix("/v1")
LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://10.0.4.200:8080/v1").removesuffix("/v1")
OPENAI_BASE = "https://api.openai.com"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
2026-06-09 21:54:03 +03:00
print(f"MAIN_PC_BASE={MAIN_PC_BASE}")
print(f"LOCAL_SLM_BASE={LOCAL_SLM_BASE}")
# Health check endpoint for the Main PC
async def check_main_pc_health():
try:
# We check the /v1/models endpoint
async with httpx.AsyncClient(timeout=2.0) as client:
response = await client.get(f"{MAIN_PC_BASE}/v1/models")
return response.status_code == 200
except Exception:
return False
@app.get("/health")
async def health():
"""Local router health check."""
return {"status": "router_online"}
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(
request: Request,
path: str,
x_intelligence_level: str = Header(None)
):
"""
Smart Proxy: Routes requests based on target availability and intelligence requirements.
"""
target_url = None
# 1. Check for "Turbo" (High Intelligence) request
# Note: OPENAI_API_KEY must be set in environment
if x_intelligence_level == "High" and OPENAI_API_KEY:
target_url = f"{OPENAI_BASE}/{path}"
# 2. Try Primary (Main PC)
else:
is_main_pc_online = await check_main_pc_health()
if is_main_pc_online:
target_url = f"{MAIN_PC_BASE}/{path}"
else:
# 3. Fallback to Local SLM (on Docker host)
target_url = f"{LOCAL_SLM_BASE}/{path}"
if not target_url:
return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)
2026-06-09 20:05:10 +03:00
print(f"Routing {path} -> {target_url}")
# Prepare request for proxying
body = await request.body()
headers = dict(request.headers)
# Update headers for the target
headers.pop("host", None)
headers.pop("content-length", None)
if target_url.startswith("https://api.openai.com"):
headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"
# Execute the request
async def stream_generator():
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream(
request.method,
target_url,
content=body,
headers=headers,
) as resp:
async for chunk in resp.aiter_bytes():
yield chunk
# Handle streaming responses (essential for LLM)
accept_header = request.headers.get("accept", "")
if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
return StreamingResponse(stream_generator(), status_code=200)
# For non-streaming, we'll just use a simple proxy logic
async with httpx.AsyncClient(timeout=60.0) as client:
try:
resp = await client.request(
method=request.method,
url=target_url,
content=body,
headers=headers,
)
return Response(
content=resp.content,
status_code=resp.status_code,
headers=dict(resp.headers)
)
except Exception as e:
return Response(content=str(e), status_code=500)