import os import asyncio import httpx from fastapi import FastAPI, Request, Response, Header from fastapi.responses import StreamingResponse from dotenv import load_dotenv load_dotenv() app = FastAPI() # Configuration from environment variables # We use removesuffix to ensure we have the base URL without the /v1 part, # as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions) MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1").removesuffix("/v1") LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1").removesuffix("/v1") OPENAI_BASE = "https://api.openai.com" OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") # Health check endpoint for the Main PC async def check_main_pc_health(): try: # We check the /v1/models endpoint async with httpx.AsyncClient(timeout=2.0) as client: response = await client.get(f"{MAIN_PC_BASE}/v1/models") return response.status_code == 200 except Exception: return False @app.get("/health") async def health(): """Local router health check.""" return {"status": "router_online"} @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy( request: Request, path: str, x_intelligence_level: str = Header(None) ): """ Smart Proxy: Routes requests based on target availability and intelligence requirements. """ target_url = None # 1. Check for "Turbo" (High Intelligence) request # Note: OPENAI_API_KEY must be set in environment if x_intelligence_level == "High" and OPENAI_API_KEY: target_url = f"{OPENAI_BASE}/{path}" # 2. Try Primary (Main PC) else: is_main_pc_online = await check_main_pc_health() if is_main_pc_online: target_url = f"{MAIN_PC_BASE}/{path}" else: # 3. Fallback to Local SLM (on Docker host) target_url = f"{LOCAL_SLM_BASE}/{path}" if not target_url: return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503) # Prepare request for proxying body = await request.body() headers = dict(request.headers) # Update headers for the target headers.pop("host", None) headers.pop("content-length", None) if target_url.startswith("https://api.openai.com"): headers["Authorization"] = f"Bearer {OPENAI_API_KEY}" # Execute the request async def stream_generator(): async with httpx.AsyncClient(timeout=60.0) as client: async with client.stream( request.method, target_url, content=body, headers=headers, ) as resp: async for chunk in resp.aiter_bytes(): yield chunk # Handle streaming responses (essential for LLM) accept_header = request.headers.get("accept", "") if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header: return StreamingResponse(stream_generator(), status_code=200) # For non-streaming, we'll just use a simple proxy logic async with httpx.AsyncClient(timeout=60.0) as client: try: resp = await client.request( method=request.method, url=target_url, content=body, headers=headers, ) return Response( content=resp.content, status_code=resp.status_code, headers=dict(resp.headers) ) except Exception as e: return Response(content=str(e), status_code=500)