import os import asyncio import httpx from fastapi import FastAPI, Request, Response, Header from fastapi.responses import StreamingResponse from dotenv import load_dotenv load_dotenv() app = FastAPI() # Configuration from environment variables MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1") LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1") OPENAI_URL = "https://api.openai.com/v1" OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") # Health check endpoint for the Main PC async def check_main_pc_health(): try: # We check a simple endpoint or just attempt a connection to the base URL async with httpx.AsyncClient(timeout=2.0) as client: response = await client.get(f"{MAIN_PC_URL}/models") return response.status_code == 200 except Exception: return False @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy( request: Request, path: str, x_intelligence_level: str = Header(None) ): """ Smart Proxy: Routes requests based on target availability and intelligence requirements. """ target_url = None # 1. Check for "Turbo" (High Intelligence) request if x_intelligence_level == "High" and OPENAI_API_KEY: target_url = f"{OPENAI_URL}/{path}" # 2. Try Primary (Main PC) else: is_main_pc_online = await check_main_pc_health() if is_main_pc_online: target_url = f"{MAIN_PC_URL}/{path}" else: # 3. Fallback to Local SLM (on Docker host) target_url = f"{LOCAL_SLM_URL}/{path}" if not target_url: return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503) # Prepare request for proxying body = await request.body() headers = dict(request.headers) # Update headers for the target headers.pop("host", None) headers.pop("content-length", None) if target_url.startswith("https://api.openai.com"): headers["Authorization"] = f"Bearer {OPENAI_API_KEY}" # Execute the request async def stream_generator(): async with httpx.AsyncClient(timeout=60.0) as client: async with client.stream( request.method, target_url, content=body, headers=headers, ) as resp: async for chunk in resp.aiter_bytes(): yield chunk # Handle streaming responses (essential for LLM) accept_header = request.headers.get("accept", "") if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header: return StreamingResponse(stream_generator(), status_code=200, background=None) # For non-streaming, we'll just use a simple proxy logic async with httpx.AsyncClient(timeout=60.0) as client: try: resp = await client.request( method=request.method, url=target_url, content=body, headers=headers, ) return Response( content=resp.content, status_code=resp.status_code, headers=dict(resp.headers) ) except Exception as e: return Response(content=str(e), status_code=500) @app.get("/health") async def health(): return {"status": "router_online"}