From 0e05390be27a47ac12c6b4a174ddf7e3db8b7d01 Mon Sep 17 00:00:00 2001 From: Tudorel Oprisan Date: Tue, 9 Jun 2026 11:48:43 +0100 Subject: [PATCH] Initial commit: migrate intelligence-router files --- Dockerfile | 13 ++++++ docker-compose.yml | 28 +++++++++++++ main.py | 101 +++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 ++ 4 files changed, 146 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 main.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c75d205 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY main.py . + +# Expose the proxy port +EXPOSE 9000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "9000"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cc08e57 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +version: '3.8' + +services: + # The Intelligence Router + router: + build: ./intelligence-router + ports: + - "9000:9000" + environment: + - MAIN_PC_URL=http://10.0.4.x:8080/v1 + - LOCAL_SLM_URL=http://llama-slm:8080/v1 + - OPENAI_API_KEY=${OPENAI_API_KEY} + depends_on: + - llama-slm + + # The Local SLM (Fallback Brain) + llama-slm: + image: ghcr.io/ggerganov/llama.cpp:server + volumes: + - ./models:/models + # Command to run a small, fast model (e.g., Llama-3-8B GGUF) + command: > + -m /models/llama-3-8b-instruct.Q4_K_M.gguf + --host 0.0.0.0 + --port 8080 + --ctx-size 2048 + ports: + - "8081:8080" diff --git a/main.py b/main.py new file mode 100644 index 0000000..2bc8b38 --- /dev/null +++ b/main.py @@ -0,0 +1,101 @@ +import os +import asyncio +import httpx +from fastapi import FastAPI, Request, Response, Header +from fastapi.responses import StreamingResponse +from dotenv import load_dotenv + +load_dotenv() + +app = FastAPI() + +# Configuration from environment variables +MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1") +LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1") +OPENAI_URL = "https://api.openai.com/v1" +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") + +# Health check endpoint for the Main PC +async def check_main_pc_health(): + try: + # We check a simple endpoint or just attempt a connection to the base URL + async with httpx.AsyncClient(timeout=2.0) as client: + response = await client.get(f"{MAIN_PC_URL}/models") + return response.status_code == 200 + except Exception: + return False + +@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) +async def proxy( + request: Request, + path: str, + x_intelligence_level: str = Header(None) +): + """ + Smart Proxy: Routes requests based on target availability and intelligence requirements. + """ + target_url = None + + # 1. Check for "Turbo" (High Intelligence) request + if x_intelligence_level == "High" and OPENAI_API_KEY: + target_url = f"{OPENAI_URL}/{path}" + + # 2. Try Primary (Main PC) + else: + is_main_pc_online = await check_main_pc_health() + if is_main_pc_online: + target_url = f"{MAIN_PC_URL}/{path}" + else: + # 3. Fallback to Local SLM (on Docker host) + target_url = f"{LOCAL_SLM_URL}/{path}" + + if not target_url: + return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503) + + # Prepare request for proxying + body = await request.body() + headers = dict(request.headers) + + # Update headers for the target + headers.pop("host", None) + headers.pop("content-length", None) + if target_url.startswith("https://api.openai.com"): + headers["Authorization"] = f"Bearer {OPENAI_API_KEY}" + + # Execute the request + async def stream_generator(): + async with httpx.AsyncClient(timeout=60.0) as client: + async with client.stream( + request.method, + target_url, + content=body, + headers=headers, + ) as resp: + async for chunk in resp.aiter_bytes(): + yield chunk + + # Handle streaming responses (essential for LLM) + accept_header = request.headers.get("accept", "") + if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header: + return StreamingResponse(stream_generator(), status_code=200, background=None) + + # For non-streaming, we'll just use a simple proxy logic + async with httpx.AsyncClient(timeout=60.0) as client: + try: + resp = await client.request( + method=request.method, + url=target_url, + content=body, + headers=headers, + ) + return Response( + content=resp.content, + status_code=resp.status_code, + headers=dict(resp.headers) + ) + except Exception as e: + return Response(content=str(e), status_code=500) + +@app.get("/health") +async def health(): + return {"status": "router_online"} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3263361 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn +httpx +python-dotenv