Fix build context, port conflict, and improve proxy/health-check logic
This commit is contained in:
parent
cb01b42f38
commit
d7090b1644
@ -3,9 +3,9 @@ version: '3.8'
|
|||||||
services:
|
services:
|
||||||
# The Intelligence Router
|
# The Intelligence Router
|
||||||
router:
|
router:
|
||||||
build: ./intelligence-router
|
build: .
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9001:9000"
|
||||||
environment:
|
environment:
|
||||||
- MAIN_PC_URL=http://10.0.4.11:8080/v1
|
- MAIN_PC_URL=http://10.0.4.11:8080/v1
|
||||||
- LOCAL_SLM_URL=http://10.0.4.200:8080/v1
|
- LOCAL_SLM_URL=http://10.0.4.200:8080/v1
|
||||||
|
|||||||
30
main.py
30
main.py
@ -10,21 +10,28 @@ load_dotenv()
|
|||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
# Configuration from environment variables
|
# Configuration from environment variables
|
||||||
MAIN_PC_URL = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1")
|
# We use removesuffix to ensure we have the base URL without the /v1 part,
|
||||||
LOCAL_SLM_URL = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1")
|
# as the incoming path already includes 'v1/...' (e.g. /v1/chat/completions)
|
||||||
OPENAI_URL = "https://api.openai.com/v1"
|
MAIN_PC_BASE = os.getenv("MAIN_PC_URL", "http://10.0.4.x:8080/v1").removesuffix("/v1")
|
||||||
|
LOCAL_SLM_BASE = os.getenv("LOCAL_SLM_URL", "http://llama-slm:8080/v1").removesuffix("/v1")
|
||||||
|
OPENAI_BASE = "https://api.openai.com"
|
||||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
# Health check endpoint for the Main PC
|
# Health check endpoint for the Main PC
|
||||||
async def check_main_pc_health():
|
async def check_main_pc_health():
|
||||||
try:
|
try:
|
||||||
# We check a simple endpoint or just attempt a connection to the base URL
|
# We check the /v1/models endpoint
|
||||||
async with httpx.AsyncClient(timeout=2.0) as client:
|
async with httpx.AsyncClient(timeout=2.0) as client:
|
||||||
response = await client.get(f"{MAIN_PC_URL}/models")
|
response = await client.get(f"{MAIN_PC_BASE}/v1/models")
|
||||||
return response.status_code == 200
|
return response.status_code == 200
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
"""Local router health check."""
|
||||||
|
return {"status": "router_online"}
|
||||||
|
|
||||||
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
||||||
async def proxy(
|
async def proxy(
|
||||||
request: Request,
|
request: Request,
|
||||||
@ -37,17 +44,18 @@ async def proxy(
|
|||||||
target_url = None
|
target_url = None
|
||||||
|
|
||||||
# 1. Check for "Turbo" (High Intelligence) request
|
# 1. Check for "Turbo" (High Intelligence) request
|
||||||
|
# Note: OPENAI_API_KEY must be set in environment
|
||||||
if x_intelligence_level == "High" and OPENAI_API_KEY:
|
if x_intelligence_level == "High" and OPENAI_API_KEY:
|
||||||
target_url = f"{OPENAI_URL}/{path}"
|
target_url = f"{OPENAI_BASE}/{path}"
|
||||||
|
|
||||||
# 2. Try Primary (Main PC)
|
# 2. Try Primary (Main PC)
|
||||||
else:
|
else:
|
||||||
is_main_pc_online = await check_main_pc_health()
|
is_main_pc_online = await check_main_pc_health()
|
||||||
if is_main_pc_online:
|
if is_main_pc_online:
|
||||||
target_url = f"{MAIN_PC_URL}/{path}"
|
target_url = f"{MAIN_PC_BASE}/{path}"
|
||||||
else:
|
else:
|
||||||
# 3. Fallback to Local SLM (on Docker host)
|
# 3. Fallback to Local SLM (on Docker host)
|
||||||
target_url = f"{LOCAL_SLM_URL}/{path}"
|
target_url = f"{LOCAL_SLM_BASE}/{path}"
|
||||||
|
|
||||||
if not target_url:
|
if not target_url:
|
||||||
return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)
|
return Response(content="No valid target available (Main PC offline, SLM unavailable, and no OpenAI key)", status_code=503)
|
||||||
@ -77,7 +85,7 @@ async def proxy(
|
|||||||
# Handle streaming responses (essential for LLM)
|
# Handle streaming responses (essential for LLM)
|
||||||
accept_header = request.headers.get("accept", "")
|
accept_header = request.headers.get("accept", "")
|
||||||
if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
|
if "text/event-stream" in accept_header or "application/x-ndjson" in accept_header:
|
||||||
return StreamingResponse(stream_generator(), status_code=200, background=None)
|
return StreamingResponse(stream_generator(), status_code=200)
|
||||||
|
|
||||||
# For non-streaming, we'll just use a simple proxy logic
|
# For non-streaming, we'll just use a simple proxy logic
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
@ -95,7 +103,3 @@ async def proxy(
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return Response(content=str(e), status_code=500)
|
return Response(content=str(e), status_code=500)
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health():
|
|
||||||
return {"status": "router_online"}
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user