fix: circuit breaker deadlock — always query sidecar for status

The circuit breaker opened after MAX_RECOVERY_ATTEMPTS failures but
was never reset because the sidecar status query (which calls
circuit_reset()) was skipped when the circuit was open.  This caused
a permanent deadlock: all subsequent requests went to the LXC fallback
with no recovery possible.

Fix: always query the sidecar for /models/status, even when the
circuit is open.  If the sidecar responds successfully, reset the
circuit.  The circuit breaker now only prevents the SWITCH operation,
not the status health check.  If a model is already running when the
circuit is open, route to it directly.
This commit is contained in:
root 2026-06-16 22:09:16 +00:00
parent bcf45129f1
commit 7e9b3f43e1

17
main.py
View File

@ -403,12 +403,7 @@ async def proxy(
error: Optional[str] = None error: Optional[str] = None
sidecar_status = None sidecar_status = None
# Circuit breaker check # Always query the sidecar first (to detect recovery even when circuit is open)
if not await circuit_breaker_check():
error = "circuit_open"
else:
# Query Sidecar for active model
sidecar_status = None
async with httpx.AsyncClient(timeout=3.0) as client: async with httpx.AsyncClient(timeout=3.0) as client:
try: try:
resp = await client.get(f"{SIDECAR_URL}/models/status") resp = await client.get(f"{SIDECAR_URL}/models/status")
@ -416,13 +411,19 @@ async def proxy(
sidecar_status = resp.json() sidecar_status = resp.json()
circuit_reset() circuit_reset()
except Exception: except Exception:
error = "sidecar_down" pass # Handled below
if sidecar_status is None: if sidecar_status is None:
circuit_record_failure() circuit_record_failure()
error = "sidecar_down" error = "sidecar_down"
elif not await circuit_breaker_check():
# Sidecar is up but circuit is open from prior switch failures
# Only block the switch — allow routing to already-active backend
error = "circuit_open"
if sidecar_status.get("llama_server_running"):
target_url = f"{MAIN_PC_BASE}/{path}"
else: else:
# Extract requested model from request body # Both sidecar reachable and circuit closed — proceed normally
body = await request.body() body = await request.body()
body_data = json.loads(body) if body else {} body_data = json.loads(body) if body else {}
requested_model = body_data.get("model") requested_model = body_data.get("model")