intelligence-router/tests/test_router_model_detection.py
root 4914363089 Epic: Model Switching via Sidecar — Issues #4-#7 + #8 deployment
Issue #4: Automatic model detection and switch
- Router extracts model from chat body, queries sidecar, triggers switch on mismatch
- Matching active model routes directly to Main PC
- No active model triggers cold start switch
- Tests: 4 test_router_model_detection.py

Issue #5: SSE switch progress feedback
- _sse_format() correctly serializes SSE events
- sse_progress_stream() generates phase progression events
- Proxy yields SSE events then actual response
- Tests: 3 test_router_sse_progress.py

Issue #6: Circuit breaker + OpenRouter fallback
- Circuit tracks Sidecar failures, opens after MAX_RECOVERY_ATTEMPTS (3)
- OpenRouter API key from env, no longer uses x-intelligence-level header
- Fixes: OPENROUTER_BASE, SSE format, circuit state isolation
- Tests: 7 test_router_circuit_breaker.py

Issue #7: LXC fallback chain completion
- Full fallback: Main PC → OpenRouter → LXC
- Each backend health-checked via /v1/models before routing
- All backends down → 503 response
- Fixed: execute() wrapped in try/except to trigger fallback chain
- Tests: 3 test_router_fallback_lxc.py

Issue #8: Systemd service deployment
- deploy/llm-sidecar.service: systemd unit with Restart=always
- deploy/manifest.yaml: example manifest with 3 profiles
- deploy/README.md: deployment instructions
- Updated: docker-compose.yml, requirements.txt, Dockerfile

Test framework improvements:
- tests/conftest.py: shared URL patches for all router tests
- Fixed global state pollution in circuit breaker tests
- Fixed test sidecar switch test (AsyncMock for async function)

Total: 42 tests passing
2026-06-15 01:13:36 +00:00

104 lines
4.0 KiB
Python

"""Tests for automatic model detection — Issue #4.
Router extracts model from chat body, queries sidecar, triggers switch on mismatch.
"""
import asyncio
import pytest
from unittest.mock import patch
from httpx import Response, ASGITransport, AsyncClient
from main import app as router_app
SIDECAR_URL = "http://localhost:8081"
MAIN_PC_URL = "http://localhost:8080"
@pytest.fixture(autouse=True)
def setup():
"""Setup test environment."""
import main
main._circuit_open = False
main._recovery_attempts = 0
with patch("main.SIDECAR_URL", SIDECAR_URL), \
patch("main.MAIN_PC_BASE", MAIN_PC_URL), \
patch("main.FALLBACK_SLM_URL", "http://localhost:9999"), \
patch("main.OPENROUTER_API_KEY", ""):
yield
def test_active_model_match_routes_directly():
"""Matching active model → routes to Main PC without switch."""
import respx
async def run_test():
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/status").mock(
return_value=Response(200, json={"active_profile": "qwen-3-8b", "llama_server_running": True})
)
respx.post(f"{MAIN_PC_URL}/v1/chat/completions").mock(
return_value=Response(200, json={"choices": [{"message": {"content": "Hello"}}]})
)
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.post(
"/v1/chat/completions",
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status_code == 200
switch_calls = [r for r in respx.calls if "switch" in r[0].url.path]
assert len(switch_calls) == 0
asyncio.run(run_test())
def test_mismatch_triggers_switch():
"""Mismatching model → triggers switch via sidecar."""
import respx
async def run_test():
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/status").mock(
return_value=Response(200, json={"active_profile": "llama-4-maverick", "llama_server_running": True})
)
respx.post(f"{SIDECAR_URL}/models/switch").mock(
return_value=Response(200, json={"status": "ready", "active_profile": "qwen-3-8b"})
)
respx.post(f"{MAIN_PC_URL}/v1/chat/completions").mock(
return_value=Response(200, json={"choices": [{"message": {"content": "Hello"}}]})
)
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.post(
"/v1/chat/completions",
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status_code == 200
asyncio.run(run_test())
def test_no_active_model_triggers_cold_start():
"""No active model → triggers cold start switch."""
import respx
async def run_test():
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/status").mock(
return_value=Response(200, json={"active_profile": None, "llama_server_running": False})
)
respx.post(f"{SIDECAR_URL}/models/switch").mock(
return_value=Response(200, json={"status": "ready", "active_profile": "qwen-3-8b"})
)
respx.post(f"{MAIN_PC_URL}/v1/chat/completions").mock(
return_value=Response(200, json={"choices": [{"message": {"content": "Hello"}}]})
)
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.post(
"/v1/chat/completions",
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status_code == 200
asyncio.run(run_test())