intelligence-router/tests/test_sidecar_switch.py
root 4914363089 Epic: Model Switching via Sidecar — Issues #4-#7 + #8 deployment
Issue #4: Automatic model detection and switch
- Router extracts model from chat body, queries sidecar, triggers switch on mismatch
- Matching active model routes directly to Main PC
- No active model triggers cold start switch
- Tests: 4 test_router_model_detection.py

Issue #5: SSE switch progress feedback
- _sse_format() correctly serializes SSE events
- sse_progress_stream() generates phase progression events
- Proxy yields SSE events then actual response
- Tests: 3 test_router_sse_progress.py

Issue #6: Circuit breaker + OpenRouter fallback
- Circuit tracks Sidecar failures, opens after MAX_RECOVERY_ATTEMPTS (3)
- OpenRouter API key from env, no longer uses x-intelligence-level header
- Fixes: OPENROUTER_BASE, SSE format, circuit state isolation
- Tests: 7 test_router_circuit_breaker.py

Issue #7: LXC fallback chain completion
- Full fallback: Main PC → OpenRouter → LXC
- Each backend health-checked via /v1/models before routing
- All backends down → 503 response
- Fixed: execute() wrapped in try/except to trigger fallback chain
- Tests: 3 test_router_fallback_lxc.py

Issue #8: Systemd service deployment
- deploy/llm-sidecar.service: systemd unit with Restart=always
- deploy/manifest.yaml: example manifest with 3 profiles
- deploy/README.md: deployment instructions
- Updated: docker-compose.yml, requirements.txt, Dockerfile

Test framework improvements:
- tests/conftest.py: shared URL patches for all router tests
- Fixed global state pollution in circuit breaker tests
- Fixed test sidecar switch test (AsyncMock for async function)

Total: 42 tests passing
2026-06-15 01:13:36 +00:00

106 lines
4.5 KiB
Python

"""Tests for sidecar model switch — Issue #3."""
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from httpx import Response
from fastapi.testclient import TestClient
from sidecar.app import app as sidecar_app
@pytest.fixture(autouse=True)
def reset_sidecar_state():
"""Reset shared sidecar state between tests."""
from sidecar.app import _active_profile, _llama_server_process
import sidecar.app
old_active = sidecar.app._active_profile
old_proc = sidecar.app._llama_server_process
sidecar.app._active_profile = None
sidecar.app._llama_server_process = None
yield
sidecar.app._active_profile = old_active
sidecar.app._llama_server_process = old_proc
@pytest.fixture
def tmp_manifest(tmp_path):
manifest_file = tmp_path / "manifest.yaml"
manifest_file.write_text(
"- id: qwen-3-8b\n"
" name: \"Qwen 3 8B\"\n"
" model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
" flags:\n"
" n_ctx: 8192\n"
" n_gpu_layers: 35\n"
"- id: llama-4-maverick\n"
" name: \"Llama 4 Maverick\"\n"
" model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
)
return manifest_file
class TestSwitchEndpoint:
"""Tests for POST /models/switch."""
def test_switch_to_new_profile(self, tmp_manifest):
"""Switching to a new profile starts llama-server and waits for readiness."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
patch("sidecar.app._poll_llama_server_ready", return_value=True):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
def test_switch_profile_not_found(self, tmp_manifest):
"""Switching to a non-existent profile returns 404."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "nonexistent"})
assert response.status_code == 404
data = response.json()
assert data["status"] == "error"
assert "not found" in data["message"]
def test_switch_returns_error_when_unready(self, tmp_manifest):
"""If llama-server doesn't become ready, switch returns error."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 500
data = response.json()
assert data["status"] == "error"
def test_switch_when_already_running_same_profile(self, tmp_manifest):
"""Already running this profile — returns ready immediately."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
class TestStatusEndpoint:
"""Tests for GET /models/status after switch."""
def test_status_reflects_running_server(self, tmp_manifest):
"""After a successful switch, status shows active_profile and running server."""
mock_process = MagicMock()
mock_process.returncode = None
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._llama_server_process", mock_process), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.get("/models/status")
assert response.status_code == 200
data = response.json()
assert data["active_profile"] == "qwen-3-8b"
assert data["llama_server_running"] is True