intelligence-router/tests/test_sidecar_switch.py

"""Tests for sidecar model switch — Issue #3."""
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from httpx import Response
from fastapi.testclient import TestClient

from sidecar.app import app as sidecar_app


@pytest.fixture(autouse=True)
def reset_sidecar_state():
    """Reset shared sidecar state between tests."""
    from sidecar.app import _active_profile, _llama_server_process
    import sidecar.app
    old_active = sidecar.app._active_profile
    old_proc = sidecar.app._llama_server_process
    sidecar.app._active_profile = None
    sidecar.app._llama_server_process = None
    yield
    sidecar.app._active_profile = old_active
    sidecar.app._llama_server_process = old_proc


@pytest.fixture
def tmp_manifest(tmp_path):
    manifest_file = tmp_path / "manifest.yaml"
    manifest_file.write_text(
        "- id: qwen-3-8b\n"
        "  name: \"Qwen 3 8B\"\n"
        "  model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
        "  flags:\n"
        "    n_ctx: 8192\n"
        "    n_gpu_layers: 35\n"
        "- id: llama-4-maverick\n"
        "  name: \"Llama 4 Maverick\"\n"
        "  model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
    )
    return manifest_file


class TestSwitchEndpoint:
    """Tests for POST /models/switch."""

    def test_switch_to_new_profile(self, tmp_manifest):
        """Switching to a new profile starts llama-server and waits for readiness."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
             patch("sidecar.app._poll_llama_server_ready", return_value=True):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 200
            data = response.json()
            assert data["status"] == "ready"
            assert data["active_profile"] == "qwen-3-8b"

    def test_switch_profile_not_found(self, tmp_manifest):
        """Switching to a non-existent profile returns 404."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "nonexistent"})
            assert response.status_code == 404
            data = response.json()
            assert data["status"] == "error"
            assert "not found" in data["message"]

    def test_switch_returns_error_when_unready(self, tmp_manifest):
        """If llama-server doesn't become ready, switch returns error."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
             patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 500
            data = response.json()
            assert data["status"] == "error"

    def test_switch_when_already_running_same_profile(self, tmp_manifest):
        """Already running this profile — returns ready immediately."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._active_profile", "qwen-3-8b"):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 200
            data = response.json()
            assert data["status"] == "ready"
            assert data["active_profile"] == "qwen-3-8b"


class TestStatusEndpoint:
    """Tests for GET /models/status after switch."""

    def test_status_reflects_running_server(self, tmp_manifest):
        """After a successful switch, status shows active_profile and running server."""
        mock_process = MagicMock()
        mock_process.returncode = None

        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._llama_server_process", mock_process), \
             patch("sidecar.app._active_profile", "qwen-3-8b"):
            client = TestClient(sidecar_app)
            response = client.get("/models/status")
            assert response.status_code == 200
            data = response.json()
            assert data["active_profile"] == "qwen-3-8b"
            assert data["llama_server_running"] is True
Epic: Model Switching via Sidecar — Issues #2-#3 Issue #2: Manifest schema + Sidecar foundation - sidecar/manifest.py: YAML manifest loading and profile validation - sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints - Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list - Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total) Issue #3: Sidecar model switch + Router request queue - Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness - Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility) - Router request queue: max 10 requests, 120s hard timeout, 429 when full - Router automatic model detection: extracts model from chat body, matches against sidecar status - Full proxy endpoint with Sidecar → Main PC routing and fallback chain - Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total) Total: 33 tests, all passing 2026-06-15 03:49:24 +03:00			`"""Tests for sidecar model switch — Issue #3."""`
			`import pytest`
			`from unittest.mock import patch, AsyncMock, MagicMock`
			`from httpx import Response`
			`from fastapi.testclient import TestClient`

			`from sidecar.app import app as sidecar_app`


			`@pytest.fixture(autouse=True)`
			`def reset_sidecar_state():`
			`"""Reset shared sidecar state between tests."""`
			`from sidecar.app import _active_profile, _llama_server_process`
			`import sidecar.app`
			`old_active = sidecar.app._active_profile`
			`old_proc = sidecar.app._llama_server_process`
			`sidecar.app._active_profile = None`
			`sidecar.app._llama_server_process = None`
			`yield`
			`sidecar.app._active_profile = old_active`
			`sidecar.app._llama_server_process = old_proc`


			`@pytest.fixture`
			`def tmp_manifest(tmp_path):`
			`manifest_file = tmp_path / "manifest.yaml"`
			`manifest_file.write_text(`
			`"- id: qwen-3-8b\n"`
			`" name: \"Qwen 3 8B\"\n"`
			`" model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"`
			`" flags:\n"`
			`" n_ctx: 8192\n"`
			`" n_gpu_layers: 35\n"`
			`"- id: llama-4-maverick\n"`
			`" name: \"Llama 4 Maverick\"\n"`
			`" model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"`
			`)`
			`return manifest_file`


			`class TestSwitchEndpoint:`
			`"""Tests for POST /models/switch."""`

			`def test_switch_to_new_profile(self, tmp_manifest):`
			`"""Switching to a new profile starts llama-server and waits for readiness."""`
			`with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \`
			`patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \`
			`patch("sidecar.app._poll_llama_server_ready", return_value=True):`
			`client = TestClient(sidecar_app)`
			`response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})`
			`assert response.status_code == 200`
			`data = response.json()`
			`assert data["status"] == "ready"`
			`assert data["active_profile"] == "qwen-3-8b"`

			`def test_switch_profile_not_found(self, tmp_manifest):`
			`"""Switching to a non-existent profile returns 404."""`
			`with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):`
			`client = TestClient(sidecar_app)`
			`response = client.post("/models/switch", json={"profile_id": "nonexistent"})`
			`assert response.status_code == 404`
			`data = response.json()`
			`assert data["status"] == "error"`
			`assert "not found" in data["message"]`

			`def test_switch_returns_error_when_unready(self, tmp_manifest):`
			`"""If llama-server doesn't become ready, switch returns error."""`
			`with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \`
			`patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \`
Epic: Model Switching via Sidecar — Issues #4-#7 + #8 deployment Issue #4: Automatic model detection and switch - Router extracts model from chat body, queries sidecar, triggers switch on mismatch - Matching active model routes directly to Main PC - No active model triggers cold start switch - Tests: 4 test_router_model_detection.py Issue #5: SSE switch progress feedback - _sse_format() correctly serializes SSE events - sse_progress_stream() generates phase progression events - Proxy yields SSE events then actual response - Tests: 3 test_router_sse_progress.py Issue #6: Circuit breaker + OpenRouter fallback - Circuit tracks Sidecar failures, opens after MAX_RECOVERY_ATTEMPTS (3) - OpenRouter API key from env, no longer uses x-intelligence-level header - Fixes: OPENROUTER_BASE, SSE format, circuit state isolation - Tests: 7 test_router_circuit_breaker.py Issue #7: LXC fallback chain completion - Full fallback: Main PC → OpenRouter → LXC - Each backend health-checked via /v1/models before routing - All backends down → 503 response - Fixed: execute() wrapped in try/except to trigger fallback chain - Tests: 3 test_router_fallback_lxc.py Issue #8: Systemd service deployment - deploy/llm-sidecar.service: systemd unit with Restart=always - deploy/manifest.yaml: example manifest with 3 profiles - deploy/README.md: deployment instructions - Updated: docker-compose.yml, requirements.txt, Dockerfile Test framework improvements: - tests/conftest.py: shared URL patches for all router tests - Fixed global state pollution in circuit breaker tests - Fixed test sidecar switch test (AsyncMock for async function) Total: 42 tests passing 2026-06-15 04:13:36 +03:00			`patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):`
Epic: Model Switching via Sidecar — Issues #2-#3 Issue #2: Manifest schema + Sidecar foundation - sidecar/manifest.py: YAML manifest loading and profile validation - sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints - Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list - Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total) Issue #3: Sidecar model switch + Router request queue - Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness - Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility) - Router request queue: max 10 requests, 120s hard timeout, 429 when full - Router automatic model detection: extracts model from chat body, matches against sidecar status - Full proxy endpoint with Sidecar → Main PC routing and fallback chain - Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total) Total: 33 tests, all passing 2026-06-15 03:49:24 +03:00			`client = TestClient(sidecar_app)`
			`response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})`
			`assert response.status_code == 500`
			`data = response.json()`
			`assert data["status"] == "error"`

			`def test_switch_when_already_running_same_profile(self, tmp_manifest):`
			`"""Already running this profile — returns ready immediately."""`
			`with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \`
			`patch("sidecar.app._active_profile", "qwen-3-8b"):`
			`client = TestClient(sidecar_app)`
			`response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})`
			`assert response.status_code == 200`
			`data = response.json()`
			`assert data["status"] == "ready"`
			`assert data["active_profile"] == "qwen-3-8b"`


			`class TestStatusEndpoint:`
			`"""Tests for GET /models/status after switch."""`

			`def test_status_reflects_running_server(self, tmp_manifest):`
			`"""After a successful switch, status shows active_profile and running server."""`
			`mock_process = MagicMock()`
			`mock_process.returncode = None`

			`with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \`
			`patch("sidecar.app._llama_server_process", mock_process), \`
			`patch("sidecar.app._active_profile", "qwen-3-8b"):`
			`client = TestClient(sidecar_app)`
			`response = client.get("/models/status")`
			`assert response.status_code == 200`
			`data = response.json()`
			`assert data["active_profile"] == "qwen-3-8b"`
			`assert data["llama_server_running"] is True`