intelligence-router/tests/test_sidecar_switch.py

"""Tests for sidecar model switch — Issue #3."""
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from httpx import Response
from fastapi.testclient import TestClient

from sidecar.app import app as sidecar_app


@pytest.fixture(autouse=True)
def reset_sidecar_state():
    """Reset shared sidecar state between tests."""
    from sidecar.app import _active_profile, _llama_server_process
    import sidecar.app
    old_active = sidecar.app._active_profile
    old_proc = sidecar.app._llama_server_process
    sidecar.app._active_profile = None
    sidecar.app._llama_server_process = None
    yield
    sidecar.app._active_profile = old_active
    sidecar.app._llama_server_process = old_proc


@pytest.fixture
def tmp_manifest(tmp_path):
    manifest_file = tmp_path / "manifest.yaml"
    manifest_file.write_text(
        "- id: qwen-3-8b\n"
        "  name: \"Qwen 3 8B\"\n"
        "  model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
        "  flags:\n"
        "    n_ctx: 8192\n"
        "    n_gpu_layers: 35\n"
        "- id: llama-4-maverick\n"
        "  name: \"Llama 4 Maverick\"\n"
        "  model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
    )
    return manifest_file


class TestSwitchEndpoint:
    """Tests for POST /models/switch."""

    def test_switch_to_new_profile(self, tmp_manifest):
        """Switching to a new profile starts llama-server and waits for readiness."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
             patch("sidecar.app._poll_llama_server_ready", return_value=True):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 200
            data = response.json()
            assert data["status"] == "ready"
            assert data["active_profile"] == "qwen-3-8b"

    def test_switch_profile_not_found(self, tmp_manifest):
        """Switching to a non-existent profile returns 404."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "nonexistent"})
            assert response.status_code == 404
            data = response.json()
            assert data["status"] == "error"
            assert "not found" in data["message"]

    def test_switch_returns_error_when_unready(self, tmp_manifest):
        """If llama-server doesn't become ready, switch returns error."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
             patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 500
            data = response.json()
            assert data["status"] == "error"

    def test_switch_when_already_running_same_profile(self, tmp_manifest):
        """Already running this profile — returns ready immediately."""
        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._active_profile", "qwen-3-8b"):
            client = TestClient(sidecar_app)
            response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
            assert response.status_code == 200
            data = response.json()
            assert data["status"] == "ready"
            assert data["active_profile"] == "qwen-3-8b"


class TestStatusEndpoint:
    """Tests for GET /models/status after switch."""

    def test_status_reflects_running_server(self, tmp_manifest):
        """After a successful switch, status shows active_profile and running server."""
        mock_process = MagicMock()
        mock_process.returncode = None

        with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
             patch("sidecar.app._llama_server_process", mock_process), \
             patch("sidecar.app._active_profile", "qwen-3-8b"):
            client = TestClient(sidecar_app)
            response = client.get("/models/status")
            assert response.status_code == 200
            data = response.json()
            assert data["active_profile"] == "qwen-3-8b"
            assert data["llama_server_running"] is True