intelligence-router/tests/test_sidecar_switch.py
root c491779248 Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)

Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)

Total: 33 tests, all passing
2026-06-15 00:49:24 +00:00

106 lines
4.4 KiB
Python

"""Tests for sidecar model switch — Issue #3."""
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from httpx import Response
from fastapi.testclient import TestClient
from sidecar.app import app as sidecar_app
@pytest.fixture(autouse=True)
def reset_sidecar_state():
"""Reset shared sidecar state between tests."""
from sidecar.app import _active_profile, _llama_server_process
import sidecar.app
old_active = sidecar.app._active_profile
old_proc = sidecar.app._llama_server_process
sidecar.app._active_profile = None
sidecar.app._llama_server_process = None
yield
sidecar.app._active_profile = old_active
sidecar.app._llama_server_process = old_proc
@pytest.fixture
def tmp_manifest(tmp_path):
manifest_file = tmp_path / "manifest.yaml"
manifest_file.write_text(
"- id: qwen-3-8b\n"
" name: \"Qwen 3 8B\"\n"
" model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
" flags:\n"
" n_ctx: 8192\n"
" n_gpu_layers: 35\n"
"- id: llama-4-maverick\n"
" name: \"Llama 4 Maverick\"\n"
" model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
)
return manifest_file
class TestSwitchEndpoint:
"""Tests for POST /models/switch."""
def test_switch_to_new_profile(self, tmp_manifest):
"""Switching to a new profile starts llama-server and waits for readiness."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
patch("sidecar.app._poll_llama_server_ready", return_value=True):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
def test_switch_profile_not_found(self, tmp_manifest):
"""Switching to a non-existent profile returns 404."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "nonexistent"})
assert response.status_code == 404
data = response.json()
assert data["status"] == "error"
assert "not found" in data["message"]
def test_switch_returns_error_when_unready(self, tmp_manifest):
"""If llama-server doesn't become ready, switch returns error."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
patch("sidecar.app._poll_llama_server_ready", return_value=False):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 500
data = response.json()
assert data["status"] == "error"
def test_switch_when_already_running_same_profile(self, tmp_manifest):
"""Already running this profile — returns ready immediately."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
class TestStatusEndpoint:
"""Tests for GET /models/status after switch."""
def test_status_reflects_running_server(self, tmp_manifest):
"""After a successful switch, status shows active_profile and running server."""
mock_process = MagicMock()
mock_process.returncode = None
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._llama_server_process", mock_process), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.get("/models/status")
assert response.status_code == 200
data = response.json()
assert data["active_profile"] == "qwen-3-8b"
assert data["llama_server_running"] is True