intelligence-router/tests/test_sidecar_switch.py

106 lines
4.5 KiB
Python
Raw Normal View History

"""Tests for sidecar model switch — Issue #3."""
import pytest
from unittest.mock import patch, AsyncMock, MagicMock
from httpx import Response
from fastapi.testclient import TestClient
from sidecar.app import app as sidecar_app
@pytest.fixture(autouse=True)
def reset_sidecar_state():
"""Reset shared sidecar state between tests."""
from sidecar.app import _active_profile, _llama_server_process
import sidecar.app
old_active = sidecar.app._active_profile
old_proc = sidecar.app._llama_server_process
sidecar.app._active_profile = None
sidecar.app._llama_server_process = None
yield
sidecar.app._active_profile = old_active
sidecar.app._llama_server_process = old_proc
@pytest.fixture
def tmp_manifest(tmp_path):
manifest_file = tmp_path / "manifest.yaml"
manifest_file.write_text(
"- id: qwen-3-8b\n"
" name: \"Qwen 3 8B\"\n"
" model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
" flags:\n"
" n_ctx: 8192\n"
" n_gpu_layers: 35\n"
"- id: llama-4-maverick\n"
" name: \"Llama 4 Maverick\"\n"
" model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
)
return manifest_file
class TestSwitchEndpoint:
"""Tests for POST /models/switch."""
def test_switch_to_new_profile(self, tmp_manifest):
"""Switching to a new profile starts llama-server and waits for readiness."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
patch("sidecar.app._poll_llama_server_ready", return_value=True):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
def test_switch_profile_not_found(self, tmp_manifest):
"""Switching to a non-existent profile returns 404."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "nonexistent"})
assert response.status_code == 404
data = response.json()
assert data["status"] == "error"
assert "not found" in data["message"]
def test_switch_returns_error_when_unready(self, tmp_manifest):
"""If llama-server doesn't become ready, switch returns error."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
Epic: Model Switching via Sidecar — Issues #4-#7 + #8 deployment Issue #4: Automatic model detection and switch - Router extracts model from chat body, queries sidecar, triggers switch on mismatch - Matching active model routes directly to Main PC - No active model triggers cold start switch - Tests: 4 test_router_model_detection.py Issue #5: SSE switch progress feedback - _sse_format() correctly serializes SSE events - sse_progress_stream() generates phase progression events - Proxy yields SSE events then actual response - Tests: 3 test_router_sse_progress.py Issue #6: Circuit breaker + OpenRouter fallback - Circuit tracks Sidecar failures, opens after MAX_RECOVERY_ATTEMPTS (3) - OpenRouter API key from env, no longer uses x-intelligence-level header - Fixes: OPENROUTER_BASE, SSE format, circuit state isolation - Tests: 7 test_router_circuit_breaker.py Issue #7: LXC fallback chain completion - Full fallback: Main PC → OpenRouter → LXC - Each backend health-checked via /v1/models before routing - All backends down → 503 response - Fixed: execute() wrapped in try/except to trigger fallback chain - Tests: 3 test_router_fallback_lxc.py Issue #8: Systemd service deployment - deploy/llm-sidecar.service: systemd unit with Restart=always - deploy/manifest.yaml: example manifest with 3 profiles - deploy/README.md: deployment instructions - Updated: docker-compose.yml, requirements.txt, Dockerfile Test framework improvements: - tests/conftest.py: shared URL patches for all router tests - Fixed global state pollution in circuit breaker tests - Fixed test sidecar switch test (AsyncMock for async function) Total: 42 tests passing
2026-06-15 04:13:36 +03:00
patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 500
data = response.json()
assert data["status"] == "error"
def test_switch_when_already_running_same_profile(self, tmp_manifest):
"""Already running this profile — returns ready immediately."""
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
assert response.status_code == 200
data = response.json()
assert data["status"] == "ready"
assert data["active_profile"] == "qwen-3-8b"
class TestStatusEndpoint:
"""Tests for GET /models/status after switch."""
def test_status_reflects_running_server(self, tmp_manifest):
"""After a successful switch, status shows active_profile and running server."""
mock_process = MagicMock()
mock_process.returncode = None
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
patch("sidecar.app._llama_server_process", mock_process), \
patch("sidecar.app._active_profile", "qwen-3-8b"):
client = TestClient(sidecar_app)
response = client.get("/models/status")
assert response.status_code == 200
data = response.json()
assert data["active_profile"] == "qwen-3-8b"
assert data["llama_server_running"] is True