Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)
Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)
Total: 33 tests, all passing
2026-06-15 03:49:24 +03:00
|
|
|
"""Tests for sidecar model switch — Issue #3."""
|
|
|
|
|
import pytest
|
|
|
|
|
from unittest.mock import patch, AsyncMock, MagicMock
|
|
|
|
|
from httpx import Response
|
|
|
|
|
from fastapi.testclient import TestClient
|
|
|
|
|
|
|
|
|
|
from sidecar.app import app as sidecar_app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
|
|
|
def reset_sidecar_state():
|
|
|
|
|
"""Reset shared sidecar state between tests."""
|
|
|
|
|
from sidecar.app import _active_profile, _llama_server_process
|
|
|
|
|
import sidecar.app
|
|
|
|
|
old_active = sidecar.app._active_profile
|
|
|
|
|
old_proc = sidecar.app._llama_server_process
|
|
|
|
|
sidecar.app._active_profile = None
|
|
|
|
|
sidecar.app._llama_server_process = None
|
|
|
|
|
yield
|
|
|
|
|
sidecar.app._active_profile = old_active
|
|
|
|
|
sidecar.app._llama_server_process = old_proc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def tmp_manifest(tmp_path):
|
|
|
|
|
manifest_file = tmp_path / "manifest.yaml"
|
|
|
|
|
manifest_file.write_text(
|
|
|
|
|
"- id: qwen-3-8b\n"
|
|
|
|
|
" name: \"Qwen 3 8B\"\n"
|
|
|
|
|
" model_path: /home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf\n"
|
|
|
|
|
" flags:\n"
|
|
|
|
|
" n_ctx: 8192\n"
|
|
|
|
|
" n_gpu_layers: 35\n"
|
|
|
|
|
"- id: llama-4-maverick\n"
|
|
|
|
|
" name: \"Llama 4 Maverick\"\n"
|
|
|
|
|
" model_path: /home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf\n"
|
|
|
|
|
)
|
|
|
|
|
return manifest_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestSwitchEndpoint:
|
|
|
|
|
"""Tests for POST /models/switch."""
|
|
|
|
|
|
|
|
|
|
def test_switch_to_new_profile(self, tmp_manifest):
|
|
|
|
|
"""Switching to a new profile starts llama-server and waits for readiness."""
|
|
|
|
|
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
|
|
|
|
|
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
|
|
|
|
|
patch("sidecar.app._poll_llama_server_ready", return_value=True):
|
|
|
|
|
client = TestClient(sidecar_app)
|
|
|
|
|
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
|
|
|
|
|
assert response.status_code == 200
|
|
|
|
|
data = response.json()
|
|
|
|
|
assert data["status"] == "ready"
|
|
|
|
|
assert data["active_profile"] == "qwen-3-8b"
|
|
|
|
|
|
|
|
|
|
def test_switch_profile_not_found(self, tmp_manifest):
|
|
|
|
|
"""Switching to a non-existent profile returns 404."""
|
|
|
|
|
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)):
|
|
|
|
|
client = TestClient(sidecar_app)
|
|
|
|
|
response = client.post("/models/switch", json={"profile_id": "nonexistent"})
|
|
|
|
|
assert response.status_code == 404
|
|
|
|
|
data = response.json()
|
|
|
|
|
assert data["status"] == "error"
|
|
|
|
|
assert "not found" in data["message"]
|
|
|
|
|
|
|
|
|
|
def test_switch_returns_error_when_unready(self, tmp_manifest):
|
|
|
|
|
"""If llama-server doesn't become ready, switch returns error."""
|
|
|
|
|
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
|
|
|
|
|
patch("sidecar.app._start_llama_server", new_callable=AsyncMock), \
|
2026-06-15 04:13:36 +03:00
|
|
|
patch("sidecar.app._poll_llama_server_ready", new_callable=AsyncMock, return_value=False):
|
Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)
Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)
Total: 33 tests, all passing
2026-06-15 03:49:24 +03:00
|
|
|
client = TestClient(sidecar_app)
|
|
|
|
|
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
|
|
|
|
|
assert response.status_code == 500
|
|
|
|
|
data = response.json()
|
|
|
|
|
assert data["status"] == "error"
|
|
|
|
|
|
|
|
|
|
def test_switch_when_already_running_same_profile(self, tmp_manifest):
|
|
|
|
|
"""Already running this profile — returns ready immediately."""
|
|
|
|
|
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
|
|
|
|
|
patch("sidecar.app._active_profile", "qwen-3-8b"):
|
|
|
|
|
client = TestClient(sidecar_app)
|
|
|
|
|
response = client.post("/models/switch", json={"profile_id": "qwen-3-8b"})
|
|
|
|
|
assert response.status_code == 200
|
|
|
|
|
data = response.json()
|
|
|
|
|
assert data["status"] == "ready"
|
|
|
|
|
assert data["active_profile"] == "qwen-3-8b"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStatusEndpoint:
|
|
|
|
|
"""Tests for GET /models/status after switch."""
|
|
|
|
|
|
|
|
|
|
def test_status_reflects_running_server(self, tmp_manifest):
|
|
|
|
|
"""After a successful switch, status shows active_profile and running server."""
|
|
|
|
|
mock_process = MagicMock()
|
|
|
|
|
mock_process.returncode = None
|
|
|
|
|
|
|
|
|
|
with patch("sidecar.app.MANIFEST_PATH", str(tmp_manifest)), \
|
|
|
|
|
patch("sidecar.app._llama_server_process", mock_process), \
|
|
|
|
|
patch("sidecar.app._active_profile", "qwen-3-8b"):
|
|
|
|
|
client = TestClient(sidecar_app)
|
|
|
|
|
response = client.get("/models/status")
|
|
|
|
|
assert response.status_code == 200
|
|
|
|
|
data = response.json()
|
|
|
|
|
assert data["active_profile"] == "qwen-3-8b"
|
|
|
|
|
assert data["llama_server_running"] is True
|