Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)
Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)
Total: 33 tests, all passing
2026-06-15 03:49:24 +03:00
|
|
|
"""Tests for router /v1/models endpoint — Issue #2."""
|
|
|
|
|
import json
|
|
|
|
|
import asyncio
|
|
|
|
|
import pytest
|
|
|
|
|
from unittest.mock import patch
|
|
|
|
|
from httpx import Response, ASGITransport, AsyncClient
|
|
|
|
|
|
|
|
|
|
from main import app as router_app
|
|
|
|
|
|
2026-06-15 16:16:47 +03:00
|
|
|
SIDECAR_URL = "http://localhost:8080"
|
Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)
Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)
Total: 33 tests, all passing
2026-06-15 03:49:24 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_v1_models_returns_profiles_from_sidecar():
|
|
|
|
|
"""Router /v1/models proxies to sidecar /models/available."""
|
|
|
|
|
sidecar_profiles = [
|
|
|
|
|
{"id": "qwen-3-8b", "name": "Qwen 3 8B", "model_path": "/path/model.gguf", "flags": {"n_ctx": 8192}},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
async def run_test():
|
|
|
|
|
import respx
|
|
|
|
|
with respx.mock:
|
|
|
|
|
respx.get(f"{SIDECAR_URL}/models/available").mock(
|
|
|
|
|
return_value=Response(200, json=sidecar_profiles)
|
|
|
|
|
)
|
|
|
|
|
with patch("main.SIDECAR_URL", SIDECAR_URL):
|
|
|
|
|
transport = ASGITransport(app=router_app)
|
|
|
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
|
|
|
resp = await ac.get("/v1/models")
|
|
|
|
|
assert resp.status_code == 200
|
|
|
|
|
data = resp.json()
|
|
|
|
|
assert "data" in data
|
|
|
|
|
assert len(data["data"]) == 1
|
|
|
|
|
assert data["data"][0]["id"] == "qwen-3-8b"
|
|
|
|
|
assert data["data"][0]["object"] == "model"
|
|
|
|
|
|
|
|
|
|
asyncio.run(run_test())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_v1_models_returns_empty_list_when_sidecar_empty():
|
|
|
|
|
"""Router /v1/models returns empty list when sidecar has no profiles."""
|
|
|
|
|
async def run_test():
|
|
|
|
|
import respx
|
|
|
|
|
with respx.mock:
|
|
|
|
|
respx.get(f"{SIDECAR_URL}/models/available").mock(
|
|
|
|
|
return_value=Response(200, json=[])
|
|
|
|
|
)
|
|
|
|
|
with patch("main.SIDECAR_URL", SIDECAR_URL):
|
|
|
|
|
transport = ASGITransport(app=router_app)
|
|
|
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
|
|
|
resp = await ac.get("/v1/models")
|
|
|
|
|
assert resp.status_code == 200
|
|
|
|
|
data = resp.json()
|
|
|
|
|
assert data["data"] == []
|
|
|
|
|
|
|
|
|
|
asyncio.run(run_test())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_v1_models_returns_503_when_sidecar_down():
|
|
|
|
|
"""Router /v1/models returns 503 when sidecar is unreachable."""
|
|
|
|
|
async def run_test():
|
|
|
|
|
import respx
|
|
|
|
|
with respx.mock:
|
|
|
|
|
respx.get(f"{SIDECAR_URL}/models/available").mock(
|
|
|
|
|
side_effect=Exception("connection refused")
|
|
|
|
|
)
|
|
|
|
|
with patch("main.SIDECAR_URL", SIDECAR_URL):
|
|
|
|
|
transport = ASGITransport(app=router_app)
|
|
|
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
|
|
|
resp = await ac.get("/v1/models")
|
|
|
|
|
assert resp.status_code == 503
|
|
|
|
|
|
|
|
|
|
asyncio.run(run_test())
|