intelligence-router/tests/test_router_v1_models.py
root c491779248 Epic: Model Switching via Sidecar — Issues #2-#3
Issue #2: Manifest schema + Sidecar foundation
- sidecar/manifest.py: YAML manifest loading and profile validation
- sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints
- Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list
- Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total)

Issue #3: Sidecar model switch + Router request queue
- Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness
- Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility)
- Router request queue: max 10 requests, 120s hard timeout, 429 when full
- Router automatic model detection: extracts model from chat body, matches against sidecar status
- Full proxy endpoint with Sidecar → Main PC routing and fallback chain
- Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total)

Total: 33 tests, all passing
2026-06-15 00:49:24 +00:00

73 lines
2.6 KiB
Python

"""Tests for router /v1/models endpoint — Issue #2."""
import json
import asyncio
import pytest
from unittest.mock import patch
from httpx import Response, ASGITransport, AsyncClient
from main import app as router_app
SIDECAR_URL = "http://localhost:8081"
def test_v1_models_returns_profiles_from_sidecar():
"""Router /v1/models proxies to sidecar /models/available."""
sidecar_profiles = [
{"id": "qwen-3-8b", "name": "Qwen 3 8B", "model_path": "/path/model.gguf", "flags": {"n_ctx": 8192}},
]
async def run_test():
import respx
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/available").mock(
return_value=Response(200, json=sidecar_profiles)
)
with patch("main.SIDECAR_URL", SIDECAR_URL):
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.get("/v1/models")
assert resp.status_code == 200
data = resp.json()
assert "data" in data
assert len(data["data"]) == 1
assert data["data"][0]["id"] == "qwen-3-8b"
assert data["data"][0]["object"] == "model"
asyncio.run(run_test())
def test_v1_models_returns_empty_list_when_sidecar_empty():
"""Router /v1/models returns empty list when sidecar has no profiles."""
async def run_test():
import respx
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/available").mock(
return_value=Response(200, json=[])
)
with patch("main.SIDECAR_URL", SIDECAR_URL):
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.get("/v1/models")
assert resp.status_code == 200
data = resp.json()
assert data["data"] == []
asyncio.run(run_test())
def test_v1_models_returns_503_when_sidecar_down():
"""Router /v1/models returns 503 when sidecar is unreachable."""
async def run_test():
import respx
with respx.mock:
respx.get(f"{SIDECAR_URL}/models/available").mock(
side_effect=Exception("connection refused")
)
with patch("main.SIDECAR_URL", SIDECAR_URL):
transport = ASGITransport(app=router_app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
resp = await ac.get("/v1/models")
assert resp.status_code == 503
asyncio.run(run_test())