intelligence-router/tests/test_router_v1_models.py

"""Tests for router /v1/models endpoint — Issue #2."""
import json
import asyncio
import pytest
from unittest.mock import patch
from httpx import Response, ASGITransport, AsyncClient

from main import app as router_app

SIDECAR_URL = "http://localhost:8080"


def test_v1_models_returns_profiles_from_sidecar():
    """Router /v1/models proxies to sidecar /models/available."""
    sidecar_profiles = [
        {"id": "qwen-3-8b", "name": "Qwen 3 8B", "model_path": "/path/model.gguf", "flags": {"n_ctx": 8192}},
    ]

    async def run_test():
        import respx
        with respx.mock:
            respx.get(f"{SIDECAR_URL}/models/available").mock(
                return_value=Response(200, json=sidecar_profiles)
            )
            with patch("main.SIDECAR_URL", SIDECAR_URL):
                transport = ASGITransport(app=router_app)
                async with AsyncClient(transport=transport, base_url="http://test") as ac:
                    resp = await ac.get("/v1/models")
            assert resp.status_code == 200
            data = resp.json()
            assert "data" in data
            assert len(data["data"]) == 1
            assert data["data"][0]["id"] == "qwen-3-8b"
            assert data["data"][0]["object"] == "model"

    asyncio.run(run_test())


def test_v1_models_returns_empty_list_when_sidecar_empty():
    """Router /v1/models returns empty list when sidecar has no profiles."""
    async def run_test():
        import respx
        with respx.mock:
            respx.get(f"{SIDECAR_URL}/models/available").mock(
                return_value=Response(200, json=[])
            )
            with patch("main.SIDECAR_URL", SIDECAR_URL):
                transport = ASGITransport(app=router_app)
                async with AsyncClient(transport=transport, base_url="http://test") as ac:
                    resp = await ac.get("/v1/models")
            assert resp.status_code == 200
            data = resp.json()
            assert data["data"] == []

    asyncio.run(run_test())


def test_v1_models_returns_503_when_sidecar_down():
    """Router /v1/models returns 503 when sidecar is unreachable."""
    async def run_test():
        import respx
        with respx.mock:
            respx.get(f"{SIDECAR_URL}/models/available").mock(
                side_effect=Exception("connection refused")
            )
            with patch("main.SIDECAR_URL", SIDECAR_URL):
                transport = ASGITransport(app=router_app)
                async with AsyncClient(transport=transport, base_url="http://test") as ac:
                    resp = await ac.get("/v1/models")
            assert resp.status_code == 503

    asyncio.run(run_test())
Epic: Model Switching via Sidecar — Issues #2-#3 Issue #2: Manifest schema + Sidecar foundation - sidecar/manifest.py: YAML manifest loading and profile validation - sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints - Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list - Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total) Issue #3: Sidecar model switch + Router request queue - Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness - Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility) - Router request queue: max 10 requests, 120s hard timeout, 429 when full - Router automatic model detection: extracts model from chat body, matches against sidecar status - Full proxy endpoint with Sidecar → Main PC routing and fallback chain - Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total) Total: 33 tests, all passing 2026-06-15 03:49:24 +03:00			`"""Tests for router /v1/models endpoint — Issue #2."""`
			`import json`
			`import asyncio`
			`import pytest`
			`from unittest.mock import patch`
			`from httpx import Response, ASGITransport, AsyncClient`

			`from main import app as router_app`

fix: change sidecar port from 8081 to 8080 The sidecar is deployed on port 8080 instead of 8081. Update all: - Default SIDECAR_PORT in sidecar/app.py - Default SIDECAR_URL in main.py (router) - deploy/llm-sidecar.service Environment - deploy/README.md (.env example + config table) - All 7 test files (conftest, circuit-breaker, fallback, queue, model-detection, sse-progress, v1-models) 2026-06-15 16:16:47 +03:00			`SIDECAR_URL = "http://localhost:8080"`
Epic: Model Switching via Sidecar — Issues #2-#3 Issue #2: Manifest schema + Sidecar foundation - sidecar/manifest.py: YAML manifest loading and profile validation - sidecar/app.py: FastAPI sidecar service with /models/available, /models/status endpoints - Router GET /v1/models: proxies to sidecar, returns OpenAI-compatible model list - Tests: 12 manifest tests, 6 sidecar endpoint tests, 3 router tests (21 total) Issue #3: Sidecar model switch + Router request queue - Sidecar POST /models/switch: stops current llama-server, starts new one, polls for readiness - Switch lock prevents concurrent switches (threading.Lock for TestClient compatibility) - Router request queue: max 10 requests, 120s hard timeout, 429 when full - Router automatic model detection: extracts model from chat body, matches against sidecar status - Full proxy endpoint with Sidecar → Main PC routing and fallback chain - Tests: 5 sidecar switch tests, 4 queue tests, 3 router integration tests (12 total) Total: 33 tests, all passing 2026-06-15 03:49:24 +03:00

			`def test_v1_models_returns_profiles_from_sidecar():`
			`"""Router /v1/models proxies to sidecar /models/available."""`
			`sidecar_profiles = [`
			`{"id": "qwen-3-8b", "name": "Qwen 3 8B", "model_path": "/path/model.gguf", "flags": {"n_ctx": 8192}},`
			`]`

			`async def run_test():`
			`import respx`
			`with respx.mock:`
			`respx.get(f"{SIDECAR_URL}/models/available").mock(`
			`return_value=Response(200, json=sidecar_profiles)`
			`)`
			`with patch("main.SIDECAR_URL", SIDECAR_URL):`
			`transport = ASGITransport(app=router_app)`
			`async with AsyncClient(transport=transport, base_url="http://test") as ac:`
			`resp = await ac.get("/v1/models")`
			`assert resp.status_code == 200`
			`data = resp.json()`
			`assert "data" in data`
			`assert len(data["data"]) == 1`
			`assert data["data"][0]["id"] == "qwen-3-8b"`
			`assert data["data"][0]["object"] == "model"`

			`asyncio.run(run_test())`


			`def test_v1_models_returns_empty_list_when_sidecar_empty():`
			`"""Router /v1/models returns empty list when sidecar has no profiles."""`
			`async def run_test():`
			`import respx`
			`with respx.mock:`
			`respx.get(f"{SIDECAR_URL}/models/available").mock(`
			`return_value=Response(200, json=[])`
			`)`
			`with patch("main.SIDECAR_URL", SIDECAR_URL):`
			`transport = ASGITransport(app=router_app)`
			`async with AsyncClient(transport=transport, base_url="http://test") as ac:`
			`resp = await ac.get("/v1/models")`
			`assert resp.status_code == 200`
			`data = resp.json()`
			`assert data["data"] == []`

			`asyncio.run(run_test())`


			`def test_v1_models_returns_503_when_sidecar_down():`
			`"""Router /v1/models returns 503 when sidecar is unreachable."""`
			`async def run_test():`
			`import respx`
			`with respx.mock:`
			`respx.get(f"{SIDECAR_URL}/models/available").mock(`
			`side_effect=Exception("connection refused")`
			`)`
			`with patch("main.SIDECAR_URL", SIDECAR_URL):`
			`transport = ASGITransport(app=router_app)`
			`async with AsyncClient(transport=transport, base_url="http://test") as ac:`
			`resp = await ac.get("/v1/models")`
			`assert resp.status_code == 503`

			`asyncio.run(run_test())`