The sidecar is deployed on port 8080 instead of 8081. Update all: - Default SIDECAR_PORT in sidecar/app.py - Default SIDECAR_URL in main.py (router) - deploy/llm-sidecar.service Environment - deploy/README.md (.env example + config table) - All 7 test files (conftest, circuit-breaker, fallback, queue, model-detection, sse-progress, v1-models)
102 lines
4.4 KiB
Python
102 lines
4.4 KiB
Python
"""Tests for LXC fallback chain — Issue #7.
|
|
|
|
Full fallback: Main PC → OpenRouter → LXC. 503 when all backends down.
|
|
Uses conftest.py patches for URL mocking.
|
|
"""
|
|
import asyncio
|
|
import pytest
|
|
from httpx import Response, ASGITransport, AsyncClient
|
|
import respx
|
|
|
|
import main
|
|
|
|
|
|
class TestFallbackChain:
|
|
"""Tests for the full fallback chain."""
|
|
|
|
def test_openrouter_failure_triggers_lxc(self):
|
|
"""When OpenRouter fails with network error, router falls back to LXC."""
|
|
async def run_test():
|
|
with respx.mock:
|
|
# Sidecar is down — triggers fallback chain
|
|
respx.get("http://localhost:8080/models/status").mock(
|
|
return_value=Response(503, json={"status": "error", "message": "not ready"})
|
|
)
|
|
# OpenRouter fails with network error
|
|
respx.post("https://openrouter.ai/v1/chat/completions").mock(
|
|
side_effect=Exception("Connection refused")
|
|
)
|
|
# LXC health check passes
|
|
respx.get("http://localhost:9999/v1/models").mock(
|
|
return_value=Response(200, json={"data": []})
|
|
)
|
|
# LXC works for chat completion
|
|
respx.post("http://localhost:9999/v1/chat/completions").mock(
|
|
return_value=Response(200, json={"choices": [{"message": {"content": "Hello from LXC"}}]})
|
|
)
|
|
transport = ASGITransport(app=main.app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
resp = await ac.post(
|
|
"/v1/chat/completions",
|
|
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
|
|
)
|
|
assert resp.status_code == 200
|
|
assert resp.json()["choices"][0]["message"]["content"] == "Hello from LXC"
|
|
|
|
asyncio.run(run_test())
|
|
|
|
def test_all_backends_down_returns_503(self):
|
|
"""When all backends are down, router returns 503."""
|
|
async def run_test():
|
|
with respx.mock:
|
|
# Sidecar down
|
|
respx.get("http://localhost:8080/models/status").mock(
|
|
side_effect=Exception("connection refused")
|
|
)
|
|
# OpenRouter down
|
|
respx.post("https://openrouter.ai/v1/chat/completions").mock(
|
|
side_effect=Exception("timeout")
|
|
)
|
|
# LXC down
|
|
respx.get("http://localhost:9999/v1/models").mock(
|
|
side_effect=Exception("connection refused")
|
|
)
|
|
transport = ASGITransport(app=main.app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
resp = await ac.post(
|
|
"/v1/chat/completions",
|
|
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
|
|
)
|
|
assert resp.status_code == 503
|
|
|
|
asyncio.run(run_test())
|
|
|
|
def test_lxc_health_check_before_routing(self):
|
|
"""Router checks LXC health before routing to it."""
|
|
async def run_test():
|
|
with respx.mock:
|
|
# Sidecar down, OpenRouter down
|
|
respx.get("http://localhost:8080/models/status").mock(
|
|
side_effect=Exception("connection refused")
|
|
)
|
|
respx.post("https://openrouter.ai/v1/chat/completions").mock(
|
|
side_effect=Exception("timeout")
|
|
)
|
|
# LXC health check passes
|
|
respx.get("http://localhost:9999/v1/models").mock(
|
|
return_value=Response(200, json={"data": []})
|
|
)
|
|
# Then the actual chat completion
|
|
respx.post("http://localhost:9999/v1/chat/completions").mock(
|
|
return_value=Response(200, json={"choices": [{"message": {"content": "LXC"}}]})
|
|
)
|
|
transport = ASGITransport(app=main.app)
|
|
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
|
resp = await ac.post(
|
|
"/v1/chat/completions",
|
|
json={"model": "qwen-3-8b", "messages": [{"role": "user", "content": "hi"}]},
|
|
)
|
|
assert resp.status_code == 200
|
|
|
|
asyncio.run(run_test())
|