fix: /api/show GET support, /v1 root handler, and proxy debug logging

Three changes to debug and fix Hermes Desktop integration:

1. /api/show: Added GET handler alongside existing POST handler.
   Hermes Desktop probes with GET ?model=xxx, not POST body.
   Refactored shared lookup logic into _ollama_show_lookup().

2. /v1 root: Added handler returning basic info. Hermes Desktop
   probes this URL and ERR_CONNECTION_REFUSED was blocking
   full provider validation.

3. Proxy execute(): Added debug logging for non-200 responses.
   Prints the backend URL, status code, and first 500 bytes of body
   to help diagnose why llama-server returns 400 on
   /v1/chat/completions.
This commit is contained in:
root 2026-06-16 21:16:45 +00:00
parent d935339280
commit f2e62f60e6

32
main.py
View File

@ -153,6 +153,12 @@ app = FastAPI(lifespan=lifespan)
# ─── GET /v1/models — Issue #2 ──────────────────────────────────────────────
@app.get("/v1")
async def v1_root():
"""OpenAI API root — return basic info for Hermes Desktop WebUI probe."""
return {"object": "list", "data": []}
@app.get("/v1/models")
async def get_models():
"""OpenAI-compatible /v1/models endpoint proxying to Sidecar."""
@ -226,13 +232,26 @@ async def ollama_tags():
return {"models": models}
@app.get("/api/show")
async def ollama_show_get(model: str = ""):
"""Ollama-compatible model info for Hermes Desktop discovery (GET variant).
Some Hermes Desktop versions probe /api/show via GET with a ?model= parameter.
"""
return await _ollama_show_lookup(model)
@app.post("/api/show")
async def ollama_show(request: Request):
"""Ollama-compatible model info for Hermes Desktop discovery."""
async def ollama_show_post(request: Request):
"""Ollama-compatible model info for Hermes Desktop discovery (POST variant)."""
body = await request.body()
body_data = json.loads(body) if body else {}
model_name = body_data.get("model", "")
return await _ollama_show_lookup(model_name)
async def _ollama_show_lookup(model_name: str):
"""Shared logic for Ollama /api/show model info lookup."""
async with httpx.AsyncClient(timeout=5.0) as client:
try:
resp = await client.get(f"{SIDECAR_URL}/models/available")
@ -507,8 +526,11 @@ async def proxy(
request.method, target,
content=body, headers=headers,
) as resp:
if resp.status_code != 200:
print(f"PROXY: {target} returned {resp.status_code} during SSE stream", flush=True)
async for chunk in resp.aiter_bytes():
yield chunk
return StreamingResponse(gen(), status_code=200)
resp = await client.request(
@ -517,6 +539,12 @@ async def proxy(
content=body,
headers=headers,
)
if resp.status_code != 200:
body_preview = resp.content[:500].decode("utf-8", errors="replace")
print(
f"PROXY: {request.method} {target} returned {resp.status_code}: {body_preview}",
flush=True,
)
return Response(
content=resp.content,
status_code=resp.status_code,