fix: /api/show GET support, /v1 root handler, and proxy debug logging

Three changes to debug and fix Hermes Desktop integration: 1. /api/show: Added GET handler alongside existing POST handler. Hermes Desktop probes with GET ?model=xxx, not POST body. Refactored shared lookup logic into _ollama_show_lookup(). 2. /v1 root: Added handler returning basic info. Hermes Desktop probes this URL and ERR_CONNECTION_REFUSED was blocking full provider validation. 3. Proxy execute(): Added debug logging for non-200 responses. Prints the backend URL, status code, and first 500 bytes of body to help diagnose why llama-server returns 400 on /v1/chat/completions.
2026-06-16 21:16:45 +00:00 · 2026-06-16 21:16:45 +00:00 · f2e62f60e6
commit f2e62f60e6
parent d935339280
1 changed files with 30 additions and 2 deletions
--- a/main.py
+++ b/main.py
@ -153,6 +153,12 @@ app = FastAPI(lifespan=lifespan)
 # ─── GET /v1/models — Issue #2 ──────────────────────────────────────────────
@app.get("/v1")
 async def v1_root():
    """OpenAI API root — return basic info for Hermes Desktop WebUI probe."""
    return {"object": "list", "data": []}
@app.get("/v1/models")
 async def get_models():
    """OpenAI-compatible /v1/models endpoint proxying to Sidecar."""
@ -226,13 +232,26 @@ async def ollama_tags():
    return {"models": models}
@app.get("/api/show")
 async def ollama_show_get(model: str = ""):
    """Ollama-compatible model info for Hermes Desktop discovery (GET variant).
    Some Hermes Desktop versions probe /api/show via GET with a ?model= parameter.
    """
    return await _ollama_show_lookup(model)
@app.post("/api/show")
-async def ollama_show(request: Request):
+async def ollama_show_post(request: Request):
-    """Ollama-compatible model info for Hermes Desktop discovery."""
+    """Ollama-compatible model info for Hermes Desktop discovery (POST variant)."""
    body = await request.body()
    body_data = json.loads(body) if body else {}
    model_name = body_data.get("model", "")
    return await _ollama_show_lookup(model_name)
 async def _ollama_show_lookup(model_name: str):
    """Shared logic for Ollama /api/show model info lookup."""
    async with httpx.AsyncClient(timeout=5.0) as client:
        try:
            resp = await client.get(f"{SIDECAR_URL}/models/available")
@ -507,8 +526,11 @@ async def proxy(
                        request.method, target,
                        content=body, headers=headers,
                    ) as resp:
                        if resp.status_code != 200:
                            print(f"PROXY: {target} returned {resp.status_code} during SSE stream", flush=True)
                        async for chunk in resp.aiter_bytes():
                            yield chunk
                return StreamingResponse(gen(), status_code=200)
            resp = await client.request(
@ -517,6 +539,12 @@ async def proxy(
                content=body,
                headers=headers,
            )
            if resp.status_code != 200:
                body_preview = resp.content[:500].decode("utf-8", errors="replace")
                print(
                    f"PROXY: {request.method} {target} returned {resp.status_code}: {body_preview}",
                    flush=True,
                )
            return Response(
                content=resp.content,
                status_code=resp.status_code,