From f2e62f60e68fb16f3df9a9434cbae04f8b9733bc Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2026 21:16:45 +0000 Subject: [PATCH] fix: /api/show GET support, /v1 root handler, and proxy debug logging Three changes to debug and fix Hermes Desktop integration: 1. /api/show: Added GET handler alongside existing POST handler. Hermes Desktop probes with GET ?model=xxx, not POST body. Refactored shared lookup logic into _ollama_show_lookup(). 2. /v1 root: Added handler returning basic info. Hermes Desktop probes this URL and ERR_CONNECTION_REFUSED was blocking full provider validation. 3. Proxy execute(): Added debug logging for non-200 responses. Prints the backend URL, status code, and first 500 bytes of body to help diagnose why llama-server returns 400 on /v1/chat/completions. --- main.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 1e371fc..53d2b3f 100644 --- a/main.py +++ b/main.py @@ -153,6 +153,12 @@ app = FastAPI(lifespan=lifespan) # ─── GET /v1/models — Issue #2 ────────────────────────────────────────────── +@app.get("/v1") +async def v1_root(): + """OpenAI API root — return basic info for Hermes Desktop WebUI probe.""" + return {"object": "list", "data": []} + + @app.get("/v1/models") async def get_models(): """OpenAI-compatible /v1/models endpoint proxying to Sidecar.""" @@ -226,13 +232,26 @@ async def ollama_tags(): return {"models": models} +@app.get("/api/show") +async def ollama_show_get(model: str = ""): + """Ollama-compatible model info for Hermes Desktop discovery (GET variant). + + Some Hermes Desktop versions probe /api/show via GET with a ?model= parameter. + """ + return await _ollama_show_lookup(model) + + @app.post("/api/show") -async def ollama_show(request: Request): - """Ollama-compatible model info for Hermes Desktop discovery.""" +async def ollama_show_post(request: Request): + """Ollama-compatible model info for Hermes Desktop discovery (POST variant).""" body = await request.body() body_data = json.loads(body) if body else {} model_name = body_data.get("model", "") + return await _ollama_show_lookup(model_name) + +async def _ollama_show_lookup(model_name: str): + """Shared logic for Ollama /api/show model info lookup.""" async with httpx.AsyncClient(timeout=5.0) as client: try: resp = await client.get(f"{SIDECAR_URL}/models/available") @@ -507,8 +526,11 @@ async def proxy( request.method, target, content=body, headers=headers, ) as resp: + if resp.status_code != 200: + print(f"PROXY: {target} returned {resp.status_code} during SSE stream", flush=True) async for chunk in resp.aiter_bytes(): yield chunk + return StreamingResponse(gen(), status_code=200) resp = await client.request( @@ -517,6 +539,12 @@ async def proxy( content=body, headers=headers, ) + if resp.status_code != 200: + body_preview = resp.content[:500].decode("utf-8", errors="replace") + print( + f"PROXY: {request.method} {target} returned {resp.status_code}: {body_preview}", + flush=True, + ) return Response( content=resp.content, status_code=resp.status_code,