fix: /api/show GET support, /v1 root handler, and proxy debug logging
Three changes to debug and fix Hermes Desktop integration: 1. /api/show: Added GET handler alongside existing POST handler. Hermes Desktop probes with GET ?model=xxx, not POST body. Refactored shared lookup logic into _ollama_show_lookup(). 2. /v1 root: Added handler returning basic info. Hermes Desktop probes this URL and ERR_CONNECTION_REFUSED was blocking full provider validation. 3. Proxy execute(): Added debug logging for non-200 responses. Prints the backend URL, status code, and first 500 bytes of body to help diagnose why llama-server returns 400 on /v1/chat/completions.
This commit is contained in:
parent
d935339280
commit
f2e62f60e6
32
main.py
32
main.py
@ -153,6 +153,12 @@ app = FastAPI(lifespan=lifespan)
|
||||
|
||||
|
||||
# ─── GET /v1/models — Issue #2 ──────────────────────────────────────────────
|
||||
@app.get("/v1")
|
||||
async def v1_root():
|
||||
"""OpenAI API root — return basic info for Hermes Desktop WebUI probe."""
|
||||
return {"object": "list", "data": []}
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
async def get_models():
|
||||
"""OpenAI-compatible /v1/models endpoint proxying to Sidecar."""
|
||||
@ -226,13 +232,26 @@ async def ollama_tags():
|
||||
return {"models": models}
|
||||
|
||||
|
||||
@app.get("/api/show")
|
||||
async def ollama_show_get(model: str = ""):
|
||||
"""Ollama-compatible model info for Hermes Desktop discovery (GET variant).
|
||||
|
||||
Some Hermes Desktop versions probe /api/show via GET with a ?model= parameter.
|
||||
"""
|
||||
return await _ollama_show_lookup(model)
|
||||
|
||||
|
||||
@app.post("/api/show")
|
||||
async def ollama_show(request: Request):
|
||||
"""Ollama-compatible model info for Hermes Desktop discovery."""
|
||||
async def ollama_show_post(request: Request):
|
||||
"""Ollama-compatible model info for Hermes Desktop discovery (POST variant)."""
|
||||
body = await request.body()
|
||||
body_data = json.loads(body) if body else {}
|
||||
model_name = body_data.get("model", "")
|
||||
return await _ollama_show_lookup(model_name)
|
||||
|
||||
|
||||
async def _ollama_show_lookup(model_name: str):
|
||||
"""Shared logic for Ollama /api/show model info lookup."""
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
try:
|
||||
resp = await client.get(f"{SIDECAR_URL}/models/available")
|
||||
@ -507,8 +526,11 @@ async def proxy(
|
||||
request.method, target,
|
||||
content=body, headers=headers,
|
||||
) as resp:
|
||||
if resp.status_code != 200:
|
||||
print(f"PROXY: {target} returned {resp.status_code} during SSE stream", flush=True)
|
||||
async for chunk in resp.aiter_bytes():
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(gen(), status_code=200)
|
||||
|
||||
resp = await client.request(
|
||||
@ -517,6 +539,12 @@ async def proxy(
|
||||
content=body,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
body_preview = resp.content[:500].decode("utf-8", errors="replace")
|
||||
print(
|
||||
f"PROXY: {request.method} {target} returned {resp.status_code}: {body_preview}",
|
||||
flush=True,
|
||||
)
|
||||
return Response(
|
||||
content=resp.content,
|
||||
status_code=resp.status_code,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user