fix: /api/show GET support, /v1 root handler, and proxy debug logging
Three changes to debug and fix Hermes Desktop integration: 1. /api/show: Added GET handler alongside existing POST handler. Hermes Desktop probes with GET ?model=xxx, not POST body. Refactored shared lookup logic into _ollama_show_lookup(). 2. /v1 root: Added handler returning basic info. Hermes Desktop probes this URL and ERR_CONNECTION_REFUSED was blocking full provider validation. 3. Proxy execute(): Added debug logging for non-200 responses. Prints the backend URL, status code, and first 500 bytes of body to help diagnose why llama-server returns 400 on /v1/chat/completions.
This commit is contained in:
parent
d935339280
commit
f2e62f60e6
32
main.py
32
main.py
@ -153,6 +153,12 @@ app = FastAPI(lifespan=lifespan)
|
|||||||
|
|
||||||
|
|
||||||
# ─── GET /v1/models — Issue #2 ──────────────────────────────────────────────
|
# ─── GET /v1/models — Issue #2 ──────────────────────────────────────────────
|
||||||
|
@app.get("/v1")
|
||||||
|
async def v1_root():
|
||||||
|
"""OpenAI API root — return basic info for Hermes Desktop WebUI probe."""
|
||||||
|
return {"object": "list", "data": []}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v1/models")
|
@app.get("/v1/models")
|
||||||
async def get_models():
|
async def get_models():
|
||||||
"""OpenAI-compatible /v1/models endpoint proxying to Sidecar."""
|
"""OpenAI-compatible /v1/models endpoint proxying to Sidecar."""
|
||||||
@ -226,13 +232,26 @@ async def ollama_tags():
|
|||||||
return {"models": models}
|
return {"models": models}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/show")
|
||||||
|
async def ollama_show_get(model: str = ""):
|
||||||
|
"""Ollama-compatible model info for Hermes Desktop discovery (GET variant).
|
||||||
|
|
||||||
|
Some Hermes Desktop versions probe /api/show via GET with a ?model= parameter.
|
||||||
|
"""
|
||||||
|
return await _ollama_show_lookup(model)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/show")
|
@app.post("/api/show")
|
||||||
async def ollama_show(request: Request):
|
async def ollama_show_post(request: Request):
|
||||||
"""Ollama-compatible model info for Hermes Desktop discovery."""
|
"""Ollama-compatible model info for Hermes Desktop discovery (POST variant)."""
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
body_data = json.loads(body) if body else {}
|
body_data = json.loads(body) if body else {}
|
||||||
model_name = body_data.get("model", "")
|
model_name = body_data.get("model", "")
|
||||||
|
return await _ollama_show_lookup(model_name)
|
||||||
|
|
||||||
|
|
||||||
|
async def _ollama_show_lookup(model_name: str):
|
||||||
|
"""Shared logic for Ollama /api/show model info lookup."""
|
||||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
try:
|
try:
|
||||||
resp = await client.get(f"{SIDECAR_URL}/models/available")
|
resp = await client.get(f"{SIDECAR_URL}/models/available")
|
||||||
@ -507,8 +526,11 @@ async def proxy(
|
|||||||
request.method, target,
|
request.method, target,
|
||||||
content=body, headers=headers,
|
content=body, headers=headers,
|
||||||
) as resp:
|
) as resp:
|
||||||
|
if resp.status_code != 200:
|
||||||
|
print(f"PROXY: {target} returned {resp.status_code} during SSE stream", flush=True)
|
||||||
async for chunk in resp.aiter_bytes():
|
async for chunk in resp.aiter_bytes():
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return StreamingResponse(gen(), status_code=200)
|
return StreamingResponse(gen(), status_code=200)
|
||||||
|
|
||||||
resp = await client.request(
|
resp = await client.request(
|
||||||
@ -517,6 +539,12 @@ async def proxy(
|
|||||||
content=body,
|
content=body,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
body_preview = resp.content[:500].decode("utf-8", errors="replace")
|
||||||
|
print(
|
||||||
|
f"PROXY: {request.method} {target} returned {resp.status_code}: {body_preview}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
return Response(
|
return Response(
|
||||||
content=resp.content,
|
content=resp.content,
|
||||||
status_code=resp.status_code,
|
status_code=resp.status_code,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user