From d935339280b4c7409aa73774c0c5adc3656a9515 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2026 21:04:40 +0000 Subject: [PATCH] fix: report actual profile context size in /api/show probe endpoint Hermes Desktop reads the context size from /api/show's 'parameters' field. This was hardcoded to 'num_ctx 4096' for every model, causing 'context too small' errors when the user's system prompt + conversation exceeded 4K tokens. Now extracts the actual ctx-size from the profile's flags and returns the correct value (e.g. 'num_ctx 131072' for the 128K profiles). --- main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 2713399..1e371fc 100644 --- a/main.py +++ b/main.py @@ -242,14 +242,17 @@ async def ollama_show(request: Request): for p in profiles: if p.get("id") == model_name: + # Extract actual context size from the profile's flags + flags = p.get("flags", {}) + ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096"))) return { "modelfile": "", - "parameters": "num_ctx 4096", + "parameters": f"num_ctx {ctx_size}", "template": "", "details": { "format": "gguf", "family": p.get("name", "llm"), - "parameter_size": p.get("flags", {}).get("--num-ctx", "4096"), + "parameter_size": ctx_size, }, "model_info": {"id": p.get("id", "")}, }