fix: report actual profile context size in /api/show probe endpoint

Hermes Desktop reads the context size from /api/show's 'parameters' field. This was hardcoded to 'num_ctx 4096' for every model, causing 'context too small' errors when the user's system prompt + conversation exceeded 4K tokens. Now extracts the actual ctx-size from the profile's flags and returns the correct value (e.g. 'num_ctx 131072' for the 128K profiles).
2026-06-16 21:04:40 +00:00 · 2026-06-16 21:04:40 +00:00 · d935339280
commit d935339280
parent 4ee85972ec
1 changed files with 5 additions and 2 deletions
--- a/main.py
+++ b/main.py
@ -242,14 +242,17 @@ async def ollama_show(request: Request):

    for p in profiles:
        if p.get("id") == model_name:
+            # Extract actual context size from the profile's flags
+            flags = p.get("flags", {})
+            ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096")))
            return {
                "modelfile": "",
-                "parameters": "num_ctx 4096",
+                "parameters": f"num_ctx {ctx_size}",
                "template": "",
                "details": {
                    "format": "gguf",
                    "family": p.get("name", "llm"),
-                    "parameter_size": p.get("flags", {}).get("--num-ctx", "4096"),
+                    "parameter_size": ctx_size,
                },
                "model_info": {"id": p.get("id", "")},
            }