From d935339280b4c7409aa73774c0c5adc3656a9515 Mon Sep 17 00:00:00 2001
From: root <root@hermes.chiabur.xyz>
Date: Tue, 16 Jun 2026 21:04:40 +0000
Subject: [PATCH] fix: report actual profile context size in /api/show probe
 endpoint

Hermes Desktop reads the context size from /api/show's 'parameters'
field.  This was hardcoded to 'num_ctx 4096' for every model, causing
'context too small' errors when the user's system prompt + conversation
exceeded 4K tokens.

Now extracts the actual ctx-size from the profile's flags and returns
the correct value (e.g. 'num_ctx 131072' for the 128K profiles).
---
 main.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index 2713399..1e371fc 100644
--- a/main.py
+++ b/main.py
@@ -242,14 +242,17 @@ async def ollama_show(request: Request):
 
     for p in profiles:
         if p.get("id") == model_name:
+            # Extract actual context size from the profile's flags
+            flags = p.get("flags", {})
+            ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096")))
             return {
                 "modelfile": "",
-                "parameters": "num_ctx 4096",
+                "parameters": f"num_ctx {ctx_size}",
                 "template": "",
                 "details": {
                     "format": "gguf",
                     "family": p.get("name", "llm"),
-                    "parameter_size": p.get("flags", {}).get("--num-ctx", "4096"),
+                    "parameter_size": ctx_size,
                 },
                 "model_info": {"id": p.get("id", "")},
             }