fix: report actual profile context size in /api/show probe endpoint

Hermes Desktop reads the context size from /api/show's 'parameters'
field.  This was hardcoded to 'num_ctx 4096' for every model, causing
'context too small' errors when the user's system prompt + conversation
exceeded 4K tokens.

Now extracts the actual ctx-size from the profile's flags and returns
the correct value (e.g. 'num_ctx 131072' for the 128K profiles).
This commit is contained in:
root 2026-06-16 21:04:40 +00:00
parent 4ee85972ec
commit d935339280

View File

@ -242,14 +242,17 @@ async def ollama_show(request: Request):
for p in profiles:
if p.get("id") == model_name:
# Extract actual context size from the profile's flags
flags = p.get("flags", {})
ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096")))
return {
"modelfile": "",
"parameters": "num_ctx 4096",
"parameters": f"num_ctx {ctx_size}",
"template": "",
"details": {
"format": "gguf",
"family": p.get("name", "llm"),
"parameter_size": p.get("flags", {}).get("--num-ctx", "4096"),
"parameter_size": ctx_size,
},
"model_info": {"id": p.get("id", "")},
}