fix: report actual profile context size in /api/show probe endpoint
Hermes Desktop reads the context size from /api/show's 'parameters' field. This was hardcoded to 'num_ctx 4096' for every model, causing 'context too small' errors when the user's system prompt + conversation exceeded 4K tokens. Now extracts the actual ctx-size from the profile's flags and returns the correct value (e.g. 'num_ctx 131072' for the 128K profiles).
This commit is contained in:
parent
4ee85972ec
commit
d935339280
7
main.py
7
main.py
@ -242,14 +242,17 @@ async def ollama_show(request: Request):
|
||||
|
||||
for p in profiles:
|
||||
if p.get("id") == model_name:
|
||||
# Extract actual context size from the profile's flags
|
||||
flags = p.get("flags", {})
|
||||
ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096")))
|
||||
return {
|
||||
"modelfile": "",
|
||||
"parameters": "num_ctx 4096",
|
||||
"parameters": f"num_ctx {ctx_size}",
|
||||
"template": "",
|
||||
"details": {
|
||||
"format": "gguf",
|
||||
"family": p.get("name", "llm"),
|
||||
"parameter_size": p.get("flags", {}).get("--num-ctx", "4096"),
|
||||
"parameter_size": ctx_size,
|
||||
},
|
||||
"model_info": {"id": p.get("id", "")},
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user