fix: report actual profile context size in /api/show probe endpoint
Hermes Desktop reads the context size from /api/show's 'parameters' field. This was hardcoded to 'num_ctx 4096' for every model, causing 'context too small' errors when the user's system prompt + conversation exceeded 4K tokens. Now extracts the actual ctx-size from the profile's flags and returns the correct value (e.g. 'num_ctx 131072' for the 128K profiles).
This commit is contained in:
parent
4ee85972ec
commit
d935339280
7
main.py
7
main.py
@ -242,14 +242,17 @@ async def ollama_show(request: Request):
|
|||||||
|
|
||||||
for p in profiles:
|
for p in profiles:
|
||||||
if p.get("id") == model_name:
|
if p.get("id") == model_name:
|
||||||
|
# Extract actual context size from the profile's flags
|
||||||
|
flags = p.get("flags", {})
|
||||||
|
ctx_size = str(flags.get("ctx-size", flags.get("n_ctx", "4096")))
|
||||||
return {
|
return {
|
||||||
"modelfile": "",
|
"modelfile": "",
|
||||||
"parameters": "num_ctx 4096",
|
"parameters": f"num_ctx {ctx_size}",
|
||||||
"template": "",
|
"template": "",
|
||||||
"details": {
|
"details": {
|
||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"family": p.get("name", "llm"),
|
"family": p.get("name", "llm"),
|
||||||
"parameter_size": p.get("flags", {}).get("--num-ctx", "4096"),
|
"parameter_size": ctx_size,
|
||||||
},
|
},
|
||||||
"model_info": {"id": p.get("id", "")},
|
"model_info": {"id": p.get("id", "")},
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user