# LLM Model Manifest # Each profile defines a named model configuration for llama-server. # The sidecar reads this file on every request — no restart needed. # # Usage: # 1. Edit this file with available GGUFs and desired parameters # 2. The sidecar automatically picks up changes # 3. Use the Hermes model picker to switch models - id: qwen-3-8b name: "Qwen 3 8B" model_path: "/home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf" flags: n_ctx: 8192 n_gpu_layers: 35 - id: qwen-3-8b-long name: "Qwen 3 8B (Long Context)" model_path: "/home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf" flags: n_ctx: 32768 n_gpu_layers: 20 - id: llama-4-maverick name: "Llama 4 Maverick" model_path: "/home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf" flags: n_ctx: 8192 n_gpu_layers: 35