30 lines
800 B
YAML
30 lines
800 B
YAML
|
|
# LLM Model Manifest
|
||
|
|
# Each profile defines a named model configuration for llama-server.
|
||
|
|
# The sidecar reads this file on every request — no restart needed.
|
||
|
|
#
|
||
|
|
# Usage:
|
||
|
|
# 1. Edit this file with available GGUFs and desired parameters
|
||
|
|
# 2. The sidecar automatically picks up changes
|
||
|
|
# 3. Use the Hermes model picker to switch models
|
||
|
|
|
||
|
|
- id: qwen-3-8b
|
||
|
|
name: "Qwen 3 8B"
|
||
|
|
model_path: "/home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf"
|
||
|
|
flags:
|
||
|
|
n_ctx: 8192
|
||
|
|
n_gpu_layers: 35
|
||
|
|
|
||
|
|
- id: qwen-3-8b-long
|
||
|
|
name: "Qwen 3 8B (Long Context)"
|
||
|
|
model_path: "/home/bigt/AI/llm/qwen/qwen3-8b-q4.gguf"
|
||
|
|
flags:
|
||
|
|
n_ctx: 32768
|
||
|
|
n_gpu_layers: 20
|
||
|
|
|
||
|
|
- id: llama-4-maverick
|
||
|
|
name: "Llama 4 Maverick"
|
||
|
|
model_path: "/home/bigt/AI/llm/llama4/llama4-maverick-q4.gguf"
|
||
|
|
flags:
|
||
|
|
n_ctx: 8192
|
||
|
|
n_gpu_layers: 35
|