From bcf45129f15cb336e1e7cd720382c94a72bfe00c Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2026 21:46:07 +0000 Subject: [PATCH] fix: add --host 0.0.0.0 to llama-server command llama-server defaults to binding on 127.0.0.1 (localhost only). When the router runs on a separate Docker host (10.0.4.100), all chat completion requests fail with: PROXY EXCEPTION on primary http://10.0.4.11:8081/v1/chat/completions: ConnectError: All connection attempts failed Added --host 0.0.0.0 after --port so llama-server listens on all network interfaces, reachable from the Docker host. --- sidecar/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sidecar/app.py b/sidecar/app.py index 1a82437..734ba9b 100644 --- a/sidecar/app.py +++ b/sidecar/app.py @@ -111,6 +111,7 @@ async def _start_llama_server(profile: dict): cmd = ["/home/bigt/AI/llama.cpp/build/bin/llama-server"] cmd += ["--model", profile["model_path"]] cmd += ["--port", str(LLAMA_SERVER_PORT)] + cmd += ["--host", "0.0.0.0"] for key, value in profile.get("flags", {}).items(): cmd += ["--" + _flag_key(key), _flag_value(value)]