From bcf45129f15cb336e1e7cd720382c94a72bfe00c Mon Sep 17 00:00:00 2001
From: root <root@hermes.chiabur.xyz>
Date: Tue, 16 Jun 2026 21:46:07 +0000
Subject: [PATCH] fix: add --host 0.0.0.0 to llama-server command

llama-server defaults to binding on 127.0.0.1 (localhost only).
When the router runs on a separate Docker host (10.0.4.100), all
chat completion requests fail with:

  PROXY EXCEPTION on primary http://10.0.4.11:8081/v1/chat/completions:
    ConnectError: All connection attempts failed

Added --host 0.0.0.0 after --port so llama-server listens on all
network interfaces, reachable from the Docker host.
---
 sidecar/app.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sidecar/app.py b/sidecar/app.py
index 1a82437..734ba9b 100644
--- a/sidecar/app.py
+++ b/sidecar/app.py
@@ -111,6 +111,7 @@ async def _start_llama_server(profile: dict):
     cmd = ["/home/bigt/AI/llama.cpp/build/bin/llama-server"]
     cmd += ["--model", profile["model_path"]]
     cmd += ["--port", str(LLAMA_SERVER_PORT)]
+    cmd += ["--host", "0.0.0.0"]
     for key, value in profile.get("flags", {}).items():
         cmd += ["--" + _flag_key(key), _flag_value(value)]