Skip to content

Commit 4915499

Browse files
tmaCopilot
andauthored
Restore HTTP-based health server, make it opt-in (#6)
* fix: restore HTTP-based health server, make it opt-in Revert the direct-upstream health probe introduced in #5, which opened a second TCP connection to the Modbus device on every check. Many Modbus devices only accept one concurrent connection, so the probe interfered with normal proxy operation in production. Restore the original passive HTTP health server that checks internal connection state without touching upstream. Make it opt-in via HEALTH_LISTEN so bare-binary users running multiple instances (the root cause of #4) no longer hit port conflicts. The Dockerfile sets HEALTH_LISTEN=:8080 so Docker users get health checks automatically. Closes #4 * fix: normalize wildcard hosts in CheckHealth, isolate config test env Co-authored-by: tma <4719+tma@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 9b12f3f commit 4915499

File tree

7 files changed

+118
-85
lines changed

7 files changed

+118
-85
lines changed

Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ FROM scratch
3131

3232
COPY --from=builder /app/mbproxy /mbproxy
3333

34-
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
34+
ENV HEALTH_LISTEN=:8080
35+
EXPOSE 8080
36+
37+
HEALTHCHECK --interval=5s --timeout=3s --start-period=10s --retries=3 \
3538
CMD ["/mbproxy", "-health"]
3639

3740
ENTRYPOINT ["/mbproxy"]

cmd/mbproxy/main.go

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ import (
88
"os"
99
"os/signal"
1010
"syscall"
11+
"time"
1112

1213
"github.com/tma/mbproxy/internal/config"
14+
"github.com/tma/mbproxy/internal/health"
1315
"github.com/tma/mbproxy/internal/logging"
14-
"github.com/tma/mbproxy/internal/modbus"
1516
"github.com/tma/mbproxy/internal/proxy"
1617
)
1718

@@ -20,7 +21,12 @@ func main() {
2021
flag.Parse()
2122

2223
if *healthCheck {
23-
if err := runHealthCheck(); err != nil {
24+
addr := os.Getenv("HEALTH_LISTEN")
25+
if addr == "" {
26+
fmt.Fprintln(os.Stderr, "HEALTH_LISTEN is not set")
27+
os.Exit(1)
28+
}
29+
if err := health.CheckHealth(addr); err != nil {
2430
fmt.Fprintln(os.Stderr, err)
2531
os.Exit(1)
2632
}
@@ -48,6 +54,22 @@ func main() {
4854
os.Exit(1)
4955
}
5056

57+
// Start health server if configured
58+
var hs *health.Server
59+
if cfg.HealthListen != "" {
60+
hs = health.NewServer(cfg.HealthListen, p, logger)
61+
hsLn, err := hs.Listen()
62+
if err != nil {
63+
logger.Error("failed to start health server", "error", err)
64+
os.Exit(1)
65+
}
66+
go func() {
67+
if err := hs.Serve(hsLn); err != nil {
68+
logger.Error("health server error", "error", err)
69+
}
70+
}()
71+
}
72+
5173
// Start proxy in background
5274
errCh := make(chan error, 1)
5375
go func() {
@@ -68,31 +90,18 @@ func main() {
6890
// Graceful shutdown
6991
cancel()
7092

93+
if hs != nil {
94+
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
95+
defer shutdownCancel()
96+
if err := hs.Shutdown(shutdownCtx); err != nil {
97+
logger.Error("health server shutdown error", "error", err)
98+
}
99+
}
100+
71101
if err := p.Shutdown(cfg.ShutdownTimeout); err != nil {
72102
logger.Error("shutdown error", "error", err)
73103
os.Exit(1)
74104
}
75105

76106
logger.Info("shutdown complete")
77107
}
78-
79-
func runHealthCheck() error {
80-
cfg, err := config.Load()
81-
if err != nil {
82-
return err
83-
}
84-
85-
return checkUpstreamHealth(cfg, logging.New(cfg.LogLevel))
86-
}
87-
88-
func checkUpstreamHealth(cfg *config.Config, logger *slog.Logger) (err error) {
89-
client := modbus.NewClient(cfg.Upstream, cfg.Timeout, cfg.RequestDelay, cfg.ConnectDelay, logger)
90-
defer func() {
91-
closeErr := client.Close()
92-
if err == nil && closeErr != nil {
93-
err = closeErr
94-
}
95-
}()
96-
97-
return client.Connect()
98-
}

cmd/mbproxy/main_test.go

Lines changed: 0 additions & 59 deletions
This file was deleted.

internal/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type Config struct {
3030
RequestDelay time.Duration
3131
ConnectDelay time.Duration
3232
ShutdownTimeout time.Duration
33+
HealthListen string
3334
LogLevel string
3435
}
3536

@@ -46,6 +47,7 @@ func Load() (*Config, error) {
4647
RequestDelay: 0,
4748
ConnectDelay: 0,
4849
ShutdownTimeout: 30 * time.Second,
50+
HealthListen: os.Getenv("HEALTH_LISTEN"),
4951
LogLevel: GetEnv("LOG_LEVEL", "INFO"),
5052
}
5153

internal/config/config_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ func TestLoad_Defaults(t *testing.T) {
1919
os.Unsetenv("MODBUS_READONLY")
2020
os.Unsetenv("MODBUS_TIMEOUT")
2121
os.Unsetenv("MODBUS_SHUTDOWN_TIMEOUT")
22+
os.Unsetenv("HEALTH_LISTEN")
2223
os.Unsetenv("LOG_LEVEL")
2324

2425
cfg, err := Load()
@@ -56,6 +57,9 @@ func TestLoad_Defaults(t *testing.T) {
5657
if cfg.ShutdownTimeout != 30*time.Second {
5758
t.Errorf("expected 30s shutdown timeout, got %v", cfg.ShutdownTimeout)
5859
}
60+
if cfg.HealthListen != "" {
61+
t.Errorf("expected empty health listen, got %s", cfg.HealthListen)
62+
}
5963
if cfg.LogLevel != "INFO" {
6064
t.Errorf("expected INFO log level, got %s", cfg.LogLevel)
6165
}
@@ -192,3 +196,27 @@ func TestLoad_InvalidDuration(t *testing.T) {
192196
os.Unsetenv(envVar)
193197
}
194198
}
199+
200+
func TestLoad_HealthListenCustom(t *testing.T) {
201+
// Ensure optional env vars that Load() may read do not inherit
202+
// potentially invalid values from the surrounding environment.
203+
t.Setenv("MODBUS_LISTEN", "")
204+
t.Setenv("MODBUS_READONLY", "")
205+
t.Setenv("MODBUS_CACHE_TTL", "")
206+
t.Setenv("MODBUS_TIMEOUT", "")
207+
t.Setenv("MODBUS_REQUEST_DELAY", "")
208+
t.Setenv("MODBUS_CONNECT_DELAY", "")
209+
t.Setenv("MODBUS_SHUTDOWN_TIMEOUT", "")
210+
211+
// Set required and explicitly tested env vars.
212+
t.Setenv("MODBUS_UPSTREAM", "localhost:502")
213+
t.Setenv("HEALTH_LISTEN", ":9090")
214+
215+
cfg, err := Load()
216+
if err != nil {
217+
t.Fatalf("unexpected error: %v", err)
218+
}
219+
if cfg.HealthListen != ":9090" {
220+
t.Errorf("expected :9090, got %s", cfg.HealthListen)
221+
}
222+
}

internal/health/health.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,21 @@ func (s *Server) Shutdown(ctx context.Context) error {
107107

108108
// CheckHealth performs an HTTP health check against the given address.
109109
// It returns nil if the endpoint responds with 200 OK.
110+
// Wildcard listen addresses (e.g. ":8080", "0.0.0.0:8080", "[::]:8080") are
111+
// normalized to localhost so they can be used as dial targets. IPv6 addresses
112+
// are handled correctly via net.JoinHostPort.
110113
func CheckHealth(addr string) error {
111114
// Resolve the address so we can build a proper URL.
112115
host, port, err := net.SplitHostPort(addr)
113116
if err != nil {
114117
return fmt.Errorf("invalid address %q: %w", addr, err)
115118
}
116-
if host == "" {
119+
// Normalize wildcard and empty hosts to localhost.
120+
if host == "" || host == "0.0.0.0" || host == "::" {
117121
host = "localhost"
118122
}
119123

120-
url := fmt.Sprintf("http://%s:%s/health", host, port)
124+
url := fmt.Sprintf("http://%s/health", net.JoinHostPort(host, port))
121125

122126
client := &http.Client{Timeout: 3 * time.Second}
123127
resp, err := client.Get(url)

internal/health/health_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,49 @@ func TestCheckHealth_ConnectionRefused(t *testing.T) {
149149
t.Error("expected error for connection refused")
150150
}
151151
}
152+
153+
func TestCheckHealth_WildcardAddresses(t *testing.T) {
154+
// Start a test server bound to localhost so wildcard addresses can reach it.
155+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
156+
w.WriteHeader(http.StatusOK)
157+
json.NewEncoder(w).Encode(Response{Status: "ok"})
158+
}))
159+
defer ts.Close()
160+
161+
_, port, err := net.SplitHostPort(ts.Listener.Addr().String())
162+
if err != nil {
163+
t.Fatalf("failed to parse test server address: %v", err)
164+
}
165+
166+
// Each of these listen-style addresses should be normalized to localhost.
167+
wildcards := []string{
168+
":" + port, // empty host (":8080")
169+
"0.0.0.0:" + port, // IPv4 wildcard
170+
"[::]:" + port, // IPv6 wildcard
171+
}
172+
for _, addr := range wildcards {
173+
if err := CheckHealth(addr); err != nil {
174+
t.Errorf("CheckHealth(%q) expected success, got: %v", addr, err)
175+
}
176+
}
177+
}
178+
179+
func TestCheckHealth_IPv6Loopback(t *testing.T) {
180+
// Start a test server bound to the IPv6 loopback address.
181+
ln, err := net.Listen("tcp6", "[::1]:0")
182+
if err != nil {
183+
t.Skip("IPv6 loopback not available:", err)
184+
}
185+
ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
186+
w.WriteHeader(http.StatusOK)
187+
json.NewEncoder(w).Encode(Response{Status: "ok"})
188+
}))
189+
ts.Listener = ln
190+
ts.Start()
191+
defer ts.Close()
192+
193+
addr := ts.Listener.Addr().String() // "[::1]:PORT"
194+
if err := CheckHealth(addr); err != nil {
195+
t.Errorf("CheckHealth(%q) expected success for IPv6 loopback, got: %v", addr, err)
196+
}
197+
}

0 commit comments

Comments
 (0)