From 5b7e52a74fa77e8cf1a8e6f525236e1dd35c5f1b Mon Sep 17 00:00:00 2001 From: Wong Chieh Yie Date: Sat, 2 May 2026 11:15:53 +0800 Subject: [PATCH] fix(pool): add connect timeout to prevent pool worker stall on slow TCP dial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pool.getConn() called conn.Connect() synchronously, blocking the single pool worker goroutine for the full OS TCP timeout (~20 s on Windows) when a connection attempt hung. This stalled ALL concurrent HTTP requests. Root cause: go.mod upgraded from go 1.17 to go 1.25.0 activates Go 1.21+ DNS behaviour, where net.Dialer tries IPv6 (::1) before IPv4 for localhost. If IPv6 packets are dropped rather than refused, the SYN times out silently before falling back – blocking the pool worker for the entire duration. Fix: run Connect() in a goroutine and select on result vs. a configurable ConnectTimeout (default 10 s, overridable via --connect-timeout / POOL_CONNECT_TIMEOUT). Orphaned Connect() goroutines are cleaned up asynchronously on timeout. --- cmd/storm/server.go | 3 ++- pool.go | 41 ++++++++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/cmd/storm/server.go b/cmd/storm/server.go index a2ab958..2b3fdee 100644 --- a/cmd/storm/server.go +++ b/cmd/storm/server.go @@ -118,6 +118,7 @@ type DelugeOptions struct { MaxConnections int `long:"max-connections" env:"POOL_MAX_CONNECTIONS" required:"true" default:"5" description:"Maximum concurrent Deluge RPC connections"` IdleTime *Duration `long:"idle-time" env:"POOL_IDLE_TIME" required:"true" default:"30s" description:"Close idle Deluge RPC connections after this duration"` + ConnectTimeout *Duration `long:"connect-timeout" env:"POOL_CONNECT_TIMEOUT" required:"true" default:"10s" description:"Timeout for establishing new Deluge RPC connections"` } func (options *DelugeOptions) Client() storm.DelugeProvider { @@ -143,7 +144,7 @@ func (options *DelugeOptions) Client() storm.DelugeProvider { } func (options *DelugeOptions) Pool(log *zap.Logger) *storm.ConnectionPool { - return storm.NewConnectionPool(log, options.MaxConnections, options.IdleTime.Duration, options.Client()) + return storm.NewConnectionPool(log, options.MaxConnections, options.IdleTime.Duration, options.ConnectTimeout.Duration, options.Client()) } type Options struct { diff --git a/pool.go b/pool.go index d1c63ea..9396b65 100644 --- a/pool.go +++ b/pool.go @@ -57,11 +57,12 @@ func (nullTimer) Stop() bool { return true } -func NewConnectionPool(log *zap.Logger, maxConnections int, idleConnectionTime time.Duration, provider DelugeProvider) *ConnectionPool { +func NewConnectionPool(log *zap.Logger, maxConnections int, idleConnectionTime time.Duration, connectTimeout time.Duration, provider DelugeProvider) *ConnectionPool { pool := &ConnectionPool{ Log: log, MaxConnections: maxConnections, IdleConnectionTime: idleConnectionTime, + ConnectTimeout: connectTimeout, Provider: provider, get: make(chan *poolReq), @@ -79,6 +80,7 @@ type ConnectionPool struct { Log *zap.Logger MaxConnections int IdleConnectionTime time.Duration + ConnectTimeout time.Duration Provider DelugeProvider get chan *poolReq @@ -209,13 +211,38 @@ func (pool *ConnectionPool) getConn(req *poolReq) { return } - // A new connection can be established - conn := pool.Provider() + // A new connection can be established. + // Connect() is run in a goroutine so the pool worker is not blocked indefinitely + // if the TCP dial times out (e.g. IPv6 unreachable with no RST on Go 1.21+ DNS changes). + newConn := pool.Provider() - err := conn.Connect() - if err != nil { - pool.Log.Error("Failed to establish Deluge RPC connection", zap.Error(err)) - conn = nil + type connectResult struct { + err error + } + connectCh := make(chan connectResult, 1) + go func() { connectCh <- connectResult{newConn.Connect()} }() + + var timeout <-chan time.Time + if pool.ConnectTimeout > 0 { + t := time.NewTimer(pool.ConnectTimeout) + defer t.Stop() + timeout = t.C + } + + var conn deluge.DelugeClient + select { + case r := <-connectCh: + if r.err != nil { + pool.Log.Error("Failed to establish Deluge RPC connection", zap.Error(r.err)) + } else { + conn = newConn + } + case <-timeout: + pool.Log.Error("Timed out establishing Deluge RPC connection", zap.Duration("timeout", pool.ConnectTimeout)) + go func() { <-connectCh; newConn.Close() }() + case <-req.ctx.Done(): + go func() { <-connectCh; newConn.Close() }() + return } ok := req.Send(conn)