From 69f1a98d261a4a27cc795042aa53cbbc35651d5d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 01:54:56 +0000 Subject: [PATCH 1/5] Initial plan From 4961736596c0b864a04a024743d570e6f1f699fd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 01:59:39 +0000 Subject: [PATCH 2/5] Fix graceful shutdown for WebSocket server Agent-Logs-Url: https://github.com/aceld/zinx/sessions/004c8faa-6151-4a22-bacc-73d7310cb665 Co-authored-by: aceld <7778936+aceld@users.noreply.github.com> --- znet/server.go | 70 ++++++++++++++++++++++++++++++++++++------- znet/server_test.go | 73 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 11 deletions(-) diff --git a/znet/server.go b/znet/server.go index a86bcc4e..9c54a640 100644 --- a/znet/server.go +++ b/znet/server.go @@ -1,6 +1,7 @@ package znet import ( + "context" "crypto/rand" "crypto/tls" "errors" @@ -9,6 +10,7 @@ import ( "net/http" "os" "os/signal" + "sync" "sync/atomic" "syscall" "time" @@ -71,6 +73,14 @@ type Server struct { // (异步捕获连接关闭状态) exitChan chan struct{} + // stopOnce ensures Stop() is idempotent and exitChan is closed only once + // (stopOnce 保证 Stop() 幂等,exitChan 只被关闭一次) + stopOnce sync.Once + + // WebSocket HTTP server instance, used for graceful shutdown + // (WebSocket HTTP 服务实例,用于优雅停服) + wsServer *http.Server + // Decoder for dealing with message fragmentation and reassembly // (断粘包解码器) decoder ziface.IDecoder @@ -315,7 +325,11 @@ func (s *Server) ListenTcpConn() { func (s *Server) ListenWebsocketConn() { zlog.Ins().InfoF("[START] WEBSOCKET Server name: %s,listener at IP: %s, Port %d, Path %s is starting", s.Name, s.IP, s.WsPort, s.WsPath) - http.HandleFunc(s.WsPath, func(w http.ResponseWriter, r *http.Request) { + + // Use a local ServeMux to avoid polluting the global http.DefaultServeMux + // (使用局部 ServeMux 避免污染全局 http.DefaultServeMux) + mux := http.NewServeMux() + mux.HandleFunc(s.WsPath, func(w http.ResponseWriter, r *http.Request) { // 1. Check if the server has reached the maximum allowed number of connections // (设置服务器最大连接控制,如果超过最大连接,则等待) if s.ConnMgr.Len() >= zconf.GlobalObject.MaxConn { @@ -357,18 +371,44 @@ func (s *Server) ListenWebsocketConn() { }) - if zconf.GlobalObject.CertFile != "" && zconf.GlobalObject.PrivateKeyFile != "" { - err := http.ListenAndServeTLS(fmt.Sprintf("%s:%d", s.IP, s.WsPort), zconf.GlobalObject.CertFile, zconf.GlobalObject.PrivateKeyFile, nil) - if err != nil { - panic(err) + // Create an explicit http.Server so we can shut it down gracefully later + // (显式创建 http.Server,以便后续能够优雅停服) + srv := &http.Server{ + Addr: fmt.Sprintf("%s:%d", s.IP, s.WsPort), + Handler: mux, + } + s.wsServer = srv + + // Start the HTTP server in a background goroutine + // (在后台 goroutine 中启动 HTTP Server) + go func() { + var err error + if zconf.GlobalObject.CertFile != "" && zconf.GlobalObject.PrivateKeyFile != "" { + err = srv.ListenAndServeTLS(zconf.GlobalObject.CertFile, zconf.GlobalObject.PrivateKeyFile) + } else { + err = srv.ListenAndServe() } - } else { - err := http.ListenAndServe(fmt.Sprintf("%s:%d", s.IP, s.WsPort), nil) - if err != nil { - panic(err) + // http.ErrServerClosed is returned after a successful Shutdown/Close call — + // this is not an error. (http.ErrServerClosed 是正常关闭后的返回值,不视为错误) + if err != nil && !errors.Is(err, http.ErrServerClosed) { + zlog.Ins().ErrorF("websocket server ListenAndServe err: %v", err) } - } + }() + // Block until Stop() signals exit via exitChan + // (阻塞等待 Stop() 通过 exitChan 发出退出信号) + <-s.exitChan + + // Gracefully shut down the WebSocket HTTP server with a timeout + // (带超时的优雅关闭 WebSocket HTTP Server) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := srv.Shutdown(ctx); err != nil { + zlog.Ins().ErrorF("websocket server shutdown err: %v", err) + // Fall back to forceful close if graceful shutdown fails + // (优雅关闭失败时兜底强制关闭) + _ = srv.Close() + } } func (s *Server) ListenKcpConn() { @@ -463,7 +503,15 @@ func (s *Server) Stop() { // Clear other connection information or other information that needs to be cleaned up // (将其他需要清理的连接信息或者其他信息 也要一并停止或者清理) s.ConnMgr.ClearConn() - close(s.exitChan) + + // Use sync.Once to ensure exitChan is closed only once, making Stop() safe to call + // multiple times without panicking. Closing exitChan signals all listeners + // (including ListenWebsocketConn) to shut down gracefully. + // (使用 sync.Once 确保 exitChan 只被关闭一次,使 Stop() 可重复调用不 panic。 + // 关闭 exitChan 会通知所有监听协程(包括 ListenWebsocketConn)执行优雅停服。) + s.stopOnce.Do(func() { + close(s.exitChan) + }) } // Serve runs the server (运行服务) diff --git a/znet/server_test.go b/znet/server_test.go index 65c1e069..bd31cbef 100644 --- a/znet/server_test.go +++ b/znet/server_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/aceld/zinx/zconf" "github.com/aceld/zinx/ziface" "github.com/aceld/zinx/zpack" ) @@ -212,3 +213,75 @@ func TestCloseConnectionBeforeSendMsg(t *testing.T) { wg.Wait() s.Stop() } + +// TestWebsocketServerGracefulStop verifies that calling Stop() on a WebSocket-only +// server does not block, and that the HTTP listener port is released after Stop returns. +// (验证 WebSocket-only 模式下 Stop() 不阻塞且端口被释放) +func TestWebsocketServerGracefulStop(t *testing.T) { + // Use a dedicated port to avoid conflicts with TCP tests (使用独立端口避免冲突) + const wsPort = 19990 + + config := &zconf.Config{ + Host: "127.0.0.1", + WsPort: wsPort, + WsPath: "/ws", + Mode: zconf.ServerModeWebsocket, + } + s := NewUserConfServer(config) + s.Start() + + // Give the server time to start listening (给服务端启动监听的时间) + time.Sleep(200 * time.Millisecond) + + // Stop() must return promptly — if it blocks, the test will time out. + // (Stop() 必须及时返回,否则测试会超时) + done := make(chan struct{}) + go func() { + s.Stop() + close(done) + }() + + select { + case <-done: + // Stop returned in time — good. + case <-time.After(3 * time.Second): + t.Fatal("Stop() blocked for more than 3s in websocket-only mode") + } + + // After Stop() returns, wait briefly for the background shutdown to complete, + // then verify the port has been released by binding to it. + // (Stop() 返回后等待后台关闭完成,然后验证端口已释放) + time.Sleep(200 * time.Millisecond) + ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", wsPort)) + if err != nil { + t.Fatalf("port %d not released after Stop(): %v", wsPort, err) + } + _ = ln.Close() +} + +// TestWebsocketServerStopIdempotent verifies that calling Stop() multiple times +// does not panic (thanks to sync.Once protecting the exitChan close). +// (验证多次调用 Stop() 不会 panic) +func TestWebsocketServerStopIdempotent(t *testing.T) { + const wsPort = 19991 + + config := &zconf.Config{ + Host: "127.0.0.1", + WsPort: wsPort, + WsPath: "/ws", + Mode: zconf.ServerModeWebsocket, + } + s := NewUserConfServer(config) + s.Start() + time.Sleep(200 * time.Millisecond) + + // Calling Stop() twice must not panic. + // (两次调用 Stop() 不应 panic) + defer func() { + if r := recover(); r != nil { + t.Fatalf("Stop() panicked on second call: %v", r) + } + }() + s.Stop() + s.Stop() +} From f086fda2e9f420055f8c5065e8912d3b3a34ac81 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 02:01:07 +0000 Subject: [PATCH 3/5] Improve WebSocket tests to use retry loops instead of fixed sleeps Agent-Logs-Url: https://github.com/aceld/zinx/sessions/004c8faa-6151-4a22-bacc-73d7310cb665 Co-authored-by: aceld <7778936+aceld@users.noreply.github.com> --- znet/server_test.go | 56 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/znet/server_test.go b/znet/server_test.go index bd31cbef..1df3e9a9 100644 --- a/znet/server_test.go +++ b/znet/server_test.go @@ -214,6 +214,38 @@ func TestCloseConnectionBeforeSendMsg(t *testing.T) { s.Stop() } +// waitForPort retries binding to addr until it succeeds (port released) or the +// deadline is exceeded. It returns nil on success and an error otherwise. +// (重试绑定端口直到成功或超时,成功返回 nil) +func waitForPort(addr string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + ln, err := net.Listen("tcp", addr) + if err == nil { + _ = ln.Close() + return nil + } + time.Sleep(20 * time.Millisecond) + } + return fmt.Errorf("port %s not released within %v", addr, timeout) +} + +// waitForPortListening retries connecting to addr until a connection succeeds +// (server is ready) or the deadline is exceeded. +// (重试连接直到成功或超时,用于等待服务端就绪) +func waitForPortListening(addr string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + conn, err := net.DialTimeout("tcp", addr, 50*time.Millisecond) + if err == nil { + _ = conn.Close() + return nil + } + time.Sleep(20 * time.Millisecond) + } + return fmt.Errorf("port %s not listening within %v", addr, timeout) +} + // TestWebsocketServerGracefulStop verifies that calling Stop() on a WebSocket-only // server does not block, and that the HTTP listener port is released after Stop returns. // (验证 WebSocket-only 模式下 Stop() 不阻塞且端口被释放) @@ -230,8 +262,11 @@ func TestWebsocketServerGracefulStop(t *testing.T) { s := NewUserConfServer(config) s.Start() - // Give the server time to start listening (给服务端启动监听的时间) - time.Sleep(200 * time.Millisecond) + // Wait until the server is actually listening (等待服务端真正开始监听) + addr := fmt.Sprintf("127.0.0.1:%d", wsPort) + if err := waitForPortListening(addr, 3*time.Second); err != nil { + t.Fatalf("server did not start listening: %v", err) + } // Stop() must return promptly — if it blocks, the test will time out. // (Stop() 必须及时返回,否则测试会超时) @@ -248,15 +283,11 @@ func TestWebsocketServerGracefulStop(t *testing.T) { t.Fatal("Stop() blocked for more than 3s in websocket-only mode") } - // After Stop() returns, wait briefly for the background shutdown to complete, - // then verify the port has been released by binding to it. - // (Stop() 返回后等待后台关闭完成,然后验证端口已释放) - time.Sleep(200 * time.Millisecond) - ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", wsPort)) - if err != nil { + // Verify the port is released by retrying until the bind succeeds or timeout. + // (重试绑定端口,验证端口已被释放) + if err := waitForPort(addr, 3*time.Second); err != nil { t.Fatalf("port %d not released after Stop(): %v", wsPort, err) } - _ = ln.Close() } // TestWebsocketServerStopIdempotent verifies that calling Stop() multiple times @@ -273,7 +304,12 @@ func TestWebsocketServerStopIdempotent(t *testing.T) { } s := NewUserConfServer(config) s.Start() - time.Sleep(200 * time.Millisecond) + + // Wait until the server is actually listening (等待服务端真正开始监听) + addr := fmt.Sprintf("127.0.0.1:%d", wsPort) + if err := waitForPortListening(addr, 3*time.Second); err != nil { + t.Fatalf("server did not start listening: %v", err) + } // Calling Stop() twice must not panic. // (两次调用 Stop() 不应 panic) From c7b07afaf73fec49fde6644766b2260f7b8b326c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 02:02:36 +0000 Subject: [PATCH 4/5] Extract shutdown timeout and retry interval as named constants Agent-Logs-Url: https://github.com/aceld/zinx/sessions/004c8faa-6151-4a22-bacc-73d7310cb665 Co-authored-by: aceld <7778936+aceld@users.noreply.github.com> --- znet/server.go | 7 ++++++- znet/server_test.go | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/znet/server.go b/znet/server.go index 9c54a640..b6c88b95 100644 --- a/znet/server.go +++ b/znet/server.go @@ -28,6 +28,11 @@ import ( "github.com/aceld/zinx/zpack" ) +// wsShutdownTimeout is the maximum time to wait for active WebSocket connections +// to finish when stopping the server gracefully. +// (wsShutdownTimeout 是优雅停服时等待活跃 WebSocket 连接完成的最长时间) +const wsShutdownTimeout = 5 * time.Second + // Server interface implementation, defines a Server service class // (接口实现,定义一个Server服务类) type Server struct { @@ -401,7 +406,7 @@ func (s *Server) ListenWebsocketConn() { // Gracefully shut down the WebSocket HTTP server with a timeout // (带超时的优雅关闭 WebSocket HTTP Server) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), wsShutdownTimeout) defer cancel() if err := srv.Shutdown(ctx); err != nil { zlog.Ins().ErrorF("websocket server shutdown err: %v", err) diff --git a/znet/server_test.go b/znet/server_test.go index 1df3e9a9..6581a0c6 100644 --- a/znet/server_test.go +++ b/znet/server_test.go @@ -13,6 +13,11 @@ import ( "github.com/aceld/zinx/zpack" ) +// retryInterval is the polling interval used by test helper functions that wait +// for a port to become available or unavailable. +// (retryInterval 是测试辅助函数轮询端口状态时的间隔时间) +const retryInterval = 20 * time.Millisecond + // run in terminal: // go test -v ./znet -run=TestServer @@ -225,7 +230,7 @@ func waitForPort(addr string, timeout time.Duration) error { _ = ln.Close() return nil } - time.Sleep(20 * time.Millisecond) + time.Sleep(retryInterval) } return fmt.Errorf("port %s not released within %v", addr, timeout) } @@ -241,7 +246,7 @@ func waitForPortListening(addr string, timeout time.Duration) error { _ = conn.Close() return nil } - time.Sleep(20 * time.Millisecond) + time.Sleep(retryInterval) } return fmt.Errorf("port %s not listening within %v", addr, timeout) } From 33af8e5fffd1af39940ed3efe0b8e2ff1b5b9a13 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 02:03:51 +0000 Subject: [PATCH 5/5] Extract dial timeout as named constant in tests Agent-Logs-Url: https://github.com/aceld/zinx/sessions/004c8faa-6151-4a22-bacc-73d7310cb665 Co-authored-by: aceld <7778936+aceld@users.noreply.github.com> --- znet/server_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/znet/server_test.go b/znet/server_test.go index 6581a0c6..273a9a79 100644 --- a/znet/server_test.go +++ b/znet/server_test.go @@ -18,6 +18,11 @@ import ( // (retryInterval 是测试辅助函数轮询端口状态时的间隔时间) const retryInterval = 20 * time.Millisecond +// dialTimeout is the per-attempt TCP dial timeout used when checking if a port +// is accepting connections. +// (dialTimeout 是检测端口是否在监听时每次 TCP 拨号的超时时间) +const dialTimeout = 50 * time.Millisecond + // run in terminal: // go test -v ./znet -run=TestServer @@ -241,7 +246,7 @@ func waitForPort(addr string, timeout time.Duration) error { func waitForPortListening(addr string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { - conn, err := net.DialTimeout("tcp", addr, 50*time.Millisecond) + conn, err := net.DialTimeout("tcp", addr, dialTimeout) if err == nil { _ = conn.Close() return nil