From b3f737000830dca39e4aa62c14e1ca26a04afa68 Mon Sep 17 00:00:00 2001 From: Dawei Huang Date: Wed, 18 Mar 2026 02:18:36 +0000 Subject: [PATCH] Add async command execution for long-running SetPackage operations Add RunHostCommandAsync to pkg/exec which starts commands decoupled from the caller's context. This is critical for SetPackage where sonic-installer can run for minutes - if the gRPC client disconnects, the installation continues safely in the background instead of being killed. Changes: - Add AsyncCommandHandle struct with Wait() and Done() methods - Add RunHostCommandAsync() using background context with timeout - Add NewCompletedAsyncHandle() helper for testing - Refactor installPackage/activatePackage to use async execution - Remove ctx parameter since the command manages its own lifecycle - Update all tests for the new signatures Related: #619 Related: sonic-net/sonic-mgmt#23054 Signed-off-by: Dawei Huang --- pkg/exec/command.go | 97 +++++++++++++++++++++++++++ pkg/exec/command_test.go | 94 ++++++++++++++++++++++++++ pkg/gnoi/system/system.go | 23 ++++--- pkg/gnoi/system/system_monkey_test.go | 76 ++++++++++----------- pkg/gnoi/system/system_test.go | 12 ++-- 5 files changed, 245 insertions(+), 57 deletions(-) diff --git a/pkg/exec/command.go b/pkg/exec/command.go index bc8e913a0..416ea88bf 100644 --- a/pkg/exec/command.go +++ b/pkg/exec/command.go @@ -160,6 +160,103 @@ func buildNsenterArgs(opts *RunHostCommandOptions, command string, args []string return nsenterArgs } +// AsyncCommandHandle represents a handle to a running asynchronous command. +// The underlying process is decoupled from any caller context so it will +// continue running even if the originating gRPC stream is cancelled. +type AsyncCommandHandle struct { + done chan struct{} + result *CommandResult +} + +// Wait blocks until the command completes and returns the result. +func (h *AsyncCommandHandle) Wait() *CommandResult { + <-h.done + return h.result +} + +// Done returns a channel that is closed when the command completes. +func (h *AsyncCommandHandle) Done() <-chan struct{} { + return h.done +} + +// RunHostCommandAsync starts a command on the host using nsenter, but unlike +// RunHostCommand the process is NOT tied to any caller-supplied context. +// This is critical for long-running operations (e.g. sonic-installer) that +// must survive gRPC client disconnects. +// +// The Timeout field in opts is still honoured via an internal background +// context. If Timeout is zero the defaultTimeout is used. +func RunHostCommandAsync(command string, args []string, opts *RunHostCommandOptions) (*AsyncCommandHandle, error) { + if command == "" { + return nil, fmt.Errorf("command cannot be empty") + } + + if opts == nil { + opts = &RunHostCommandOptions{} + } + + timeout := opts.Timeout + if timeout == 0 { + timeout = defaultTimeout + } + + // Use a background context so the process is never killed by the + // caller's context cancellation (e.g. gRPC stream teardown). + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + nsenterArgs := buildNsenterArgs(opts, command, args) + cmd := exec.CommandContext(ctx, "nsenter", nsenterArgs...) + + if opts.WorkingDir != "" { + cmd.Dir = opts.WorkingDir + } + if len(opts.Environment) > 0 { + cmd.Env = append(cmd.Env, opts.Environment...) + } + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Start(); err != nil { + cancel() + return nil, fmt.Errorf("failed to start command: %v", err) + } + + handle := &AsyncCommandHandle{ + done: make(chan struct{}), + } + + go func() { + defer cancel() + defer close(handle.done) + + err := cmd.Wait() + result := &CommandResult{ + Stdout: stdout.String(), + Stderr: stderr.String(), + Error: err, + } + if exitError, ok := err.(*exec.ExitError); ok { + result.ExitCode = exitError.ExitCode() + } + handle.result = result + }() + + return handle, nil +} + +// NewCompletedAsyncHandle creates an AsyncCommandHandle that is already +// completed with the given result. This is useful for testing. +func NewCompletedAsyncHandle(result *CommandResult) *AsyncCommandHandle { + h := &AsyncCommandHandle{ + done: make(chan struct{}), + result: result, + } + close(h.done) + return h +} + // IsNsenterAvailable checks if nsenter is available in the system func IsNsenterAvailable() bool { cmd := exec.Command("which", "nsenter") diff --git a/pkg/exec/command_test.go b/pkg/exec/command_test.go index 19ffb33dd..74921a21c 100644 --- a/pkg/exec/command_test.go +++ b/pkg/exec/command_test.go @@ -312,6 +312,100 @@ func TestParseCommand(t *testing.T) { } } +func TestRunHostCommandAsync(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("nsenter tests can only run on Linux") + } + if !IsNsenterAvailable() { + t.Skip("nsenter is not available on this system") + } + + testResult, _ := RunHostCommand(context.Background(), "true", nil, nil) + if testResult != nil && testResult.Error != nil && strings.Contains(testResult.Stderr, "Permission denied") { + t.Skip("Insufficient permissions to run nsenter tests") + } + + t.Run("empty command", func(t *testing.T) { + _, err := RunHostCommandAsync("", nil, nil) + if err == nil { + t.Error("RunHostCommandAsync() with empty command should return error") + } + }) + + t.Run("simple echo", func(t *testing.T) { + handle, err := RunHostCommandAsync("echo", []string{"async-test"}, &RunHostCommandOptions{ + Timeout: 5 * time.Second, + }) + if err != nil { + t.Fatalf("RunHostCommandAsync() failed to start: %v", err) + } + + result := handle.Wait() + if result.Error != nil { + t.Errorf("command failed: %v", result.Error) + } + if !strings.Contains(result.Stdout, "async-test") { + t.Errorf("expected stdout to contain 'async-test', got %q", result.Stdout) + } + }) + + t.Run("done channel closes on completion", func(t *testing.T) { + handle, err := RunHostCommandAsync("true", nil, &RunHostCommandOptions{ + Timeout: 5 * time.Second, + }) + if err != nil { + t.Fatalf("RunHostCommandAsync() failed to start: %v", err) + } + + select { + case <-handle.Done(): + // expected + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for Done() channel") + } + + result := handle.Wait() + if result.ExitCode != 0 { + t.Errorf("expected exit code 0, got %d", result.ExitCode) + } + }) + + t.Run("captures non-zero exit code", func(t *testing.T) { + handle, err := RunHostCommandAsync("false", nil, &RunHostCommandOptions{ + Timeout: 5 * time.Second, + }) + if err != nil { + t.Fatalf("RunHostCommandAsync() failed to start: %v", err) + } + + result := handle.Wait() + if result.ExitCode == 0 { + t.Error("expected non-zero exit code for 'false' command") + } + }) +} + +func TestNewCompletedAsyncHandle(t *testing.T) { + expected := &CommandResult{ + Stdout: "test output", + ExitCode: 0, + } + handle := NewCompletedAsyncHandle(expected) + + // Done channel should already be closed + select { + case <-handle.Done(): + // expected + default: + t.Fatal("Done() channel should be closed for completed handle") + } + + result := handle.Wait() + if result != expected { + t.Error("Wait() should return the provided result") + } +} + func TestIsNsenterAvailable(t *testing.T) { // This test just verifies the function runs without error // The actual result depends on the system diff --git a/pkg/gnoi/system/system.go b/pkg/gnoi/system/system.go index b14764971..248e44004 100644 --- a/pkg/gnoi/system/system.go +++ b/pkg/gnoi/system/system.go @@ -107,8 +107,8 @@ func HandleSetPackage(ctx context.Context, req *syspb.SetPackageRequest) (*syspb return nil, status.Errorf(codes.InvalidArgument, "filename must be an absolute path") } - // Install the package using sonic-installer - if err := installPackage(ctx, pkg.Package.Filename); err != nil { + // Install the package using sonic-installer (async, survives gRPC disconnect) + if err := installPackage(pkg.Package.Filename); err != nil { log.Errorf("Failed to install package %s: %v", pkg.Package.Filename, err) return nil, status.Errorf(codes.Internal, "failed to install package: %v", err) } @@ -117,7 +117,7 @@ func HandleSetPackage(ctx context.Context, req *syspb.SetPackageRequest) (*syspb // If activate is requested, set as next boot image if pkg.Package.Activate { - if err := activatePackage(ctx, pkg.Package.Version); err != nil { + if err := activatePackage(pkg.Package.Version); err != nil { log.Errorf("Failed to activate package %s: %v", pkg.Package.Version, err) return nil, status.Errorf(codes.Internal, "failed to activate package: %v", err) } @@ -128,7 +128,9 @@ func HandleSetPackage(ctx context.Context, req *syspb.SetPackageRequest) (*syspb } // installPackage installs a SONiC image using sonic-installer install command. -func installPackage(ctx context.Context, filename string) error { +// The command is started asynchronously and decoupled from any caller context +// so that it survives gRPC client disconnects. +func installPackage(filename string) error { log.V(1).Infof("Installing package: %s", filename) // Execute sonic-installer install command with -y flag for non-interactive installation @@ -136,11 +138,12 @@ func installPackage(ctx context.Context, filename string) error { opts := &exec.RunHostCommandOptions{ Timeout: 10 * time.Minute, // Allow up to 10 minutes for installation } - result, err := exec.RunHostCommand(ctx, "sonic-installer", []string{"install", "-y", filename}, opts) + handle, err := exec.RunHostCommandAsync("sonic-installer", []string{"install", "-y", filename}, opts) if err != nil { - return fmt.Errorf("failed to run sonic-installer install: %v", err) + return fmt.Errorf("failed to start sonic-installer install: %v", err) } + result := handle.Wait() if result.Error != nil { return fmt.Errorf("sonic-installer install failed with exit code %d: %s", result.ExitCode, result.Stderr) @@ -151,7 +154,8 @@ func installPackage(ctx context.Context, filename string) error { } // activatePackage sets a SONiC image as the next boot image using sonic-installer set-default. -func activatePackage(ctx context.Context, version string) error { +// Like installPackage, the command is decoupled from any caller context. +func activatePackage(version string) error { log.V(1).Infof("Activating package version: %s", version) // Execute sonic-installer set-default command @@ -159,11 +163,12 @@ func activatePackage(ctx context.Context, version string) error { opts := &exec.RunHostCommandOptions{ Timeout: 2 * time.Minute, // Allow up to 2 minutes for setting default } - result, err := exec.RunHostCommand(ctx, "sonic-installer", []string{"set-default", version}, opts) + handle, err := exec.RunHostCommandAsync("sonic-installer", []string{"set-default", version}, opts) if err != nil { - return fmt.Errorf("failed to run sonic-installer set-default: %v", err) + return fmt.Errorf("failed to start sonic-installer set-default: %v", err) } + result := handle.Wait() if result.Error != nil { return fmt.Errorf("sonic-installer set-default failed with exit code %d: %s", result.ExitCode, result.Stderr) diff --git a/pkg/gnoi/system/system_monkey_test.go b/pkg/gnoi/system/system_monkey_test.go index aa65f32b7..116ae72ec 100644 --- a/pkg/gnoi/system/system_monkey_test.go +++ b/pkg/gnoi/system/system_monkey_test.go @@ -14,15 +14,15 @@ func TestHandleSetPackage_SuccessWithoutActivation(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return successful installation - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync to return successful installation + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { if cmd == "sonic-installer" && len(args) >= 1 && args[0] == "install" { - return &exec.CommandResult{ + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Installation completed successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil } return nil, fmt.Errorf("unexpected command: %s %v", cmd, args) }) @@ -52,24 +52,24 @@ func TestHandleSetPackage_SuccessWithActivation(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to handle both install and set-default commands - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync to handle both install and set-default commands + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { if cmd == "sonic-installer" { if len(args) >= 1 && args[0] == "install" { - return &exec.CommandResult{ + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Installation completed successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil } if len(args) >= 1 && args[0] == "set-default" { - return &exec.CommandResult{ + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Default image set successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil } } return nil, fmt.Errorf("unexpected command: %s %v", cmd, args) @@ -100,8 +100,8 @@ func TestHandleSetPackage_InstallCommandError(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return command execution error - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync to return command start error + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { return nil, fmt.Errorf("permission denied") }) @@ -130,16 +130,16 @@ func TestHandleSetPackage_ActivateCommandError(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand: install succeeds, set-default fails - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync: install succeeds, set-default fails + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { if cmd == "sonic-installer" { if len(args) >= 1 && args[0] == "install" { - return &exec.CommandResult{ + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Installation completed successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil } if len(args) >= 1 && args[0] == "set-default" { return nil, fmt.Errorf("set-default command failed") @@ -173,18 +173,17 @@ func TestInstallPackage_Success(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return successful installation - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { - return &exec.CommandResult{ + // Mock exec.RunHostCommandAsync to return successful installation + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Package installed successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil }) - ctx := context.Background() - err := installPackage(ctx, "/tmp/test-image.bin") + err := installPackage("/tmp/test-image.bin") if err != nil { t.Fatalf("installPackage() returned error: %v", err) } @@ -194,19 +193,18 @@ func TestInstallPackage_CommandError(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return command execution error - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync to return command start error + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { return nil, fmt.Errorf("command not found") }) - ctx := context.Background() - err := installPackage(ctx, "/tmp/test-image.bin") + err := installPackage("/tmp/test-image.bin") if err == nil { t.Fatal("installPackage() should return error when command fails") } - if !containsSubstring(err.Error(), "failed to run sonic-installer install") { - t.Errorf("installPackage() error = %v, should contain 'failed to run sonic-installer install'", err) + if !containsSubstring(err.Error(), "failed to start sonic-installer install") { + t.Errorf("installPackage() error = %v, should contain 'failed to start sonic-installer install'", err) } } @@ -214,18 +212,17 @@ func TestActivatePackage_Success(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return successful activation - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { - return &exec.CommandResult{ + // Mock exec.RunHostCommandAsync to return successful activation + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { + return exec.NewCompletedAsyncHandle(&exec.CommandResult{ Stdout: "Default image set successfully", Stderr: "", ExitCode: 0, Error: nil, - }, nil + }), nil }) - ctx := context.Background() - err := activatePackage(ctx, "test-version-1.0.0") + err := activatePackage("test-version-1.0.0") if err != nil { t.Fatalf("activatePackage() returned error: %v", err) } @@ -235,19 +232,18 @@ func TestActivatePackage_CommandError(t *testing.T) { patches := gomonkey.NewPatches() defer patches.Reset() - // Mock exec.RunHostCommand to return command execution error - patches.ApplyFunc(exec.RunHostCommand, func(ctx context.Context, cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.CommandResult, error) { + // Mock exec.RunHostCommandAsync to return command start error + patches.ApplyFunc(exec.RunHostCommandAsync, func(cmd string, args []string, opts *exec.RunHostCommandOptions) (*exec.AsyncCommandHandle, error) { return nil, fmt.Errorf("permission denied") }) - ctx := context.Background() - err := activatePackage(ctx, "test-version-1.0.0") + err := activatePackage("test-version-1.0.0") if err == nil { t.Fatal("activatePackage() should return error when command fails") } - if !containsSubstring(err.Error(), "failed to run sonic-installer set-default") { - t.Errorf("activatePackage() error = %v, should contain 'failed to run sonic-installer set-default'", err) + if !containsSubstring(err.Error(), "failed to start sonic-installer set-default") { + t.Errorf("activatePackage() error = %v, should contain 'failed to start sonic-installer set-default'", err) } } diff --git a/pkg/gnoi/system/system_test.go b/pkg/gnoi/system/system_test.go index 2006e3d13..647328a49 100644 --- a/pkg/gnoi/system/system_test.go +++ b/pkg/gnoi/system/system_test.go @@ -133,16 +133,14 @@ func TestHandleSetPackageValidation(t *testing.T) { } func TestInstallPackageValidation(t *testing.T) { - ctx := context.Background() - // Test empty filename - err := installPackage(ctx, "") + err := installPackage("") if err == nil { t.Error("installPackage() with empty filename should fail") } // Test with non-existent file (will fail but tests the command execution path) - err = installPackage(ctx, "/nonexistent/path/image.bin") + err = installPackage("/nonexistent/path/image.bin") if err == nil { t.Error("installPackage() with non-existent file should fail") } @@ -150,16 +148,14 @@ func TestInstallPackageValidation(t *testing.T) { } func TestActivatePackageValidation(t *testing.T) { - ctx := context.Background() - // Test empty version - err := activatePackage(ctx, "") + err := activatePackage("") if err == nil { t.Error("activatePackage() with empty version should fail") } // Test with non-existent version (will fail but tests the command execution path) - err = activatePackage(ctx, "non-existent-version") + err = activatePackage("non-existent-version") if err == nil { t.Error("activatePackage() with non-existent version should fail") }