diff --git a/internal/shell/dispatch.go b/internal/shell/dispatch.go index 2acff082e7..77b43aea75 100644 --- a/internal/shell/dispatch.go +++ b/internal/shell/dispatch.go @@ -197,6 +197,7 @@ func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args cmd.Stdin = hc.Stdin cmd.Stdout = hc.Stdout cmd.Stderr = hc.Stderr + isolateProcess(cmd) if err := cmd.Run(); err != nil { var exitErr *exec.ExitError @@ -392,7 +393,7 @@ func runShellSource(ctx context.Context, path string, args []string, blockFuncs interp.Interactive(false), interp.Env(hc.Env), interp.Dir(hc.Dir), - interp.ExecHandlers(standardHandlers(blockFuncs)...), + execHandlerOption(blockFuncs), } if len(args) > 1 { // Params with a leading "--" avoids any of args[1:] being diff --git a/internal/shell/exec_unix.go b/internal/shell/exec_unix.go new file mode 100644 index 0000000000..b3488c2457 --- /dev/null +++ b/internal/shell/exec_unix.go @@ -0,0 +1,108 @@ +//go:build !windows + +package shell + +import ( + "context" + "fmt" + "io" + "os/exec" + "syscall" + "time" + + "mvdan.cc/sh/v3/interp" +) + +// defaultKillTimeout matches mvdan's DefaultExecHandler default. Extracted +// so the coupling to upstream is explicit rather than a buried literal. +const defaultKillTimeout = 2 * time.Second + +// isolateProcess sets SysProcAttr so the child runs in its own session, +// fully detached from Crush's controlling terminal. This prevents shells +// like zsh from grabbing the TTY for job control, which would cause +// "suspended (tty input)" and corrupt Bubble Tea's terminal rendering. +// It also prevents child processes from sending signals to Crush's process +// group. +func isolateProcess(cmd *exec.Cmd) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setsid = true +} + +// processGroupExecHandler returns an ExecHandlerFunc that replaces +// interp.DefaultExecHandler with one that fully isolates child processes +// from Crush's session and controlling terminal. +// +// Without this, shells like zsh that enable job control when sourcing +// framework files will attempt to take over the TTY, causing SIGTTIN/SIGTTOU +// signals and corrupting the parent terminal state. +// +// The implementation mirrors interp.DefaultExecHandler with two additions: +// isolateProcess(&cmd) after construction, and negative-PID signal targeting +// in the cancellation callback so the entire child process group is killed. +func processGroupExecHandler(killTimeout time.Duration) interp.ExecHandlerFunc { + return func(ctx context.Context, args []string) error { + hc := interp.HandlerCtx(ctx) + path, err := interp.LookPathDir(hc.Dir, hc.Env, args[0]) + if err != nil { + fmt.Fprintln(hc.Stderr, err) + return interp.ExitStatus(127) + } + + cmd := exec.Cmd{ + Path: path, + Args: args, + Env: execEnvList(hc.Env), + Dir: hc.Dir, + Stdin: hc.Stdin, + Stdout: hc.Stdout, + Stderr: hc.Stderr, + } + isolateProcess(&cmd) + + err = cmd.Start() + if err == nil { + stopf := context.AfterFunc(ctx, func() { + if killTimeout <= 0 { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + return + } + // Signal the child's process group (negative PID) so + // grandchildren also receive it. + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) + time.Sleep(killTimeout) + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + }) + defer stopf() + + err = cmd.Wait() + } + + return exitStatusFromError(ctx, hc.Stderr, err) + } +} + +// exitStatusFromError translates an exec error into an interp exit status, +// matching the conventions of interp.DefaultExecHandler. Extracted so the +// platform-specific exec handler stays focused on isolation mechanics. +func exitStatusFromError(ctx context.Context, stderr io.Writer, err error) error { + if err == nil { + return nil + } + switch err := err.(type) { + case *exec.ExitError: + if status, ok := err.Sys().(syscall.WaitStatus); ok && status.Signaled() { + if ctx.Err() != nil { + return ctx.Err() + } + return interp.ExitStatus(128 + uint8(status.Signal())) + } + return interp.ExitStatus(uint8(err.ExitCode())) + case *exec.Error: + fmt.Fprintf(stderr, "%v\n", err) + return interp.ExitStatus(127) + default: + return err + } +} diff --git a/internal/shell/exec_windows.go b/internal/shell/exec_windows.go new file mode 100644 index 0000000000..3609c68e3b --- /dev/null +++ b/internal/shell/exec_windows.go @@ -0,0 +1,23 @@ +//go:build windows + +package shell + +import ( + "os/exec" + "time" + + "mvdan.cc/sh/v3/interp" +) + +// defaultKillTimeout matches mvdan's DefaultExecHandler default. +const defaultKillTimeout = 2 * time.Second + +// isolateProcess is a no-op on Windows. Session isolation via Setsid is a +// Unix-only concept; Windows uses CREATE_NEW_PROCESS_GROUP which mvdan's +// default handler already handles adequately. +func isolateProcess(_ *exec.Cmd) {} + +// processGroupExecHandler returns interp.DefaultExecHandler on Windows. +func processGroupExecHandler(killTimeout time.Duration) interp.ExecHandlerFunc { + return interp.DefaultExecHandler(killTimeout) +} diff --git a/internal/shell/expand.go b/internal/shell/expand.go index bef83310bf..b809a03860 100644 --- a/internal/shell/expand.go +++ b/internal/shell/expand.go @@ -93,7 +93,7 @@ func ExpandValue(ctx context.Context, value string, env []string) (string, error interp.Interactive(false), interp.Env(expand.ListEnviron(env...)), interp.Dir(s.cwd), - interp.ExecHandlers(standardHandlers(s.blockFuncs)...), + execHandlerOption(s.blockFuncs), } if strict { // Match the outer NoUnset: an unset $VAR inside diff --git a/internal/shell/isolation_unix_test.go b/internal/shell/isolation_unix_test.go new file mode 100644 index 0000000000..e8d6da7e35 --- /dev/null +++ b/internal/shell/isolation_unix_test.go @@ -0,0 +1,143 @@ +//go:build !windows + +package shell + +import ( + "bytes" + "context" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + "testing" + "time" +) + +// TestProcessIsolation_SignalDoesNotReachParent verifies that a child +// sending SIGINT to its own process group does not deliver the signal to +// the parent (test) process. Without Setsid isolation, the child shares +// the parent's process group and the signal would kill the test. +func TestProcessIsolation_SignalDoesNotReachParent(t *testing.T) { + t.Parallel() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT) + defer signal.Stop(sigCh) + + var stderr bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: `sh -c 'kill -INT -$$'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stderr: &stderr, + }) + + if err == nil { + t.Log("child exited 0 after self-signal (unexpected but not a failure)") + } + + select { + case sig := <-sigCh: + t.Fatalf("SIGINT leaked to parent process: %v (stderr=%q)", sig, stderr.String()) + case <-time.After(200 * time.Millisecond): + } +} + +// TestProcessIsolation_TTYAccessDoesNotBlock verifies that a child trying +// to read from /dev/tty does not block or suspend the parent. With Setsid, +// the child has no controlling terminal, so /dev/tty access fails immediately +// with ENXIO. +func TestProcessIsolation_TTYAccessDoesNotBlock(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 3*time.Second) + defer cancel() + + var stderr bytes.Buffer + done := make(chan error, 1) + go func() { + done <- Run(ctx, RunOptions{ + Command: `sh -c 'cat < /dev/tty'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stderr: &stderr, + }) + }() + + select { + case err := <-done: + if err == nil { + t.Fatal("expected error from /dev/tty access in detached session, got nil") + } + case <-time.After(3 * time.Second): + t.Fatalf("/dev/tty access blocked for >3s; session isolation may be broken (stderr=%q)", stderr.String()) + } +} + +// TestProcessIsolation_ZshJobControlDoesNotSuspend simulates the exact +// scenario that caused the original bug: running zsh with job control +// enabled. Without session isolation, zsh tries to become the foreground +// process group leader on the controlling terminal, gets SIGTTIN, and +// suspends. Skips if zsh is not installed. +func TestProcessIsolation_ZshJobControlDoesNotSuspend(t *testing.T) { + t.Parallel() + + if _, err := exec.LookPath("zsh"); err != nil { + t.Skip("zsh not installed") + } + + ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) + defer cancel() + + var stdout, stderr bytes.Buffer + done := make(chan error, 1) + go func() { + done <- Run(ctx, RunOptions{ + Command: `zsh -i -c 'echo ok'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stdout: &stdout, + Stderr: &stderr, + }) + }() + + select { + case err := <-done: + if err != nil { + t.Logf("zsh exited with error (may be expected): %v", err) + } + out := strings.TrimSpace(stdout.String()) + if !strings.Contains(out, "ok") { + t.Errorf("expected 'ok' in stdout, got %q (stderr=%q)", out, stderr.String()) + } + case <-time.After(5 * time.Second): + t.Fatalf("zsh -i -c did not complete within 5s; likely suspended on TTY (stderr=%q)", stderr.String()) + } +} + +// TestProcessIsolation_ChildProcessGroupKill verifies that when context +// is cancelled, the signal reaches the entire child process group (not +// just the direct child). +func TestProcessIsolation_ChildProcessGroupKill(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond) + defer cancel() + + start := time.Now() + err := Run(ctx, RunOptions{ + Command: `sh -c 'sleep 60 & wait'`, + Cwd: t.TempDir(), + Env: os.Environ(), + }) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from cancelled context") + } + // Allow up to 4s: 500ms context timeout + 2s kill timeout + margin. + if elapsed > 4*time.Second { + t.Fatalf("cancellation took %v; expected prompt process group kill", elapsed) + } +} diff --git a/internal/shell/run.go b/internal/shell/run.go index 01986a6fd5..cafab786d8 100644 --- a/internal/shell/run.go +++ b/internal/shell/run.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "slices" "strings" "mvdan.cc/sh/moreinterp/coreutils" @@ -95,10 +96,29 @@ func newRunner(cwd string, env []string, stdin io.Reader, stdout, stderr io.Writ interp.Interactive(false), interp.Env(expand.ListEnviron(env...)), interp.Dir(cwd), - interp.ExecHandlers(standardHandlers(blockFuncs)...), + execHandlerOption(blockFuncs), ) } +// execHandlerOption returns an interp.RunnerOption that installs the +// standard Crush middleware chain (builtins, script dispatch, block list) +// on top of a process-group-isolated base exec handler. +// +// We use interp.ExecHandler (singular) with a manually-built chain rather +// than interp.ExecHandlers because the latter always appends +// interp.DefaultExecHandler as the final handler, which lacks process group +// isolation. Without isolation, shells like zsh that set up job control +// when sourcing framework files can send SIGINT/SIGTERM to Crush's process +// group and crash the parent. +func execHandlerOption(blockFuncs []BlockFunc) interp.RunnerOption { + base := processGroupExecHandler(defaultKillTimeout) + handler := base + for _, mw := range slices.Backward(standardHandlers(blockFuncs)) { + handler = mw(handler) + } + return interp.ExecHandler(handler) //nolint:staticcheck // ExecHandlers always appends DefaultExecHandler which lacks process isolation. +} + // nonInteractiveEnvVars are forced on every shell execution to prevent // commands from hanging on a nonexistent TTY. These are always applied // regardless of the caller's environment because Crush shells are never