From 23a4f94a4e9aee5dbdb2919477a7fab956b11082 Mon Sep 17 00:00:00 2001 From: Kieran Klukas Date: Mon, 8 Jun 2026 12:04:45 -0400 Subject: [PATCH] fix(shell): isolate child processes from Crush's session and controlling terminal Child processes spawned by the shell interpreter shared Crush's session, allowing shells like zsh to grab the TTY for job control. This caused "suspended (tty input)" hangs and corrupted Bubble Tea's rendering via unexpected escape sequences. Replace mvdan's DefaultExecHandler with a process-isolated variant that sets Setsid on Unix, and target signals at the child's process group so grandchildren are cleaned up on cancellation. --- internal/shell/dispatch.go | 3 +- internal/shell/exec_unix.go | 108 +++++++++++++++++++ internal/shell/exec_windows.go | 23 +++++ internal/shell/expand.go | 2 +- internal/shell/isolation_unix_test.go | 143 ++++++++++++++++++++++++++ internal/shell/run.go | 22 +++- 6 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 internal/shell/exec_unix.go create mode 100644 internal/shell/exec_windows.go create mode 100644 internal/shell/isolation_unix_test.go diff --git a/internal/shell/dispatch.go b/internal/shell/dispatch.go index 2acff082e7..77b43aea75 100644 --- a/internal/shell/dispatch.go +++ b/internal/shell/dispatch.go @@ -197,6 +197,7 @@ func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args cmd.Stdin = hc.Stdin cmd.Stdout = hc.Stdout cmd.Stderr = hc.Stderr + isolateProcess(cmd) if err := cmd.Run(); err != nil { var exitErr *exec.ExitError @@ -392,7 +393,7 @@ func runShellSource(ctx context.Context, path string, args []string, blockFuncs interp.Interactive(false), interp.Env(hc.Env), interp.Dir(hc.Dir), - interp.ExecHandlers(standardHandlers(blockFuncs)...), + execHandlerOption(blockFuncs), } if len(args) > 1 { // Params with a leading "--" avoids any of args[1:] being diff --git a/internal/shell/exec_unix.go b/internal/shell/exec_unix.go new file mode 100644 index 0000000000..b3488c2457 --- /dev/null +++ b/internal/shell/exec_unix.go @@ -0,0 +1,108 @@ +//go:build !windows + +package shell + +import ( + "context" + "fmt" + "io" + "os/exec" + "syscall" + "time" + + "mvdan.cc/sh/v3/interp" +) + +// defaultKillTimeout matches mvdan's DefaultExecHandler default. Extracted +// so the coupling to upstream is explicit rather than a buried literal. +const defaultKillTimeout = 2 * time.Second + +// isolateProcess sets SysProcAttr so the child runs in its own session, +// fully detached from Crush's controlling terminal. This prevents shells +// like zsh from grabbing the TTY for job control, which would cause +// "suspended (tty input)" and corrupt Bubble Tea's terminal rendering. +// It also prevents child processes from sending signals to Crush's process +// group. +func isolateProcess(cmd *exec.Cmd) { + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setsid = true +} + +// processGroupExecHandler returns an ExecHandlerFunc that replaces +// interp.DefaultExecHandler with one that fully isolates child processes +// from Crush's session and controlling terminal. +// +// Without this, shells like zsh that enable job control when sourcing +// framework files will attempt to take over the TTY, causing SIGTTIN/SIGTTOU +// signals and corrupting the parent terminal state. +// +// The implementation mirrors interp.DefaultExecHandler with two additions: +// isolateProcess(&cmd) after construction, and negative-PID signal targeting +// in the cancellation callback so the entire child process group is killed. +func processGroupExecHandler(killTimeout time.Duration) interp.ExecHandlerFunc { + return func(ctx context.Context, args []string) error { + hc := interp.HandlerCtx(ctx) + path, err := interp.LookPathDir(hc.Dir, hc.Env, args[0]) + if err != nil { + fmt.Fprintln(hc.Stderr, err) + return interp.ExitStatus(127) + } + + cmd := exec.Cmd{ + Path: path, + Args: args, + Env: execEnvList(hc.Env), + Dir: hc.Dir, + Stdin: hc.Stdin, + Stdout: hc.Stdout, + Stderr: hc.Stderr, + } + isolateProcess(&cmd) + + err = cmd.Start() + if err == nil { + stopf := context.AfterFunc(ctx, func() { + if killTimeout <= 0 { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + return + } + // Signal the child's process group (negative PID) so + // grandchildren also receive it. + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) + time.Sleep(killTimeout) + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + }) + defer stopf() + + err = cmd.Wait() + } + + return exitStatusFromError(ctx, hc.Stderr, err) + } +} + +// exitStatusFromError translates an exec error into an interp exit status, +// matching the conventions of interp.DefaultExecHandler. Extracted so the +// platform-specific exec handler stays focused on isolation mechanics. +func exitStatusFromError(ctx context.Context, stderr io.Writer, err error) error { + if err == nil { + return nil + } + switch err := err.(type) { + case *exec.ExitError: + if status, ok := err.Sys().(syscall.WaitStatus); ok && status.Signaled() { + if ctx.Err() != nil { + return ctx.Err() + } + return interp.ExitStatus(128 + uint8(status.Signal())) + } + return interp.ExitStatus(uint8(err.ExitCode())) + case *exec.Error: + fmt.Fprintf(stderr, "%v\n", err) + return interp.ExitStatus(127) + default: + return err + } +} diff --git a/internal/shell/exec_windows.go b/internal/shell/exec_windows.go new file mode 100644 index 0000000000..3609c68e3b --- /dev/null +++ b/internal/shell/exec_windows.go @@ -0,0 +1,23 @@ +//go:build windows + +package shell + +import ( + "os/exec" + "time" + + "mvdan.cc/sh/v3/interp" +) + +// defaultKillTimeout matches mvdan's DefaultExecHandler default. +const defaultKillTimeout = 2 * time.Second + +// isolateProcess is a no-op on Windows. Session isolation via Setsid is a +// Unix-only concept; Windows uses CREATE_NEW_PROCESS_GROUP which mvdan's +// default handler already handles adequately. +func isolateProcess(_ *exec.Cmd) {} + +// processGroupExecHandler returns interp.DefaultExecHandler on Windows. +func processGroupExecHandler(killTimeout time.Duration) interp.ExecHandlerFunc { + return interp.DefaultExecHandler(killTimeout) +} diff --git a/internal/shell/expand.go b/internal/shell/expand.go index bef83310bf..b809a03860 100644 --- a/internal/shell/expand.go +++ b/internal/shell/expand.go @@ -93,7 +93,7 @@ func ExpandValue(ctx context.Context, value string, env []string) (string, error interp.Interactive(false), interp.Env(expand.ListEnviron(env...)), interp.Dir(s.cwd), - interp.ExecHandlers(standardHandlers(s.blockFuncs)...), + execHandlerOption(s.blockFuncs), } if strict { // Match the outer NoUnset: an unset $VAR inside diff --git a/internal/shell/isolation_unix_test.go b/internal/shell/isolation_unix_test.go new file mode 100644 index 0000000000..e8d6da7e35 --- /dev/null +++ b/internal/shell/isolation_unix_test.go @@ -0,0 +1,143 @@ +//go:build !windows + +package shell + +import ( + "bytes" + "context" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + "testing" + "time" +) + +// TestProcessIsolation_SignalDoesNotReachParent verifies that a child +// sending SIGINT to its own process group does not deliver the signal to +// the parent (test) process. Without Setsid isolation, the child shares +// the parent's process group and the signal would kill the test. +func TestProcessIsolation_SignalDoesNotReachParent(t *testing.T) { + t.Parallel() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT) + defer signal.Stop(sigCh) + + var stderr bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: `sh -c 'kill -INT -$$'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stderr: &stderr, + }) + + if err == nil { + t.Log("child exited 0 after self-signal (unexpected but not a failure)") + } + + select { + case sig := <-sigCh: + t.Fatalf("SIGINT leaked to parent process: %v (stderr=%q)", sig, stderr.String()) + case <-time.After(200 * time.Millisecond): + } +} + +// TestProcessIsolation_TTYAccessDoesNotBlock verifies that a child trying +// to read from /dev/tty does not block or suspend the parent. With Setsid, +// the child has no controlling terminal, so /dev/tty access fails immediately +// with ENXIO. +func TestProcessIsolation_TTYAccessDoesNotBlock(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 3*time.Second) + defer cancel() + + var stderr bytes.Buffer + done := make(chan error, 1) + go func() { + done <- Run(ctx, RunOptions{ + Command: `sh -c 'cat < /dev/tty'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stderr: &stderr, + }) + }() + + select { + case err := <-done: + if err == nil { + t.Fatal("expected error from /dev/tty access in detached session, got nil") + } + case <-time.After(3 * time.Second): + t.Fatalf("/dev/tty access blocked for >3s; session isolation may be broken (stderr=%q)", stderr.String()) + } +} + +// TestProcessIsolation_ZshJobControlDoesNotSuspend simulates the exact +// scenario that caused the original bug: running zsh with job control +// enabled. Without session isolation, zsh tries to become the foreground +// process group leader on the controlling terminal, gets SIGTTIN, and +// suspends. Skips if zsh is not installed. +func TestProcessIsolation_ZshJobControlDoesNotSuspend(t *testing.T) { + t.Parallel() + + if _, err := exec.LookPath("zsh"); err != nil { + t.Skip("zsh not installed") + } + + ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) + defer cancel() + + var stdout, stderr bytes.Buffer + done := make(chan error, 1) + go func() { + done <- Run(ctx, RunOptions{ + Command: `zsh -i -c 'echo ok'`, + Cwd: t.TempDir(), + Env: os.Environ(), + Stdout: &stdout, + Stderr: &stderr, + }) + }() + + select { + case err := <-done: + if err != nil { + t.Logf("zsh exited with error (may be expected): %v", err) + } + out := strings.TrimSpace(stdout.String()) + if !strings.Contains(out, "ok") { + t.Errorf("expected 'ok' in stdout, got %q (stderr=%q)", out, stderr.String()) + } + case <-time.After(5 * time.Second): + t.Fatalf("zsh -i -c did not complete within 5s; likely suspended on TTY (stderr=%q)", stderr.String()) + } +} + +// TestProcessIsolation_ChildProcessGroupKill verifies that when context +// is cancelled, the signal reaches the entire child process group (not +// just the direct child). +func TestProcessIsolation_ChildProcessGroupKill(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond) + defer cancel() + + start := time.Now() + err := Run(ctx, RunOptions{ + Command: `sh -c 'sleep 60 & wait'`, + Cwd: t.TempDir(), + Env: os.Environ(), + }) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from cancelled context") + } + // Allow up to 4s: 500ms context timeout + 2s kill timeout + margin. + if elapsed > 4*time.Second { + t.Fatalf("cancellation took %v; expected prompt process group kill", elapsed) + } +} diff --git a/internal/shell/run.go b/internal/shell/run.go index 01986a6fd5..cafab786d8 100644 --- a/internal/shell/run.go +++ b/internal/shell/run.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "slices" "strings" "mvdan.cc/sh/moreinterp/coreutils" @@ -95,10 +96,29 @@ func newRunner(cwd string, env []string, stdin io.Reader, stdout, stderr io.Writ interp.Interactive(false), interp.Env(expand.ListEnviron(env...)), interp.Dir(cwd), - interp.ExecHandlers(standardHandlers(blockFuncs)...), + execHandlerOption(blockFuncs), ) } +// execHandlerOption returns an interp.RunnerOption that installs the +// standard Crush middleware chain (builtins, script dispatch, block list) +// on top of a process-group-isolated base exec handler. +// +// We use interp.ExecHandler (singular) with a manually-built chain rather +// than interp.ExecHandlers because the latter always appends +// interp.DefaultExecHandler as the final handler, which lacks process group +// isolation. Without isolation, shells like zsh that set up job control +// when sourcing framework files can send SIGINT/SIGTERM to Crush's process +// group and crash the parent. +func execHandlerOption(blockFuncs []BlockFunc) interp.RunnerOption { + base := processGroupExecHandler(defaultKillTimeout) + handler := base + for _, mw := range slices.Backward(standardHandlers(blockFuncs)) { + handler = mw(handler) + } + return interp.ExecHandler(handler) //nolint:staticcheck // ExecHandlers always appends DefaultExecHandler which lacks process isolation. +} + // nonInteractiveEnvVars are forced on every shell execution to prevent // commands from hanging on a nonexistent TTY. These are always applied // regardless of the caller's environment because Crush shells are never