diff --git a/AGENTS.md b/AGENTS.md index 2578041..f41139b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -32,7 +32,8 @@ Hard rules, in priority order: The one standing, captain-authorized relaxation is a project's `yolo` flag (section 7): with `yolo` on, firstmate makes routine approval decisions itself, but anything destructive, irreversible, or security-sensitive still escalates to the captain. 3. **Never tear down a worktree that holds unlanded work.** `bin/fm-teardown.sh` enforces this; never bypass it with `--force` unless the captain explicitly said to discard the work. - The work is "landed" once `HEAD` is reachable from any remote-tracking branch (a fork counts as a remote - upstream-contribution PRs pushed to a fork satisfy this in any mode); for `local-only` ship tasks with no remote at all, the work may instead be merged into the local default branch. + The work is "landed" once `HEAD` is reachable from any remote-tracking branch (a fork counts as a remote - upstream-contribution PRs pushed to a fork satisfy this in any mode); for a normal ship task whose commits are not so reachable, it is also landed when its PR is merged and GitHub reports the current worktree HEAD as that PR's head (which covers the common squash-merge-then-delete-branch flow, where the branch's commits live nowhere on a remote yet the recorded work merged) or when its content is already present in the up-to-date default branch; for `local-only` ship tasks with no remote at all, the work may instead be merged into the local default branch. + Uncommitted changes are never landed. The scout carve-out: a scout task's worktree is declared scratch from the start - its deliverable is the report, and teardown lets the worktree go once that report exists (section 7). 4. **Crewmates never address the captain.** All crewmate communication flows through you. @@ -81,7 +82,7 @@ projects/ cloned repos; gitignored; READ-ONLY for you state/ volatile runtime signals; gitignored .status appended by crewmates: ": " lines .turn-ended touched by turn-end hooks - .meta written by fm-spawn: window=, worktree=, project=, harness=, kind=, mode=, yolo=; kind=secondmate also records home= and projects= (fm-pr-check appends pr=) + .meta written by fm-spawn: window=, worktree=, project=, harness=, kind=, mode=, yolo=; kind=secondmate also records home= and projects= (fm-pr-check appends pr= and verified pr_head= when available) .check.sh optional slow poll you write per task (e.g. merged-PR check) .wake-queue durable queued wakes: epochseqkindkeypayload .afk durable away-mode flag; present = sub-supervisor may inject escalations (set by /afk, cleared on user return) @@ -356,7 +357,7 @@ Because `fm-send` to a `kind=secondmate` target marks the request as from-firstm A ship task's path from `done` to landed on `main` is set by the project's `mode` (recorded in meta; section 6); `yolo` decides who approves. The Validate / PR ready / Ship teardown stages below are written for the `no-mistakes` path; the other modes diverge: - **no-mistakes** - the stages below as written: no-mistakes validation pipeline -> PR -> captain merge. -- **direct-PR** - no pipeline. The crewmate pushes and opens the PR itself (its brief says so) and reports `done: PR `. Skip the Validate step and go straight to PR ready (run `fm-pr-check`, relay the PR). Teardown uses the normal pushed-branch check. +- **direct-PR** - no pipeline. The crewmate pushes and opens the PR itself (its brief says so) and reports `done: PR `. Skip the Validate step and go straight to PR ready (run `fm-pr-check`, relay the PR). Teardown uses the normal landed-work check. - **local-only** - no remote, no PR. The crewmate stops at `done: ready in branch fm/`. Review the diff with `bin/fm-review-diff.sh `, relay a one-paragraph summary to the captain, and on approval run `bin/fm-merge-local.sh ` to fast-forward local `main` (it refuses anything but a clean fast-forward - if it does, have the crewmate rebase). No `fm-pr-check`. Then teardown, whose safety check requires the branch already merged into local `main`, OR the work pushed to any remote (a fork counts - relevant for upstream-contribution PRs on a local-only-registered project). When reviewing any crewmate branch diff, use `bin/fm-review-diff.sh ` rather than `git diff ...branch` directly. @@ -377,7 +378,7 @@ Use chat for yes/no decisions; use lavish-axi when there are multiple findings o ### PR ready For PR-based ship tasks, the ready signal depends on mode: `no-mistakes` reports `done: PR checks green` after CI is green, while `direct-PR` reports `done: PR ` after opening the PR. -Run `bin/fm-pr-check.sh ` - it records `pr=` in the task's meta and arms the watcher's merge poll. +Run `bin/fm-pr-check.sh ` - it records `pr=` and a verified `pr_head=` when available in the task's meta and arms the watcher's merge poll. Tell the captain: the PR's full URL (always the complete `https://...` link, never a bare `#number` - the captain's terminal makes a full URL clickable), a one-paragraph summary, and, for `no-mistakes`, the risk level it emitted. (The check contract, for any custom `state/.check.sh` you write yourself: print one line only when firstmate should wake, print nothing otherwise, and finish before `FM_CHECK_TIMEOUT`.) @@ -389,7 +390,10 @@ If the captain says "merge it", run `gh-axi pr merge` yourself; that instruction bin/fm-teardown.sh ``` -The script refuses if the worktree holds unpushed work; treat a refusal as a stop-and-investigate, not an obstacle. +The script refuses if the worktree holds uncommitted changes or committed work that has not landed; treat a refusal as a stop-and-investigate, not an obstacle. +"Landed" is broader than remote-reachable: for a normal ship task whose commits are not reachable from any remote-tracking branch, the script also accepts the work when its PR is merged and GitHub reports the current worktree HEAD as that PR's head, or when its content is already present in the up-to-date default branch. +This recognizes the common squash-merge-then-delete-branch flow, where the branch's own commits live nowhere on a remote yet the change is fully in `main`; a merged-and-deleted branch now tears down cleanly instead of false-refusing. +Genuinely unlanded work (no matching merged PR head and content not in the default branch) and dirty worktrees still refuse, and a gh lookup error falls back to the content check rather than silently allowing. Known benign case: after an external-PR task, a squash merge leaves the branch commits reachable only on the contributor's fork; add the fork as a remote and fetch (`git remote add fork && git fetch fork`), then retry - never reach for `--force`. After a successful PR-based teardown, it also runs `bin/fm-fleet-sync.sh` for that project, best-effort, so the clone's local default catches up to the merge and the just-merged branch, now gone on the remote and free of its worktree, is pruned immediately. Then update the backlog using the teardown reminder: run `tasks-axi done` when the compatible tool is available, otherwise move the task to Done in `data/backlog.md` manually with the full `https://...` PR URL or local merge note and date and keep Done to the 10 most recent. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 17d306c..0d0a72b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -74,7 +74,7 @@ tests/fm-update.test.sh # fast-forward-only self-update, rerea tests/fm-secondmate-sync.test.sh # local-HEAD secondmate sync, no-fetch, bootstrap nudge gating, and spawn hook tests tests/fm-secondmate-lifecycle-e2e.test.sh # persistent secondmate routing, seeding, backlog handoff, spawn, recovery, teardown, and FM_HOME flow tests tests/fm-secondmate-safety.test.sh # secondmate home safety, idle charter, handoff validation, and teardown boundary tests -tests/fm-teardown.test.sh # fm-teardown.sh safety and reminder checks: local-only fork-remote allow, truly-unpushed refuse, merged-to-main allow, no-mistakes regression, tasks-axi reminder, --force override +tests/fm-teardown.test.sh # fm-teardown.sh landed-work safety and reminder checks: fork-remote allow, squash/content landings, dirty and unlanded refusals, PR-head metadata, tasks-axi reminder, --force override [ "$(readlink CLAUDE.md)" = "AGENTS.md" ] [ "$(readlink .claude/skills)" = "../.agents/skills" ] FM_HEARTBEAT=2 FM_POLL=1 bin/fm-watch-arm.sh # watcher re-arm smoke test (prints arm status, then "heartbeat") diff --git a/bin/fm-pr-check.sh b/bin/fm-pr-check.sh index 928226e..4271654 100755 --- a/bin/fm-pr-check.sh +++ b/bin/fm-pr-check.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# Record a PR-ready task: appends pr= to state/.meta and arms the -# watcher's merge poll by writing state/.check.sh, which prints one line iff -# the PR is merged (the watcher's check contract: output = wake firstmate, -# silence = keep sleeping). +# Record a PR-ready task: appends pr= and a verified pr_head= to +# state/.meta when available, then arms the watcher's merge poll by writing +# state/.check.sh, which prints one line iff the PR is merged (the watcher's +# check contract: output = wake firstmate, silence = keep sleeping). # Usage: fm-pr-check.sh set -eu @@ -15,8 +15,26 @@ ID=$1 URL=$2 META="$STATE/$ID.meta" -if [ -f "$META" ] && ! grep -qxF "pr=$URL" "$META"; then - echo "pr=$URL" >> "$META" +if [ -f "$META" ]; then + WT=$(grep '^worktree=' "$META" | tail -1 | cut -d= -f2- || true) + LOCAL_HEAD= + PR_HEAD= + if [ -n "$WT" ] && [ -d "$WT" ]; then + LOCAL_HEAD=$(git -C "$WT" rev-parse --verify HEAD 2>/dev/null || true) + if [ -n "$LOCAL_HEAD" ] && command -v gh >/dev/null 2>&1; then + if REMOTE_HEAD=$(cd "$WT" && gh pr view "$URL" --json headRefOid -q .headRefOid 2>/dev/null); then + if [ "$LOCAL_HEAD" = "$REMOTE_HEAD" ]; then + PR_HEAD=$LOCAL_HEAD + fi + fi + fi + fi + if ! grep -qxF "pr=$URL" "$META"; then + echo "pr=$URL" >> "$META" + fi + if [ -n "$PR_HEAD" ] && ! grep -qxF "pr_head=$PR_HEAD" "$META"; then + echo "pr_head=$PR_HEAD" >> "$META" + fi fi cat > "$STATE/$ID.check.sh" <.meta so fm-teardown.sh applies the full unpushed-work protection +# state/.meta so fm-teardown.sh applies the full ship-task teardown protection # again. After promoting, send the crewmate its ship instructions via fm-send.sh # (inventory scratch state, reset to a clean default-branch base, carry over only # intended fix changes, create branch fm/, implement, then report done diff --git a/bin/fm-teardown.sh b/bin/fm-teardown.sh index ddd0a6d..e08e448 100755 --- a/bin/fm-teardown.sh +++ b/bin/fm-teardown.sh @@ -3,9 +3,18 @@ # secondmate home, kill the tmux window, clear volatile state, refresh/prune # the project's clone for PR-based ship tasks, then print a backlog-refresh # reminder. -# REFUSES if the worktree holds work not on any remote, because treehouse return -# hard-resets the worktree and kills its processes. A fork counts as a remote, -# so upstream-contribution PRs pushed to a fork satisfy this in any mode. +# REFUSES if the worktree holds work that has not LANDED, because treehouse return +# hard-resets the worktree and kills its processes. Work has landed when it is +# reachable from any remote-tracking branch (a fork counts as a remote, so +# upstream-contribution PRs pushed to a fork satisfy this in any mode), OR - for a +# normal ship task whose commits are not so reachable - when its PR is merged and +# GitHub reports the current HEAD as that PR's head, or its content is already +# present in the up-to-date default branch. This recognizes the common +# squash-merge-then-delete-branch flow, where the branch's own commits live nowhere +# on a remote yet the change is fully in main. +# A gh lookup error falls back to the content check; if that is also inconclusive, +# teardown refuses rather than risk discarding unlanded work. +# Uncommitted changes are never landed. # local-only projects additionally accept work merged into the local default # branch (firstmate performs that merge on the captain's approval) as a fallback # for the common case where there is no remote at all. @@ -20,9 +29,9 @@ # never left leased forever. If the treehouse return fails, teardown leaves the # leased home and state in place instead of hiding a still-held lease. # Usage: fm-teardown.sh [--force] -# --force skips the unpushed-work check for ordinary tasks and discards -# secondmate child work for kind=secondmate. Only use it when the captain has -# explicitly said to discard the work. +# --force skips ordinary-task dirty and landed-work checks, skips scout report +# checks, and discards secondmate child work for kind=secondmate. Only use it +# when the captain has explicitly said to discard the work. set -eu SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -72,6 +81,79 @@ meta_value() { grep "^$key=" "$meta" | cut -d= -f2- || true } +# Resolve the PR number for a worktree branch via gh-axi. Echoes the number on a +# single match and returns 0; returns non-zero on no match or any lookup failure, +# so the caller treats it as "no PR found" (fail-safe). +pr_number_from_branch() { + local branch=$1 out n + [ -n "$branch" ] && [ "$branch" != HEAD ] || return 1 + out=$( cd "$WT" && gh-axi pr list --state all --head "$branch" --limit 1 2>/dev/null ) || return 1 + n=$(printf '%s\n' "$out" | sed -n 's/^[[:space:]]*\([0-9][0-9]*\),.*/\1/p' | head -1) + [ -n "$n" ] || return 1 + printf '%s' "$n" +} + +# Is the worktree's PR merged for this exact HEAD? Resolves the PR from the +# recorded pr= URL first, then from the branch name, and asks GitHub for both the +# PR state and head. Returns non-zero when the PR is not merged, the current HEAD +# is not the PR head, no PR is found, or any gh error occurs - the caller then +# falls back to the content check. +pr_is_merged() { + local branch=$1 target view state head current + if [ -n "$PR_URL" ]; then + target=$PR_URL + else + target=$(pr_number_from_branch "$branch") || return 1 + fi + [ -n "$target" ] || return 1 + view=$(cd "$WT" && gh pr view "$target" --json state,headRefOid -q '.state + "\t" + .headRefOid' 2>/dev/null) || return 1 + state=${view%%$'\t'*} + head=${view#*$'\t'} + [ "$state" != "$view" ] || return 1 + case "$state" in + MERGED|merged) ;; + *) return 1 ;; + esac + [ -n "$head" ] || return 1 + current=$(git -C "$WT" rev-parse --verify HEAD 2>/dev/null) || return 1 + [ "$current" = "$head" ] +} + +# Is the branch's content already present in the up-to-date default branch? Fetches +# first, then 3-way merges the default branch with HEAD: when HEAD introduces nothing +# the default branch does not already contain (e.g. its change landed via squash) the +# merged tree equals the default branch's tree. This isolates branch-only changes, so +# unrelated commits the default branch gained past the merge-base do not count as +# "added". Returns non-zero when inconclusive (no default ref, or a merge conflict), +# so the caller refuses rather than guesses. +content_in_default() { + local name ref default_tree merged_tree + name=$(default_branch) || return 1 + if git -C "$WT" remote get-url origin >/dev/null 2>&1; then + git -C "$WT" fetch --quiet origin "+refs/heads/$name:refs/remotes/origin/$name" >/dev/null 2>&1 || return 1 + ref="refs/remotes/origin/$name" + elif git -C "$WT" rev-parse --quiet --verify "refs/heads/$name" >/dev/null 2>&1; then + ref="refs/heads/$name" + else + return 1 + fi + default_tree=$(git -C "$WT" rev-parse --quiet --verify "$ref^{tree}" 2>/dev/null) || return 1 + [ -n "$default_tree" ] || return 1 + merged_tree=$(git -C "$WT" merge-tree --write-tree "$ref" HEAD 2>/dev/null) || return 1 + merged_tree=$(printf '%s\n' "$merged_tree" | head -1) + [ "$merged_tree" = "$default_tree" ] +} + +# Has the worktree's committed work actually LANDED, though its commits are not +# reachable from any remote-tracking branch? True when a merged PR proves the +# current HEAD, OR the content is already in the default branch (fallback, which +# also covers the no-PR and gh-error paths). False only for genuinely unlanded work. +work_is_landed() { + local branch=$1 + pr_is_merged "$branch" && return 0 + content_in_default +} + backlog_refresh_reminder() { local pr done_cmd report_path if fm_tasks_axi_compatible; then @@ -429,9 +511,14 @@ if [ -d "$WT" ] && [ "$FORCE" != "--force" ]; then else # The fm-spawn hook file is ours, never work product; ignore it in the dirty check. dirty=$(git -C "$WT" status --porcelain 2>/dev/null | grep -vE '^\?\? \.claude/' | head -1 || true) - # A worktree's work is "safely on a remote" once HEAD is reachable from ANY - # remote-tracking branch (empty result here). A fork is a remote too, so - # upstream-contribution PRs pushed to a fork satisfy this regardless of mode. + # Reachability test: is HEAD reachable from ANY remote-tracking branch? Empty + # means the work is already pushed (a fork is a remote too, so upstream- + # contribution PRs pushed to a fork pass here). Non-empty does NOT prove the work + # is unlanded: a squash or rebase merge rewrites the branch into a new commit on + # the default branch, and a repo that auto-deletes the head branch on merge also + # drops its remote-tracking ref - so a merged-and-deleted branch trips this test + # while being fully landed. We therefore treat reachability as a fast accept, not + # the sole verdict, and fall through to a landed-work check before refusing. unpushed=$(git -C "$WT" log --oneline HEAD --not --remotes -- 2>/dev/null | head -5 || true) if [ -n "$unpushed" ] && [ "$MODE" = local-only ]; then # local-only ships have no remote in the common case, so the "on a remote" @@ -447,12 +534,26 @@ if [ -d "$WT" ] && [ "$FORCE" != "--force" ]; then echo "Merge the branch into local $DEFAULT first (bin/fm-merge-local.sh after the captain approves), or push to a fork/remote, or get the captain's explicit OK to discard, then --force." >&2 exit 1 fi - elif [ -n "$dirty" ] || [ -n "$unpushed" ]; then - echo "REFUSED: worktree $WT has work not on any remote." >&2 - [ -n "$dirty" ] && echo "uncommitted changes present" >&2 - [ -n "$unpushed" ] && printf 'unpushed commits:\n%s\n' "$unpushed" >&2 - echo "Push the branch (or get the captain's explicit OK to discard, then --force)." >&2 + elif [ -n "$dirty" ]; then + # Uncommitted changes are never landed and the reset would discard them; always + # refuse, regardless of whether the committed work itself has landed. + echo "REFUSED: worktree $WT has uncommitted changes." >&2 + echo "uncommitted changes present" >&2 + echo "Commit them (or get the captain's explicit OK to discard, then --force)." >&2 exit 1 + elif [ -n "$unpushed" ]; then + # Commits not reachable from any remote. Before refusing, recognize LANDED work: + # a merged PR for the current HEAD or content already in the up-to-date default + # branch. On a gh lookup error work_is_landed falls back to the content check, + # and if that is also inconclusive it returns false - so we never silently allow + # teardown of possibly-unlanded work; only genuinely unlanded work is refused. + branch=$(git -C "$WT" rev-parse --abbrev-ref HEAD 2>/dev/null || echo HEAD) + if ! work_is_landed "$branch"; then + echo "REFUSED: worktree $WT has work not on any remote and not landed." >&2 + printf 'unpushed commits:\n%s\n' "$unpushed" >&2 + echo "Push the branch, land its PR, or get the captain's explicit OK to discard, then --force." >&2 + exit 1 + fi fi fi fi diff --git a/docs/architecture.md b/docs/architecture.md index 0a891f3..82e7d84 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -68,6 +68,9 @@ The `data/secondmates.md` line schema and the secondmate environment variables a `data/projects.md` records each project's delivery mode and optional `+yolo` autonomy flag. `no-mistakes` projects run the full validation pipeline, `direct-PR` projects open PRs without that pipeline, and `local-only` projects stay local until firstmate performs an approved fast-forward merge. +Teardown is fail-closed for ship worktrees: dirty worktrees refuse, and committed work must be landed before the worktree is returned. +Landed work is accepted when `HEAD` is reachable from any remote-tracking branch, when a PR for the current `HEAD` is merged, or when the worktree content is already present in the freshly fetched default branch. +That content check lets a squash-merged PR whose head branch was deleted tear down cleanly without using `--force`; `local-only` work instead tears down after the approved local default-branch merge or after the branch is pushed to any remote. ## Project memory belongs to projects diff --git a/docs/scripts.md b/docs/scripts.md index 5aa092a..6aa0007 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -29,8 +29,8 @@ Each file also starts with a short header comment. | `fm-send.sh` | Send one verified literal line (or `--key Escape`) to a direct-report window; exits non-zero on confirmed swallowed Enter; bare `kind=secondmate` targets are marked as from-firstmate; text sends pause `FM_SEND_SETTLE` seconds after success | | `fm-tmux-lib.sh` | Shared tmux pane primitives for busy detection, dim-ghost-aware and border-aware composer detection, and verified submit retry | | `fm-peek.sh` | Print a bounded tail of a crewmate pane | -| `fm-pr-check.sh` | Record a PR-ready task and arm the watcher's merge poll | +| `fm-pr-check.sh` | Record `pr=` and a verified `pr_head=` when available for a PR-ready task, then arm the watcher's merge poll | | `fm-promote.sh` | Promote a scout task in place so it becomes a protected ship task | -| `fm-teardown.sh` | Return the worktree or retire/release a secondmate home; protects ship work, requires scout reports, checks child work, and prints the backlog reminder | +| `fm-teardown.sh` | Return a clean, landed ship worktree or retire/release a secondmate home; requires scout reports, checks child work, and prints the backlog reminder | | `fm-harness.sh` | Detect the running harness; resolve the effective crewmate harness | | `fm-lock.sh` | Per-home firstmate session lock | diff --git a/tests/fm-teardown.test.sh b/tests/fm-teardown.test.sh index f985b55..e5cb135 100755 --- a/tests/fm-teardown.test.sh +++ b/tests/fm-teardown.test.sh @@ -1,24 +1,42 @@ #!/usr/bin/env bash -# Tests for bin/fm-teardown.sh's unpushed-work safety check. +# Tests for bin/fm-teardown.sh's landed-work safety check. # -# Covers the local-only fork-remote fix: a local-only-registered project whose -# task pushes its work to a fork (upstream-contribution PRs) must be teardown- -# eligible because a fork IS a remote. The pre-fix code short-circuited to a -# strict local-main check and false-refused legitimate fork-pushed work. +# The check refuses to tear down a worktree whose work has not LANDED, because +# treehouse return hard-resets the worktree. "Landed" means reachable from a remote +# OR - for a normal ship task whose commits are not so reachable - its PR is merged +# and GitHub reports the current HEAD as that PR's head, or its content is already +# in the up-to-date default branch. +# +# Covers two fixes: +# - local-only fork-remote: a fork IS a remote, so fork-pushed upstream- +# contribution PRs are teardown-eligible (the pre-fix code false-refused them). +# - squash-merge-then-delete-branch: the branch's own commits live nowhere on a +# remote after a squash merge deletes the head branch, yet the change is fully in +# main. Reachability alone false-refused this common GitHub flow; the check now +# recognizes the matching merged PR head (or the content already in main) as +# landed. # # Matrix: -# (a) local-only + HEAD on a fork remote-tracking branch -> ALLOW (the fix) +# (a) local-only + HEAD on a fork remote-tracking branch -> ALLOW (fork fix) # (b) local-only + truly unpushed work (no remote, not main) -> REFUSE (safety) # (c) local-only + merged into local main, no remote -> ALLOW (no regression) -# (d) no-mistakes + HEAD on origin remote-tracking branch -> ALLOW (no regression) -# (e) no-mistakes + truly unpushed work -> REFUSE (no regression) +# (d) no-mistakes + HEAD on origin remote-tracking branch -> ALLOW (no regression) +# (e) no-mistakes + unpushed, no PR, content not in default -> REFUSE (safety) # (f) local-only + truly unpushed + --force -> ALLOW (escape hatch) +# (g) no-mistakes + squash-merged PR, branch-deleted -> ALLOW (squash fix) +# (h) no-mistakes + no PR but content already in default -> ALLOW (content fallback) +# (i) no-mistakes + dirty worktree, even when work landed -> REFUSE (dirty wins) +# (j) no-mistakes + gh lookup errors + content not in default -> REFUSE (fail-safe) +# (k) no-mistakes + merged PR but HEAD moved afterward -> REFUSE (stale PR) +# (l) no-mistakes + stale origin/main but fetched content -> ALLOW (fresh fetch) +# (m) fm-pr-check rerun after HEAD moved -> no stale pr_head set -u # shellcheck source=tests/lib.sh . "$(dirname "${BASH_SOURCE[0]}")/lib.sh" TEARDOWN="$ROOT/bin/fm-teardown.sh" +PR_CHECK="$ROOT/bin/fm-pr-check.sh" TMP_ROOT=$(fm_test_tmproot fm-teardown-tests) # Build a fresh sandbox for one test case. Sets up: @@ -46,7 +64,26 @@ SH # tmux kill-window etc.: succeed silently. exit 0 SH - chmod +x "$fakebin/treehouse" "$fakebin/tmux" + # Default gh-axi mock: no PR is associated with the branch, and viewing any PR + # number fails. This keeps the landed-work check hermetic (never reaching the real + # gh-axi) and represents the common "no GitHub PR" baseline. Tests that need a + # merged PR or a lookup error override this file with the helpers below. + cat > "$fakebin/gh-axi" <<'SH' +#!/usr/bin/env bash +case "${1:-} ${2:-}" in + "pr list") printf '%s\n' "count: 0 (showing first 0)" "pull_requests[]: []" ; exit 0 ;; + "pr view") echo "error: pull request not found" >&2 ; exit 1 ;; +esac +exit 0 +SH + cat > "$fakebin/gh" <<'SH' +#!/usr/bin/env bash +case "${1:-} ${2:-}" in + "pr view") echo "error: pull request not found" >&2 ; exit 1 ;; +esac +exit 0 +SH + chmod +x "$fakebin/treehouse" "$fakebin/tmux" "$fakebin/gh-axi" "$fakebin/gh" # Bare origin so the clone has an `origin` remote and origin/HEAD. git init -q --bare "$case_dir/origin.git" @@ -112,6 +149,84 @@ add_fork_with_pushed_branch() { git -C "$case_dir/project" fetch -q fork } +# Commit a real file change on the worktree's task branch (unlike wt_commit, which +# makes an empty commit). A non-empty tree is what the content-in-default check +# inspects. Args: case_dir file content [message] +wt_commit_file() { + local case_dir=$1 file=$2 content=$3 msg=${4:-add $2} + printf '%s\n' "$content" > "$case_dir/wt/$file" + git -C "$case_dir/wt" add -- "$file" + git -C "$case_dir/wt" -c user.email=t@t -c user.name=t commit -q -m "$msg" +} + +# Land = as a single commit on origin's default branch, simulating a +# squash merge whose net change matches the task branch but whose commit differs. +# After this, the branch's content is in origin/main even though the branch's own +# commits are not reachable from it. Args: case_dir file content +land_on_origin_main() { + local case_dir=$1 file=$2 content=$3 tmp + tmp="$case_dir/_land" + git clone -q "$case_dir/origin.git" "$tmp" + printf '%s\n' "$content" > "$tmp/$file" + git -C "$tmp" add -- "$file" + git -C "$tmp" -c user.email=t@t -c user.name=t commit -q -m "squash $file" + git -C "$tmp" push -q origin HEAD:main + rm -rf "$tmp" +} + +# Override GitHub lookups to report PR 7 as merged with the supplied head. +add_gh_pr_merged_for_head() { + local case_dir=$1 head=$2 + cat > "$case_dir/fakebin/gh-axi" <<'SH' +#!/usr/bin/env bash +case "${1:-} ${2:-}" in + "pr list") + printf '%s\n' "count: 1 (showing first 1)" "pull_requests[1]{number,state}:" " 7,merged" ; exit 0 ;; + "pr view") + printf '%s\n' "pull_request:" " number: 7" " state: merged" ' merged: "2026-06-26T00:00:00Z"' ; exit 0 ;; +esac +exit 0 +SH + cat > "$case_dir/fakebin/gh" <&2 +exit 1 +SH + chmod +x "$case_dir/fakebin/gh-axi" "$case_dir/fakebin/gh" +} + +append_pr_meta_for_current_head() { + local case_dir=$1 head + head=$(git -C "$case_dir/wt" rev-parse HEAD) + printf '%s\n' \ + 'pr=https://github.com/example/repo/pull/7' \ + "pr_head=$head" >> "$case_dir/state/task-x1.meta" +} + +# Override gh-axi so every call fails, simulating an API/network error. +add_gh_axi_error() { + local case_dir=$1 + cat > "$case_dir/fakebin/gh-axi" <<'SH' +#!/usr/bin/env bash +echo "error: gh-axi unavailable" >&2 +exit 1 +SH + cat > "$case_dir/fakebin/gh" <<'SH' +#!/usr/bin/env bash +echo "error: gh unavailable" >&2 +exit 1 +SH + chmod +x "$case_dir/fakebin/gh-axi" "$case_dir/fakebin/gh" +} + # Run teardown with PATH mocking. Args: case_dir [extra args...] run_teardown() { local case_dir=$1; shift @@ -221,7 +336,9 @@ test_no_mistakes_truly_unpushed_refuses() { local case_dir rc case_dir=$(make_case nm-unpushed) write_meta "$case_dir" no-mistakes ship - wt_commit "$case_dir" "unpushed work" + # Real content that is not pushed, has no PR (default gh-axi mock), and never + # landed on origin/main: genuinely unlanded work that must still refuse. + wt_commit_file "$case_dir" feature.txt hello "unpushed work" set +e run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" @@ -230,7 +347,170 @@ test_no_mistakes_truly_unpushed_refuses() { expect_code 1 "$rc" "nm-unpushed: teardown should refuse" grep -q REFUSED "$case_dir/stderr" || fail "nm-unpushed: no REFUSED line in stderr" - pass "no-mistakes worktree with truly unpushed work is refused (no regression)" + pass "no-mistakes worktree with genuinely unlanded work is refused (safety preserved)" +} + +test_squash_merged_branch_deleted_allows() { + local case_dir rc pr_head + case_dir=$(make_case squash-merged) + write_meta "$case_dir" no-mistakes ship + # Real branch content that is NOT pushed and NOT on origin/main: a squash merge + # rewrote it into a different commit on main and auto-deleted the head branch, so + # HEAD is unreachable from every remote-tracking branch. The matching merged PR is + # the only signal that the work landed. + wt_commit_file "$case_dir" feature.txt hello "add feature" + append_pr_meta_for_current_head "$case_dir" + pr_head=$(git -C "$case_dir/wt" rev-parse HEAD) + add_gh_pr_merged_for_head "$case_dir" "$pr_head" + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 0 "$rc" "squash-merged: teardown should succeed when the PR is merged" + ! grep -q REFUSED "$case_dir/stderr" || fail "squash-merged: teardown printed a REFUSED line" + pass "squash-merged + deleted-branch worktree (PR merged) is torn down (the fix)" +} + +test_merged_pr_with_later_local_commit_refuses() { + local case_dir rc pr_head + case_dir=$(make_case stale-pr-head) + write_meta "$case_dir" no-mistakes ship + wt_commit_file "$case_dir" feature.txt hello "add feature" + append_pr_meta_for_current_head "$case_dir" + pr_head=$(git -C "$case_dir/wt" rev-parse HEAD) + wt_commit_file "$case_dir" later.txt local-only "local follow-up" + add_gh_pr_merged_for_head "$case_dir" "$pr_head" + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 1 "$rc" "stale-pr-head: teardown should refuse when HEAD moved after PR recording" + grep -q REFUSED "$case_dir/stderr" || fail "stale-pr-head: no REFUSED line in stderr" + pass "merged PR does not allow teardown after a later local commit" +} + +test_pr_check_does_not_refresh_stale_pr_head() { + local case_dir rc pr_head new_head count + case_dir=$(make_case pr-check-stale) + write_meta "$case_dir" no-mistakes ship + wt_commit_file "$case_dir" feature.txt hello "add feature" + pr_head=$(git -C "$case_dir/wt" rev-parse HEAD) + add_gh_pr_merged_for_head "$case_dir" "$pr_head" + + FM_ROOT_OVERRIDE="$ROOT" \ + FM_STATE_OVERRIDE="$case_dir/state" \ + PATH="$case_dir/fakebin:$PATH" \ + "$PR_CHECK" task-x1 https://github.com/example/repo/pull/7 >/dev/null + + wt_commit_file "$case_dir" later.txt local-only "local follow-up" + new_head=$(git -C "$case_dir/wt" rev-parse HEAD) + + FM_ROOT_OVERRIDE="$ROOT" \ + FM_STATE_OVERRIDE="$case_dir/state" \ + PATH="$case_dir/fakebin:$PATH" \ + "$PR_CHECK" task-x1 https://github.com/example/repo/pull/7 >/dev/null + + count=$(grep -c '^pr_head=' "$case_dir/state/task-x1.meta" || true) + expect_code 1 "$count" "pr-check-stale: stale rerun should not append a second pr_head" + ! grep -qxF "pr_head=$new_head" "$case_dir/state/task-x1.meta" \ + || fail "pr-check-stale: stale rerun recorded the later local HEAD" + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 1 "$rc" "pr-check-stale: teardown should refuse after a later local commit" + grep -q REFUSED "$case_dir/stderr" || fail "pr-check-stale: no REFUSED line in stderr" + pass "fm-pr-check does not refresh PR head after HEAD moves" +} + +test_content_in_default_fallback_allows() { + local case_dir rc + case_dir=$(make_case content-landed) + write_meta "$case_dir" no-mistakes ship + # No pr= recorded and the default gh-axi mock reports no PR, so the merged-PR path + # cannot fire and the content check must carry it. The branch adds feature.txt, and + # the same net change has independently landed on origin/main via a squash commit. + wt_commit_file "$case_dir" feature.txt hello "add feature" + land_on_origin_main "$case_dir" feature.txt hello + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 0 "$rc" "content-landed: teardown should succeed when content is already in the default branch" + ! grep -q REFUSED "$case_dir/stderr" || fail "content-landed: teardown printed a REFUSED line" + pass "worktree whose content already landed in the default branch is torn down (content fallback)" +} + +test_content_fallback_refreshes_stale_origin_ref() { + local case_dir rc + case_dir=$(make_case content-stale-ref) + write_meta "$case_dir" no-mistakes ship + wt_commit_file "$case_dir" feature.txt hello "add feature" + git -C "$case_dir/project" config --unset-all remote.origin.fetch + git -C "$case_dir/project" config --add remote.origin.fetch '+refs/heads/not-main:refs/remotes/origin/not-main' + land_on_origin_main "$case_dir" feature.txt hello + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 0 "$rc" "content-stale-ref: teardown should use the freshly fetched default branch" + ! grep -q REFUSED "$case_dir/stderr" || fail "content-stale-ref: teardown printed a REFUSED line" + pass "content fallback refreshes origin default before comparing trees" +} + +test_dirty_worktree_refuses() { + local case_dir rc pr_head + case_dir=$(make_case dirty-wt) + write_meta "$case_dir" no-mistakes ship + printf '%s\n' 'pr=https://github.com/example/repo/pull/7' >> "$case_dir/state/task-x1.meta" + # The committed work has fully landed (merged PR + content in default), but an + # uncommitted edit remains. Dirtiness must refuse regardless: the reset would + # discard those changes. + wt_commit_file "$case_dir" feature.txt hello "add feature" + land_on_origin_main "$case_dir" feature.txt hello + pr_head=$(git -C "$case_dir/wt" rev-parse HEAD) + add_gh_pr_merged_for_head "$case_dir" "$pr_head" + printf '%s\n' "uncommitted edit" > "$case_dir/wt/feature.txt" + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 1 "$rc" "dirty-wt: teardown should refuse a dirty worktree even when the committed work has landed" + grep -q REFUSED "$case_dir/stderr" || fail "dirty-wt: no REFUSED line in stderr" + grep -q "uncommitted changes" "$case_dir/stderr" || fail "dirty-wt: refusal did not cite uncommitted changes" + pass "dirty worktree is refused even when its committed work has landed (dirty always wins)" +} + +test_gh_error_and_content_absent_refuses() { + local case_dir rc + case_dir=$(make_case gh-error) + write_meta "$case_dir" no-mistakes ship + printf '%s\n' 'pr=https://github.com/example/repo/pull/7' >> "$case_dir/state/task-x1.meta" + # Real content not pushed, the PR lookup errors, and origin/main never gained the + # content. The fail-safe must refuse rather than allow on a transient gh failure. + wt_commit_file "$case_dir" feature.txt hello "add feature" + add_gh_axi_error "$case_dir" + + set +e + run_teardown "$case_dir" > "$case_dir/stdout" 2> "$case_dir/stderr" + rc=$? + set -e + + expect_code 1 "$rc" "gh-error: teardown should refuse when the PR lookup errors and content is not landed" + grep -q REFUSED "$case_dir/stderr" || fail "gh-error: no REFUSED line in stderr" + pass "gh lookup error with content not in default refuses (fail-safe)" } test_local_only_force_overrides_unpushed() { @@ -256,3 +536,10 @@ test_local_only_merged_to_local_main_allows test_no_mistakes_origin_remote_allows test_no_mistakes_truly_unpushed_refuses test_local_only_force_overrides_unpushed +test_squash_merged_branch_deleted_allows +test_merged_pr_with_later_local_commit_refuses +test_pr_check_does_not_refresh_stale_pr_head +test_content_in_default_fallback_allows +test_content_fallback_refreshes_stale_origin_ref +test_dirty_worktree_refuses +test_gh_error_and_content_absent_refuses