From 7376c553c9d7526a89b648d104e4e714146d05b9 Mon Sep 17 00:00:00 2001 From: tommy230 Date: Wed, 24 Jun 2026 04:02:56 -0400 Subject: [PATCH 1/3] fix(watcher): preserve check wakes before suppression --- AGENTS.md | 6 +-- README.md | 1 + bin/fm-pr-check.sh | 7 +-- bin/fm-teardown.sh | 19 +++++++- bin/fm-watch.sh | 41 +++++++++++++++- tests/fm-teardown.test.sh | 26 ++++++++++ tests/fm-wake-queue.test.sh | 95 +++++++++++++++++++++++++++++++++++++ 7 files changed, 186 insertions(+), 9 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f268e4e..6e9f6a1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -87,7 +87,7 @@ state/ volatile runtime signals; gitignored .wake-queue durable queued wakes: epochseqkindkeypayload .afk durable away-mode flag; present = sub-supervisor may inject escalations (set by /afk, cleared on user return) .watch.lock .wake-queue.lock watcher singleton and queue serialization locks - .hash-* .count-* .stale-* .seen-* .last-* .heartbeat-streak watcher internals; never touch + .hash-* .count-* .stale-* .seen-* .babysit-* .escalated-* .last-* .heartbeat-streak watcher internals; never touch .last-watcher-beat watcher liveness beacon, touched every poll; fm-guard.sh reads it .subsuper-* .supervise-daemon.* sub-supervisor internals (stale markers, escalation buffer, inject-wedged marker, seen-status dedup, log, lock, pid); never touch .no-mistakes/ local validation state and evidence; gitignored @@ -439,7 +439,7 @@ Use chat for yes/no decisions; use lavish-axi when there are multiple findings o For PR-based ship tasks, the ready signal depends on mode: `no-mistakes` reports `done: PR checks green` after CI is green, while `direct-PR` reports `done: PR ` after opening the PR. Run `bin/fm-pr-check.sh ` - it records `pr=` in the task's meta and arms the watcher's merge poll. Tell the captain: the PR's full URL (always the complete `https://...` link, never a bare `#number` - the captain's terminal makes a full URL clickable), a one-paragraph summary, and, for `no-mistakes`, the risk level it emitted. -(The check contract, for any custom `state/.check.sh` you write yourself: print one line only when firstmate should wake, print nothing otherwise, and finish before `FM_CHECK_TIMEOUT`.) +(The check contract, for any custom `state/.check.sh` you write yourself: prefer printing the current terminal state every run, such as `echo "merged"` while merged. The watcher dedups repeated output with `.seen-check-` and enqueues to the durable queue before advancing that marker, so a lost stdout or crashed watcher cannot swallow a wake. Edge-triggered checks that self-suppress with `.babysit-*.seen` still work, but a swallowed terminal transition then relies on the watcher catch-all backstop. Finish before `FM_CHECK_TIMEOUT`.) If the captain says "merge it", run `gh-axi pr merge` yourself; that instruction is the explicit approval. If `yolo=on`, merge a green/approved PR yourself and post the required FYI. @@ -485,7 +485,7 @@ From there the task is an ordinary ship task through its mode-specific validatio The watcher is the backbone. Whenever at least one task is in flight, `bin/fm-watch.sh` must be running as a background task. It costs zero tokens while running and exits with one reason line when something needs you. -It also writes each detected wake to the durable queue at `state/.wake-queue` before advancing suppression markers such as `.seen-*`, `.stale-*`, `.last-check`, or `.last-heartbeat`. +It also writes each detected wake to the durable queue at `state/.wake-queue` before advancing suppression markers such as `.seen-*`, `.stale-*`, `.seen-check-*`, `.escalated-*`, `.last-check`, or `.last-heartbeat`. At the start of every wake-handling turn and every recovery turn, run `bin/fm-wake-drain.sh` before peeking panes, reading status files beyond the reason line, or starting new work. The printed one-shot reason line is still useful, but the drained queue is the lossless backlog. After handling drained wakes, re-arm `bin/fm-watch.sh` before you end the turn. diff --git a/README.md b/README.md index 9ab98ff..1eca291 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ firstmate works from any terminal - outside tmux, crewmates land in a detached ` - **Event-driven supervision** - a zero-token bash watcher (`bin/fm-watch.sh`) sleeps on the fleet and wakes the first mate only when a crewmate reports, stalls, a PR merges, or an internal heartbeat review is due. Detected wakes are also written to a durable local queue (`state/.wake-queue`) before detector state advances, so a missed one-shot process exit can be recovered by draining the queue. + Custom slow checks should print their current terminal state idempotently; the watcher dedups repeated output and keeps a catch-all backstop for legacy self-suppressing `.babysit-*.seen` checks. Routine watcher polling, restarts, elapsed waiting time, and unchanged heartbeat reviews stay silent; an idle crew costs you nothing. A pull-based guard (`bin/fm-guard.sh`) warns through supervision tool output if tasks are in flight and that watcher stops running or queued wakes are waiting to be drained. A presence-gated sub-supervisor (`bin/fm-supervise-daemon.sh`) extends this for walk-away supervision: the `/afk` skill activates it, after which it self-handles routine wakes in bash and escalates only captain-relevant events as one batched, single-line digest (prefixed with an in-band sentinel marker so firstmate can tell daemon injections apart from real messages). diff --git a/bin/fm-pr-check.sh b/bin/fm-pr-check.sh index 928226e..71823fe 100755 --- a/bin/fm-pr-check.sh +++ b/bin/fm-pr-check.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # Record a PR-ready task: appends pr= to state/.meta and arms the -# watcher's merge poll by writing state/.check.sh, which prints one line iff -# the PR is merged (the watcher's check contract: output = wake firstmate, -# silence = keep sleeping). +# watcher's merge poll by writing state/.check.sh. Once the PR is merged, +# the check prints the current terminal state every run; the watcher dedups the +# repeated output and enqueues the first delta before advancing suppression. +# Silence means "no current terminal state; keep sleeping." # Usage: fm-pr-check.sh set -eu diff --git a/bin/fm-teardown.sh b/bin/fm-teardown.sh index ddd0a6d..ea1f8f1 100755 --- a/bin/fm-teardown.sh +++ b/bin/fm-teardown.sh @@ -72,6 +72,23 @@ meta_value() { grep "^$key=" "$meta" | cut -d= -f2- || true } +sanitize_state_name() { printf '%s' "$1" | LC_ALL=C tr -c 'A-Za-z0-9._-' '_'; } + +cleanup_task_state() { + local state=$1 id=$2 check_name sidecar_name + check_name=$(sanitize_state_name "$id.check.sh") + sidecar_name=$(sanitize_state_name ".babysit-$id.seen") + rm -f \ + "$state/$id.status" \ + "$state/$id.turn-ended" \ + "$state/$id.check.sh" \ + "$state/$id.meta" \ + "$state/$id.pi-ext.ts" \ + "$state/.seen-check-$check_name" \ + "$state/.babysit-$id.seen" \ + "$state/.escalated-$sidecar_name" +} + backlog_refresh_reminder() { local pr done_cmd report_path if fm_tasks_axi_compatible; then @@ -479,7 +496,7 @@ if [ "$KIND" = secondmate ]; then remove_firstmate_home "$HOME_PATH" "secondmate home" "$ID" remove_secondmate_registry_entry "$ID" fi -rm -f "$STATE/$ID.status" "$STATE/$ID.turn-ended" "$STATE/$ID.check.sh" "$STATE/$ID.meta" "$STATE/$ID.pi-ext.ts" +cleanup_task_state "$STATE" "$ID" if [ "$KIND" != scout ] && [ "$KIND" != secondmate ] && [ "$MODE" != local-only ]; then "$FM_ROOT/bin/fm-fleet-sync.sh" "$PROJ" || true fi diff --git a/bin/fm-watch.sh b/bin/fm-watch.sh index daa4356..077138e 100755 --- a/bin/fm-watch.sh +++ b/bin/fm-watch.sh @@ -4,7 +4,9 @@ # signal: ... a crewmate wrote a status line or a turn-end hook fired; signals # landing within FM_SIGNAL_GRACE of each other coalesce into one wake # stale: a crewmate pane stopped changing and shows no busy signature -# check: