From f8e6ddd7125ea054293a0550a312e564d6bbd7e0 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 05:02:08 +0000 Subject: [PATCH 01/16] feat(supervision): GitHub events watcher (comments/CI/reviews) with CLI filter knobs --- bin/fm-github-watch.sh | 424 ++++++++++++++++++++++++++++++++++ tests/fm-github-watch.test.sh | 300 ++++++++++++++++++++++++ 2 files changed, 724 insertions(+) create mode 100755 bin/fm-github-watch.sh create mode 100755 tests/fm-github-watch.test.sh diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh new file mode 100755 index 0000000..b235ec4 --- /dev/null +++ b/bin/fm-github-watch.sh @@ -0,0 +1,424 @@ +#!/usr/bin/env bash +# fm-github-watch.sh — GitHub events watcher for the fleet's open PRs. +# +# Discovers all of a contributor's open PRs and surfaces new maintainer +# comments, CI status changes, reviews, and merge/close transitions as +# one-line events on stdout. Built to run as a watcher check script: it +# prints iff firstmate should wake, and stays silent otherwise. +# +# Wire it in with a check script the existing watcher already sweeps, e.g.: +# ln -s ../bin/fm-github-watch.sh state/github-events.check.sh +# bin/fm-watch.sh runs state/*.check.sh every FM_CHECK_INTERVAL (default +# 300s); any stdout is captured, classified as a `check` wake, escalated. +# +# Usage: +# fm-github-watch.sh # one poll cycle (same as --once) +# fm-github-watch.sh --once # one poll cycle +# fm-github-watch.sh --daemon # loop, polling every poll_interval +# fm-github-watch.sh filter list # show active filters +# fm-github-watch.sh filter on|off +# fm-github-watch.sh contributor # show configured contributor +# fm-github-watch.sh contributor +# fm-github-watch.sh status # show config + seen-state summary +# +# Filter names: comments, ci, reviews, merge. +# Config: state/.github-watch-config (key=value lines). +# Seen: state/.github-watch-seen/-- (key=value lines). +# +# Losslessness: seen markers are written ONLY as the last action of a poll, +# after every event line has already been emitted. A crash between print and +# the seen write at worst causes a redundant re-detect next cycle, never a +# permanent swallow. A failing seen write leaves the old marker in place, so +# the same event fires again next cycle. +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" +STATE="${FM_STATE_OVERRIDE:-$FM_ROOT/state}" +CONFIG="$STATE/.github-watch-config" +SEEN_DIR="$STATE/.github-watch-seen" +DEFAULT_CONTRIBUTOR="${FM_GH_CONTRIBUTOR:-e-jung}" +ALL_FILTERS="comments,ci,reviews,merge" +DEFAULT_POLL_SECS="${FM_GH_POLL_SECS:-300}" + +mkdir -p "$STATE" "$SEEN_DIR" + +# ---- small helpers ---- + +is_int() { case "${1:-}" in ''|*[!0-9]*) return 1 ;; *) return 0 ;; esac; } + +valid_filter() { + case "$1" in comments|ci|reviews|merge) return 0 ;; *) return 1 ;; esac +} + +# Run gh, swallowing errors and stderr so a missing gh or a transient API +# failure never kills the poll (output is simply empty for that call). +ghc() { command gh "$@" 2>/dev/null || true; } + +# cfg_read -> prints value (empty if missing/unset) +cfg_read() { + local key=$1 + [ -f "$CONFIG" ] || return 0 + awk -F= -v k="$key" '$1==k { sub(/^[^=]*=/, ""); print; exit }' "$CONFIG" +} + +# cfg_write (upsert a single key=value line) +cfg_write() { + local key=$1 val=$2 tmp + val=$(printf '%s' "$val" | tr '\n' ' ') + if [ -f "$CONFIG" ] && grep -q "^${key}=" "$CONFIG"; then + tmp="${CONFIG}.tmp.$$" + awk -F= -v k="$key" -v v="$val" '$1==k { print k"="v; next } { print }' \ + "$CONFIG" > "$tmp" && mv "$tmp" "$CONFIG" + else + printf '%s=%s\n' "$key" "$val" >> "$CONFIG" + fi +} + +get_contributor() { + local v + v=$(cfg_read contributor) + printf '%s' "${v:-$DEFAULT_CONTRIBUTOR}" +} + +get_filters() { + local v + v=$(cfg_read filters) + [ -n "$v" ] || v=$ALL_FILTERS + printf '%s' "$v" +} + +filter_enabled() { + case ",$(get_filters)," in *",$1,"*) return 0 ;; *) return 1 ;; esac +} + +get_poll() { + local v + v=$(cfg_read poll_interval) + case "${v:-}" in ''|*[!0-9]*) printf '%s' "$DEFAULT_POLL_SECS" ;; *) printf '%s' "$v" ;; esac +} + +# seen_file -> path to that PR's seen-state file +seen_file() { printf '%s/%s-%s-%s\n' "$SEEN_DIR" "$1" "$2" "$3"; } + +# seen_get -> value (empty if missing) +seen_get() { + local f=$1 key=$2 + [ -f "$f" ] || return 0 + awk -F= -v k="$key" '$1==k { sub(/^[^=]*=/, ""); print; exit }' "$f" +} + +# ---- comma-list set ops ---- + +# list_contains "a,b,c" "b" -> 0 if present +list_contains() { + case ",$1," in *",$2,"*) return 0 ;; *) return 1 ;; esac +} + +# list_add "a,b,c" "d" -> "a,b,c,d" (dedup; preserves order) +list_add() { + local list=$1 item=$2 + if list_contains "$list" "$item"; then + printf '%s' "$list" + else + if [ -z "$list" ]; then printf '%s' "$item"; else printf '%s,%s' "$list" "$item"; fi + fi +} + +# list_remove "a,b,c" "b" -> "a,c" +list_remove() { + local list=$1 item=$2 out="" i + local IFS=, + for i in $list; do + [ "$i" = "$item" ] && continue + if [ -z "$out" ]; then out=$i; else out="$out,$i"; fi + done + printf '%s' "$out" +} + +# ---- discovery + per-PR probes (each fails open: empty output, no crash) ---- + +# Prints "owner/reponumber" per open PR by the contributor. +discover_prs() { + local contributor + contributor=$(get_contributor) + ghc search prs --author="$contributor" --state=open \ + --json repository,number \ + --jq '.[] | [.repository.nameWithOwner, .number] | @tsv' +} + +# count_comments +count_comments() { + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/issues/$3/comments" \ + --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' +} + +# count_reviews +count_reviews() { + ghc api "repos/$1/$2/pulls/$3/reviews" --jq 'length' +} + +# pr_state -> OPEN|MERGED|CLOSED (empty on failure) +pr_state() { + ghc pr view "$3" -R "$1/$2" --json state -q .state +} + +# head_sha +head_sha() { + ghc pr view "$3" -R "$1/$2" --json headRefOid -q .headRefOid +} + +# ci_signature -> sorted multiset of check conclusions/statuses +ci_signature() { + [ -n "$3" ] || return 0 + ghc api "repos/$1/$2/commits/$3/check-runs" \ + --jq '[.check_runs[] | (.conclusion // .status)] | sort | join(",")' +} + +# ---- the poll ---- + +# Emit one poll cycle. Events for ALL prs are accumulated in memory, printed in +# a single burst, and only then are seen markers advanced — so a crash can never +# advance a marker past an event that was not yet emitted. +poll_once() { + local contributor prs EVENTS="" pending + contributor=$(get_contributor) + prs=$(discover_prs) + + # Staged seen updates: written to a scratch dir during the loop, moved into + # place only AFTER events are printed (the last action of the poll). + pending=$(mktemp -d "${TMPDIR:-/tmp}/fm-ghwatch.XXXXXX") + + local fullname pr owner repo sf basename initialized + local c_count r_count p_state sha ci_sig + local seen_c seen_r seen_state seen_ci new_c new_r + local open_basenames="" + local block + + while IFS=$'\t' read -r fullname pr; do + [ -n "${fullname:-}" ] || continue + owner=${fullname%%/*} + repo=${fullname#*/} + { [ -n "$owner" ] && [ -n "$repo" ] && [ "$owner" != "$fullname" ] && [ -n "${pr:-}" ]; } || continue + + sf=$(seen_file "$owner" "$repo" "$pr") + basename=${sf##*/} + open_basenames="$open_basenames $basename" + + # Gather fresh data only for enabled filters. + c_count="" r_count="" p_state="" sha="" ci_sig="" + filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") + filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr") + filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") + if filter_enabled ci; then + sha=$(head_sha "$owner" "$repo" "$pr") + ci_sig=$(ci_signature "$owner" "$repo" "$sha") + fi + + initialized=$(seen_get "$sf" initialized) + seen_c="" seen_r="" seen_state="" seen_ci="" + block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1\n' "$owner" "$repo" "$pr") + + if [ -z "$initialized" ]; then + # First sight of this PR: baseline silently (no events). + : + else + seen_c=$(seen_get "$sf" comments) + seen_r=$(seen_get "$sf" reviews) + seen_state=$(seen_get "$sf" state) + seen_ci=$(seen_get "$sf" ci) + + # comments (high-water): event on increase only. + if is_int "$c_count" && is_int "$seen_c" && [ "$c_count" -gt "$seen_c" ]; then + EVENTS="${EVENTS}COMMENT: ${fullname}#${pr} has $((c_count - seen_c)) new maintainer comment(s) +" + fi + # reviews (high-water): event on increase only. + if is_int "$r_count" && is_int "$seen_r" && [ "$r_count" -gt "$seen_r" ]; then + EVENTS="${EVENTS}REVIEW: ${fullname}#${pr} has $((r_count - seen_r)) new review(s) +" + fi + # ci: event on any signature change. + if [ -n "$ci_sig" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_sig" ]; then + EVENTS="${EVENTS}CI: ${fullname}#${pr} checks changed +" + fi + # merge: event on open -> merged/closed transition. + if [ -n "$p_state" ] && [ "$p_state" != "$seen_state" ]; then + case "$p_state" in + MERGED) [ "${seen_state:-OPEN}" = "OPEN" ] && EVENTS="${EVENTS}MERGED: ${fullname}#${pr} +" ;; + CLOSED) [ "${seen_state:-OPEN}" = "OPEN" ] && EVENTS="${EVENTS}CLOSED: ${fullname}#${pr} +" ;; + esac + fi + fi + + # Build the staged seen block: high-water for counts, current for ci/state. + new_c=$seen_c; new_r=$seen_r + if is_int "$c_count"; then + if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi + fi + if is_int "$r_count"; then + if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi + fi + is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") + is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") + [ -n "$ci_sig" ] && block=$(printf '%s\nci=%s' "$block" "$ci_sig") + [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") + [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") + printf '%s\n' "$block" > "$pending/$basename" + done < +detect_left_open() { + local pending=$1 open_basenames=$2 f base owner repo pr seen_state p_state + [ -d "$SEEN_DIR" ] || return 0 + for f in "$SEEN_DIR"/*; do + [ -e "$f" ] || continue + base=${f##*/} + case "$open_basenames" in *" $base "*) continue ;; esac + # Only re-check PRs we last recorded as OPEN; merged/closed are settled. + seen_state=$(seen_get "$f" state) + [ -n "$(seen_get "$f" initialized)" ] || continue + { [ -z "$seen_state" ] || [ "$seen_state" = "OPEN" ]; } || continue + owner=$(seen_get "$f" owner) + repo=$(seen_get "$f" repo) + pr=$(seen_get "$f" pr) + [ -n "$owner" ] && [ -n "$repo" ] && [ -n "$pr" ] || continue + p_state=$(pr_state "$owner" "$repo" "$pr") + case "$p_state" in + MERGED) EVENTS="${EVENTS:-}MERGED: ${owner}/${repo}#${pr} +" ;; + CLOSED) EVENTS="${EVENTS:-}CLOSED: ${owner}/${repo}#${pr} +" ;; + *) continue ;; + esac + # Carry forward prior seen fields, updating only state. + carry_seen_forward "$pending" "$base" "$f" "$p_state" + done +} + +# carry_seen_forward +# Reproduce the PR's current seen block with the state field replaced, into the +# pending dir, so a left-open PR's marker advances atomically with the rest. +carry_seen_forward() { + local pending=$1 base=$2 real=$3 newstate=$4 + awk -F= -v s="$newstate" '$1!="state" { print } END { print "state=" s }' \ + "$real" > "$pending/$base" +} + +# apply_pending — atomically move each staged seen file into place. +# A failed move (e.g. read-only state dir) is non-fatal: the seen marker simply +# stays at its prior value and the event re-fires next cycle (lossless). +apply_pending() { + local dir=$1 f + [ -d "$dir" ] || return 0 + for f in "$dir"/*; do + [ -e "$f" ] || continue + mv -f "$f" "$SEEN_DIR/${f##*/}" 2>/dev/null || true + done +} + +# ---- daemon ---- + +poll_daemon() { + local interval + interval=$(get_poll) + trap 'exit 0' INT TERM + while :; do + poll_once + sleep "$interval" + done +} + +# ---- CLI subcommands ---- + +cmd_filter() { + # filter list -> show active filters + # filter on|off -> toggle a filter + local name="${1:-}" state="${2:-}" + if [ -z "$name" ] || [ "$name" = "list" ]; then + local IFS=, + for f in $(get_filters); do printf '%s\n' "$f"; done + return + fi + valid_filter "$name" || { echo "error: unknown filter '$name' (comments|ci|reviews|merge)" >&2; exit 2; } + case "$state" in + on|off) ;; + *) echo "usage: fm-github-watch.sh filter [list | on|off]" >&2; exit 2 ;; + esac + local cur new + cur=$(get_filters) + if [ "$state" = "on" ]; then + new=$(list_add "$cur" "$name") + else + new=$(list_remove "$cur" "$name") + fi + cfg_write filters "$new" + echo "filters=$new" +} + +cmd_contributor() { + if [ "$#" -gt 0 ]; then + cfg_write contributor "$1" + echo "contributor=$1" + else + get_contributor + fi +} + +cmd_status() { + local contributor filters f on seen_count + contributor=$(get_contributor) + filters=$(get_filters) + printf 'contributor: %s\n' "$contributor" + printf 'filters:\n' + for f in comments ci reviews merge; do + if list_contains "$filters" "$f"; then on=on; else on=off; fi + printf ' %s: %s\n' "$f" "$on" + done + printf 'poll interval: %ss\n' "$(get_poll)" + seen_count=0 + if [ -d "$SEEN_DIR" ]; then + seen_count=$(find "$SEEN_DIR" -type f 2>/dev/null | wc -l | tr -d ' ') + fi + printf 'seen PRs: %s\n' "$seen_count" +} + +usage() { + sed -n '2,/^$/p' < "$0" | sed 's/^# \{0,1\}//' +} + +# ---- entry ---- + +case "${1:-}" in + --help|-h) usage; exit 0 ;; + --once|"") poll_once ;; + --daemon) poll_daemon ;; + filter) shift; cmd_filter "$@" ;; + contributor) shift; cmd_contributor "$@" ;; + status) cmd_status ;; + *) + echo "error: unknown command '${1:-}'" >&2 + usage >&2 + exit 2 + ;; +esac diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh new file mode 100755 index 0000000..54bbc5d --- /dev/null +++ b/tests/fm-github-watch.test.sh @@ -0,0 +1,300 @@ +#!/usr/bin/env bash +# Behavior tests for fm-github-watch.sh. +# A fake `gh` on PATH serves canned, file-driven responses so each test can +# mutate fixture state between poll cycles and assert on emitted events. +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +GH_WATCH="$ROOT/bin/fm-github-watch.sh" + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +TMP_ROOT= +cleanup() { + [ -n "${TMP_ROOT:-}" ] && rm -rf "$TMP_ROOT" +} +trap cleanup EXIT + +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-ghwatch-tests.XXXXXX") + +# Build an isolated case dir with its own state/ + fakebin/gh, and echo its root. +# The fake gh reads fixtures from $GH_FIXTURE (one PR's data per set of files). +make_case() { + local name=$1 dir fakebin + dir="$TMP_ROOT/$name" + fakebin="$dir/fakebin" + mkdir -p "$dir/state" "$dir/fixture" "$fakebin" + cat > "$fakebin/gh" <<'GH' +#!/usr/bin/env bash +# Minimal, file-driven gh stand-in for fm-github-watch tests. +set -u +FX="${GH_FIXTURE:?no fixture}" +emit_default() { :; } # most commands print nothing by default + +sub="${1:-}" +shift || true + +case "$sub" in + search) + # gh search prs ... : print "owner/reponum" lines. + [ -f "$FX/prs" ] && cat "$FX/prs" + exit 0 + ;; + api) + # gh api --jq ... : find the repos/... path argument. + path="" + for a in "$@"; do + case "$a" in repos/*) path=$a ;; esac + done + # repos/OWNER/REPO/issues/NUM/comments -> comments-OWNER-REPO-NUM + # repos/OWNER/REPO/pulls/NUM/reviews -> reviews-OWNER-REPO-NUM + # repos/OWNER/REPO/commits/SHA/check-runs -> ci-SHA + case "$path" in + */issues/*/comments) + rest=${path#repos/} # OWNER/REPO/issues/NUM/comments + owner=${rest%%/*}; rest=${rest#*/} + repo=${rest%%/*}; rest=${rest#*/} + num=${rest#issues/}; num=${num%/comments} + f="$FX/comments-$owner-$repo-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo 0; exit 0 + ;; + */pulls/*/reviews) + rest=${path#repos/} + owner=${rest%%/*}; rest=${rest#*/} + repo=${rest%%/*}; rest=${rest#*/} + num=${rest#pulls/}; num=${num%/reviews} + f="$FX/reviews-$owner-$repo-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo 0; exit 0 + ;; + */commits/*/check-runs) + sha=${path##*/commits/}; sha=${sha%/check-runs} + f="$FX/ci-$sha" + [ -f "$f" ] && { cat "$f"; exit 0; } + exit 0 + ;; + esac + exit 0 + ;; + pr) + # gh pr view -R owner/repo --json -q ... + num=""; repo=""; field="" + prev="" + for a in "$@"; do + if [ "$prev" = "-R" ]; then repo=$a; fi + if [ "$prev" = "--json" ]; then field=$a; fi + case "$a" in [0-9]*) num=$a ;; esac + prev=$a + done + owner=${repo%%/*}; rn=${repo#*/} + case "$field" in + state) + f="$FX/state-$owner-$rn-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo "OPEN"; exit 0 + ;; + headRefOid) + f="$FX/sha-$owner-$rn-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo "deadbeef"; exit 0 + ;; + esac + exit 0 + ;; +esac +exit 0 +GH + chmod +x "$fakebin/gh" + printf '%s\n' "$dir" +} + +# run_poll : invoke one poll cycle with the fake gh on PATH. +run_poll() { + local dir=$1 + PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" \ + FM_STATE_OVERRIDE="$dir/state" \ + bash "$GH_WATCH" --once +} + +# Seed the open-PR list a fake gh search returns. +seed_prs() { + local dir=$1 + shift + : > "$dir/fixture/prs" + local ln + for ln in "$@"; do printf '%s\n' "$ln" >> "$dir/fixture/prs"; done +} + +test_filter_toggling() { + local dir + dir=$(make_case filter-toggle) + local cfg="$dir/state/.github-watch-config" + + run_poll "$dir" >/dev/null 2>&1 # ensure default config materializes + # Default: all four filters active. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/list.out" + grep -Fxq comments "$dir/list.out" || fail "comments filter not on by default" + grep -Fxq ci "$dir/list.out" || fail "ci filter not on by default" + grep -Fxq merge "$dir/list.out" || fail "merge filter not on by default" + + # Turn comments off -> persisted in config, absent from list. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments off > "$dir/off.out" + grep -Eq '^filters=ci,reviews,merge$' "$dir/off.out" || fail "turning comments off gave unexpected result" + ! grep -Fxq comments "$cfg" || fail "comments should be removed from config when toggled off" + + # Turn comments back on. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments on > "$dir/on.out" + grep -Eq '^filters=ci,reviews,merge,comments$' "$dir/on.out" || fail "turning comments on gave unexpected result" + + # Disabling then re-enabling is idempotent (no dupes). + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci on >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/list2.out" + [ "$(grep -Fc ci "$dir/list2.out")" -eq 1 ] || fail "filter toggling duplicated the ci filter" + + pass "filter on/off toggles persist in config without duplicates" +} + +test_first_run_baselines_silently() { + local dir out + dir=$(make_case baseline) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + printf '2\n' > "$dir/fixture/reviews-kunchenguid-firstmate-30" + + out=$(run_poll "$dir") + [ -z "$out" ] || fail "first poll should baseline silently, but printed: $out" + # Seen file exists with the baselined high-water marks. + local sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + [ -f "$sf" ] || fail "baseline seen file was not written" + grep -Fxq "comments=5" "$sf" || fail "comments high-water not baselined" + grep -Fxq "reviews=2" "$sf" || fail "reviews high-water not baselined" + grep -Fxq "initialized=1" "$sf" || fail "initialized marker missing" + + pass "first run for a PR baselines silently with no event" +} + +test_comment_detection_advances_seen_after_print() { + local dir out sf + dir=$(make_case comment) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + + # Cycle 1: baseline. + run_poll "$dir" >/dev/null + grep -Fxq "comments=5" "$sf" || fail "baseline comments not set" + + # Cycle 2: two new maintainer comments. + printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + || fail "comment increase did not emit event; got: $out" + # Seen marker advanced to the new high-water (after the print). + grep -Fxq "comments=7" "$sf" || fail "seen marker not advanced after event" + + # Cycle 3: no change -> silence. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "steady-state poll should be silent; got: $out" + + pass "comment increase emits event and advances seen after the print" +} + +test_losslessness_redetects_when_seen_write_fails() { + local dir out sf + dir=$(make_case lossless) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + + # Cycle 1: baseline (writes the seen file while dir is writable). + run_poll "$dir" >/dev/null + grep -Fxq "comments=5" "$sf" || fail "baseline did not write seen" + + # New comment arrives. + printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + + # Simulate a failing seen write: make the seen dir read-only so the mv in + # apply_pending cannot advance the marker. The event must STILL print this + # cycle (print happens before the seen write). + chmod a-w "$dir/state/.github-watch-seen" + out=$(run_poll "$dir") + chmod u+w "$dir/state/.github-watch-seen" + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + || fail "event did not print when seen write failed; got: $out" + # Marker must NOT have advanced (the whole point). + grep -Fxq "comments=5" "$sf" || fail "seen marker advanced despite failing write (permanent swallow)" + + # Next cycle (writable again) re-detects the same event: lossless. + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + || fail "event was not re-detected after failed seen write; got: $out" + + pass "failed seen write leaves the event re-detectable (lossless)" +} + +test_merge_detection_on_left_open() { + local dir out sf + dir=$(make_case merge) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + + # Cycle 1: baseline the open PR. + run_poll "$dir" >/dev/null + grep -Fxq "state=OPEN" "$sf" || fail "baseline state not recorded as OPEN" + + # PR merges: it leaves the open search, and its state becomes MERGED. + : > "$dir/fixture/prs" # no longer open + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "MERGED: kunchenguid/firstmate#42" \ + || fail "open->merged transition did not emit event; got: $out" + grep -Fxq "state=MERGED" "$sf" || fail "seen state not advanced to MERGED" + + # A later cycle does not re-report the merge (state no longer OPEN). + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "MERGED" && fail "merge event re-reported after settling" || true + + pass "PR leaving the open set as merged emits MERGED once" +} + +test_config_roundtrip() { + local dir + dir=$(make_case config) + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" contributor captain-ej >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter reviews off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" contributor > "$dir/c.out" + [ "$(cat "$dir/c.out")" = "captain-ej" ] || fail "contributor did not roundtrip" + + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/f.out" + # comments + merge remain; ci + reviews disabled. + grep -Fxq comments "$dir/f.out" || fail "comments should remain on" + grep -Fxq merge "$dir/f.out" || fail "merge should remain on" + ! grep -Fxq ci "$dir/f.out" || fail "ci should be off" + ! grep -Fxq reviews "$dir/f.out" || fail "reviews should be off" + + # status reflects the persisted config. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" status > "$dir/s.out" + grep -Eq '^contributor: captain-ej$' "$dir/s.out" || fail "status did not show contributor" + grep -Eq '^ ci: off$' "$dir/s.out" || fail "status did not show ci off" + + pass "config writes round-trip across contributor + filter subcommands" +} + +test_filter_toggling +test_first_run_baselines_silently +test_comment_detection_advances_seen_after_print +test_losslessness_redetects_when_seen_write_fails +test_merge_detection_on_left_open +test_config_roundtrip From 030ac715af546965cce19f87f8eccf32873f16cb Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 05:21:56 +0000 Subject: [PATCH 02/16] fix(ghwatch): per-PR emission, atomic seen writes, usage + test coverage Address review findings from the no-mistakes pipeline: - Emit events per-PR (print then advance that PR's seen) instead of buffering all-at-end: a watcher 30s timeout now surfaces partial progress rather than killing the poll with zero output every cycle. - atomic_write (temp + rename) so a read-only/crashing state dir never leaves a partial seen file; bash builtin printf write()s immediately, keeping the per-PR ordering lossless even under SIGKILL. - Fix open_basenames membership (space-padded) so the last open PR is skipped in detect_left_open instead of always re-checked. - usage() stops before set -u so --help no longer leaks code. - Add review-detection and ci-detection tests. --- bin/fm-github-watch.sh | 259 +++++++++++++++++----------------- tests/fm-github-watch.test.sh | 46 ++++++ 2 files changed, 177 insertions(+), 128 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index b235ec4..f22bd33 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -25,11 +25,12 @@ # Config: state/.github-watch-config (key=value lines). # Seen: state/.github-watch-seen/-- (key=value lines). # -# Losslessness: seen markers are written ONLY as the last action of a poll, -# after every event line has already been emitted. A crash between print and -# the seen write at worst causes a redundant re-detect next cycle, never a -# permanent swallow. A failing seen write leaves the old marker in place, so -# the same event fires again next cycle. +# Losslessness: for each PR, events are emitted BEFORE its seen marker advances +# (and bash's builtin printf write()s to the capture pipe immediately, so an +# emitted event survives even a SIGKILL). A crash between the print and the seen +# write at worst causes a redundant re-detect next cycle, never a permanent +# swallow. A failing seen write leaves the old marker in place, so the same +# event fires again next cycle. set -u SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -177,119 +178,141 @@ ci_signature() { # ---- the poll ---- -# Emit one poll cycle. Events for ALL prs are accumulated in memory, printed in -# a single burst, and only then are seen markers advanced — so a crash can never -# advance a marker past an event that was not yet emitted. -poll_once() { - local contributor prs EVENTS="" pending - contributor=$(get_contributor) - prs=$(discover_prs) - - # Staged seen updates: written to a scratch dir during the loop, moved into - # place only AFTER events are printed (the last action of the poll). - pending=$(mktemp -d "${TMPDIR:-/tmp}/fm-ghwatch.XXXXXX") +# atomic_write — write seen state via temp + rename so a crash +# or a read-only state dir can never leave a partial file. On any failure the +# prior file is left untouched, so the event re-fires next cycle (lossless). +atomic_write() { + local file=$1 content=$2 tmp + tmp="$file.tmp.$$" + # Redirect fd 2 to /dev/null BEFORE the output redirect so a failure to open + # the temp (read-only dir) is reported to /dev/null, not the terminal. + if printf '%s\n' "$content" 2>/dev/null > "$tmp"; then + mv -f "$tmp" "$file" 2>/dev/null || rm -f "$tmp" 2>/dev/null + else + rm -f "$tmp" 2>/dev/null + fi +} - local fullname pr owner repo sf basename initialized - local c_count r_count p_state sha ci_sig - local seen_c seen_r seen_state seen_ci new_c new_r - local open_basenames="" - local block +# build_seen +# Compose the seen-state block: high-water marks for counts, current value for +# ci/state. Fields with no fresh value this cycle are carried forward from the +# prior block, so toggling a filter off never wipes its remembered high-water. +build_seen() { + local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_sig=$7 sha=$8 p_state=$9 + local seen_c seen_r new_c new_r block + seen_c=$(seen_get "$sf" comments) + seen_r=$(seen_get "$sf" reviews) + new_c=$seen_c; new_r=$seen_r + if is_int "$c_count"; then + if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi + fi + if is_int "$r_count"; then + if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi + fi + block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1' "$owner" "$repo" "$pr") + is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") + is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") + [ -n "$ci_sig" ] && block=$(printf '%s\nci=%s' "$block" "$ci_sig") + [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") + [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") + printf '%s' "$block" +} - while IFS=$'\t' read -r fullname pr; do - [ -n "${fullname:-}" ] || continue - owner=${fullname%%/*} - repo=${fullname#*/} - { [ -n "$owner" ] && [ -n "$repo" ] && [ "$owner" != "$fullname" ] && [ -n "${pr:-}" ]; } || continue +# process_pr +# Gather fresh data for the enabled filters, EMIT any new events for this PR, +# then advance this PR's seen marker. Per-PR ordering (print before seen) plus +# bash's immediate write() to the capture pipe make this lossless even if the +# poll is killed mid-cycle: an emitted event is already in the pipe, and a PR +# whose marker never advanced simply re-fires next cycle. Emitting per-PR (not +# all-at-end) also means a watcher timeout surfaces partial progress instead of +# nothing — important because the watcher wraps check scripts in a 30s timeout +# and a full fleet poll of serial gh calls can approach that budget. +process_pr() { + local owner=$1 repo=$2 pr=$3 contributor=$4 + local sf c_count r_count p_state sha ci_sig + local initialized seen_c seen_r seen_state seen_ci ev="" + sf=$(seen_file "$owner" "$repo" "$pr") + + c_count="" r_count="" p_state="" sha="" ci_sig="" + filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") + filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr") + filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") + if filter_enabled ci; then + sha=$(head_sha "$owner" "$repo" "$pr") + ci_sig=$(ci_signature "$owner" "$repo" "$sha") + fi - sf=$(seen_file "$owner" "$repo" "$pr") - basename=${sf##*/} - open_basenames="$open_basenames $basename" - - # Gather fresh data only for enabled filters. - c_count="" r_count="" p_state="" sha="" ci_sig="" - filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") - filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr") - filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") - if filter_enabled ci; then - sha=$(head_sha "$owner" "$repo" "$pr") - ci_sig=$(ci_signature "$owner" "$repo" "$sha") - fi + initialized=$(seen_get "$sf" initialized) + if [ -n "$initialized" ]; then + seen_c=$(seen_get "$sf" comments) + seen_r=$(seen_get "$sf" reviews) + seen_state=$(seen_get "$sf" state) + seen_ci=$(seen_get "$sf" ci) - initialized=$(seen_get "$sf" initialized) - seen_c="" seen_r="" seen_state="" seen_ci="" - block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1\n' "$owner" "$repo" "$pr") - - if [ -z "$initialized" ]; then - # First sight of this PR: baseline silently (no events). - : - else - seen_c=$(seen_get "$sf" comments) - seen_r=$(seen_get "$sf" reviews) - seen_state=$(seen_get "$sf" state) - seen_ci=$(seen_get "$sf" ci) - - # comments (high-water): event on increase only. - if is_int "$c_count" && is_int "$seen_c" && [ "$c_count" -gt "$seen_c" ]; then - EVENTS="${EVENTS}COMMENT: ${fullname}#${pr} has $((c_count - seen_c)) new maintainer comment(s) + # comments (high-water): event on increase only. + if is_int "$c_count" && is_int "$seen_c" && [ "$c_count" -gt "$seen_c" ]; then + ev="${ev}COMMENT: ${owner}/${repo}#${pr} has $((c_count - seen_c)) new maintainer comment(s) " - fi - # reviews (high-water): event on increase only. - if is_int "$r_count" && is_int "$seen_r" && [ "$r_count" -gt "$seen_r" ]; then - EVENTS="${EVENTS}REVIEW: ${fullname}#${pr} has $((r_count - seen_r)) new review(s) + fi + # reviews (high-water): event on increase only. + if is_int "$r_count" && is_int "$seen_r" && [ "$r_count" -gt "$seen_r" ]; then + ev="${ev}REVIEW: ${owner}/${repo}#${pr} has $((r_count - seen_r)) new review(s) " - fi - # ci: event on any signature change. - if [ -n "$ci_sig" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_sig" ]; then - EVENTS="${EVENTS}CI: ${fullname}#${pr} checks changed + fi + # ci: event on any signature change. + if [ -n "$ci_sig" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_sig" ]; then + ev="${ev}CI: ${owner}/${repo}#${pr} checks changed " - fi - # merge: event on open -> merged/closed transition. - if [ -n "$p_state" ] && [ "$p_state" != "$seen_state" ]; then - case "$p_state" in - MERGED) [ "${seen_state:-OPEN}" = "OPEN" ] && EVENTS="${EVENTS}MERGED: ${fullname}#${pr} + fi + # merge: event on open -> merged/closed transition. + if [ -n "$p_state" ] && [ "$p_state" != "$seen_state" ]; then + case "$p_state" in + MERGED) [ "${seen_state:-OPEN}" = "OPEN" ] && ev="${ev}MERGED: ${owner}/${repo}#${pr} " ;; - CLOSED) [ "${seen_state:-OPEN}" = "OPEN" ] && EVENTS="${EVENTS}CLOSED: ${fullname}#${pr} + CLOSED) [ "${seen_state:-OPEN}" = "OPEN" ] && ev="${ev}CLOSED: ${owner}/${repo}#${pr} " ;; - esac - fi + esac fi + fi - # Build the staged seen block: high-water for counts, current for ci/state. - new_c=$seen_c; new_r=$seen_r - if is_int "$c_count"; then - if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi - fi - if is_int "$r_count"; then - if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi - fi - is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") - is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") - [ -n "$ci_sig" ] && block=$(printf '%s\nci=%s' "$block" "$ci_sig") - [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") - [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") - printf '%s\n' "$block" > "$pending/$basename" + # --- LOSSLESSNESS BOUNDARY (per-PR) --- + # Emit this PR's events first (bash's printf write()s to the pipe at once), + # then advance its seen marker. A crash between the two leaves the event + # delivered and the marker stale -> a redundant re-detect, never a swallow. + [ -n "$ev" ] && printf '%s' "$ev" + local block + block=$(build_seen "$sf" "$owner" "$repo" "$pr" "$c_count" "$r_count" "$ci_sig" "$sha" "$p_state") + atomic_write "$sf" "$block" +} + +# Emit one poll cycle. +poll_once() { + local contributor prs fullname pr owner repo basename + local open_basenames=" " + contributor=$(get_contributor) + prs=$(discover_prs) + + while IFS=$'\t' read -r fullname pr; do + [ -n "${fullname:-}" ] || continue + owner=${fullname%%/*} + repo=${fullname#*/} + { [ -n "$owner" ] && [ -n "$repo" ] && [ "$owner" != "$fullname" ] && [ -n "${pr:-}" ]; } || continue + process_pr "$owner" "$repo" "$pr" "$contributor" + basename=$(seen_file "$owner" "$repo" "$pr"); basename=${basename##*/} + open_basenames="${open_basenames}${basename} " done < +# Detect PRs previously seen as OPEN that no longer appear in the open search +# (they merged or closed). For each, emit the transition and update its state. +# detect_left_open (space-padded: " key1 key2 " so the last +# entry matches too). detect_left_open() { - local pending=$1 open_basenames=$2 f base owner repo pr seen_state p_state + local open_basenames=$1 f base owner repo pr seen_state p_state block [ -d "$SEEN_DIR" ] || return 0 for f in "$SEEN_DIR"/*; do [ -e "$f" ] || continue @@ -305,35 +328,13 @@ detect_left_open() { [ -n "$owner" ] && [ -n "$repo" ] && [ -n "$pr" ] || continue p_state=$(pr_state "$owner" "$repo" "$pr") case "$p_state" in - MERGED) EVENTS="${EVENTS:-}MERGED: ${owner}/${repo}#${pr} -" ;; - CLOSED) EVENTS="${EVENTS:-}CLOSED: ${owner}/${repo}#${pr} -" ;; + MERGED|CLOSED) ;; *) continue ;; esac - # Carry forward prior seen fields, updating only state. - carry_seen_forward "$pending" "$base" "$f" "$p_state" - done -} - -# carry_seen_forward -# Reproduce the PR's current seen block with the state field replaced, into the -# pending dir, so a left-open PR's marker advances atomically with the rest. -carry_seen_forward() { - local pending=$1 base=$2 real=$3 newstate=$4 - awk -F= -v s="$newstate" '$1!="state" { print } END { print "state=" s }' \ - "$real" > "$pending/$base" -} - -# apply_pending — atomically move each staged seen file into place. -# A failed move (e.g. read-only state dir) is non-fatal: the seen marker simply -# stays at its prior value and the event re-fires next cycle (lossless). -apply_pending() { - local dir=$1 f - [ -d "$dir" ] || return 0 - for f in "$dir"/*; do - [ -e "$f" ] || continue - mv -f "$f" "$SEEN_DIR/${f##*/}" 2>/dev/null || true + # Emit, then advance state (same per-PR losslessness ordering). + printf '%s: %s/%s#%s\n' "$p_state" "$owner" "$repo" "$pr" + block=$(awk -F= -v s="$p_state" '$1!="state" { print } END { print "state=" s }' "$f") + atomic_write "$f" "$block" done } @@ -404,7 +405,9 @@ cmd_status() { } usage() { - sed -n '2,/^$/p' < "$0" | sed 's/^# \{0,1\}//' + # Print the leading `#` header comment (lines 2..) up to the first non-comment + # line, stripping the `# ` prefix. Stops before `set -u` so no code leaks. + awk 'NR==1 { next } /^#/ { sub(/^# ?/, ""); print; next } { exit }' "$0" } # ---- entry ---- diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 54bbc5d..15d4f8e 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -292,9 +292,55 @@ test_config_roundtrip() { pass "config writes round-trip across contributor + filter subcommands" } +test_review_detection() { + local dir out sf + dir=$(make_case review) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf '1\n' > "$dir/fixture/reviews-kunchenguid-no-mistakes-310" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + run_poll "$dir" >/dev/null + grep -Fxq "reviews=1" "$sf" || fail "baseline reviews not set" + + printf '3\n' > "$dir/fixture/reviews-kunchenguid-no-mistakes-310" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "REVIEW: kunchenguid/no-mistakes#310 has 2 new review(s)" \ + || fail "review increase did not emit event; got: $out" + grep -Fxq "reviews=3" "$sf" || fail "review high-water not advanced" + + pass "review count increase emits REVIEW event" +} + +test_ci_detection() { + local dir out sf + dir=$(make_case ci) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf 'abcdef1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + printf 'success,success,success\n' > "$dir/fixture/ci-abcdef1" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + run_poll "$dir" >/dev/null + grep -Fxq "ci=success,success,success" "$sf" || fail "baseline ci signature not set" + + # A check goes red: signature changes. + printf 'failure,success,success\n' > "$dir/fixture/ci-abcdef1" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 checks changed" \ + || fail "ci signature change did not emit event; got: $out" + grep -Fxq "ci=failure,success,success" "$sf" || fail "ci signature not advanced" + + # Steady state again: silence. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "steady-state ci poll should be silent; got: $out" + + pass "CI signature change emits CI event" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print test_losslessness_redetects_when_seen_write_fails test_merge_detection_on_left_open test_config_roundtrip +test_review_detection +test_ci_detection From 85d1fd0760d1b36f2f3d64511ba2fb7038b5f003 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 05:35:11 +0000 Subject: [PATCH 03/16] fix(ghwatch): respect merge filter, carry-forward CI sig, doc/test fixes Address the second review pass: - detect_left_open now honors the merge filter (filter merge off suppresses merge/close events instead of always emitting them). - build_seen carries the CI signature forward across a transiently-empty fetch (new commit whose check-runs have not populated), so a later status change still fires instead of being silently dropped. - Header: note --daemon for large fleets (check-script timeout) and that ci reads the Checks API only. - Tests: fix a vacuous config assertion; add regression tests for the merge-filter suppression and the CI carry-forward window. --- bin/fm-github-watch.sh | 26 +++++++++++++---- tests/fm-github-watch.test.sh | 53 ++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index f22bd33..c51f5f9 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -10,6 +10,10 @@ # ln -s ../bin/fm-github-watch.sh state/github-events.check.sh # bin/fm-watch.sh runs state/*.check.sh every FM_CHECK_INTERVAL (default # 300s); any stdout is captured, classified as a `check` wake, escalated. +# A full poll issues up to 5 serial gh calls per open PR; for a large fleet +# that can approach the watcher's 30s check-script timeout. Events emit +# per-PR (not all-at-end), so a timeout still surfaces partial progress, +# but for very large fleets prefer --daemon, which is not timeout-bound. # # Usage: # fm-github-watch.sh # one poll cycle (same as --once) @@ -25,6 +29,10 @@ # Config: state/.github-watch-config (key=value lines). # Seen: state/.github-watch-seen/-- (key=value lines). # +# The ci filter reads the Checks API (check-runs); CI providers that report +# only via the legacy commit status API (some older Travis/Coveralls setups) +# are not covered. Use `gh pr checks` directly for a unified view. +# # Losslessness: for each PR, events are emitted BEFORE its seen marker advances # (and bash's builtin printf write()s to the capture pipe immediately, so an # emitted event survives even a SIGKILL). A crash between the print and the seen @@ -197,11 +205,14 @@ atomic_write() { # Compose the seen-state block: high-water marks for counts, current value for # ci/state. Fields with no fresh value this cycle are carried forward from the # prior block, so toggling a filter off never wipes its remembered high-water. +# CI is carried forward across a transiently-empty fetch (a new commit whose +# check-runs have not populated yet) so a later status change still fires. build_seen() { local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_sig=$7 sha=$8 p_state=$9 - local seen_c seen_r new_c new_r block + local seen_c seen_r seen_ci new_c new_r ci_val block seen_c=$(seen_get "$sf" comments) seen_r=$(seen_get "$sf" reviews) + seen_ci=$(seen_get "$sf" ci) new_c=$seen_c; new_r=$seen_r if is_int "$c_count"; then if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi @@ -209,12 +220,14 @@ build_seen() { if is_int "$r_count"; then if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi fi + ci_val=$ci_sig + [ -n "$ci_val" ] || ci_val=$seen_ci block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1' "$owner" "$repo" "$pr") - is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") - is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") - [ -n "$ci_sig" ] && block=$(printf '%s\nci=%s' "$block" "$ci_sig") - [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") - [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") + is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") + is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") + [ -n "$ci_val" ] && block=$(printf '%s\nci=%s' "$block" "$ci_val") + [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") + [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") printf '%s' "$block" } @@ -313,6 +326,7 @@ EOF # entry matches too). detect_left_open() { local open_basenames=$1 f base owner repo pr seen_state p_state block + filter_enabled merge || return 0 [ -d "$SEEN_DIR" ] || return 0 for f in "$SEEN_DIR"/*; do [ -e "$f" ] || continue diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 15d4f8e..ce2101a 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -148,7 +148,8 @@ test_filter_toggling() { # Turn comments off -> persisted in config, absent from list. FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments off > "$dir/off.out" grep -Eq '^filters=ci,reviews,merge$' "$dir/off.out" || fail "turning comments off gave unexpected result" - ! grep -Fxq comments "$cfg" || fail "comments should be removed from config when toggled off" + ! awk -F= '/^filters=/{print $2}' "$cfg" | grep -qw comments \ + || fail "comments should be absent from filters= when toggled off" # Turn comments back on. FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments on > "$dir/on.out" @@ -336,6 +337,54 @@ test_ci_detection() { pass "CI signature change emits CI event" } +test_merge_filter_suppresses_merge_event() { + local dir out + dir=$(make_case merge-off) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline + + # Disable the merge filter; the PR then merges (leaves the open set). + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter merge off >/dev/null + : > "$dir/fixture/prs" + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "MERGED"; then + fail "merge event fired despite merge filter being off; got: $out" + fi + pass "merge filter off suppresses merge/close events" +} + +test_ci_carry_forward_across_empty_window() { + local dir out sf + dir=$(make_case ci-carry) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf 'sha1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + printf 'success,success\n' > "$dir/fixture/ci-sha1" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + # Baseline: CI passing for sha1. + run_poll "$dir" >/dev/null + grep -Fxq "ci=success,success" "$sf" || fail "baseline ci not recorded" + + # New commit: sha changes, check-runs not populated yet (empty ci_sig). + printf 'sha2\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + rm -f "$dir/fixture/ci-sha1" + # No ci-sha2 fixture yet -> ci_signature returns empty. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "transient empty ci window should be silent; got: $out" + # seen_ci must be carried forward (not dropped) so a later change still fires. + grep -Fxq "ci=success,success" "$sf" || fail "ci signature was dropped during empty window" + + # CI completes for sha2 and FAILS: signature differs from carried-forward. + printf 'failure,success\n' > "$dir/fixture/ci-sha2" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 checks changed" \ + || fail "ci completion after empty window did not fire; got: $out" + + pass "CI signature carries forward across an empty window and fires on change" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -344,3 +393,5 @@ test_merge_detection_on_left_open test_config_roundtrip test_review_detection test_ci_detection +test_merge_filter_suppresses_merge_event +test_ci_carry_forward_across_empty_window From 299316109c2e7c0a19deca3ed8e78a7c439d090f Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 05:52:36 +0000 Subject: [PATCH 04/16] fix(ghwatch): empty-filters, contributor derivation, reviews filter, docs Address the third review pass: - Distinguish a configured-empty filters= (all filters off -> watcher muted) from a missing key (defaults to all on); previously 'all off' reset to defaults, so the captain could not actually mute the watcher. - Derive the contributor from `gh auth` when unset instead of hardcoding a username in a shared public-repo script; FM_GH_CONTRIBUTOR and the configured value still take precedence. - count_reviews now excludes the contributor's own reviews (keeps bot and maintainer reviews), matching count_comments. - Document state/.github-watch-config and state/.github-watch-seen/ in the AGENTS.md state layout; add a README toolbelt row + env knobs. - Fix a stale test comment (apply_pending -> atomic_write); add a regression test for the all-filters-off mute. --- AGENTS.md | 2 ++ README.md | 3 +++ bin/fm-github-watch.sh | 34 +++++++++++++++++++++++++--------- tests/fm-github-watch.test.sh | 28 +++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5d4da3d..94743a5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -75,6 +75,8 @@ state/ volatile runtime signals; gitignored .watch.lock .wake-queue.lock watcher singleton and queue serialization locks .hash-* .count-* .stale-* .seen-* .last-* .heartbeat-streak watcher internals; never touch .last-watcher-beat watcher liveness beacon, touched every poll; fm-guard.sh reads it + .github-watch-config fm-github-watch.sh filter/contributor config (key=value); never touch unless driving that tool + .github-watch-seen/ fm-github-watch.sh per-PR seen state (high-water marks); owned by that script .no-mistakes/ local validation state and evidence; gitignored ``` diff --git a/README.md b/README.md index 5f0df7f..4f50699 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ The first mate drives these; you rarely need to, but they work by hand too. | `fm-merge-local.sh` | Fast-forward a `local-only` project's local default branch after approval | | `fm-review-diff.sh` | Review a crewmate branch against the authoritative base, with optional `--stat` output | | `fm-watch.sh` | Singleton-safe one-shot watcher; blocks until supervision work is due, queues it durably, then exits with one reason line | +| `fm-github-watch.sh` | Poll the contributor's open PRs for new comments/CI/reviews/merges; toggle filters via `filter on\|off` | | `fm-wake-drain.sh` | Atomically drain queued watcher wakes before handling supervision work | | `fm-send.sh` | Send one literal line (or `--key Escape`) to a crewmate window | | `fm-peek.sh` | Print a bounded tail of a crewmate pane | @@ -168,6 +169,8 @@ FM_SIGNAL_GRACE=30 # seconds to coalesce nearby status and turn-end signals FM_FLEET_SYNC_BOOTSTRAP_TIMEOUT=20 # seconds allowed for bootstrap's best-effort clone refresh FM_FLEET_PRUNE=1 # set to 0 to skip pruning local branches whose upstream is gone FM_BUSY_REGEX='esc (to )?interrupt|Working\.\.\.' # busy-pane signatures, extend per harness +FM_GH_CONTRIBUTOR= # GitHub user whose open PRs fm-github-watch.sh polls; unset = derive from `gh auth` +FM_GH_POLL_SECS=300 # seconds between polls when fm-github-watch.sh runs as --daemon ``` ## Development diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index c51f5f9..e2462c0 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -46,7 +46,6 @@ FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" STATE="${FM_STATE_OVERRIDE:-$FM_ROOT/state}" CONFIG="$STATE/.github-watch-config" SEEN_DIR="$STATE/.github-watch-seen" -DEFAULT_CONTRIBUTOR="${FM_GH_CONTRIBUTOR:-e-jung}" ALL_FILTERS="comments,ci,reviews,merge" DEFAULT_POLL_SECS="${FM_GH_POLL_SECS:-300}" @@ -71,6 +70,13 @@ cfg_read() { awk -F= -v k="$key" '$1==k { sub(/^[^=]*=/, ""); print; exit }' "$CONFIG" } +# cfg_has -> 0 if the key exists in the config (distinguishes a configured +# empty value, e.g. `filters=`, from a missing key so "all filters off" sticks). +cfg_has() { + local key=$1 + [ -f "$CONFIG" ] && grep -q "^${key}=" "$CONFIG" +} + # cfg_write (upsert a single key=value line) cfg_write() { local key=$1 val=$2 tmp @@ -85,16 +91,23 @@ cfg_write() { } get_contributor() { + # Precedence: configured value > FM_GH_CONTRIBUTOR env > authenticated gh user. + # No hardcoded default: a shared tool should poll whoever is logged in. local v v=$(cfg_read contributor) - printf '%s' "${v:-$DEFAULT_CONTRIBUTOR}" + if [ -n "$v" ]; then printf '%s' "$v"; return; fi + if [ -n "${FM_GH_CONTRIBUTOR:-}" ]; then printf '%s' "$FM_GH_CONTRIBUTOR"; return; fi + ghc api user -q .login 2>/dev/null | tr -d '\n' } get_filters() { - local v - v=$(cfg_read filters) - [ -n "$v" ] || v=$ALL_FILTERS - printf '%s' "$v" + # A configured value (even empty = all filters off) is respected; only a + # never-configured key falls back to the full default set. + if cfg_has filters; then + cfg_read filters + else + printf '%s' "$ALL_FILTERS" + fi } filter_enabled() { @@ -162,9 +175,12 @@ count_comments() { --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' } -# count_reviews +# count_reviews +# Excludes the contributor's own reviews (self-reviews) but keeps maintainer and +# bot reviews (Greptile, coderabbit, etc. have distinct logins). count_reviews() { - ghc api "repos/$1/$2/pulls/$3/reviews" --jq 'length' + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/pulls/$3/reviews" \ + --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' } # pr_state -> OPEN|MERGED|CLOSED (empty on failure) @@ -248,7 +264,7 @@ process_pr() { c_count="" r_count="" p_state="" sha="" ci_sig="" filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") - filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr") + filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr" "$contributor") filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") if filter_enabled ci; then sha=$(head_sha "$owner" "$repo" "$pr") diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index ce2101a..7346cc1 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -223,9 +223,9 @@ test_losslessness_redetects_when_seen_write_fails() { # New comment arrives. printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" - # Simulate a failing seen write: make the seen dir read-only so the mv in - # apply_pending cannot advance the marker. The event must STILL print this - # cycle (print happens before the seen write). + # Simulate a failing seen write: make the seen dir read-only so atomic_write + # cannot advance the marker. The event must STILL print this cycle (print + # happens before the seen write). chmod a-w "$dir/state/.github-watch-seen" out=$(run_poll "$dir") chmod u+w "$dir/state/.github-watch-seen" @@ -385,6 +385,27 @@ test_ci_carry_forward_across_empty_window() { pass "CI signature carries forward across an empty window and fires on change" } +test_all_filters_off_mutes_watcher() { + local dir out + dir=$(make_case all-off) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + run_poll "$dir" >/dev/null # baseline + + # Turn every filter off; the persisted config must keep filters empty (not + # fall back to defaults). + for f in comments ci reviews merge; do + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter "$f" off >/dev/null + done + grep -Fxq 'filters=' "$dir/state/.github-watch-config" || fail "all-off should write filters= (empty), not default" + + # A new comment must NOT fire (every filter is muted). + printf '9\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + out=$(run_poll "$dir") + [ -z "$out" ] || fail "muted watcher emitted events; got: $out" + pass "all filters off (empty filters=) mutes the watcher instead of resetting to defaults" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -395,3 +416,4 @@ test_review_detection test_ci_detection test_merge_filter_suppresses_merge_event test_ci_carry_forward_across_empty_window +test_all_filters_off_mutes_watcher From 9e99407add1b29ff47bdd36bde781dc2c1408883 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:10:06 +0000 Subject: [PATCH 05/16] fix(ghwatch): per_page=100 on list endpoints; accurate comment label Address the fourth review pass: - Append ?per_page=100 to the comments, reviews, and check-runs API calls. GitHub list endpoints default to 30 items per page, so counts and the CI signature silently capped at 30 on active PRs; per_page=100 lifts that. - The comment event said 'new maintainer comment(s)' but the count includes bot comments (coderabbit, greptile) by design, so relabel to 'comment(s)'. - Update the fake gh in tests to strip the query string before matching. --- bin/fm-github-watch.sh | 17 +++++++++-------- tests/fm-github-watch.test.sh | 9 +++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index e2462c0..65ec02a 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -1,10 +1,11 @@ #!/usr/bin/env bash # fm-github-watch.sh — GitHub events watcher for the fleet's open PRs. # -# Discovers all of a contributor's open PRs and surfaces new maintainer -# comments, CI status changes, reviews, and merge/close transitions as -# one-line events on stdout. Built to run as a watcher check script: it -# prints iff firstmate should wake, and stays silent otherwise. +# Discovers all of a contributor's open PRs and surfaces new comments (from +# maintainers, reviewers, or bots), CI status changes, reviews, and +# merge/close transitions as one-line events on stdout. Built to run as a +# watcher check script: it prints iff firstmate should wake, and stays +# silent otherwise. # # Wire it in with a check script the existing watcher already sweeps, e.g.: # ln -s ../bin/fm-github-watch.sh state/github-events.check.sh @@ -171,7 +172,7 @@ discover_prs() { # count_comments count_comments() { - CONTRIB_WATCH="$4" ghc api "repos/$1/$2/issues/$3/comments" \ + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/issues/$3/comments?per_page=100" \ --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' } @@ -179,7 +180,7 @@ count_comments() { # Excludes the contributor's own reviews (self-reviews) but keeps maintainer and # bot reviews (Greptile, coderabbit, etc. have distinct logins). count_reviews() { - CONTRIB_WATCH="$4" ghc api "repos/$1/$2/pulls/$3/reviews" \ + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/pulls/$3/reviews?per_page=100" \ --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' } @@ -196,7 +197,7 @@ head_sha() { # ci_signature -> sorted multiset of check conclusions/statuses ci_signature() { [ -n "$3" ] || return 0 - ghc api "repos/$1/$2/commits/$3/check-runs" \ + ghc api "repos/$1/$2/commits/$3/check-runs?per_page=100" \ --jq '[.check_runs[] | (.conclusion // .status)] | sort | join(",")' } @@ -280,7 +281,7 @@ process_pr() { # comments (high-water): event on increase only. if is_int "$c_count" && is_int "$seen_c" && [ "$c_count" -gt "$seen_c" ]; then - ev="${ev}COMMENT: ${owner}/${repo}#${pr} has $((c_count - seen_c)) new maintainer comment(s) + ev="${ev}COMMENT: ${owner}/${repo}#${pr} has $((c_count - seen_c)) new comment(s) " fi # reviews (high-water): event on increase only. diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 7346cc1..192474b 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -53,6 +53,7 @@ case "$sub" in for a in "$@"; do case "$a" in repos/*) path=$a ;; esac done + path="${path%%\?*}" # strip any ?per_page=... query before matching # repos/OWNER/REPO/issues/NUM/comments -> comments-OWNER-REPO-NUM # repos/OWNER/REPO/pulls/NUM/reviews -> reviews-OWNER-REPO-NUM # repos/OWNER/REPO/commits/SHA/check-runs -> ci-SHA @@ -194,10 +195,10 @@ test_comment_detection_advances_seen_after_print() { run_poll "$dir" >/dev/null grep -Fxq "comments=5" "$sf" || fail "baseline comments not set" - # Cycle 2: two new maintainer comments. + # Cycle 2: two new comments. printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ || fail "comment increase did not emit event; got: $out" # Seen marker advanced to the new high-water (after the print). grep -Fxq "comments=7" "$sf" || fail "seen marker not advanced after event" @@ -229,14 +230,14 @@ test_losslessness_redetects_when_seen_write_fails() { chmod a-w "$dir/state/.github-watch-seen" out=$(run_poll "$dir") chmod u+w "$dir/state/.github-watch-seen" - printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ || fail "event did not print when seen write failed; got: $out" # Marker must NOT have advanced (the whole point). grep -Fxq "comments=5" "$sf" || fail "seen marker advanced despite failing write (permanent swallow)" # Next cycle (writable again) re-detects the same event: lossless. out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new maintainer comment(s)" \ + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ || fail "event was not re-detected after failed seen write; got: $out" pass "failed seen write leaves the event re-detectable (lossless)" From 754fca4b449d7c03b7914e1d43ed631d7bf6c511 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:31:13 +0000 Subject: [PATCH 06/16] fix(ghwatch): empty-contributor flood guard; isolate atomic_write temps Address the fifth review pass: - discover_prs skips discovery when the contributor resolves empty (gh missing/unauthed), so an empty --author is never passed to the search (an empty author qualifier would match open PRs across every repo). - atomic_write now stages its temp in a hidden SEEN_DIR/.tmp subdir (same filesystem, so the rename stays atomic) so a crash-leaked temp never matches detect_left_open's glob and cause a duplicate merge/close event. - Tests pin the contributor via FM_GH_CONTRIBUTOR so they no longer depend on the fake gh implementing `api user`. --- bin/fm-github-watch.sh | 13 +++++++++++-- tests/fm-github-watch.test.sh | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 65ec02a..fca4246 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -165,6 +165,10 @@ list_remove() { discover_prs() { local contributor contributor=$(get_contributor) + # An empty contributor (gh missing/unauthed) must NOT pass --author="" to the + # search: GitHub treats an empty author qualifier as no filter, which would + # match open PRs across every repo and flood the seen state. + [ -n "$contributor" ] || return 0 ghc search prs --author="$contributor" --state=open \ --json repository,number \ --jq '.[] | [.repository.nameWithOwner, .number] | @tsv' @@ -206,9 +210,14 @@ ci_signature() { # atomic_write — write seen state via temp + rename so a crash # or a read-only state dir can never leave a partial file. On any failure the # prior file is left untouched, so the event re-fires next cycle (lossless). +# The temp lives in a hidden .tmp subdir of the seen dir (same filesystem, so +# the rename is atomic) so a crash-leaked temp never matches detect_left_open's +# `"$SEEN_DIR"/*` glob and cause a double-fire. atomic_write() { - local file=$1 content=$2 tmp - tmp="$file.tmp.$$" + local file=$1 content=$2 tmp stagedir + stagedir="$SEEN_DIR/.tmp" + tmp="$stagedir/$(basename "$file").$$" + mkdir -p "$stagedir" 2>/dev/null || true # Redirect fd 2 to /dev/null BEFORE the output redirect so a failure to open # the temp (read-only dir) is reported to /dev/null, not the terminal. if printf '%s\n' "$content" 2>/dev/null > "$tmp"; then diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 192474b..ccffa01 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -118,9 +118,12 @@ GH } # run_poll : invoke one poll cycle with the fake gh on PATH. +# A known contributor is pinned via env so discovery proceeds even though the +# fake gh does not implement `api user`. run_poll() { local dir=$1 PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" \ + FM_GH_CONTRIBUTOR=e-jung \ FM_STATE_OVERRIDE="$dir/state" \ bash "$GH_WATCH" --once } From abc8c592f7653d75a395bdcfb726f13e62541037 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:45:42 +0000 Subject: [PATCH 07/16] fix(ghwatch): --limit 1000 on PR search; document 100-item count cap Address the sixth review pass: - discover_prs passes --limit 1000 so open PRs beyond the gh search default of 30 are still discovered (the header advertises large-fleet use). - Document that comment/review/check-run counts cap at 100 per type per PR (per_page=100, no pagination) alongside the existing Checks-API caveat. --- bin/fm-github-watch.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index fca4246..51ead7a 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -33,6 +33,8 @@ # The ci filter reads the Checks API (check-runs); CI providers that report # only via the legacy commit status API (some older Travis/Coveralls setups) # are not covered. Use `gh pr checks` directly for a unified view. +# Comment, review, and check-run counts fetch up to 100 items per type per PR +# (per_page=100, no pagination); a single PR with >100 of one kind would cap. # # Losslessness: for each PR, events are emitted BEFORE its seen marker advances # (and bash's builtin printf write()s to the capture pipe immediately, so an @@ -169,7 +171,7 @@ discover_prs() { # search: GitHub treats an empty author qualifier as no filter, which would # match open PRs across every repo and flood the seen state. [ -n "$contributor" ] || return 0 - ghc search prs --author="$contributor" --state=open \ + ghc search prs --author="$contributor" --state=open --limit 1000 \ --json repository,number \ --jq '.[] | [.repository.nameWithOwner, .number] | @tsv' } From 869882de36fed63ee554baf7b599a1ccf15b8e7f Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 07:02:37 +0000 Subject: [PATCH 08/16] fix(ghwatch): treat CLOSED as non-terminal; exclude .tmp from status count Address the seventh review pass: - detect_left_open now treats only MERGED as terminal. CLOSED PRs are re-probed each cycle, so a close->reopen->merge between polls still emits MERGED instead of being swallowed. Repeat CLOSED events are suppressed by skipping when p_state equals seen_state. - cmd_status excludes the .tmp staging subdir so leaked temps never inflate the 'seen PRs' count; detect_left_open also skips *.tmp.* defensively. - Add a regression test for the close->merge path. --- bin/fm-github-watch.sh | 29 +++++++++++++++++++---------- tests/fm-github-watch.test.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 51ead7a..36c062a 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -348,8 +348,11 @@ EOF detect_left_open "$open_basenames" } -# Detect PRs previously seen as OPEN that no longer appear in the open search -# (they merged or closed). For each, emit the transition and update its state. +# Detect PRs that left the open search (merged or closed) since the last poll. +# For each, emit a state transition and advance its seen state. Only MERGED is +# terminal: a CLOSED PR can be reopened and later merged, so CLOSED (and OPEN) +# PRs are re-probed each cycle until they settle to MERGED. Re-probing is +# bounded by the seen set (PRs once seen open, not yet merged). # detect_left_open (space-padded: " key1 key2 " so the last # entry matches too). detect_left_open() { @@ -359,22 +362,27 @@ detect_left_open() { for f in "$SEEN_DIR"/*; do [ -e "$f" ] || continue base=${f##*/} + case "$base" in *.tmp.*) continue ;; esac case "$open_basenames" in *" $base "*) continue ;; esac - # Only re-check PRs we last recorded as OPEN; merged/closed are settled. - seen_state=$(seen_get "$f" state) [ -n "$(seen_get "$f" initialized)" ] || continue - { [ -z "$seen_state" ] || [ "$seen_state" = "OPEN" ]; } || continue + seen_state=$(seen_get "$f" state) + [ "$seen_state" = "MERGED" ] && continue # merged is the only terminal state owner=$(seen_get "$f" owner) repo=$(seen_get "$f" repo) pr=$(seen_get "$f" pr) [ -n "$owner" ] && [ -n "$repo" ] && [ -n "$pr" ] || continue p_state=$(pr_state "$owner" "$repo" "$pr") + [ "$p_state" = "$seen_state" ] && continue # unchanged: no event, no rewrite case "$p_state" in - MERGED|CLOSED) ;; - *) continue ;; + MERGED|CLOSED) + # Emit, then advance state (same per-PR losslessness ordering). + printf '%s: %s/%s#%s\n' "$p_state" "$owner" "$repo" "$pr" + ;; + *) + # Reopened back to OPEN (or unknown): no event, but track the new state + # so a later merge still fires from the right baseline. + ;; esac - # Emit, then advance state (same per-PR losslessness ordering). - printf '%s: %s/%s#%s\n' "$p_state" "$owner" "$repo" "$pr" block=$(awk -F= -v s="$p_state" '$1!="state" { print } END { print "state=" s }' "$f") atomic_write "$f" "$block" done @@ -441,7 +449,8 @@ cmd_status() { printf 'poll interval: %ss\n' "$(get_poll)" seen_count=0 if [ -d "$SEEN_DIR" ]; then - seen_count=$(find "$SEEN_DIR" -type f 2>/dev/null | wc -l | tr -d ' ') + # Exclude the .tmp staging subdir so leaked temps never inflate the count. + seen_count=$(find "$SEEN_DIR" -type f -not -path '*/.tmp/*' 2>/dev/null | wc -l | tr -d ' ') fi printf 'seen PRs: %s\n' "$seen_count" } diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index ccffa01..c37b8b1 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -273,6 +273,35 @@ test_merge_detection_on_left_open() { pass "PR leaving the open set as merged emits MERGED once" } +test_closed_then_merged_is_not_swallowed() { + local dir out sf + dir=$(make_case close-merge) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline OPEN + + # PR is closed (leaves the open set): emit CLOSED once. + : > "$dir/fixture/prs" + printf 'CLOSED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CLOSED: kunchenguid/firstmate#42" \ + || fail "open->closed did not emit; got: $out" + + # Steady closed: must NOT re-emit CLOSED every cycle. + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CLOSED" && fail "CLOSED re-emitted while settled" || true + + # Closed -> reopened -> merged all between polls: MERGED must still fire + # (CLOSED is not terminal; the watcher re-probes it). + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "MERGED: kunchenguid/firstmate#42" \ + || fail "close->merge transition was swallowed; got: $out" + + pass "CLOSED is treated as non-terminal: close->merge still emits MERGED" +} + test_config_roundtrip() { local dir dir=$(make_case config) @@ -415,6 +444,7 @@ test_first_run_baselines_silently test_comment_detection_advances_seen_after_print test_losslessness_redetects_when_seen_write_fails test_merge_detection_on_left_open +test_closed_then_merged_is_not_swallowed test_config_roundtrip test_review_detection test_ci_detection From bd312726400220545b4100a3ad2e4d92bb832ff6 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 07:16:13 +0000 Subject: [PATCH 09/16] fix(ghwatch): carry forward state across transient pr_state failures Address the eighth review pass: - build_seen carries the prior state forward when pr_state returns empty (transient gh failure), matching the ci/comments/reviews carry-forward, so a single failed state fetch never erases the tracked OPEN/CLOSED/MERGED. - detect_left_open skips the rewrite entirely when p_state is empty, so it cannot clobber a real state with an empty value and trigger a duplicate. --- bin/fm-github-watch.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 36c062a..8129c17 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -237,10 +237,11 @@ atomic_write() { # check-runs have not populated yet) so a later status change still fires. build_seen() { local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_sig=$7 sha=$8 p_state=$9 - local seen_c seen_r seen_ci new_c new_r ci_val block + local seen_c seen_r seen_ci seen_state new_c new_r ci_val state_val block seen_c=$(seen_get "$sf" comments) seen_r=$(seen_get "$sf" reviews) seen_ci=$(seen_get "$sf" ci) + seen_state=$(seen_get "$sf" state) new_c=$seen_c; new_r=$seen_r if is_int "$c_count"; then if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi @@ -250,12 +251,14 @@ build_seen() { fi ci_val=$ci_sig [ -n "$ci_val" ] || ci_val=$seen_ci + state_val=$p_state + [ -n "$state_val" ] || state_val=$seen_state block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1' "$owner" "$repo" "$pr") - is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") - is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") - [ -n "$ci_val" ] && block=$(printf '%s\nci=%s' "$block" "$ci_val") - [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") - [ -n "$p_state" ] && block=$(printf '%s\nstate=%s' "$block" "$p_state") + is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") + is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") + [ -n "$ci_val" ] && block=$(printf '%s\nci=%s' "$block" "$ci_val") + [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") + [ -n "$state_val" ] && block=$(printf '%s\nstate=%s' "$block" "$state_val") printf '%s' "$block" } @@ -372,6 +375,7 @@ detect_left_open() { pr=$(seen_get "$f" pr) [ -n "$owner" ] && [ -n "$repo" ] && [ -n "$pr" ] || continue p_state=$(pr_state "$owner" "$repo" "$pr") + [ -n "$p_state" ] || continue # transient gh failure: leave seen state untouched [ "$p_state" = "$seen_state" ] && continue # unchanged: no event, no rewrite case "$p_state" in MERGED|CLOSED) From a2dc84f52565e4c1fd01de9ba328a4f64ef597eb Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 07:29:49 +0000 Subject: [PATCH 10/16] fix(ghwatch): bound CLOSED re-probes with a close->settle window Address the ninth review pass: - A closed_at timestamp (set when CLOSED is emitted) bounds how long a CLOSED PR is re-probed for a close->reopen->merge. Past FM_GH_CLOSE_REPROBE_SECS (default 7200s) it is treated as settled, so accumulated closed PRs cannot push the fleet past GitHub's rate limit. The default window is generous enough that a prompt close->merge still fires. --- bin/fm-github-watch.sh | 32 +++++++++++++++++++++++++------- tests/fm-github-watch.test.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 8129c17..eeb32a1 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -51,6 +51,10 @@ CONFIG="$STATE/.github-watch-config" SEEN_DIR="$STATE/.github-watch-seen" ALL_FILTERS="comments,ci,reviews,merge" DEFAULT_POLL_SECS="${FM_GH_POLL_SECS:-300}" +# How long after a PR closes to keep re-probing it for a close->reopen->merge. +# Bounds API cost: a closed PR is re-checked only within this window, then +# treated as settled. ~2h at the default 300s poll. +CLOSE_REPROBE_SECS="${FM_GH_CLOSE_REPROBE_SECS:-7200}" mkdir -p "$STATE" "$SEEN_DIR" @@ -353,15 +357,16 @@ EOF # Detect PRs that left the open search (merged or closed) since the last poll. # For each, emit a state transition and advance its seen state. Only MERGED is -# terminal: a CLOSED PR can be reopened and later merged, so CLOSED (and OPEN) -# PRs are re-probed each cycle until they settle to MERGED. Re-probing is -# bounded by the seen set (PRs once seen open, not yet merged). -# detect_left_open (space-padded: " key1 key2 " so the last -# entry matches too). +# terminal: a CLOSED PR can be reopened and later merged, so CLOSED PRs are +# re-probed within a bounded window (CLOSE_REPROBE_SECS after they closed) so a +# close->reopen->merge still fires, without an unbounded per-cycle API cost as +# closed PRs accumulate. detect_left_open (space-padded: +# " key1 key2 " so the last entry matches too). detect_left_open() { - local open_basenames=$1 f base owner repo pr seen_state p_state block + local open_basenames=$1 f base owner repo pr seen_state p_state block closed_at now filter_enabled merge || return 0 [ -d "$SEEN_DIR" ] || return 0 + now=$(date +%s) for f in "$SEEN_DIR"/*; do [ -e "$f" ] || continue base=${f##*/} @@ -370,6 +375,14 @@ detect_left_open() { [ -n "$(seen_get "$f" initialized)" ] || continue seen_state=$(seen_get "$f" state) [ "$seen_state" = "MERGED" ] && continue # merged is the only terminal state + # A CLOSED PR older than the re-probe window is settled: skip the API call + # so accumulated closed PRs cannot push the fleet past the rate limit. + if [ "$seen_state" = "CLOSED" ]; then + closed_at=$(seen_get "$f" closed_at) + if [ -n "$closed_at" ] && [ $((now - closed_at)) -ge "$CLOSE_REPROBE_SECS" ]; then + continue + fi + fi owner=$(seen_get "$f" owner) repo=$(seen_get "$f" repo) pr=$(seen_get "$f" pr) @@ -387,7 +400,12 @@ detect_left_open() { # so a later merge still fires from the right baseline. ;; esac - block=$(awk -F= -v s="$p_state" '$1!="state" { print } END { print "state=" s }' "$f") + # Rewrite state; stamp closed_at when entering CLOSED so the re-probe window + # can age it out, and clear it on any other transition. + local cat="" + [ "$p_state" = "CLOSED" ] && cat=$now + block=$(awk -F= -v s="$p_state" -v cat="$cat" \ + '$1!="state" && $1!="closed_at" { print } END { print "state=" s; if (cat != "") print "closed_at=" cat }' "$f") atomic_write "$f" "$block" done } diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index c37b8b1..678a6bb 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -302,6 +302,32 @@ test_closed_then_merged_is_not_swallowed() { pass "CLOSED is treated as non-terminal: close->merge still emits MERGED" } +test_closed_pr_reprobe_window_is_bounded() { + # A closed PR is re-probed only within CLOSE_REPROBE_SECS of closing, so + # accumulated closed PRs cannot push the fleet past the rate limit. With a + # zero window the PR is settled immediately: a later merge is intentionally + # not re-detected (the cost-bound tradeoff). The default window is generous. + local dir out sf + dir=$(make_case close-window) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline OPEN + : > "$dir/fixture/prs" + printf 'CLOSED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") # emits CLOSED, stamps closed_at + printf '%s\n' "$out" | grep -Fq "CLOSED: kunchenguid/firstmate#42" || fail "close not emitted" + grep -Fq "closed_at=" "$sf" || fail "closed_at not stamped on close" + + # Zero window: the aged-out CLOSED PR is not re-probed, so a merge is missed. + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" FM_GH_CONTRIBUTOR=e-jung \ + FM_GH_CLOSE_REPROBE_SECS=0 FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" --once) + printf '%s\n' "$out" | grep -Fq "MERGED" \ + && fail "aged-out CLOSED PR was re-probed (cost not bounded)" || true + pass "closed PR past the re-probe window stops consuming an API call" +} + test_config_roundtrip() { local dir dir=$(make_case config) @@ -445,6 +471,7 @@ test_comment_detection_advances_seen_after_print test_losslessness_redetects_when_seen_write_fails test_merge_detection_on_left_open test_closed_then_merged_is_not_swallowed +test_closed_pr_reprobe_window_is_bounded test_config_roundtrip test_review_detection test_ci_detection From a7c1fd1d71511f81f6544a460570c9736c77c3a9 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:11:44 +0000 Subject: [PATCH 11/16] no-mistakes(document): docs(ghwatch): document FM_GH_CLOSE_REPROBE_SECS config knob --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4f50699..02c8b66 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ FM_FLEET_PRUNE=1 # set to 0 to skip pruning local branches whose upstream FM_BUSY_REGEX='esc (to )?interrupt|Working\.\.\.' # busy-pane signatures, extend per harness FM_GH_CONTRIBUTOR= # GitHub user whose open PRs fm-github-watch.sh polls; unset = derive from `gh auth` FM_GH_POLL_SECS=300 # seconds between polls when fm-github-watch.sh runs as --daemon +FM_GH_CLOSE_REPROBE_SECS=7200 # seconds after a PR closes to keep re-probing for a reopen->merge; bounds API cost ``` ## Development From 7a8baa4d7efaee7cb3e0ea30eb296c57ad247e08 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:32:48 +0000 Subject: [PATCH 12/16] style(ghwatch): refactor A && B || C to if-statements for shellcheck SC2015 CI's shellcheck (v0.10.0) flags SC2015 on `A && B || C` patterns that local 0.11.0 missed; the existing repo scripts avoid the pattern. Rewrite the five guard/assert occurrences in the new files to explicit if-statements. --- bin/fm-github-watch.sh | 6 ++++-- tests/fm-github-watch.test.sh | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index eeb32a1..80212f2 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -344,7 +344,9 @@ poll_once() { [ -n "${fullname:-}" ] || continue owner=${fullname%%/*} repo=${fullname#*/} - { [ -n "$owner" ] && [ -n "$repo" ] && [ "$owner" != "$fullname" ] && [ -n "${pr:-}" ]; } || continue + if [ -z "$owner" ] || [ -z "$repo" ] || [ "$owner" = "$fullname" ] || [ -z "${pr:-}" ]; then + continue + fi process_pr "$owner" "$repo" "$pr" "$contributor" basename=$(seen_file "$owner" "$repo" "$pr"); basename=${basename##*/} open_basenames="${open_basenames}${basename} " @@ -386,7 +388,7 @@ detect_left_open() { owner=$(seen_get "$f" owner) repo=$(seen_get "$f" repo) pr=$(seen_get "$f" pr) - [ -n "$owner" ] && [ -n "$repo" ] && [ -n "$pr" ] || continue + if [ -z "$owner" ] || [ -z "$repo" ] || [ -z "$pr" ]; then continue; fi p_state=$(pr_state "$owner" "$repo" "$pr") [ -n "$p_state" ] || continue # transient gh failure: leave seen state untouched [ "$p_state" = "$seen_state" ] && continue # unchanged: no event, no rewrite diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 678a6bb..b30ec42 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -268,7 +268,7 @@ test_merge_detection_on_left_open() { # A later cycle does not re-report the merge (state no longer OPEN). out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "MERGED" && fail "merge event re-reported after settling" || true + if printf '%s\n' "$out" | grep -Fq "MERGED"; then fail "merge event re-reported after settling"; fi pass "PR leaving the open set as merged emits MERGED once" } @@ -290,7 +290,7 @@ test_closed_then_merged_is_not_swallowed() { # Steady closed: must NOT re-emit CLOSED every cycle. out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "CLOSED" && fail "CLOSED re-emitted while settled" || true + if printf '%s\n' "$out" | grep -Fq "CLOSED"; then fail "CLOSED re-emitted while settled"; fi # Closed -> reopened -> merged all between polls: MERGED must still fire # (CLOSED is not terminal; the watcher re-probes it). @@ -323,8 +323,9 @@ test_closed_pr_reprobe_window_is_bounded() { printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" out=$(PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" FM_GH_CONTRIBUTOR=e-jung \ FM_GH_CLOSE_REPROBE_SECS=0 FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" --once) - printf '%s\n' "$out" | grep -Fq "MERGED" \ - && fail "aged-out CLOSED PR was re-probed (cost not bounded)" || true + if printf '%s\n' "$out" | grep -Fq "MERGED"; then + fail "aged-out CLOSED PR was re-probed (cost not bounded)" + fi pass "closed PR past the re-probe window stops consuming an API call" } From 0a2f5e4fd19c2e15a48734ebd79b2603471fdba7 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Tue, 23 Jun 2026 19:04:41 +0000 Subject: [PATCH 13/16] perf(ghwatch): poll PRs concurrently to fit the 30s check-script budget A --once sweep polled each PR serially: up to 5 gh calls per PR, one PR at a time. At the captain's ~22 open PRs that cost ~47s, over the watcher's 30s check-script kill limit, so a daemon check-script plugin got killed every cycle and delivered nothing. Parallelize the per-PR loop in poll_once with a bounded counting semaphore (FM_GH_CONCURRENCY, default 8; >=1, 0/non-numeric falls back to 8). Each worker is a subshell running process_pr and owns its own per-PR seen file (seen_file is keyed by owner/repo/pr), so concurrent seen writes never collide. The losslessness invariant (print before seen) holds per-worker exactly, and each worker emits its whole event block in a single printf (atomic under PIPE_BUF), so stdout lines never interleave. A final wait settles all workers before detect_left_open scans the seen dir. Measured on the live fleet (~22 PRs): 47.6s -> ~8.6s (5.5x), comfortably under both the 15s target and the 30s kill limit. Adds a parallel-mode regression test asserting events still print before seen advances (via the read-only-seen-dir trick) across 12 concurrently-polled PRs, and that workers never cross-contaminate each other's seen files. --- bin/fm-github-watch.sh | 62 +++++++++++++++++++++++++++++------ tests/fm-github-watch.test.sh | 51 ++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 10 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 80212f2..818f40b 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -11,10 +11,10 @@ # ln -s ../bin/fm-github-watch.sh state/github-events.check.sh # bin/fm-watch.sh runs state/*.check.sh every FM_CHECK_INTERVAL (default # 300s); any stdout is captured, classified as a `check` wake, escalated. -# A full poll issues up to 5 serial gh calls per open PR; for a large fleet -# that can approach the watcher's 30s check-script timeout. Events emit -# per-PR (not all-at-end), so a timeout still surfaces partial progress, -# but for very large fleets prefer --daemon, which is not timeout-bound. +# A full poll issues up to 5 gh calls per open PR, but PRs are polled +# concurrently (bounded by FM_GH_CONCURRENCY, default 8) so a sweep across the +# fleet finishes in well under the watcher's 30s check-script timeout. Events +# emit per-PR (not all-at-end), so a timeout still surfaces partial progress. # # Usage: # fm-github-watch.sh # one poll cycle (same as --once) @@ -41,7 +41,8 @@ # emitted event survives even a SIGKILL). A crash between the print and the seen # write at worst causes a redundant re-detect next cycle, never a permanent # swallow. A failing seen write leaves the old marker in place, so the same -# event fires again next cycle. +# event fires again next cycle. PRs are polled concurrently but each worker +# owns its own per-PR seen file, so this ordering holds per-worker exactly. set -u SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -55,6 +56,11 @@ DEFAULT_POLL_SECS="${FM_GH_POLL_SECS:-300}" # Bounds API cost: a closed PR is re-checked only within this window, then # treated as settled. ~2h at the default 300s poll. CLOSE_REPROBE_SECS="${FM_GH_CLOSE_REPROBE_SECS:-7200}" +# Max number of PRs polled concurrently in a single sweep. Bounded so a large +# fleet can't burst GitHub's rate limit or hammer the API. ~88 calls/sweep at +# the captain's ~22 PRs is well under the 5000/hr ceiling even at 12 sweeps/hr. +# Set FM_GH_CONCURRENCY to tune (>=1; 0/non-numeric falls back to the default 8). +DEFAULT_CONCURRENCY=8 mkdir -p "$STATE" "$SEEN_DIR" @@ -127,6 +133,13 @@ get_poll() { case "${v:-}" in ''|*[!0-9]*) printf '%s' "$DEFAULT_POLL_SECS" ;; *) printf '%s' "$v" ;; esac } +# Max concurrent per-PR workers in a sweep. FM_GH_CONCURRENCY overrides; a +# missing, empty, non-numeric, or zero value falls back to the sane default. +get_concurrency() { + local v="${FM_GH_CONCURRENCY:-}" + case "$v" in ''|*[!0-9]*|0) printf '%s' "$DEFAULT_CONCURRENCY" ;; *) printf '%s' "$v" ;; esac +} + # seen_file -> path to that PR's seen-state file seen_file() { printf '%s/%s-%s-%s\n' "$SEEN_DIR" "$1" "$2" "$3"; } @@ -271,10 +284,9 @@ build_seen() { # then advance this PR's seen marker. Per-PR ordering (print before seen) plus # bash's immediate write() to the capture pipe make this lossless even if the # poll is killed mid-cycle: an emitted event is already in the pipe, and a PR -# whose marker never advanced simply re-fires next cycle. Emitting per-PR (not -# all-at-end) also means a watcher timeout surfaces partial progress instead of -# nothing — important because the watcher wraps check scripts in a 30s timeout -# and a full fleet poll of serial gh calls can approach that budget. +# whose marker never advanced simply re-fires next cycle. Runs one worker per +# PR under poll_once's bounded concurrency; each worker writes only this PR's +# own seen file, so concurrent workers never contend on seen state. process_pr() { local owner=$1 repo=$2 pr=$3 contributor=$4 local sf c_count r_count p_state sha ci_sig @@ -337,9 +349,21 @@ process_pr() { poll_once() { local contributor prs fullname pr owner repo basename local open_basenames=" " + local max_jobs running + max_jobs=$(get_concurrency) + running=0 contributor=$(get_contributor) prs=$(discover_prs) + # Parallel per-PR polling. Each worker is a subshell running process_pr; each + # owns its own seen file (seen_file is keyed by owner/repo/pr), so concurrent + # seen writes never collide. Concurrency is bounded by FM_GH_CONCURRENCY + # (default 8) via a counting semaphore so a large fleet can't burst the GitHub + # rate limit. Each worker prints its whole event block in a single printf + # (one write() of a few hundred bytes, atomic under PIPE_BUF, so lines never + # interleave), and only then advances its own seen marker — the losslessness + # invariant (print before seen) holds per-worker exactly as in the serial + # model: a crash/timeout mid-sweep at worst re-detects, never swallows. while IFS=$'\t' read -r fullname pr; do [ -n "${fullname:-}" ] || continue owner=${fullname%%/*} @@ -347,13 +371,31 @@ poll_once() { if [ -z "$owner" ] || [ -z "$repo" ] || [ "$owner" = "$fullname" ] || [ -z "${pr:-}" ]; then continue fi - process_pr "$owner" "$repo" "$pr" "$contributor" basename=$(seen_file "$owner" "$repo" "$pr"); basename=${basename##*/} open_basenames="${open_basenames}${basename} " + + # Throttle: at capacity, wait for one worker to finish before launching the + # next. wait -n (bash >= 4.3) blocks until any child exits; the decrement + # keeps the running count honest (it can only under-count finished workers, + # which is conservative — concurrency never exceeds the cap). + while [ "$running" -ge "$max_jobs" ]; do + wait -n 2>/dev/null || wait + running=$((running - 1)) + done + + # "$dir/fixture/comments-org-r$i-1" + done + seed_prs "$dir" "${pr_lines[@]}" + run_poll "$dir" >/dev/null # baseline all n PRs (comments=5 each) + + # Each PR gains a DISTINCT count (PR i -> 5+i) so a worker that crossed wires + # would stamp another PR's count into the wrong seen file. + for i in $(seq 1 "$n"); do + printf '%d\n' "$((5 + i))" > "$dir/fixture/comments-org-r$i-1" + done + + # Losslessness under concurrency: make the seen dir read-only so every + # worker's seen write fails, then poll with concurrency well below n. Every + # PR's event must STILL print this cycle (each worker prints before its seen + # write, independent of the other workers). + chmod a-w "$dir/state/.github-watch-seen" + out=$(FM_GH_CONCURRENCY=4 run_poll "$dir") + chmod u+w "$dir/state/.github-watch-seen" + for i in $(seq 1 "$n"); do + printf '%s\n' "$out" | grep -Fq "COMMENT: org/r$i#1 has $i new comment(s)" \ + || fail "parallel poll did not emit PR r$i before its seen write; out: $out" + done + + # No cross-contamination: after a writable concurrent poll, each PR's seen + # file holds its OWN advanced count and its own identity (never another PR's + # values), even though workers ran concurrently with a shared .tmp stage. + out=$(FM_GH_CONCURRENCY=4 run_poll "$dir") + for i in $(seq 1 "$n"); do + sf="$dir/state/.github-watch-seen/org-r$i-1" + grep -Fxq "comments=$((5 + i))" "$sf" \ + || fail "r$i seen file has wrong count (cross-contamination?): $(cat "$sf")" + grep -Fxq "owner=org" "$sf" || fail "r$i seen file lost owner identity" + grep -Fxq "repo=r$i" "$sf" || fail "r$i seen file has wrong repo (cross-contamination?)" + grep -Fxq "pr=1" "$sf" || fail "r$i seen file lost pr identity" + done + + pass "parallel poll emits before each seen write and never cross-contaminates seen files" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -479,3 +529,4 @@ test_ci_detection test_merge_filter_suppresses_merge_event test_ci_carry_forward_across_empty_window test_all_filters_off_mutes_watcher +test_parallel_poll_is_lossless_and_does_not_cross_contaminate From 80429d7667b9917f868663bdf45551f7efa487f5 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Wed, 24 Jun 2026 03:45:09 +0000 Subject: [PATCH 14/16] fix(ghwatch): debounce CI filter to one event per overall-state transition The ci filter built a sorted multiset of every check-run conclusion and fired whenever that multiset changed, so a PR whose checks complete at staggered times fired once per check landing (observed live: no-mistakes#312 fired several times as its 7 check-runs trickled in). The captain wants "is the PR green or not", not per-check noise. Replace the multiset with a single rolled-up overall state per PR, computed in one jq pass over the check-runs: success every non-neutral check passed (success/skipped), none still running failure at least one non-neutral check failed (failure/timed_out/cancelled/ action_required/stale) pending at least one non-neutral check is still queued/in_progress neutral only neutral check-runs present (empty) no check-runs reported yet Fire one event only when this state flips vs the seen marker (`CI: # -> green|failure|pending|neutral`); stay silent while it is unchanged. Failure beats pending (a red check already settles the outcome), matching GitHub's own combined-status precedence. Losslessness is preserved exactly: the seen marker is still written AFTER the event prints, the empty-response carry-forward for a just-pushed SHA whose checks have not populated yet still holds (so a later transition fires), and per-PR seen files stay independently owned by each parallel worker. The bounded concurrent poll is untouched. The test fake gh now runs the watcher's real --jq roll-up over JSON check-run fixtures (via jq), so the roll-up logic itself is exercised, not just the comparison. New tests cover the staggered-checks debounce (7 checks trickling pending->success fires exactly once, not once per check), the pending->green / green->green / green->failure transitions, and roll-up precedence (failure beats pending; neutral checks ignored). Existing CI tests moved to the rolled-up state model; the parallel losslessness test is unchanged and still passes. Measured on the live fleet (23 open PRs): --once = 7.74s, well under the 15s target and the 30s check-script kill limit. --- bin/fm-github-watch.sh | 66 ++++++++---- tests/fm-github-watch.test.sh | 190 ++++++++++++++++++++++++++++++---- 2 files changed, 219 insertions(+), 37 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 818f40b..0f8d6dd 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -30,9 +30,12 @@ # Config: state/.github-watch-config (key=value lines). # Seen: state/.github-watch-seen/-- (key=value lines). # -# The ci filter reads the Checks API (check-runs); CI providers that report -# only via the legacy commit status API (some older Travis/Coveralls setups) -# are not covered. Use `gh pr checks` directly for a unified view. +# The ci filter rolls the Checks API (check-runs) up to a single overall state +# per PR (green/failure/pending) and fires one event only when that state flips, +# not once per check landing — so a PR whose many checks trickle in reports a +# single transition, not a burst. CI providers that report only via the legacy +# commit status API (some older Travis/Coveralls setups) are not covered; use +# `gh pr checks` directly for a unified view. # Comment, review, and check-run counts fetch up to 100 items per type per PR # (per_page=100, no pagination); a single PR with >100 of one kind would cap. # @@ -217,11 +220,35 @@ head_sha() { ghc pr view "$3" -R "$1/$2" --json headRefOid -q .headRefOid } -# ci_signature -> sorted multiset of check conclusions/statuses -ci_signature() { +# ci_state -> the commit's rolled-up overall CI state: +# success every non-neutral check passed (conclusion success/skipped), none still running +# failure at least one non-neutral check failed (failure/timed_out/cancelled/action_required/stale) +# pending at least one non-neutral check is still queued/in_progress (no conclusion yet) +# neutral only neutral check-runs are present +# (empty) no check-runs reported yet; the caller carries forward the prior state +# Rolled up from the Checks API so a PR with many staggered checks surfaces a +# single green/red transition instead of one event per check landing. Failure +# beats pending (a red check already settles the PR's outcome), matching +# GitHub's own combined-status precedence. +ci_state() { [ -n "$3" ] || return 0 + # shellcheck disable=SC2016 # single quotes are deliberate: $all/$rel are jq bindings, not shell vars. ghc api "repos/$1/$2/commits/$3/check-runs?per_page=100" \ - --jq '[.check_runs[] | (.conclusion // .status)] | sort | join(",")' + --jq '(.check_runs // []) as $all + | ($all | map(select(.conclusion != "neutral"))) as $rel + | if ($all | length) == 0 then "" + elif ($rel | length) == 0 then "neutral" + elif any($rel[]; .conclusion != null and .conclusion != "success" and .conclusion != "skipped") then "failure" + elif any($rel[]; .conclusion == null) then "pending" + else "success" end' +} + +# ci_label -> the word printed in a CI event line (success -> green). +ci_label() { + case "${1:-}" in + success) printf 'green' ;; + *) printf '%s' "${1:-unknown}" ;; + esac } # ---- the poll ---- @@ -246,14 +273,15 @@ atomic_write() { fi } -# build_seen +# build_seen # Compose the seen-state block: high-water marks for counts, current value for # ci/state. Fields with no fresh value this cycle are carried forward from the # prior block, so toggling a filter off never wipes its remembered high-water. -# CI is carried forward across a transiently-empty fetch (a new commit whose -# check-runs have not populated yet) so a later status change still fires. +# CI is the rolled-up overall state; it is carried forward across a transiently +# empty fetch (a new commit whose check-runs have not populated yet) so a later +# state transition still fires. build_seen() { - local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_sig=$7 sha=$8 p_state=$9 + local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_st=$7 sha=$8 p_state=$9 local seen_c seen_r seen_ci seen_state new_c new_r ci_val state_val block seen_c=$(seen_get "$sf" comments) seen_r=$(seen_get "$sf" reviews) @@ -266,7 +294,7 @@ build_seen() { if is_int "$r_count"; then if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi fi - ci_val=$ci_sig + ci_val=$ci_st [ -n "$ci_val" ] || ci_val=$seen_ci state_val=$p_state [ -n "$state_val" ] || state_val=$seen_state @@ -289,17 +317,17 @@ build_seen() { # own seen file, so concurrent workers never contend on seen state. process_pr() { local owner=$1 repo=$2 pr=$3 contributor=$4 - local sf c_count r_count p_state sha ci_sig + local sf c_count r_count p_state sha ci_st local initialized seen_c seen_r seen_state seen_ci ev="" sf=$(seen_file "$owner" "$repo" "$pr") - c_count="" r_count="" p_state="" sha="" ci_sig="" + c_count="" r_count="" p_state="" sha="" ci_st="" filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr" "$contributor") filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") if filter_enabled ci; then sha=$(head_sha "$owner" "$repo" "$pr") - ci_sig=$(ci_signature "$owner" "$repo" "$sha") + ci_st=$(ci_state "$owner" "$repo" "$sha") fi initialized=$(seen_get "$sf" initialized) @@ -319,9 +347,11 @@ process_pr() { ev="${ev}REVIEW: ${owner}/${repo}#${pr} has $((r_count - seen_r)) new review(s) " fi - # ci: event on any signature change. - if [ -n "$ci_sig" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_sig" ]; then - ev="${ev}CI: ${owner}/${repo}#${pr} checks changed + # ci: event on overall-state transition only (debounced). A PR with many + # staggered checks surfaces one event per green/red/pending flip, not one + # per check landing. No event while the rolled-up state is unchanged. + if [ -n "$ci_st" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_st" ]; then + ev="${ev}CI: ${owner}/${repo}#${pr} -> $(ci_label "$ci_st") " fi # merge: event on open -> merged/closed transition. @@ -341,7 +371,7 @@ process_pr() { # delivered and the marker stale -> a redundant re-detect, never a swallow. [ -n "$ev" ] && printf '%s' "$ev" local block - block=$(build_seen "$sf" "$owner" "$repo" "$pr" "$c_count" "$r_count" "$ci_sig" "$sha" "$p_state") + block=$(build_seen "$sf" "$owner" "$repo" "$pr" "$c_count" "$r_count" "$ci_st" "$sha" "$p_state") atomic_write "$sf" "$block" } diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 0cdd08f..88dc86c 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -79,7 +79,22 @@ case "$sub" in */commits/*/check-runs) sha=${path##*/commits/}; sha=${sha%/check-runs} f="$FX/ci-$sha" - [ -f "$f" ] && { cat "$f"; exit 0; } + [ -f "$f" ] || exit 0 + # The watcher passes --jq to roll check-runs up into a single overall + # state; run that same filter against the JSON fixture so the real + # roll-up logic (success/failure/pending/neutral) is exercised, not + # just the comparison. Falls back to cat for any caller without --jq. + jq_expr="" + prev="" + for a in "$@"; do + if [ "$prev" = "--jq" ]; then jq_expr=$a; fi + prev=$a + done + if [ -n "$jq_expr" ]; then + jq -r "$jq_expr" "$f" + else + cat "$f" + fi exit 0 ;; esac @@ -137,6 +152,30 @@ seed_prs() { for ln in "$@"; do printf '%s\n' "$ln" >> "$dir/fixture/prs"; done } +# seed_ci -> write a JSON check-runs fixture the +# fake gh feeds through the watcher's real --jq roll-up. Each conclusion arg is +# a Checks-API value ("success","failure","neutral","skipped","timed_out",...) +# or the literal "pending" for a still-running check (status in_progress, +# conclusion null). The fake gh runs the watcher's --jq filter on this JSON, so +# the actual roll-up logic (not just the comparison) is what the tests exercise. +seed_ci() { + local f="$1/fixture/ci-$2" + shift 2 + printf '%s' '{"check_runs":[' > "$f" + local first=1 c status conclusion + for c in "$@"; do + [ "$first" = 1 ] || printf ',' >> "$f" + first=0 + if [ "$c" = "pending" ]; then + status="in_progress"; conclusion="null" + else + status="completed"; conclusion="\"$c\"" + fi + printf '{"status":"%s","conclusion":%s}' "$status" "$conclusion" >> "$f" + done + printf '%s' ']}' >> "$f" +} + test_filter_toggling() { local dir dir=$(make_case filter-toggle) @@ -377,24 +416,24 @@ test_ci_detection() { dir=$(make_case ci) seed_prs "$dir" $'kunchenguid/no-mistakes\t310' printf 'abcdef1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" - printf 'success,success,success\n' > "$dir/fixture/ci-abcdef1" + seed_ci "$dir" abcdef1 success success success sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" run_poll "$dir" >/dev/null - grep -Fxq "ci=success,success,success" "$sf" || fail "baseline ci signature not set" + grep -Fxq "ci=success" "$sf" || fail "baseline ci state not rolled up to success" - # A check goes red: signature changes. - printf 'failure,success,success\n' > "$dir/fixture/ci-abcdef1" + # One check goes red: the overall state flips success -> failure (one event). + seed_ci "$dir" abcdef1 failure success success out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 checks changed" \ - || fail "ci signature change did not emit event; got: $out" - grep -Fxq "ci=failure,success,success" "$sf" || fail "ci signature not advanced" + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 -> failure" \ + || fail "ci state change did not emit event; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "ci state not advanced to failure" # Steady state again: silence. out=$(run_poll "$dir") [ -z "$out" ] || fail "steady-state ci poll should be silent; got: $out" - pass "CI signature change emits CI event" + pass "overall CI state change emits a single CI event" } test_merge_filter_suppresses_merge_event() { @@ -420,29 +459,29 @@ test_ci_carry_forward_across_empty_window() { dir=$(make_case ci-carry) seed_prs "$dir" $'kunchenguid/no-mistakes\t310' printf 'sha1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" - printf 'success,success\n' > "$dir/fixture/ci-sha1" + seed_ci "$dir" sha1 success success sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" - # Baseline: CI passing for sha1. + # Baseline: CI passing for sha1 (rolled up to success). run_poll "$dir" >/dev/null - grep -Fxq "ci=success,success" "$sf" || fail "baseline ci not recorded" + grep -Fxq "ci=success" "$sf" || fail "baseline ci state not recorded" - # New commit: sha changes, check-runs not populated yet (empty ci_sig). + # New commit: sha changes, check-runs not populated yet (empty ci_state). printf 'sha2\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" rm -f "$dir/fixture/ci-sha1" - # No ci-sha2 fixture yet -> ci_signature returns empty. + # No ci-sha2 fixture yet -> ci_state returns empty. out=$(run_poll "$dir") [ -z "$out" ] || fail "transient empty ci window should be silent; got: $out" # seen_ci must be carried forward (not dropped) so a later change still fires. - grep -Fxq "ci=success,success" "$sf" || fail "ci signature was dropped during empty window" + grep -Fxq "ci=success" "$sf" || fail "ci state was dropped during empty window" - # CI completes for sha2 and FAILS: signature differs from carried-forward. - printf 'failure,success\n' > "$dir/fixture/ci-sha2" + # CI completes for sha2 and FAILS: state differs from carried-forward success. + seed_ci "$dir" sha2 failure success out=$(run_poll "$dir") - printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 checks changed" \ + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 -> failure" \ || fail "ci completion after empty window did not fire; got: $out" - pass "CI signature carries forward across an empty window and fires on change" + pass "overall CI state carries forward across an empty window and fires on change" } test_all_filters_off_mutes_watcher() { @@ -516,6 +555,116 @@ test_parallel_poll_is_lossless_and_does_not_cross_contaminate() { pass "parallel poll emits before each seen write and never cross-contaminates seen files" } +test_ci_debounces_staggered_checks() { + # Reproduces the no-mistakes#312 chatter: a PR whose many check-runs complete + # at staggered times. Under the old per-multiset logic each completion changed + # the signature and fired (one event per check). The roll-up keeps the state + # at "pending" while ANY check is still running, then flips to green exactly + # once when the last one completes. + local dir out sf finished i + dir=$(make_case ci-debounce) + seed_prs "$dir" $'kunchenguid/no-mistakes\t312' + printf 'sha7\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-312" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-312" + + # Cycle 1: 7 checks, all pending -> baseline (no event, first run). + seed_ci "$dir" sha7 pending pending pending pending pending pending pending + run_poll "$dir" >/dev/null + grep -Fxq "ci=pending" "$sf" || fail "baseline should roll 7 pending checks up to pending" + + # Checks complete a few at a time: state stays pending, so every one of these + # cycles must stay silent (under the old logic each would have fired). + for finished in 1 3 6; do + local args=() + for i in $(seq 1 7); do + if [ "$i" -le "$finished" ]; then args+=(success); else args+=(pending); fi + done + seed_ci "$dir" sha7 "${args[@]}" + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then + fail "fired while still pending after $finished/7 checks done; got: $out" + fi + done + + # Last check completes: pending -> green fires exactly once. + seed_ci "$dir" sha7 success success success success success success success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#312 -> green" \ + || fail "pending->success transition did not fire once; got: $out" + # No second fire on the next (steady) cycle. + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then + fail "steady success re-fired; got: $out" + fi + + pass "staggered checks debounce to a single overall-state transition" +} + +test_ci_state_transitions() { + # The three transitions the captain cares about, each firing exactly once: + # pending->green, green->green (silent), green->failure. + local dir out sf + dir=$(make_case ci-trans) + seed_prs "$dir" $'kunchenguid/no-mistakes\t320' + printf 'shat\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-320" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-320" + + seed_ci "$dir" shat pending + run_poll "$dir" >/dev/null # baseline pending + + # pending -> green fires once. + seed_ci "$dir" shat success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#320 -> green" \ + || fail "pending->success did not fire; got: $out" + grep -Fxq "ci=success" "$sf" || fail "state not advanced to success" + + # green -> green does not fire. + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then fail "success->success re-fired; got: $out"; fi + + # green -> failure fires once. + seed_ci "$dir" shat success failure + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#320 -> failure" \ + || fail "success->failure did not fire; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "state not advanced to failure" + + pass "pending->green fires once, green->green is silent, green->failure fires once" +} + +test_ci_rollup_precedence() { + # The rolled-up state follows GitHub's combined-status precedence: a red check + # settles failure even while others are still pending; neutral checks are + # ignored entirely (never red, never green, never block). + local dir out sf + dir=$(make_case ci-rollup) + seed_prs "$dir" $'kunchenguid/no-mistakes\t321' + printf 'shar\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-321" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-321" + + # Baseline: a passing check plus a neutral informational check rolls up to + # success (neutral ignored). + seed_ci "$dir" shar success neutral + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "success+neutral should roll up to success" + + # A failure landing while another check is still pending settles failure + # immediately (no transient pending event). + seed_ci "$dir" shar success failure pending + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#321 -> failure" \ + || fail "failure+pending should roll straight up to failure; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "state not advanced to failure" + + # The pending check then succeeds: state stays failure (no second fire). + seed_ci "$dir" shar success failure success + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then fail "failure->failure re-fired; got: $out"; fi + + pass "roll-up precedence: failure beats pending, neutral checks are ignored" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -528,5 +677,8 @@ test_review_detection test_ci_detection test_merge_filter_suppresses_merge_event test_ci_carry_forward_across_empty_window +test_ci_debounces_staggered_checks +test_ci_state_transitions +test_ci_rollup_precedence test_all_filters_off_mutes_watcher test_parallel_poll_is_lossless_and_does_not_cross_contaminate From 9fe2029f85567dfae7e691e0a07c85c1e92a4d0e Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:36:01 +0000 Subject: [PATCH 15/16] fix(ghwatch): no-deploy flood + correct fork-PR CI roll-up for daemon use Two refinements that make ghwatch safe to leave wired in as a daemon check-script plugin, plus strict-mode hardening. 1. Silent re-baseline on seen-schema migration. A SEEN_SCHEMA version is now stamped into each seen file. When a prior file's schema does not match the current version, the first poll rewrites it at the current schema with carried-forward values and emits NOTHING -- so deploying a schema change (e.g. the ci debounce) no longer floods once as every PR appears to "transition" off the old format. Applied in both process_pr (open PRs) and detect_left_open (PRs that left the open set). Only subsequent real transitions fire. 2. Exclude FM_GH_IGNORE_CHECKS names from the CI roll-up (default: the known fork-routing signature gap #293, "PR must be raised via no-mistakes"). A PR that fails only that check now rolls up to green when its real checks pass, instead of a false failure. Only the roll-up applies the filter; the raw check list and the other filters are unchanged. Set FM_GH_IGNORE_CHECKS to a custom regex, or empty to disable. The regex is embedded into the jq program (gh api has no --arg binding for --jq); a malformed regex fails open to empty (carried forward), never crashing the poll. Also adopt `set -euo pipefail`; the fail-open design is preserved via the existing `|| true` / `|| return 0` guards (full suite green under strict mode). Tests: silent-baseline-on-migration (no flood on schema change; a real transition still fires afterward); gap-excluded PR rolls up green while a real failure still surfaces. All 17 prior ghwatch tests stay green. --- bin/fm-github-watch.sh | 75 ++++++++++++++++++++----- tests/fm-github-watch.test.sh | 103 ++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+), 13 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 0f8d6dd..221254f 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -46,7 +46,7 @@ # swallow. A failing seen write leaves the old marker in place, so the same # event fires again next cycle. PRs are polled concurrently but each worker # owns its own per-PR seen file, so this ordering holds per-worker exactly. -set -u +set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" @@ -64,6 +64,21 @@ CLOSE_REPROBE_SECS="${FM_GH_CLOSE_REPROBE_SECS:-7200}" # the captain's ~22 PRs is well under the 5000/hr ceiling even at 12 sweeps/hr. # Set FM_GH_CONCURRENCY to tune (>=1; 0/non-numeric falls back to the default 8). DEFAULT_CONCURRENCY=8 +# Seen-state schema version. Bump when a stored field's meaning or the field set +# changes in a way that would make a prior value miscompare (e.g. the ci roll-up +# changed `ci` from a multiset signature to a single state). On a schema mismatch +# the first poll silently re-baselines: it writes the new seen state and emits no +# event, so deploying a schema change never floods once as every PR appears to +# "transition" off the old format. Only subsequent real transitions fire. +SEEN_SCHEMA=2 +# Regex (Oniguruma) of check-run NAMES to drop from the CI roll-up before it is +# computed. Default: the known fork-routing signature gap #293 ("PR must be +# raised via no-mistakes"), which fails on kunchenguid fork-PRs even though the +# PR's real checks pass. With it excluded such PRs roll up to green when their +# real checks pass, instead of a false failure. Set FM_GH_IGNORE_CHECKS to a +# custom regex, or to empty to disable filtering entirely. Only the CI roll-up +# applies this; the raw check list and the other filters are unchanged. +IGNORE_CHECKS="${FM_GH_IGNORE_CHECKS-PR must be raised via no-mistakes}" mkdir -p "$STATE" "$SEEN_DIR" @@ -229,18 +244,36 @@ head_sha() { # Rolled up from the Checks API so a PR with many staggered checks surfaces a # single green/red transition instead of one event per check landing. Failure # beats pending (a red check already settles the PR's outcome), matching -# GitHub's own combined-status precedence. +# GitHub's own combined-status precedence. Check-runs whose NAME matches the +# IGNORE_CHECKS regex (default: the known fork-routing gap #293) are dropped +# before the roll-up, so a PR that fails ONLY that signature check still rolls +# up to green when its real checks pass. The regex is embedded into the jq +# program (escaped for a JSON string literal) because `gh api` has no --arg +# binding for its --jq filter; a malformed regex fails open to empty (carried +# forward), never crashing the poll. ci_state() { [ -n "$3" ] || return 0 - # shellcheck disable=SC2016 # single quotes are deliberate: $all/$rel are jq bindings, not shell vars. - ghc api "repos/$1/$2/commits/$3/check-runs?per_page=100" \ - --jq '(.check_runs // []) as $all - | ($all | map(select(.conclusion != "neutral"))) as $rel - | if ($all | length) == 0 then "" - elif ($rel | length) == 0 then "neutral" - elif any($rel[]; .conclusion != null and .conclusion != "success" and .conclusion != "skipped") then "failure" - elif any($rel[]; .conclusion == null) then "pending" - else "success" end' + local ignore_escaped jq_filter + ignore_escaped=${IGNORE_CHECKS//\\/\\\\} + ignore_escaped=${ignore_escaped//\"/\\\"} + # The regex is embedded into the jq program (escaped for a JSON string + # literal) because `gh api` has no --arg binding for its --jq filter. Every jq + # binding ($ignore/$raw/$all/$rel) is backslash-escaped so the heredoc leaves + # it literal; only $ignore_escaped expands. + # shellcheck disable=SC2016 + jq_filter=$(cat < -> the word printed in a CI event line (success -> green). @@ -298,7 +331,7 @@ build_seen() { [ -n "$ci_val" ] || ci_val=$seen_ci state_val=$p_state [ -n "$state_val" ] || state_val=$seen_state - block=$(printf 'owner=%s\nrepo=%s\npr=%s\ninitialized=1' "$owner" "$repo" "$pr") + block=$(printf 'owner=%s\nrepo=%s\npr=%s\nschema=%s\ninitialized=1' "$owner" "$repo" "$pr" "$SEEN_SCHEMA") is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") [ -n "$ci_val" ] && block=$(printf '%s\nci=%s' "$block" "$ci_val") @@ -331,7 +364,12 @@ process_pr() { fi initialized=$(seen_get "$sf" initialized) - if [ -n "$initialized" ]; then + # A prior seen file whose schema does not match the current version is treated + # as a first-run baseline: emit nothing this cycle (so deploying a schema + # change never floods as every PR appears to "transition" off the old format) + # and let build_seen rewrite it at the current schema with carried-forward + # values. Only subsequent real transitions fire. + if [ -n "$initialized" ] && [ "$(seen_get "$sf" schema)" = "$SEEN_SCHEMA" ]; then seen_c=$(seen_get "$sf" comments) seen_r=$(seen_get "$sf" reviews) seen_state=$(seen_get "$sf" state) @@ -463,6 +501,17 @@ detect_left_open() { if [ -z "$owner" ] || [ -z "$repo" ] || [ -z "$pr" ]; then continue; fi p_state=$(pr_state "$owner" "$repo" "$pr") [ -n "$p_state" ] || continue # transient gh failure: leave seen state untouched + # Migration: a prior seen file whose schema does not match the current + # version is silently re-baselined — stamp the current schema + observed + # state, emit nothing — so a schema change never floods as every PR appears + # to "transition" off the old format. All other fields (closed_at, counts, + # ci) are preserved; only schema/state are re-stamped. + if [ "$(seen_get "$f" schema)" != "$SEEN_SCHEMA" ]; then + block=$(awk -F= -v sch="$SEEN_SCHEMA" -v s="$p_state" \ + '$1 != "schema" && $1 != "state" { print } END { print "schema=" sch; print "state=" s }' "$f") + atomic_write "$f" "$block" + continue + fi [ "$p_state" = "$seen_state" ] && continue # unchanged: no event, no rewrite case "$p_state" in MERGED|CLOSED) diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 88dc86c..3e1ead6 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -176,6 +176,27 @@ seed_ci() { printf '%s' ']}' >> "$f" } +# seed_ci_named ... +# Like seed_ci but each check-run carries a name, so name-based ignore filters +# (FM_GH_IGNORE_CHECKS) can be exercised through the real --jq roll-up. The +# literal "pending" still means a running check (conclusion null). A name is +# embedded as a JSON string literal (backslash and double-quote escaped). +seed_ci_named() { + local f="$1/fixture/ci-$2" + shift 2 + printf '%s' '{"check_runs":[' > "$f" + local first=1 arg name c status conclusion esc + for arg in "$@"; do + name=${arg%%=*}; c=${arg#*=} + [ "$first" = 1 ] || printf ',' >> "$f" + first=0 + if [ "$c" = "pending" ]; then status="in_progress"; conclusion="null"; else status="completed"; conclusion="\"$c\""; fi + esc=${name//\\/\\\\}; esc=${esc//\"/\\\"} + printf '{"status":"%s","conclusion":%s,"name":"%s"}' "$status" "$conclusion" "$esc" >> "$f" + done + printf '%s' ']}' >> "$f" +} + test_filter_toggling() { local dir dir=$(make_case filter-toggle) @@ -665,6 +686,86 @@ test_ci_rollup_precedence() { pass "roll-up precedence: failure beats pending, neutral checks are ignored" } +test_silent_baseline_on_schema_migration() { + # Reproduces the debounce deploy flood: a seen file written by an OLDER + # watcher version (here, an old per-multiset ci signature, no schema= field). + # Without the schema guard, the first poll under the new code sees + # seen_ci="success:success:failure" != ci_st="success" and fires a spurious + # CI transition for EVERY migrated PR at once. The guard treats a schema + # mismatch as a silent re-baseline: write the new schema + correct values, + # emit nothing. A subsequent REAL transition still fires. + local dir out sf + dir=$(make_case ci-migrate) + seed_prs "$dir" $'kunchenguid/no-mistakes\t330' + printf 'sham\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-330" + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-no-mistakes-330" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-330" + mkdir -p "$(dirname "$sf")" + + # An old-format seen file: initialized but no schema=, and a stale ci value + # that the new roll-up would read as "different" from the fresh success. + cat > "$sf" <<'OLD' +owner=kunchenguid +repo=no-mistakes +pr=330 +initialized=1 +ci=success:success:failure +state=OPEN +OLD + + # Fresh roll-up is plain success; under the old code this != the stale sig. + seed_ci "$dir" sham success success success + + # First poll after migration: SILENT (no flood), seen rewritten to new schema. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "schema migration should baseline silently; got: $out" + grep -Fxq "schema=2" "$sf" || fail "seen file not stamped with current schema" + grep -Fxq "ci=success" "$sf" || fail "ci not re-baselined to the rolled-up success" + + # A subsequent REAL transition still fires (migration only silenced once). + seed_ci "$dir" sham success failure success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#330 -> failure" \ + || fail "post-migration real transition did not fire; got: $out" + + pass "schema mismatch is silently re-baselined; real transitions still fire" +} + +test_ci_ignore_excludes_known_gap_check() { + # A kunchenguid fork-PR whose ONLY failing check is the known fork-routing + # signature gap (#293: "PR must be raised via no-mistakes") must roll up to + # green when its real checks pass, not a false failure. The default + # FM_GH_IGNORE_CHECKS regex drops that name from the roll-up. A REAL failure + # (different name) must still roll up to failure, so the filter is not just + # disabling failure detection. + local dir out sf + dir=$(make_case ci-ignore) + seed_prs "$dir" $'kunchenguid/firstmate\t38' + printf 'sha38\n' > "$dir/fixture/sha-kunchenguid-firstmate-38" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-38" + + # 3 real checks pass; the gap check fails by name. run_poll uses the default + # FM_GH_IGNORE_CHECKS, so the gap name is excluded -> rolls up to success. + seed_ci_named "$dir" sha38 \ + "build=success" "test=success" "lint=success" \ + "PR must be raised via no-mistakes=failure" + + run_poll "$dir" >/dev/null # baseline: gap excluded -> success, not failure + grep -Fxq "ci=success" "$sf" \ + || fail "gap-excluded PR should roll up to success, got: $(cat "$sf")" + + # A REAL check failing (different name) must still surface failure despite the + # gap check also failing: the ignore list is not a blanket failure suppressor. + seed_ci_named "$dir" sha38 \ + "build=success" "test=failure" "lint=success" \ + "PR must be raised via no-mistakes=failure" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/firstmate#38 -> failure" \ + || fail "real check failure should still roll up to failure; got: $out" + + pass "known fork-routing gap check excluded from roll-up; real failures still surface" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -682,3 +783,5 @@ test_ci_state_transitions test_ci_rollup_precedence test_all_filters_off_mutes_watcher test_parallel_poll_is_lossless_and_does_not_cross_contaminate +test_silent_baseline_on_schema_migration +test_ci_ignore_excludes_known_gap_check From 8dbe6037da53c1088cae9c49f8bd8ed8dfa1ab17 Mon Sep 17 00:00:00 2001 From: e-jung <8334081+e-jung@users.noreply.github.com> Date: Wed, 24 Jun 2026 15:05:36 +0000 Subject: [PATCH 16/16] fix(ghwatch): skip a PR on transient GitHub API errors instead of parsing them as data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ghc() did `command gh "$@" 2>/dev/null || true`: it swallowed stderr and the exit code, but on an API error (e.g. a transient 401 "Bad credentials") gh api writes the error JSON to stdout — bypassing --jq — which the script then parsed as CI data and fired as an event (observed: "CI: manaflow-ai/cmux#6570 -> { \"message\": \"Bad credentials\" ... }"). Detect the error and SKIP that PR for the cycle: never surface an API error as an event. ghc() now captures gh's stdout and returns non-zero (suppressing the body) when gh exits non-zero OR its output is a GitHub error body (a JSON object with top-level "message" + "documentation_url"). process_pr treats any non-zero probe return as "skip this PR this cycle": emit nothing, do not write seen, so the next cycle re-evaluates from the same baseline (lossless). Also guards the discovery fetch (abort the cycle on failure instead of misreading an empty list as "everyone merged") and get_contributor's best-effort user lookup (no set -e trip on a blip). Tests: new test injects a 401 error JSON via the fake gh (verified it fails on the old ghc with the exact bogus CI event, passes with the fix) plus a recovery cycle proving the real transition still fires. shellcheck clean; all 20 green. --- bin/fm-github-watch.sh | 64 +++++++++++++++++++++++++++++------ tests/fm-github-watch.test.sh | 46 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 11 deletions(-) diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh index 221254f..db3be98 100755 --- a/bin/fm-github-watch.sh +++ b/bin/fm-github-watch.sh @@ -90,9 +90,36 @@ valid_filter() { case "$1" in comments|ci|reviews|merge) return 0 ;; *) return 1 ;; esac } -# Run gh, swallowing errors and stderr so a missing gh or a transient API -# failure never kills the poll (output is simply empty for that call). -ghc() { command gh "$@" 2>/dev/null || true; } +# A GitHub REST API error body is a JSON object carrying top-level "message" and +# "documentation_url" (e.g. {"message":"Bad credentials","documentation_url":"...","status":"401"}). +# On a transient API failure (401, 5xx, rate limit) gh writes that body to stdout +# — bypassing any --jq template — and exits non-zero. Every successful probe +# output is a scalar/number/TSV, never this shape, so the pair is a safe signal. +is_gh_error() { + case "$1" in + *'"message"'*) + case "$1" in *'"documentation_url"'*) return 0 ;; esac + ;; + esac + return 1 +} + +# Run gh, capturing its stdout. Returns non-zero if gh exited non-zero OR its +# output is a GitHub API error body; in either case the body is suppressed so a +# caller that ignores the exit status can never parse an error response as data +# (the bug: a 401 body reached stdout and was parsed as CI state, firing a bogus +# "CI: ... -> { \"message\": ... }" event). Probe callers treat a non-zero return +# as "skip this PR this cycle" so a transient blip never surfaces as an event. +# stderr is always swallowed so a missing gh or a transient failure never spams +# the watcher's own capture pipe. +ghc() { + local out rc + out=$(command gh "$@" 2>/dev/null); rc=$? + if [ "$rc" -ne 0 ] || is_gh_error "$out"; then + return 1 + fi + printf '%s' "$out" +} # cfg_read -> prints value (empty if missing/unset) cfg_read() { @@ -128,7 +155,7 @@ get_contributor() { v=$(cfg_read contributor) if [ -n "$v" ]; then printf '%s' "$v"; return; fi if [ -n "${FM_GH_CONTRIBUTOR:-}" ]; then printf '%s' "$FM_GH_CONTRIBUTOR"; return; fi - ghc api user -q .login 2>/dev/null | tr -d '\n' + ghc api user -q .login | tr -d '\n' || true } get_filters() { @@ -354,13 +381,23 @@ process_pr() { local initialized seen_c seen_r seen_state seen_ci ev="" sf=$(seen_file "$owner" "$repo" "$pr") + local api_err=0 c_count="" r_count="" p_state="" sha="" ci_st="" - filter_enabled comments && c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") - filter_enabled reviews && r_count=$(count_reviews "$owner" "$repo" "$pr" "$contributor") - filter_enabled merge && p_state=$(pr_state "$owner" "$repo" "$pr") + if filter_enabled comments; then c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") || api_err=1; fi + if filter_enabled reviews; then r_count=$(count_reviews "$owner" "$repo" "$pr" "$contributor") || api_err=1; fi + if filter_enabled merge; then p_state=$(pr_state "$owner" "$repo" "$pr") || api_err=1; fi if filter_enabled ci; then - sha=$(head_sha "$owner" "$repo" "$pr") - ci_st=$(ci_state "$owner" "$repo" "$sha") + sha=$(head_sha "$owner" "$repo" "$pr") || api_err=1 + if [ -n "$sha" ]; then ci_st=$(ci_state "$owner" "$repo" "$sha") || api_err=1; fi + fi + # If any enabled probe hit a GitHub API error this cycle, skip the whole PR: + # emit nothing and do not advance seen, so a transient blip can never surface + # as an event (e.g. an error JSON parsed as CI data). The next cycle + # re-evaluates from the same baseline — lossless, never a permanent swallow. + if [ "$api_err" -ne 0 ]; then + printf 'fm-github-watch: skipping %s/%s#%s this cycle (GitHub API error)\n' \ + "$owner" "$repo" "$pr" >&2 + return 0 fi initialized=$(seen_get "$sf" initialized) @@ -421,7 +458,12 @@ poll_once() { max_jobs=$(get_concurrency) running=0 contributor=$(get_contributor) - prs=$(discover_prs) + # If discovery itself failed (transient API blip), abort the cycle: an empty + # result would otherwise make detect_left_open think every open PR merged. + prs=$(discover_prs) || { + printf 'fm-github-watch: PR discovery failed this cycle; skipping\n' >&2 + return 0 + } # Parallel per-PR polling. Each worker is a subshell running process_pr; each # owns its own seen file (seen_file is keyed by owner/repo/pr), so concurrent @@ -499,7 +541,7 @@ detect_left_open() { repo=$(seen_get "$f" repo) pr=$(seen_get "$f" pr) if [ -z "$owner" ] || [ -z "$repo" ] || [ -z "$pr" ]; then continue; fi - p_state=$(pr_state "$owner" "$repo" "$pr") + p_state=$(pr_state "$owner" "$repo" "$pr") || continue [ -n "$p_state" ] || continue # transient gh failure: leave seen state untouched # Migration: a prior seen file whose schema does not match the current # version is silently re-baselined — stamp the current schema + observed diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh index 3e1ead6..40247fe 100755 --- a/tests/fm-github-watch.test.sh +++ b/tests/fm-github-watch.test.sh @@ -48,6 +48,14 @@ case "$sub" in exit 0 ;; api) + # Injectable transient API error: when $FX/api-error exists, emit a GitHub + # error body to stdout and exit non-zero — exactly how real gh behaves on a + # 401/5xx (the --jq template is bypassed on error responses). This is the + # bug surface: the raw error JSON reached stdout and was parsed as CI data. + if [ -f "$FX/api-error" ]; then + printf '{"message":"Bad credentials","documentation_url":"https://docs.github.com/rest","status":"401"}\n' + exit 1 + fi # gh api --jq ... : find the repos/... path argument. path="" for a in "$@"; do @@ -766,6 +774,43 @@ test_ci_ignore_excludes_known_gap_check() { pass "known fork-routing gap check excluded from roll-up; real failures still surface" } +test_api_error_skips_pr_without_event() { + # Reproduces the bug: a transient 401 makes `gh api` write the error body + # {"message":"Bad credentials",...} to stdout (bypassing --jq). The old ghc() + # swallowed stderr + the exit code, so the watcher parsed that JSON as CI state + # and fired a bogus "CI: ... -> { \"message\": ... }" event. The fix detects the + # API error (non-zero exit OR an error-body shape) and skips the PR for the + # cycle: no event, no crash, seen left untouched so the next (recovered) cycle + # still fires the real transition (lossless). + local dir out sf + dir=$(make_case api-error) + seed_prs "$dir" $'kunchenguid/no-mistakes\t500' + printf 'sha500\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-500" + seed_ci "$dir" sha500 success + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-500" + + # Baseline: CI green. + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "baseline ci not recorded as success" + + # Inject a transient 401 on every `gh api` call this cycle. + : > "$dir/fixture/api-error" + out=$(run_poll "$dir" 2>/dev/null) + [ -z "$out" ] || fail "transient API error must not surface as an event; got: $out" + # seen must be untouched (ci still the prior success, not the error JSON). + grep -Fxq "ci=success" "$sf" \ + || fail "seen state was clobbered during API error: $(cat "$sf")" + + # Recover: remove the blip and flip CI to failure. The real transition fires. + rm -f "$dir/fixture/api-error" + seed_ci "$dir" sha500 failure + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#500 -> failure" \ + || fail "post-blip real transition did not fire; got: $out" + + pass "transient GitHub API error skips the PR without emitting an event" +} + test_filter_toggling test_first_run_baselines_silently test_comment_detection_advances_seen_after_print @@ -785,3 +830,4 @@ test_all_filters_off_mutes_watcher test_parallel_poll_is_lossless_and_does_not_cross_contaminate test_silent_baseline_on_schema_migration test_ci_ignore_excludes_known_gap_check +test_api_error_skips_pr_without_event