From b8773b3ec71c7add4041f6bfcc0678e4df112076 Mon Sep 17 00:00:00 2001 From: JTInventory Date: Thu, 25 Jun 2026 19:44:20 +0000 Subject: [PATCH 1/4] feat: add deterministic route resolver --- bin/fm-route.sh | 289 +++++++++++++++++++++++++++++++++++++++++ docs/scripts.md | 1 + tests/fm-route.test.sh | 115 ++++++++++++++++ 3 files changed, 405 insertions(+) create mode 100755 bin/fm-route.sh create mode 100755 tests/fm-route.test.sh diff --git a/bin/fm-route.sh b/bin/fm-route.sh new file mode 100755 index 0000000..7dc3117 --- /dev/null +++ b/bin/fm-route.sh @@ -0,0 +1,289 @@ +#!/usr/bin/env bash +# Resolve a deterministic model route for a Firstmate task. +# +# This command is intentionally read-only. It classifies task text and prints a +# small key=value route record for later spawn/meta integration. +# +# Usage: +# fm-route.sh [--kind ship|scout|secondmate] +# [--task-file ] [--profile cheap|standard|deep|critical] +# [--harness claude|codex|opencode|pi] [--model ] +# [--effort ] [--captain-downgrade-ok] [--explain] +set -eu + +usage() { + cat >&2 <<'USAGE' +usage: fm-route.sh [--kind ship|scout|secondmate] [--task-file ] [--profile ] [--harness ] [--model ] [--effort ] [--captain-downgrade-ok] [--explain] +USAGE +} + +die() { + printf 'fm-route: %s\n' "$1" >&2 + exit 1 +} + +is_profile() { + case "$1" in cheap|standard|deep|critical) return 0 ;; *) return 1 ;; esac +} + +is_harness() { + case "$1" in claude|codex|opencode|pi) return 0 ;; *) return 1 ;; esac +} + +rank_profile() { + case "$1" in + cheap) echo 1 ;; + standard) echo 2 ;; + deep) echo 3 ;; + critical) echo 4 ;; + *) echo 0 ;; + esac +} + +append_unique() { + local item=$1 + if [ -z "$2" ]; then + printf '%s\n' "$item" + return + fi + local existing=",$2," + case "$existing" in + *",$item,"*) printf '%s\n' "$2" ;; + *) printf '%s,%s\n' "$2" "$item" ;; + esac +} + +contains_any() { + local haystack=$1 + shift + local needle + for needle in "$@"; do + case "$haystack" in + *"$needle"*) return 0 ;; + esac + done + return 1 +} + +join_reasons() { + local result= part + for part in "$@"; do + [ -n "$part" ] || continue + if [ -z "$result" ]; then + result=$part + else + result="$result and $part" + fi + done + printf '%s\n' "$result" +} + +if [ "$#" -lt 2 ]; then + usage + exit 2 +fi + +task_id=$1 +project=$2 +shift 2 + +kind=ship +task_file= +manual_profile= +harness=codex +manual_harness=0 +manual_model= +manual_effort= +captain_downgrade_ok=0 +explain=0 + +while [ "$#" -gt 0 ]; do + case "$1" in + --kind) + [ "$#" -ge 2 ] || die "--kind needs a value" + kind=$2 + case "$kind" in ship|scout|secondmate) ;; *) die "unknown kind: $kind" ;; esac + shift 2 + ;; + --task-file) + [ "$#" -ge 2 ] || die "--task-file needs a path" + task_file=$2 + shift 2 + ;; + --profile) + [ "$#" -ge 2 ] || die "--profile needs a value" + manual_profile=$2 + is_profile "$manual_profile" || die "unknown profile: $manual_profile" + shift 2 + ;; + --harness) + [ "$#" -ge 2 ] || die "--harness needs a value" + harness=$2 + is_harness "$harness" || die "unknown harness: $harness" + manual_harness=1 + shift 2 + ;; + --model) + [ "$#" -ge 2 ] || die "--model needs a value" + manual_model=$2 + shift 2 + ;; + --effort) + [ "$#" -ge 2 ] || die "--effort needs a value" + manual_effort=$2 + shift 2 + ;; + --captain-downgrade-ok) + captain_downgrade_ok=1 + shift + ;; + --explain) + explain=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "unknown option: $1" + ;; + esac +done + +task_text= +if [ -n "$task_file" ]; then + [ -f "$task_file" ] || die "task file not found: $task_file" + task_text=$(cat "$task_file") +fi + +raw_text="$task_id $project $kind $task_text" +text=$(printf '%s' "$raw_text" | tr '[:upper:]' '[:lower:]') + +risk_flags= +production_reason= +money_reason= +credentials_reason= +external_reason= +security_reason= +git_reason= +firstmate_reason= + +if contains_any "$text" production systemd timer cron service 4187 serve refresh follow-main deploy runtime; then + risk_flags=$(append_unique production "$risk_flags") + production_reason="production refresh/runtime" +fi + +if contains_any "$text" ppc "amazon ads" sellersnap repricing campaign budget "purchase order" margin revenue; then + risk_flags=$(append_unique money "$risk_flags") + money_reason="money/business operations" +fi + +if contains_any "$text" credential secret token auth gmail sp-api session mailbox; then + risk_flags=$(append_unique credentials "$risk_flags") + credentials_reason="credentials/auth" +fi + +if contains_any "$text" send delivery email "live run" restart push merge delete archive prune "return worktree"; then + risk_flags=$(append_unique external-side-effect "$risk_flags") + external_reason="external side effects" +fi + +if contains_any "$text" security vulnerability threat exploit pii "customer data" "customer-facing data"; then + risk_flags=$(append_unique security "$risk_flags") + security_reason="security/customer data" +fi + +if contains_any "$text" merge rebase reset clean force "history rewrite"; then + risk_flags=$(append_unique git-danger "$risk_flags") + git_reason="git history/destructive operations" +fi + +if contains_any "$text" fm-spawn fm-teardown fm-watch fm-guard fm-lock fm-route harness-adapters "state/meta" radar; then + risk_flags=$(append_unique firstmate-core "$risk_flags") + firstmate_reason="Firstmate core safety" +fi + +deep_flags= +deep_reason= +if contains_any "$text" architecture migration strategy multi-step lfg " ce " "broad audit" "unclear audit"; then + deep_flags=deep + deep_reason="architecture/migration/deep planning" +elif contains_any "$text" audit && [ -z "$risk_flags" ]; then + deep_flags=deep + deep_reason="broad audit" +fi + +safe_readonly=0 +if [ "$kind" = scout ] && contains_any "$text" read-only readonly "read only" inventory docs documentation report-only summarize summary; then + safe_readonly=1 +fi + +if [ -n "$risk_flags" ]; then + auto_profile=critical + reason="task touches $(join_reasons "$production_reason" "$money_reason" "$credentials_reason" "$external_reason" "$security_reason" "$git_reason" "$firstmate_reason")" +elif [ -n "$deep_flags" ]; then + auto_profile=deep + reason="task needs $deep_reason" +elif [ "$safe_readonly" = 1 ]; then + auto_profile=cheap + reason="task is read-only scout work with no high-risk signals" +else + auto_profile=standard + reason="task is routine or ambiguous without high-risk signals" +fi + +profile=$auto_profile +override=none +if [ -n "$manual_profile" ]; then + auto_rank=$(rank_profile "$auto_profile") + manual_rank=$(rank_profile "$manual_profile") + if [ "$manual_rank" -lt "$auto_rank" ] && [ "$captain_downgrade_ok" -ne 1 ]; then + die "refusing risky downgrade from $auto_profile to $manual_profile without --captain-downgrade-ok" + fi + profile=$manual_profile + if [ "$manual_rank" -lt "$auto_rank" ]; then + override=captain-downgrade + reason="captain explicitly allowed downgrade despite $auto_profile signals" + else + override=manual-profile + reason="captain requested $manual_profile profile" + fi +fi + +if [ "$override" = none ] && { [ "$manual_harness" -eq 1 ] || [ -n "$manual_model" ] || [ -n "$manual_effort" ]; }; then + override=manual +fi + +case "$profile" in + cheap) + model=default + effort=low + ;; + standard) + model=gpt-5.5 + effort=medium + ;; + deep) + model=gpt-5.5 + effort=high + ;; + critical) + model=gpt-5.5 + effort=medium + ;; +esac + +[ -z "$manual_model" ] || model=$manual_model +[ -z "$manual_effort" ] || effort=$manual_effort + +printf 'profile=%s\n' "$profile" +printf 'harness=%s\n' "$harness" +printf 'model=%s\n' "$model" +printf 'effort=%s\n' "$effort" +printf 'reason=%s\n' "$reason" +printf 'override=%s\n' "$override" +printf 'risk_flags=%s\n' "${risk_flags:-none}" +if [ "$explain" -eq 1 ]; then + printf 'route: %s because %s\n' "$profile" "$reason" +fi diff --git a/docs/scripts.md b/docs/scripts.md index fbf67d1..f34d8f0 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -15,6 +15,7 @@ Each file also starts with a short header comment. | `fm-home-seed.sh` | Lease/provision a secondmate home transactionally, clone projects, initialize gates, and maintain `data/secondmates.md` | | `fm-spawn.sh` | Spawn one task, several `id=repo` pairs, or a persistent secondmate with `--secondmate`; ship/scout spawns require an isolated treehouse worktree | | `fm-project-mode.sh` | Resolve a project's delivery mode and `+yolo` flag from `data/projects.md` | +| `fm-route.sh` | Classify a task into a deterministic route profile, harness, model, effort, reason, override, and risk flags without changing spawn behavior | | `fm-merge-local.sh` | Fast-forward a `local-only` project's local default branch after approval | | `fm-review-diff.sh` | Review a crewmate branch against the authoritative base, with optional `--stat` output | | `fm-watch-arm.sh` | Verified per-home watcher re-arm; reports `started`, `healthy`, or `FAILED`; `--restart` relaunches only this home's watcher | diff --git a/tests/fm-route.test.sh b/tests/fm-route.test.sh new file mode 100755 index 0000000..4abea42 --- /dev/null +++ b/tests/fm-route.test.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# Behavior tests for bin/fm-route.sh deterministic route decisions. +set -u + +# shellcheck source=tests/lib.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +ROUTE="$ROOT/bin/fm-route.sh" + +run_route() { + "$ROUTE" route-test alpha "$@" 2>&1 +} + +route_profile() { + printf '%s\n' "$1" | awk -F= '$1=="profile"{print $2; exit}' +} + +assert_profile() { + local expected=$1 text=$2 out profile + out=$(run_route --task-file "$text") || fail "route failed for $expected: $out" + profile=$(route_profile "$out") + [ "$profile" = "$expected" ] || fail "expected profile $expected, got $profile"$'\n'"$out" +} + +assert_profile_one_of() { + local allowed=$1 text=$2 out profile + out=$(run_route --task-file "$text") || fail "route failed for $allowed: $out" + profile=$(route_profile "$out") + case " $allowed " in + *" $profile "*) : ;; + *) fail "expected one of [$allowed], got $profile"$'\n'"$out" ;; + esac +} + +write_task() { + local dir=$1 name=$2 body=$3 file + file="$dir/$name.txt" + printf '%s\n' "$body" > "$file" + printf '%s\n' "$file" +} + +tmp=$(mktemp -d "${TMPDIR:-/tmp}/fm-route.XXXXXX") +trap 'rm -rf "$tmp"' EXIT + +prod=$(write_task "$tmp" prod 'Investigate production refresh on the 4187 follow-main serve lane.') +assert_profile critical "$prod" +pass "production refresh routes critical" + +auth=$(write_task "$tmp" auth 'Review Gmail auth token handling for mailbox proof.') +assert_profile critical "$auth" +pass "Gmail/auth/token routes critical" + +money=$(write_task "$tmp" money 'Audit PPC, SellerSnap, and repricing budget behavior.') +assert_profile critical "$money" +pass "PPC/SellerSnap/repricing routes critical" + +sec=$(write_task "$tmp" security 'Triage security vulnerability and PII exposure risk.') +assert_profile critical "$sec" +pass "security/vulnerability routes critical" + +git_danger=$(write_task "$tmp" git-danger 'Plan git reset, force push, merge, delete, and prune cleanup.') +assert_profile critical "$git_danger" +pass "git reset/force/merge/delete/prune routes critical" + +out=$("$ROUTE" route-test bin/fm-spawn.sh 2>&1) || fail "Firstmate core route failed: $out" +assert_contains "$out" "profile=critical" "Firstmate core script did not route critical" +assert_contains "$out" "risk_flags=firstmate-core" "Firstmate core script did not record core risk" +pass "Firstmate core safety routes critical" + +architecture=$(write_task "$tmp" architecture 'Create an architecture migration plan for the workflow.') +assert_profile deep "$architecture" +pass "architecture/migration routes deep" + +docs_inventory=$(write_task "$tmp" docs 'Read-only docs inventory scout; summarize files only.') +out=$(run_route --kind scout --task-file "$docs_inventory") || fail "docs inventory route failed: $out" +profile=$(route_profile "$out") +case "$profile" in + cheap|standard) : ;; + *) fail "read-only docs inventory should be cheap or standard, got $profile"$'\n'"$out" ;; +esac +[ "$profile" != critical ] || fail "read-only docs inventory must not be critical" +pass "read-only docs inventory scout avoids critical" + +ambiguous=$(write_task "$tmp" ambiguous 'Figure out what is going on here and make it better.') +assert_profile_one_of "standard deep" "$ambiguous" +pass "unknown ambiguous task is not cheap" + +out=$(run_route --profile critical --task-file "$docs_inventory") || fail "manual critical upgrade failed: $out" +assert_contains "$out" "profile=critical" "manual critical upgrade did not set critical" +assert_contains "$out" "override=manual-profile" "manual critical upgrade did not record override" +pass "manual critical upgrade works" + +if out=$(run_route --profile cheap --task-file "$prod"); then + fail "risky cheap downgrade unexpectedly succeeded"$'\n'"$out" +else + assert_contains "$out" "refusing risky downgrade" "risky cheap downgrade did not explain refusal" +fi +pass "risky cheap downgrade fails without captain override" + +out=$(run_route --profile cheap --captain-downgrade-ok --task-file "$prod") \ + || fail "captain-approved risky cheap downgrade failed: $out" +assert_contains "$out" "profile=cheap" "captain-approved downgrade did not set cheap" +assert_contains "$out" "override=captain-downgrade" "captain-approved downgrade did not record override" +pass "risky cheap downgrade succeeds with captain override" + +if out=$(run_route --harness unknown --task-file "$docs_inventory"); then + fail "unknown harness unexpectedly succeeded"$'\n'"$out" +else + assert_contains "$out" "unknown harness" "unknown harness failure was unclear" +fi +pass "unknown harness fails" + +out=$(run_route --explain --task-file "$prod") || fail "explain route failed: $out" +assert_contains "$out" "route: critical because" "explain output missing route line" +pass "--explain prints captain-facing route line" From c766d0a479f16b567f392a260f59043bb2fbf3af Mon Sep 17 00:00:00 2001 From: JTInventory Date: Thu, 25 Jun 2026 20:13:48 +0000 Subject: [PATCH 2/4] feat: record spawn route evidence --- bin/fm-spawn.sh | 81 +++++++++++++++++++++- tests/fm-spawn-route.test.sh | 128 +++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 3 deletions(-) create mode 100644 tests/fm-spawn-route.test.sh diff --git a/bin/fm-spawn.sh b/bin/fm-spawn.sh index 86bc38c..86916fb 100755 --- a/bin/fm-spawn.sh +++ b/bin/fm-spawn.sh @@ -140,24 +140,73 @@ launch_template() { esac } +HARNESS= +LAUNCH= +ROUTE_PROFILE=manual +ROUTE_HARNESS= +ROUTE_MODEL=default +ROUTE_EFFORT=default +ROUTE_REASON= +ROUTE_OVERRIDE=none +ROUTE_RISK_FLAGS=none case "$ARG3" in *' '*) # raw launch command (unverified-adapter escape hatch) LAUNCH=$ARG3 - HARNESS="" for word in $LAUNCH; do case "$word" in [A-Za-z_]*=*) continue ;; *) HARNESS=$(basename "$word"); break ;; esac done + ROUTE_HARNESS=${HARNESS:-raw} + ROUTE_REASON="raw launch command selected for adapter verification" + ROUTE_OVERRIDE=raw-launch ;; '') - HARNESS=$("$FM_ROOT/bin/fm-harness.sh" crew) - LAUNCH=$(launch_template "$HARNESS" "$KIND") || { echo "error: no launch template for harness '$HARNESS' (from config/crew-harness or detection); pass a raw launch command to use an unverified adapter" >&2; exit 1; } + # Deferred until BRIEF/PROJ_ABS are known, so the route can read task text. ;; *) HARNESS=$ARG3 LAUNCH=$(launch_template "$HARNESS" "$KIND") || { echo "error: unknown harness '$HARNESS'; pass a raw launch command to use an unverified adapter" >&2; exit 1; } + ROUTE_HARNESS=$HARNESS + ROUTE_REASON="manual harness override selected $HARNESS" + ROUTE_OVERRIDE=manual-harness ;; esac +parse_route_output() { + local line key value + while IFS= read -r line; do + key=${line%%=*} + value=${line#*=} + [ "$key" != "$line" ] || continue + case "$key" in + profile) ROUTE_PROFILE=$value ;; + harness) ROUTE_HARNESS=$value ;; + model) ROUTE_MODEL=$value ;; + effort) ROUTE_EFFORT=$value ;; + reason) ROUTE_REASON=$value ;; + override) ROUTE_OVERRIDE=$value ;; + risk_flags) ROUTE_RISK_FLAGS=$value ;; + esac + done +} + +append_route_block() { + [ "$KIND" != secondmate ] || return 0 + grep -qxF '' "$BRIEF" 2>/dev/null && return 0 + cat >> "$BRIEF" < +# Route + +route: $ROUTE_PROFILE because $ROUTE_REASON +Harness: $ROUTE_HARNESS +Model: $ROUTE_MODEL +Reasoning effort: $ROUTE_EFFORT +Override: $ROUTE_OVERRIDE +Risk flags: $ROUTE_RISK_FLAGS +Do not downgrade this route without an explicit firstmate override. +EOF +} + secondmate_registry_value() { local id=$1 key=$2 reg line value reg="$DATA/secondmates.md" @@ -314,6 +363,23 @@ else fi [ -f "$BRIEF" ] || { echo "error: no brief at $BRIEF" >&2; exit 1; } +if [ -z "$ARG3" ]; then + route_out= + if ! route_out=$("$FM_ROOT/bin/fm-route.sh" "$ID" "$PROJ_ABS" --kind "$KIND" --task-file "$BRIEF" 2>&1); then + printf '%s\n' "$route_out" >&2 + exit 1 + fi + parse_route_output <&2; exit 1; } +fi + # Same session when firstmate already runs inside tmux; dedicated session otherwise. if [ -n "${TMUX:-}" ]; then SES=$(tmux display-message -p '#S') @@ -441,6 +507,8 @@ $("$FM_ROOT/bin/fm-project-mode.sh" "$PROJ_NAME") EOF fi +append_route_block + mkdir -p "$STATE" { echo "window=$T" @@ -450,6 +518,13 @@ mkdir -p "$STATE" echo "kind=$KIND" echo "mode=$MODE" echo "yolo=$YOLO" + echo "route_profile=$ROUTE_PROFILE" + echo "route_harness=$ROUTE_HARNESS" + echo "route_model=$ROUTE_MODEL" + echo "route_effort=$ROUTE_EFFORT" + echo "route_reason=$ROUTE_REASON" + echo "route_override=$ROUTE_OVERRIDE" + echo "route_risk_flags=$ROUTE_RISK_FLAGS" if [ "$KIND" = secondmate ]; then echo "home=$PROJ_ABS" echo "projects=$SECONDMATE_PROJECTS" diff --git a/tests/fm-spawn-route.test.sh b/tests/fm-spawn-route.test.sh new file mode 100644 index 0000000..1e23f50 --- /dev/null +++ b/tests/fm-spawn-route.test.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# Behavior tests for fm-spawn.sh route evidence integration. +# +# The fake tmux reports a controlled pane cwd after `treehouse get`, so these +# tests exercise the real spawn success path without opening windows. +set -u + +# shellcheck source=tests/lib.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +TMP_ROOT=$(fm_test_tmproot fm-spawn-route) +SPAWN="$ROOT/bin/fm-spawn.sh" + +make_spawn_fakebin() { + local dir=$1 fakebin + fakebin=$(fm_fakebin "$dir") + cat > "$fakebin/tmux" <<'SH' +#!/usr/bin/env bash +set -u +case "$*" in + *"#{pane_current_path}"*) printf '%s\n' "${FM_FAKE_PANE_PATH:-}"; exit 0 ;; +esac +case "${1:-}" in + display-message) printf 'firstmate\n'; exit 0 ;; + list-windows) exit 0 ;; + has-session|new-session|new-window|send-keys) exit 0 ;; +esac +exit 0 +SH + chmod +x "$fakebin/tmux" + fm_fake_exit0 "$fakebin" treehouse + printf '%s\n' "$fakebin" +} + +make_case() { + local label=$1 home proj wt fakebin + home="$TMP_ROOT/$label-home" + proj="$TMP_ROOT/$label-alpha" + wt="$TMP_ROOT/$label-wt" + fakebin=$(make_spawn_fakebin "$TMP_ROOT/$label-fake") + mkdir -p "$home/data" "$home/state" "$home/projects" "$home/config" + printf '%s\n' codex > "$home/config/crew-harness" + printf '%s\n' "- $(basename "$proj") [direct-PR] - alpha fixture (added 2026-06-25)" > "$home/data/projects.md" + fm_git_init_commit "$proj" + git -C "$proj" worktree add -q --detach "$wt" >/dev/null 2>&1 + printf '%s|%s|%s|%s\n' "$home" "$proj" "$wt" "$fakebin" +} + +run_spawn_case() { + local home=$1 id=$2 proj=$3 wt=$4 fakebin=$5 + shift 5 + FM_ROOT_OVERRIDE="$ROOT" FM_HOME="$home" \ + FM_STATE_OVERRIDE="$home/state" FM_DATA_OVERRIDE="$home/data" \ + FM_PROJECTS_OVERRIDE="$home/projects" FM_CONFIG_OVERRIDE="$home/config" \ + FM_SPAWN_NO_GUARD=1 FM_FAKE_PANE_PATH="$wt" TMUX="fake,1,0" \ + PATH="$fakebin:$PATH" \ + "$SPAWN" "$id" "$proj" "$@" 2>&1 +} + +test_ordinary_spawn_records_route_fields() { + local setup home proj wt fakebin id out status meta brief + IFS='|' read -r home proj wt fakebin < "$brief" + + out=$(run_spawn_case "$home" "$id" "$proj" "$wt" "$fakebin"); status=$? + expect_code 0 "$status" "ordinary routed spawn should succeed" + assert_contains "$out" "spawned $id harness=codex" "ordinary spawn should launch route harness" + meta="$home/state/$id.meta" + assert_grep "route_profile=critical" "$meta" "ordinary spawn did not record route profile" + assert_grep "route_harness=codex" "$meta" "ordinary spawn did not record route harness" + assert_grep "route_model=gpt-5.5" "$meta" "ordinary spawn did not record route model" + assert_grep "route_effort=medium" "$meta" "ordinary spawn did not record route effort" + assert_grep "route_override=none" "$meta" "ordinary spawn did not record route override" + assert_grep "route_risk_flags=production,firstmate-core" "$meta" "ordinary spawn did not record route risk flags" + assert_grep "# Route" "$brief" "ordinary spawn did not add route block to brief" + assert_grep "route: critical because" "$brief" "ordinary spawn brief route summary missing" + pass "ordinary spawn records route evidence and appends a brief route block" +} + +test_manual_harness_override_records_manual_route() { + local setup home proj wt fakebin id out status meta + IFS='|' read -r home proj wt fakebin < "$home/data/$id/brief.md" + + out=$(run_spawn_case "$home" "$id" "$proj" "$wt" "$fakebin" claude); status=$? + expect_code 0 "$status" "manual harness spawn should still succeed" + assert_contains "$out" "spawned $id harness=claude" "manual harness override did not launch selected harness" + meta="$home/state/$id.meta" + assert_grep "harness=claude" "$meta" "manual spawn did not preserve operational harness" + assert_grep "route_profile=manual" "$meta" "manual spawn did not record manual route profile" + assert_grep "route_harness=claude" "$meta" "manual spawn did not record selected harness" + assert_grep "route_override=manual-harness" "$meta" "manual spawn did not record manual override" + assert_no_grep "route_profile=cheap" "$meta" "manual override must not silently record a cheap route" + pass "manual harness override preserves behavior and records manual route evidence" +} + +test_raw_launch_command_records_raw_route() { + local setup home proj wt fakebin id out status meta + IFS='|' read -r home proj wt fakebin < "$home/data/$id/brief.md" + + out=$(run_spawn_case "$home" "$id" "$proj" "$wt" "$fakebin" 'CUSTOM=1 /bin/echo launch'); status=$? + expect_code 0 "$status" "raw launch command should still succeed" + assert_contains "$out" "spawned $id harness=echo" "raw launch did not preserve parsed harness" + meta="$home/state/$id.meta" + assert_grep "harness=echo" "$meta" "raw launch did not record operational harness" + assert_grep "route_profile=manual" "$meta" "raw launch did not record manual route profile" + assert_grep "route_harness=echo" "$meta" "raw launch did not record parsed route harness" + assert_grep "route_override=raw-launch" "$meta" "raw launch did not record raw override" + pass "raw launch command is not blocked and records raw route evidence" +} + +test_ordinary_spawn_records_route_fields +test_manual_harness_override_records_manual_route +test_raw_launch_command_records_raw_route From f4250d0f4cc4922e6999a0a12952d31175cd4dbe Mon Sep 17 00:00:00 2001 From: JTInventory Date: Fri, 26 Jun 2026 13:07:11 +0000 Subject: [PATCH 3/4] fix route cleanup boundary matching --- bin/fm-route.sh | 13 ++++++++++++- tests/fm-route.test.sh | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/bin/fm-route.sh b/bin/fm-route.sh index 7dc3117..5dd1ebe 100755 --- a/bin/fm-route.sh +++ b/bin/fm-route.sh @@ -65,6 +65,17 @@ contains_any() { return 1 } +contains_git_danger() { + local haystack=$1 + local normalized + normalized=$(printf '%s' "$haystack" | tr -c '[:alnum:]' ' ') + normalized=" $normalized " + case "$normalized" in + *" merge "*|*" rebase "*|*" reset "*|*" clean "*|*" force "*|*" history rewrite "*) return 0 ;; + esac + return 1 +} + join_reasons() { local result= part for part in "$@"; do @@ -194,7 +205,7 @@ if contains_any "$text" security vulnerability threat exploit pii "customer data security_reason="security/customer data" fi -if contains_any "$text" merge rebase reset clean force "history rewrite"; then +if contains_git_danger "$text"; then risk_flags=$(append_unique git-danger "$risk_flags") git_reason="git history/destructive operations" fi diff --git a/tests/fm-route.test.sh b/tests/fm-route.test.sh index 4abea42..afb1228 100755 --- a/tests/fm-route.test.sh +++ b/tests/fm-route.test.sh @@ -62,6 +62,24 @@ git_danger=$(write_task "$tmp" git-danger 'Plan git reset, force push, merge, de assert_profile critical "$git_danger" pass "git reset/force/merge/delete/prune routes critical" +git_clean=$(write_task "$tmp" git-clean 'Run git clean on the task worktree.') +assert_profile critical "$git_clean" +pass "git clean routes critical" + +docs_typo_cleanup=$(write_task "$tmp" docs-typo-cleanup 'read-only docs typo cleanup') +out=$(run_route --kind scout --task-file "$docs_typo_cleanup") || fail "docs typo cleanup route failed: $out" +profile=$(route_profile "$out") +[ "$profile" = cheap ] || fail "read-only docs typo cleanup should route cheap, got $profile"$'\n'"$out" +pass "read-only docs typo cleanup stays cheap" + +cleanup_notes=$(write_task "$tmp" cleanup-notes 'cleanup notes') +assert_profile standard "$cleanup_notes" +pass "cleanup notes does not match git clean" + +cleaner_wording=$(write_task "$tmp" cleaner-wording 'cleaner wording') +assert_profile standard "$cleaner_wording" +pass "cleaner wording does not match git clean" + out=$("$ROUTE" route-test bin/fm-spawn.sh 2>&1) || fail "Firstmate core route failed: $out" assert_contains "$out" "profile=critical" "Firstmate core script did not route critical" assert_contains "$out" "risk_flags=firstmate-core" "Firstmate core script did not record core risk" From 397cb80f400488f5454a12d8effbf41efdc1b740 Mon Sep 17 00:00:00 2001 From: JTInventory Date: Fri, 26 Jun 2026 13:34:55 +0000 Subject: [PATCH 4/4] fix: refuse stale task identity --- bin/fm-pr-check.sh | 15 +++- bin/fm-review-diff.sh | 11 ++- bin/fm-task-identity-lib.sh | 45 ++++++++++ bin/fm-teardown.sh | 6 ++ tests/fm-task-identity.test.sh | 150 +++++++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 7 deletions(-) create mode 100644 bin/fm-task-identity-lib.sh create mode 100644 tests/fm-task-identity.test.sh diff --git a/bin/fm-pr-check.sh b/bin/fm-pr-check.sh index 928226e..0d71e91 100755 --- a/bin/fm-pr-check.sh +++ b/bin/fm-pr-check.sh @@ -10,12 +10,25 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" FM_HOME="${FM_HOME:-${FM_ROOT_OVERRIDE:-$FM_ROOT}}" STATE="${FM_STATE_OVERRIDE:-$FM_HOME/state}" +# shellcheck source=bin/fm-task-identity-lib.sh +. "$SCRIPT_DIR/fm-task-identity-lib.sh" "$FM_ROOT/bin/fm-guard.sh" || true ID=$1 URL=$2 META="$STATE/$ID.meta" -if [ -f "$META" ] && ! grep -qxF "pr=$URL" "$META"; then +fm_assert_task_branch_matches_meta "$ID" "$META" "error" || exit 1 + +EXPECTED_BRANCH=$(fm_task_expected_branch "$ID") +PR_BRANCH=$(gh pr view "$URL" --json headRefName -q .headRefName 2>/dev/null || true) +[ -n "$PR_BRANCH" ] || { echo "error: could not determine head branch for PR $URL" >&2; exit 1; } +if [ "$PR_BRANCH" != "$EXPECTED_BRANCH" ]; then + echo "error: task identity mismatch for $ID: PR $URL head branch is $PR_BRANCH; expected $EXPECTED_BRANCH." >&2 + echo "Use the matching task id or intentionally reconcile the metadata before continuing." >&2 + exit 1 +fi + +if ! grep -qxF "pr=$URL" "$META"; then echo "pr=$URL" >> "$META" fi diff --git a/bin/fm-review-diff.sh b/bin/fm-review-diff.sh index 23962e4..fc7b56c 100755 --- a/bin/fm-review-diff.sh +++ b/bin/fm-review-diff.sh @@ -12,6 +12,8 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" FM_HOME="${FM_HOME:-${FM_ROOT_OVERRIDE:-$FM_ROOT}}" STATE="${FM_STATE_OVERRIDE:-$FM_HOME/state}" +# shellcheck source=bin/fm-task-identity-lib.sh +. "$SCRIPT_DIR/fm-task-identity-lib.sh" "$FM_ROOT/bin/fm-guard.sh" || true usage() { @@ -35,6 +37,7 @@ esac META="$STATE/$ID.meta" [ -f "$META" ] || { echo "error: no meta for task $ID at $META" >&2; exit 1; } +fm_assert_task_branch_matches_meta "$ID" "$META" "error" || exit 1 WT=$(grep '^worktree=' "$META" | cut -d= -f2-) PROJ=$(grep '^project=' "$META" | cut -d= -f2-) @@ -61,12 +64,8 @@ default_branch() { DEFAULT=$(default_branch) || { echo "error: cannot determine default branch for $PROJ; expected origin/HEAD, main, or master" >&2; exit 1; } -BRANCH="fm/$ID" -if ! git -C "$WT" rev-parse --verify --quiet "refs/heads/$BRANCH" >/dev/null; then - BRANCH=$(git -C "$WT" symbolic-ref --quiet --short HEAD 2>/dev/null || true) - [ -n "$BRANCH" ] || { echo "error: branch fm/$ID does not exist and worktree $WT is detached" >&2; exit 1; } - git -C "$WT" rev-parse --verify --quiet "refs/heads/$BRANCH" >/dev/null || { echo "error: branch $BRANCH does not exist in $WT" >&2; exit 1; } -fi +BRANCH=$(fm_task_expected_branch "$ID") +git -C "$WT" rev-parse --verify --quiet "refs/heads/$BRANCH" >/dev/null || { echo "error: branch $BRANCH does not exist in $WT" >&2; exit 1; } if git -C "$PROJ" remote get-url origin >/dev/null 2>&1; then # Update the remote-tracking ref itself; a bare single-branch fetch can leave diff --git a/bin/fm-task-identity-lib.sh b/bin/fm-task-identity-lib.sh new file mode 100644 index 0000000..d4a94ae --- /dev/null +++ b/bin/fm-task-identity-lib.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Shared task identity checks for helpers that act on a task worktree. +# +# Ship tasks conventionally live on branch fm/. If a reused Treehouse +# pane keeps old state/.meta while the worktree has moved to another branch, +# helpers must refuse instead of recording PRs, reviewing diffs, or tearing down +# the wrong task. + +fm_meta_value() { + local meta=$1 key=$2 + grep "^$key=" "$meta" | tail -1 | cut -d= -f2- || true +} + +fm_task_expected_branch() { + printf 'fm/%s\n' "$1" +} + +fm_assert_task_branch_matches_meta() { + local id=$1 meta=$2 label=${3:-error} wt kind expected branch + [ -f "$meta" ] || { echo "$label: no meta for task $id at $meta" >&2; return 1; } + + kind=$(fm_meta_value "$meta" kind) + [ -n "$kind" ] || kind=ship + case "$kind" in + ship) ;; + *) return 0 ;; + esac + + wt=$(fm_meta_value "$meta" worktree) + [ -n "$wt" ] || { echo "$label: meta for task $id is missing worktree=" >&2; return 1; } + [ -d "$wt" ] || { echo "$label: worktree for task $id is missing: $wt" >&2; return 1; } + + expected=$(fm_task_expected_branch "$id") + branch=$(git -C "$wt" symbolic-ref --quiet --short HEAD 2>/dev/null || true) + if [ -z "$branch" ]; then + echo "$label: task identity mismatch for $id: worktree $wt is detached; expected branch $expected." >&2 + echo "Use the matching task id or intentionally reconcile the metadata before continuing." >&2 + return 1 + fi + if [ "$branch" != "$expected" ]; then + echo "$label: task identity mismatch for $id: meta $meta points at worktree $wt, but that worktree is on branch $branch; expected $expected." >&2 + echo "Use the matching task id or intentionally reconcile the metadata before continuing." >&2 + return 1 + fi +} diff --git a/bin/fm-teardown.sh b/bin/fm-teardown.sh index ddd0a6d..3466b1a 100755 --- a/bin/fm-teardown.sh +++ b/bin/fm-teardown.sh @@ -34,6 +34,8 @@ SECONDMATE_REG="$DATA/secondmates.md" SUB_HOME_MARKER=".fm-secondmate-home" # shellcheck source=bin/fm-tasks-axi-lib.sh . "$SCRIPT_DIR/fm-tasks-axi-lib.sh" +# shellcheck source=bin/fm-task-identity-lib.sh +. "$SCRIPT_DIR/fm-task-identity-lib.sh" "$FM_ROOT/bin/fm-guard.sh" || true ID=$1 FORCE=${2:-} @@ -51,6 +53,10 @@ KIND=$(grep '^kind=' "$META" | cut -d= -f2- || true) MODE=$(grep '^mode=' "$META" | cut -d= -f2- || true) [ -n "$MODE" ] || MODE=no-mistakes +if [ "$KIND" = ship ] && [ "$FORCE" != "--force" ]; then + fm_assert_task_branch_matches_meta "$ID" "$META" "REFUSED" || exit 1 +fi + default_branch() { local ref branch ref=$(git -C "$PROJ" symbolic-ref --quiet --short refs/remotes/origin/HEAD 2>/dev/null || true) diff --git a/tests/fm-task-identity.test.sh b/tests/fm-task-identity.test.sh new file mode 100644 index 0000000..9b49179 --- /dev/null +++ b/tests/fm-task-identity.test.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash +# Behavior tests for task metadata/worktree branch identity checks. +# +# A reused Treehouse pane can keep state/.meta while the worktree has +# moved on to fm/. Helpers that record PRs, review diffs, or tear down +# work must refuse that stale identity instead of acting on the old task. +set -u + +# shellcheck source=tests/lib.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +PR_CHECK="$ROOT/bin/fm-pr-check.sh" +REVIEW_DIFF="$ROOT/bin/fm-review-diff.sh" +TEARDOWN="$ROOT/bin/fm-teardown.sh" +TMP_ROOT=$(fm_test_tmproot fm-task-identity) + +make_case() { + local name=$1 current_id=$2 meta_id=$3 case_dir fakebin + case_dir="$TMP_ROOT/$name" + fakebin="$case_dir/fakebin" + mkdir -p "$case_dir/state" "$fakebin" + + cat > "$fakebin/gh" < "$fakebin/treehouse" <<'SH' +#!/usr/bin/env bash +exit 0 +SH + cat > "$fakebin/tmux" <<'SH' +#!/usr/bin/env bash +exit 0 +SH + chmod +x "$fakebin/gh" "$fakebin/treehouse" "$fakebin/tmux" + + git init -q --bare "$case_dir/origin.git" + git -C "$case_dir/origin.git" symbolic-ref HEAD refs/heads/main + git clone -q "$case_dir/origin.git" "$case_dir/_seed" 2>/dev/null + git -C "$case_dir/_seed" -c user.email=t@t -c user.name=t \ + commit -q --allow-empty -m "origin baseline" + git -C "$case_dir/_seed" push -q origin main + rm -rf "$case_dir/_seed" + + git clone -q "$case_dir/origin.git" "$case_dir/project" + git -C "$case_dir/project" remote set-head origin main 2>/dev/null || true + git -C "$case_dir/project" worktree add -q -b "fm/$current_id" "$case_dir/wt" main + git -C "$case_dir/wt" push -q origin "fm/$current_id" + git -C "$case_dir/project" fetch -q origin + + touch "$case_dir/state/.last-watcher-beat" + fm_write_meta "$case_dir/state/$meta_id.meta" \ + "window=fm-$meta_id" \ + "worktree=$case_dir/wt" \ + "project=$case_dir/project" \ + "kind=ship" \ + "mode=direct-PR" + + printf '%s\n' "$case_dir" +} + +run_pr_check() { + local case_dir=$1 id=$2 url=$3 + FM_ROOT_OVERRIDE="$ROOT" \ + FM_STATE_OVERRIDE="$case_dir/state" \ + PATH="$case_dir/fakebin:$PATH" \ + "$PR_CHECK" "$id" "$url" 2>&1 +} + +run_review_diff() { + local case_dir=$1 id=$2 + FM_ROOT_OVERRIDE="$ROOT" \ + FM_STATE_OVERRIDE="$case_dir/state" \ + "$REVIEW_DIFF" "$id" --stat 2>&1 +} + +run_teardown() { + local case_dir=$1 id=$2 + FM_ROOT_OVERRIDE="$ROOT" \ + FM_STATE_OVERRIDE="$case_dir/state" \ + PATH="$case_dir/fakebin:$PATH" \ + "$TEARDOWN" "$id" 2>&1 +} + +test_pr_check_refuses_stale_task_meta() { + local case_dir rc out url + case_dir=$(make_case pr-check new-task old-task) + url=https://github.com/example/repo/pull/12 + + set +e + out=$(run_pr_check "$case_dir" old-task "$url") + rc=$? + set -e + + expect_code 1 "$rc" "pr-check should refuse stale task metadata" + assert_contains "$out" "task identity mismatch" "pr-check did not explain the stale task identity" + assert_no_grep "pr=$url" "$case_dir/state/old-task.meta" "pr-check wrote PR URL to stale meta" + assert_absent "$case_dir/state/old-task.check.sh" "pr-check armed a merge poll for stale meta" + pass "fm-pr-check refuses stale task metadata before recording a PR" +} + +test_pr_check_records_matching_task_meta() { + local case_dir out url + case_dir=$(make_case pr-check-match same-task same-task) + url=https://github.com/example/repo/pull/13 + + out=$(run_pr_check "$case_dir" same-task "$url") || fail "pr-check should accept matching task metadata: $out" + + assert_contains "$out" "armed: state/same-task.check.sh polls $url" "pr-check did not arm the merge poll" + assert_grep "pr=$url" "$case_dir/state/same-task.meta" "pr-check did not record PR URL for matching meta" + assert_present "$case_dir/state/same-task.check.sh" "pr-check did not write check script for matching meta" + pass "fm-pr-check records a PR when task id, branch, and PR head match" +} + +test_review_diff_refuses_stale_task_meta() { + local case_dir rc out + case_dir=$(make_case review-diff new-task old-task) + + set +e + out=$(run_review_diff "$case_dir" old-task) + rc=$? + set -e + + expect_code 1 "$rc" "review-diff should refuse stale task metadata" + assert_contains "$out" "task identity mismatch" "review-diff did not explain the stale task identity" + pass "fm-review-diff refuses stale task metadata instead of reviewing the wrong branch" +} + +test_teardown_refuses_stale_task_meta() { + local case_dir rc out + case_dir=$(make_case teardown new-task old-task) + + set +e + out=$(run_teardown "$case_dir" old-task) + rc=$? + set -e + + expect_code 1 "$rc" "teardown should refuse stale task metadata" + assert_contains "$out" "task identity mismatch" "teardown did not explain the stale task identity" + assert_present "$case_dir/state/old-task.meta" "teardown removed stale meta after refusing" + pass "fm-teardown refuses stale task metadata before returning a reused worktree" +} + +test_pr_check_refuses_stale_task_meta +test_pr_check_records_matching_task_meta +test_review_diff_refuses_stale_task_meta +test_teardown_refuses_stale_task_meta