From fc9cddeb973e8494970809dd680ea85afc7f8769 Mon Sep 17 00:00:00 2001 From: JTInventory Date: Thu, 25 Jun 2026 18:42:14 +0000 Subject: [PATCH] feat: prepare supervision model consumers --- AGENTS.md | 2 + README.md | 3 + bin/fm-brief.sh | 28 +++- bin/fm-supervision-model.sh | 146 +++++++++++++++++- docs/configuration.md | 10 ++ docs/radar-supervision-contract.md | 44 ++++++ docs/scripts.md | 2 + docs/skill-roots.md | 44 ++++++ .../radar-consumes-supervision-model.md | 82 ++++++++++ tests/fm-supervision-model.test.sh | 3 + tests/fm-tangle-guard.test.sh | 6 + 11 files changed, 360 insertions(+), 10 deletions(-) create mode 100644 docs/radar-supervision-contract.md create mode 100644 docs/skill-roots.md create mode 100644 docs/solutions/architecture-patterns/radar-consumes-supervision-model.md diff --git a/AGENTS.md b/AGENTS.md index 87d6556..4ab3e9d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -69,6 +69,7 @@ README.md public overview and development notes .agents/skills/ shared skills, committed .claude/skills symlink to .agents/skills for claude compatibility bin/ helper scripts, committed; read each script's header before first use +docs/solutions/ documented solutions to past problems and patterns, organized by category with YAML frontmatter (`module`, `tags`, `problem_type`) config/crew-harness crewmate harness override; LOCAL, gitignored; absent or "default" = same as firstmate data/ personal fleet records; LOCAL, gitignored as a whole backlog.md task queue, dependencies, history @@ -138,6 +139,7 @@ Resolve `default` with `bin/fm-harness.sh`; resolve the active crewmate harness Each adapter splits into mechanics and knowledge. The mechanics (launch command, autonomy flag, turn-end hook) live in `bin/fm-spawn.sh`; the knowledge you need while supervising (busy signature, exit, interrupt, dialogs, quirks, skill invocation, resume) lives in the agent-only `harness-adapters` skill. +When instructing a crewmate or secondmate to invoke a skill, use the target harness's form: Claude uses `/`, Codex uses `$`, and other verified harnesses use their native skill surface or direct `SKILL.md` reading when no slash/dollar surface exists. **Never dispatch a crewmate on an unverified adapter.** If `config/crew-harness` names an unverified one, tell the captain and fall back to your own harness until it is verified. If the captain asks for a new harness, load `harness-adapters`, verify it empirically with a trivial supervised task, then commit the script and knowledge changes. diff --git a/README.md b/README.md index 8d1a7d1..9e22044 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,9 @@ Agent-only reference skills live under `.agents/skills/` and are loaded by first - [docs/architecture.md](docs/architecture.md) - how the crew, supervision, worktrees, secondmates, and project modes work. - [docs/configuration.md](docs/configuration.md) - environment variables, `FM_HOME`, the files you set, and harness support. - [docs/scripts.md](docs/scripts.md) - the `bin/` toolbelt reference. +- [docs/radar-supervision-contract.md](docs/radar-supervision-contract.md) - JSON consumer contract for Radar-style displays. +- [docs/skill-roots.md](docs/skill-roots.md) - Firstmate's canonical and compatibility skill roots. +- [docs/solutions/](docs/solutions/) - documented solutions to past problems and patterns, organized by category with YAML frontmatter. - [`AGENTS.md`](AGENTS.md) - firstmate's full operating manual for the orchestrator agent. - [CONTRIBUTING.md](CONTRIBUTING.md) - how to contribute, including the dev/test commands. diff --git a/bin/fm-brief.sh b/bin/fm-brief.sh index 3acc082..ec095f3 100755 --- a/bin/fm-brief.sh +++ b/bin/fm-brief.sh @@ -19,7 +19,7 @@ # For ship tasks, the definition of done is shaped by the project's delivery mode # (data/projects.md via fm-project-mode.sh; see AGENTS.md project management # and task lifecycle): -# no-mistakes implement -> /no-mistakes pipeline -> PR -> captain merge (default) +# no-mistakes implement -> no-mistakes pipeline -> PR -> captain merge (default) # direct-PR implement -> push + open PR via gh-axi (no pipeline) -> captain merge # local-only implement on branch, stop and report "ready in branch" (no push/PR); # firstmate reviews, captain approves, firstmate merges to local main @@ -58,6 +58,19 @@ shell_quote() { STATUS_FILE=$(shell_quote "$STATE/$ID.status") +cat_skill_invocation_contract() { +cat <<'EOF' +# Skill invocation +If this task requires a skill, read that skill's `SKILL.md` before acting. +Use the invocation form for the harness you are running: +- Claude: `/` +- Codex: `$` +- Other verified harnesses: use their native skill invocation surface, or follow the skill file directly when no slash/dollar surface exists. +EOF +} + +SKILL_INVOCATION_CONTRACT=$(cat_skill_invocation_contract) + if [ "$KIND" = secondmate ]; then SECONDMATE_PROJECTS="" idx=1 @@ -90,6 +103,8 @@ You do not generate your own work. Act only on tasks the main firstmate routes to you. Never start a survey, audit, or "find improvements" sweep on your own initiative; that is not your job and it is unwanted. +$SKILL_INVOCATION_CONTRACT + # Escalation to main firstmate Handle routine work yourself. Escalate only true captain-relevant outcomes by appending one line: @@ -142,6 +157,8 @@ The report is the only thing that survives, so anything worth keeping must be in 6. If a decision belongs to a human (product choices, destructive actions), append \`needs-decision: {summary of options}\` and stop. Firstmate will reply with the decision. +$SKILL_INVOCATION_CONTRACT + # Definition of done Write your findings to \`$DATA/$ID/report.md\`. The report must stand alone: what you did, what you found, the evidence (commands run, output, file:line references), and what you recommend. @@ -167,7 +184,7 @@ case "$MODE" in This project ships **direct-PR**: you raise the PR yourself, without the no-mistakes pipeline. The task is complete only when committed on your branch. When it is implemented and committed, push your branch and open a PR with \`gh-axi\`, then append \`done: PR {url}\` to the status file and stop. -Do NOT run /no-mistakes. The captain reviews and merges the PR; firstmate relays it. +Do NOT run the no-mistakes skill. The captain reviews and merges the PR; firstmate relays it. EOF ) ;; @@ -192,7 +209,8 @@ EOF # Definition of done The task is complete only when committed on your branch. When you believe it is complete, append \`done: {summary}\` to the status file and stop. -Firstmate will then instruct you to run /no-mistakes to validate and ship a PR. +Firstmate will then instruct you to run the no-mistakes skill to validate and ship a PR. +Use the skill invocation form from this brief: Claude uses \`/no-mistakes\`; Codex uses \`\$no-mistakes\`. During validation you drive the gates while the pipeline owns the fixes. Run it in the foreground and follow this contract: - Never edit or \`git commit\` code yourself while a run is active; the pipeline applies every fix in its own worktree. @@ -200,7 +218,7 @@ During validation you drive the gates while the pipeline owns the fixes. Run it - \`no-mistakes axi run\` and \`axi respond\` block synchronously for many minutes (test and CI especially); the pipeline often fixes findings itself with no gate, so when a call returns no \`gate:\` object that is normal - just let it return. - Never cancel, abort, re-run, or background the run, and never idle-wait for a background notification: the call is in the foreground and returns on its own. -After /no-mistakes reports CI green, append \`done: PR {url} checks green\` and stop. You are finished. +After no-mistakes reports CI green, append \`done: PR {url} checks green\` and stop. You are finished. EOF ) ;; @@ -236,6 +254,8 @@ $RULE1 6. If a decision belongs to a human (product choices, destructive actions, ask-user findings), append \`needs-decision: {summary of options}\` and stop. Firstmate will reply with the decision. +$SKILL_INVOCATION_CONTRACT + # Project memory If \`AGENTS.md\` or \`CLAUDE.md\` already exists, or if this task produced durable project-intrinsic knowledge, run \`$FM_ROOT/bin/fm-ensure-agents-md.sh .\` in the worktree. If this task produced durable project-intrinsic knowledge, record it in \`AGENTS.md\` as part of your change. diff --git a/bin/fm-supervision-model.sh b/bin/fm-supervision-model.sh index 7b7f47f..587f0fa 100755 --- a/bin/fm-supervision-model.sh +++ b/bin/fm-supervision-model.sh @@ -33,12 +33,146 @@ fm_supervision_schema_json() { "generated_at": { "type": "string", "format": "date-time" }, "home": { "type": "string" }, "read_only": { "const": true }, - "sources": { "type": "object" }, - "summary": { "type": "object" }, - "checklist": { "type": "array" }, - "tasks": { "type": "array" }, - "worktrees": { "type": "array" }, - "external_reminders": { "type": "array" } + "sources": { + "type": "object", + "additionalProperties": { + "type": "object", + "required": ["ok", "detail"], + "properties": { + "ok": { "type": "boolean" }, + "detail": { "type": "string" } + }, + "additionalProperties": false + } + }, + "summary": { + "type": "object", + "required": ["level", "tasks_total", "actions_total", "high_total", "medium_total", "github_state"], + "properties": { + "level": { "enum": ["ok", "watch", "action"] }, + "tasks_total": { "type": "integer", "minimum": 0 }, + "actions_total": { "type": "integer", "minimum": 0 }, + "high_total": { "type": "integer", "minimum": 0 }, + "medium_total": { "type": "integer", "minimum": 0 }, + "github_state": { "enum": ["ok", "partial", "unavailable", "skipped"] } + }, + "additionalProperties": false + }, + "checklist": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "severity", "owner", "action", "why", "task_id", "project", "pr_url", "evidence", "read_only_commands"], + "properties": { + "id": { "type": "string" }, + "severity": { "enum": ["high", "medium", "info"] }, + "owner": { "enum": ["captain", "firstmate", "worker", "external"] }, + "action": { "type": "string" }, + "why": { "type": "string" }, + "task_id": { "type": "string" }, + "project": { "type": "string" }, + "pr_url": { "type": "string" }, + "evidence": { "type": "array", "items": { "type": "string" } }, + "read_only_commands": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": false + } + }, + "tasks": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "project", "kind", "mode", "yolo", "window", "window_live", "worktree", "branch", "dirty_count", "last_status", "turn_ended", "pr", "classification", "next", "evidence"], + "properties": { + "id": { "type": "string" }, + "project": { "type": "string" }, + "kind": { "enum": ["ship", "scout", "secondmate"] }, + "mode": { "type": "string" }, + "yolo": { "enum": ["on", "off"] }, + "window": { "type": "string" }, + "window_live": { "type": "boolean" }, + "worktree": { "type": "string" }, + "branch": { "type": "string" }, + "dirty_count": { "type": "integer", "minimum": 0 }, + "last_status": { "type": "string" }, + "turn_ended": { "type": "boolean" }, + "pr": { + "type": "object", + "required": ["url", "state", "ci_state", "mergeable_state"], + "properties": { + "url": { "type": "string" }, + "state": { "type": "string" }, + "ci_state": { "type": "string" }, + "mergeable_state": { "type": "string" } + }, + "additionalProperties": false + }, + "classification": { "type": "string" }, + "next": { + "type": "object", + "required": ["owner", "action"], + "properties": { + "owner": { "enum": ["captain", "firstmate", "worker", "external"] }, + "action": { "type": "string" } + }, + "additionalProperties": false + }, + "evidence": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": false + } + }, + "worktrees": { + "type": "array", + "items": { + "type": "object", + "required": ["path", "project", "branch", "dirty_count", "has_active_task", "classification", "next", "evidence"], + "properties": { + "path": { "type": "string" }, + "project": { "type": "string" }, + "branch": { "type": "string" }, + "dirty_count": { "type": "integer", "minimum": 0 }, + "has_active_task": { "type": "boolean" }, + "classification": { "type": "string" }, + "next": { + "type": "object", + "required": ["owner", "action"], + "properties": { + "owner": { "enum": ["captain", "firstmate", "worker", "external"] }, + "action": { "type": "string" } + }, + "additionalProperties": false + }, + "evidence": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": false + } + }, + "external_reminders": { + "type": "array", + "items": { + "type": "object", + "required": ["url", "state", "ci_state", "mergeable_state", "classification", "next", "evidence"], + "properties": { + "url": { "type": "string" }, + "state": { "type": "string" }, + "ci_state": { "type": "string" }, + "mergeable_state": { "type": "string" }, + "classification": { "type": "string" }, + "next": { + "type": "object", + "required": ["owner", "action"], + "properties": { + "owner": { "enum": ["captain", "firstmate", "worker", "external"] }, + "action": { "type": "string" } + }, + "additionalProperties": false + }, + "evidence": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": false + } + } }, "additionalProperties": false } diff --git a/docs/configuration.md b/docs/configuration.md index 8ac64db..c6ac023 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -42,6 +42,16 @@ When `FM_HOME` is unset, it also behaves as the old whole-root override. claude, codex, opencode, and pi are all empirically verified; new harnesses get verified through a supervised trial task before joining the set. The verified adapter knowledge - busy signatures, interrupt and exit commands, skill-invocation syntax, and per-harness quirks - lives in [`.agents/skills/harness-adapters/SKILL.md`](../.agents/skills/harness-adapters/SKILL.md). Launch mechanics, including the verified command templates, live in [`bin/fm-spawn.sh`](../bin/fm-spawn.sh). +Claude invokes skills with `/`. +Codex invokes the same skill names with `$`. +Generated briefs include that convention so workers can use the right form without treating a skill as unavailable. + +## Skill roots + +Firstmate's canonical skill root is `.agents/skills/`. +`.claude/skills` is a compatibility symlink to that root. +Plugin, global, OpenClaw, and other runtime skill folders are external unless a separate scout proves they belong to this repo. +See [Skill Roots](skill-roots.md) for the current root map. ## Toolchain diff --git a/docs/radar-supervision-contract.md b/docs/radar-supervision-contract.md new file mode 100644 index 0000000..4c38d18 --- /dev/null +++ b/docs/radar-supervision-contract.md @@ -0,0 +1,44 @@ +# Radar Supervision Contract + +Radar-style displays consume Firstmate supervision state through `bin/fm-supervise.sh --json`. +They should not duplicate Firstmate's decision model. + +## Command + +Run the command from the Firstmate repo whose `bin/` directory you want to use. +Set `FM_HOME` to the operational home being observed: + +```sh +FM_HOME=/path/to/firstmate-home bin/fm-supervise.sh --json --no-default-reminders +``` + +Use `--no-default-reminders` when the display wants only local fleet state. +Use `--external-pr ` to add display-specific PR reminders without hard-coding them into Radar. +Use `--schema` to read the `firstmate.supervision.v1` contract without touching runtime state. + +## Consumer Rules + +- Treat `schema_version` as the compatibility gate. +- Treat `read_only: true` as part of the contract; a missing or false value is not a Radar-ready model. +- Render `summary.level` as the fleet headline: `ok`, `watch`, or `action`. +- Render `checklist` as the primary action list. +- Use each checklist item's `severity`, `owner`, `action`, `why`, `task_id`, `project`, `pr_url`, and `evidence` directly instead of recomputing the next action. +- Use `tasks`, `worktrees`, and `external_reminders` only for drill-down detail. +- Surface `sources..ok: false` as incomplete evidence, not as a command failure. + +## Non-Goals + +- Radar does not classify task state. +- Radar does not decide PR readiness. +- Radar does not inspect tmux, treehouse, git, or GitHub directly when the JSON model already includes the evidence. +- Radar does not write `state/`, `data/`, git branches, treehouse leases, tmux panes, services, or GitHub. + +## Migration Sketch + +1. Shell out to `fm-supervise.sh --json --no-default-reminders` with the right `FM_HOME`. +2. Parse the JSON. +3. Check `schema_version == "firstmate.supervision.v1"` and `read_only == true`. +4. Render `summary` plus `checklist`. +5. Keep any existing Radar-only layout code, but delete duplicated state classification rules. + +The exact local Radar patch belongs outside this repository if the active Radar script still lives at a local-only runtime path. diff --git a/docs/scripts.md b/docs/scripts.md index a5d4bd4..4081e2a 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -49,5 +49,7 @@ bin/fm-supervise.sh --no-default-reminders ``` The JSON model always includes `schema_version: "firstmate.supervision.v1"`, `read_only: true`, and top-level `sources`, `summary`, `checklist`, `tasks`, `worktrees`, and `external_reminders`. +Displays should render `summary` and `checklist` directly and use the other arrays only for detail. +The nested JSON contract is available through `--schema`; the consumer rules are documented in [Radar Supervision Contract](radar-supervision-contract.md). GitHub read failures are data, not command failures: affected PRs become `unknown`, `sources.github.ok` becomes `false`, and the command still exits successfully. By default, the command includes Firstmate PR `https://github.com/kunchenguid/firstmate/pull/68` as an external reminder; use `--no-default-reminders` to omit it or `--external-pr` to add more reminder PRs. diff --git a/docs/skill-roots.md b/docs/skill-roots.md new file mode 100644 index 0000000..c4ddf4f --- /dev/null +++ b/docs/skill-roots.md @@ -0,0 +1,44 @@ +# Skill Roots + +Firstmate keeps its own skill roots small and explicit. +This note covers the roots that belong to this repository. +It does not audit OpenClaw, global Codex, global agent, plugin-cache, or personal runtime skill folders. + +## Canonical + +`.agents/skills/` is the canonical Firstmate skill root. +Skills here are tracked with the repo and are part of the shared Firstmate operating surface. + +Current Firstmate skills: + +- `afk` +- `harness-adapters` +- `secondmate-provisioning` +- `stuck-crewmate-recovery` +- `updatefirstmate` + +## Compatibility + +`.claude/skills` is a symlink to `../.agents/skills`. +It exists for Claude compatibility and must not diverge from the canonical root. + +## Plugin and Global Roots + +Plugin and global skill roots are outside this repository. +Firstmate can use them when the running harness exposes them, but they are not Firstmate's source of truth. +Do not copy plugin or global skills into `.agents/skills/` unless the skill is becoming a maintained Firstmate skill. + +## Legacy or External Roots + +Any OpenClaw, workspace-local, or personal runtime skill folder should be treated as external until a separate read-only scout proves its role. +Do not mark an external root unused, delete it, or sync it with Firstmate based only on matching skill names. + +## Invocation Convention + +Skill names are shared, but invocation syntax is harness-specific: + +- Claude uses `/`. +- Codex uses `$`. +- Other verified harnesses use their native skill invocation surface, or the agent reads `SKILL.md` directly when no slash/dollar surface exists. + +Generated Firstmate briefs repeat this convention so workers do not guess. diff --git a/docs/solutions/architecture-patterns/radar-consumes-supervision-model.md b/docs/solutions/architecture-patterns/radar-consumes-supervision-model.md new file mode 100644 index 0000000..43d7d98 --- /dev/null +++ b/docs/solutions/architecture-patterns/radar-consumes-supervision-model.md @@ -0,0 +1,82 @@ +--- +title: Radar Consumes the Firstmate Supervision Model +date: 2026-06-25 +category: architecture-patterns +module: supervision +problem_type: architecture_pattern +component: assistant +severity: medium +applies_when: + - "A display or dashboard needs to show Firstmate fleet state" + - "A follow-up change would otherwise duplicate task classification logic outside Firstmate" +tags: [radar, supervision, json-contract, display] +--- + +# Radar Consumes the Firstmate Supervision Model + +## Context + +Radar is useful as a readable display, but it should not become a second source of truth for Firstmate supervision decisions. +The stable boundary is `bin/fm-supervise.sh --json`: Firstmate owns state collection and classification, while displays render the resulting model. + +This came up while preparing the workflow-structure follow-up after the read-only supervision command landed as a sidecar. +The active Radar script lived outside this repo, so the safe Firstmate-side move was to publish the JSON contract, test it, and leave the external runtime patch as a handoff. + +## Guidance + +Keep classification in Firstmate. +When a display needs fleet state, consume `firstmate.supervision.v1` instead of re-reading `state/`, `tmux`, treehouse, git, or GitHub. + +The display should: + +- Run `FM_HOME=/path/to/home bin/fm-supervise.sh --json --no-default-reminders`. +- Check `schema_version == "firstmate.supervision.v1"` and `read_only == true`. +- Render `summary` and `checklist` as the primary view. +- Use `tasks`, `worktrees`, and `external_reminders` only for detail. +- Treat `sources..ok: false` as incomplete evidence, not as a reason to invent fallback classification. + +When the display itself lives outside this repository, do not patch it from a Firstmate worktree. +Add or tighten the Firstmate-side contract first, then write an exact handoff for the external patch. + +## Why This Matters + +Duplicated decision rules drift. +A display might classify a task as routine while Firstmate sees a captain decision, or vice versa. +That makes supervision harder because the operator has to reconcile two explanations of the same fleet. + +The JSON contract keeps the boundary simple: Firstmate decides, Radar displays. +It also lets Firstmate improve evidence collection later without requiring every display to relearn the decision model. + +## When to Apply + +- A new dashboard, terminal display, or status report needs Firstmate fleet state. +- A local runtime script wants to inspect task readiness, PR state, dirty worktrees, or stale worker windows. +- A follow-up branch would otherwise copy logic from `bin/fm-supervision-model.sh`. + +## Examples + +Preferred display flow: + +```sh +FM_HOME=/path/to/firstmate-home bin/fm-supervise.sh --json --no-default-reminders +``` + +Then render the contract: + +- headline from `summary.level` +- action rows from `checklist[]` +- detail drawers from `tasks[]`, `worktrees[]`, and `external_reminders[]` + +Avoid this pattern: + +```text +Radar reads state/*.meta, calls tmux, checks PRs, and reimplements classification names. +``` + +That makes Radar a second supervision engine instead of a display. + +## Related + +- `docs/radar-supervision-contract.md` +- `docs/scripts.md` +- `bin/fm-supervision-model.sh` diff --git a/tests/fm-supervision-model.test.sh b/tests/fm-supervision-model.test.sh index 492047b..734da52 100755 --- a/tests/fm-supervision-model.test.sh +++ b/tests/fm-supervision-model.test.sh @@ -106,6 +106,9 @@ pass "model is sourceable" out=$("$CLI" --schema) || fail "schema command failed" assert_contains "$out" 'firstmate.supervision.v1' "schema missing id" +assert_contains "$out" '"required": ["id", "project", "kind", "mode", "yolo", "window", "window_live", "worktree", "branch", "dirty_count", "last_status", "turn_ended", "pr", "classification", "next", "evidence"]' "schema missing task contract" +assert_contains "$out" '"level": { "enum": ["ok", "watch", "action"] }' "schema missing summary level enum" +assert_contains "$out" '"read_only_commands": { "type": "array", "items": { "type": "string" } }' "schema missing checklist read-only command contract" pass "schema prints v1 id" home=$(new_home) diff --git a/tests/fm-tangle-guard.test.sh b/tests/fm-tangle-guard.test.sh index bc70bd7..1c5bb2e 100755 --- a/tests/fm-tangle-guard.test.sh +++ b/tests/fm-tangle-guard.test.sh @@ -126,6 +126,12 @@ test_brief_assertion_precedes_branch() { "brief is missing the isolation blocked-status contract" assert_grep "The path check is authoritative" "$brief" \ "brief must make the path check authoritative" + assert_grep 'Claude: `/`' "$brief" \ + "brief must document Claude skill invocation" + assert_grep 'Codex: `$`' "$brief" \ + "brief must document Codex skill invocation" + assert_grep 'Claude uses `/no-mistakes`; Codex uses `$no-mistakes`' "$brief" \ + "no-mistakes validation contract must be harness-aware" assert_no_grep "A reliable test that you are in a linked worktree" "$brief" \ "brief must not present git-dir/common-dir as decisive" assert_no_grep "they are identical in the primary checkout" "$brief" \