Pyronewbic · Pyronewbic · Jun 10, 2026 · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/.claude/workflows/README.md b/.claude/workflows/README.md
@@ -0,0 +1,17 @@
+# Claude workflows
+
+Multi-agent orchestration scripts for Claude Code (not GitHub Actions — those live in
+`.github/workflows/`). Each `*.js` here fans work out across subagents.
+
+Run one by asking Claude to "run the `review-launcher` workflow" or via `/review-launcher`.
+Watch live progress with `/workflows`. Running a workflow spawns many agents and uses a lot
+of tokens, so Claude only starts one when you explicitly ask.
+
+| Workflow | What it does |
+|----------|--------------|
+| `review-launcher` | Reviews `src/*.sh` across security / bash-3.2 / docker-vs-podman, adversarially verifies each finding. |
+| `audit-egress` | Cross-checks every `agents/*.config.sh` preset's egress hosts against the allowlist + `THREAT_MODEL.md`, then adversarially verifies each finding against upstream before reporting it. |
+| `triage-tests` | Runs the bats suites, clusters failures, root-causes each cluster in parallel. Pass `args.suite` to scope. |
+
+Anatomy: a pure-literal `export const meta = {...}` (name, description, phases) then a body using
+`agent()` / `parallel()` / `pipeline()` / `phase()` / `log()`. Copy any file here as a template.
diff --git a/.claude/workflows/audit-egress.js b/.claude/workflows/audit-egress.js
@@ -0,0 +1,114 @@
+export const meta = {
+  name: 'audit-egress',
+  description: 'Audit every agent preset (agents/*.config.sh) for egress drift, then adversarially verify each finding against the real preset + upstream source before reporting it. Findings are leads until a skeptic confirms them.',
+  whenToUse: 'Periodically or before a release, to catch agent-preset egress drift before a user hits a blocked host. A deeper one-shot companion to the weekly agents-smoke.yml gate.',
+  phases: [
+    { title: 'Map' },
+    { title: 'Audit' },
+    { title: 'Verify' },
+  ],
+}
+
+const MAP_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['presets'],
+  properties: {
+    presets: {
+      type: 'array',
+      items: { type: 'string', description: 'preset name without path or .config.sh, e.g. claude' },
+    },
+  },
+}
+
+const AUDIT_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['preset', 'declaredHosts', 'issues'],
+  properties: {
+    preset: { type: 'string' },
+    declaredHosts: { type: 'array', items: { type: 'string' } },
+    issues: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        required: ['kind', 'detail'],
+        properties: {
+          kind: {
+            type: 'string',
+            enum: ['unlisted-host', 'moved-host', 'renamed-pkg', 'removed-binary', 'threat-model-gap', 'other'],
+          },
+          host: { type: 'string' },
+          detail: { type: 'string' },
+        },
+      },
+    },
+  },
+}
+
+const VERDICT_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['status', 'reason'],
+  properties: {
+    // confirmed = real AND specifics check out; refuted = wrong / not on this preset's path;
+    // uncertain = couldn't verify (the honest default - never upgrade a guess to confirmed).
+    status: { type: 'string', enum: ['confirmed', 'refuted', 'uncertain'] },
+    reason: { type: 'string' },
+    // If the direction is right but a specific is wrong (the lesson from hand-verifying these:
+    // audits state plausible-but-wrong flags/hosts/dates), the corrected value goes here.
+    correction: { type: 'string' },
+  },
+}
+
+// Discover presets at run time (not hardcoded) so the audit fans out over whatever is actually in
+// agents/ - the point of the workflow is to catch drift, including added/removed presets.
+phase('Map')
+const map = await agent(`List every agent preset in this repo. Run: ls agents/*.config.sh
+Return just the preset names - the filename without the agents/ prefix and without the .config.sh suffix.`,
+  { label: 'map-presets', phase: 'Map', schema: MAP_SCHEMA })
+
+const presets = map.presets || []
+log(`Auditing ${presets.length} agent presets, then verifying each finding`)
+
+// Pipeline (no barrier): a preset's findings stream into Verify as soon as its audit lands, so the
+// fast presets' findings get refuted while the slow presets are still being audited.
+const auditPrompt = (p) => `Audit the agent preset agents/${p}.config.sh for egress drift.
+1. Read agents/${p}.config.sh. List every host/domain it relies on - SLUICE_ALLOW_DOMAINS plus any hosts implied by its setup/prefetch/run commands and the agent CLI's own API endpoints.
+2. Read THREAT_MODEL.md and the base-allowlist notes in sluice.config.example.sh (npm/yarn registries + GitHub git/release hosts are allowed by default, so the preset need not re-list those).
+3. Flag drift: a host not covered by base+preset allowlist, a host that looks renamed or moved, a package whose registry changed, a binary the preset installs that no longer exists upstream, or an egress need the threat model doesn't account for.
+Be concrete - name the host and what's wrong. If the preset is clean, return an empty issues array.`
+
+const verifyPrompt = (preset, issue) => `You are a skeptical verifier. An automated audit flagged this issue on agents/${preset}.config.sh. Your job is to REFUTE it.
+
+Issue kind: ${issue.kind}
+Host: ${issue.host || '(none)'}
+Detail: ${issue.detail}
+
+Check it against BOTH (a) the actual agents/${preset}.config.sh in this repo, and (b) the real upstream source/docs for that tool - curl the source, npm view the package, fetch the vendor's network doc. Audits routinely state plausible specifics that are WRONG (a flag that doesn't exist, a host that moved, a removal date that's invented), so verify every concrete claim: exact host name, exact package name, exact CLI flag, any date.
+- status=confirmed ONLY if the issue is real on this preset's actual run path AND its specifics check out.
+- status=refuted if it's wrong, or the host/flag isn't reachable on the path this preset uses.
+- status=uncertain if you cannot verify it from source/docs - default here rather than guessing.
+If the direction is right but a specific (flag, host, package, date) is wrong, put the corrected value in 'correction'.`
+
+const results = await pipeline(
+  presets,
+  p => agent(auditPrompt(p), { label: `audit:${p}`, phase: 'Audit', schema: AUDIT_SCHEMA }),
+  (audit) => parallel((audit.issues || []).map(issue => () =>
+    agent(verifyPrompt(audit.preset, issue), {
+      label: `verify:${audit.preset}:${issue.host || issue.kind}`,
+      phase: 'Verify',
+      schema: VERDICT_SCHEMA,
+    }).then(v => ({ ...issue, preset: audit.preset, verdict: v })))),
+)
+
+const all = results.flat().filter(Boolean)
+const confirmed = all.filter(i => i.verdict && i.verdict.status === 'confirmed')
+const uncertain = all.filter(i => i.verdict && i.verdict.status === 'uncertain')
+const refuted = all.filter(i => i.verdict && i.verdict.status === 'refuted')
+log(`${confirmed.length} confirmed, ${uncertain.length} uncertain, ${refuted.length} refuted (of ${all.length} raw findings)`)
+
+// confirmed = act on these; uncertain = worth a human look; refuted dropped from the headline but
+// returned so a run is auditable (you can see what the skeptic killed and why).
+return { confirmed, uncertain, refuted }
diff --git a/.claude/workflows/review-launcher.js b/.claude/workflows/review-launcher.js
@@ -0,0 +1,90 @@
+export const meta = {
+  name: 'review-launcher',
+  description: 'Multi-dimension review of the sluice launcher (src/*.sh -> bin/sluice) across security, bash-3.2 correctness, and docker/podman portability; each finding is adversarially verified before it survives.',
+  whenToUse: 'Before merging changes to src/*.sh or bin/sluice, or to audit the launcher for egress-bypass / shell-portability bugs.',
+  phases: [
+    { title: 'Review' },
+    { title: 'Verify' },
+  ],
+}
+
+// bin/sluice is GENERATED from the ordered src/*.sh slices via `make build`. Reviewers read the
+// slices (the real source), not the assembled launcher.
+
+const FINDINGS_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['findings'],
+  properties: {
+    findings: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        required: ['title', 'file', 'severity', 'detail'],
+        properties: {
+          title: { type: 'string' },
+          file: { type: 'string', description: 'path:line, e.g. src/10-egress-helpers.sh:42' },
+          severity: { type: 'string', enum: ['high', 'medium', 'low'] },
+          detail: { type: 'string' },
+          suggestion: { type: 'string' },
+        },
+      },
+    },
+  },
+}
+
+const VERDICT_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['isReal', 'reason'],
+  properties: {
+    isReal: { type: 'boolean' },
+    reason: { type: 'string' },
+  },
+}
+
+const DIMENSIONS = [
+  {
+    key: 'security',
+    prompt: `Review the sluice launcher for security holes that would let a sandboxed process escape the egress allowlist or firewall.
+Read the source slices in src/*.sh (bin/sluice is GENERATED from them - review the slices). Focus on src/10-egress-helpers.sh, src/40-runtime.sh, src/70-build-run.sh, src/50-init.sh.
+Look for: allowlist bypass, host/IP laundering, DNS-sink exfil gaps, SLUICE_ALLOW_IPS port-scoping mistakes, SSL-bump misconfig, unquoted expansions that let a hostile config inject flags, anything that weakens the default-DROP egress posture.
+Return concrete findings, each with a src/<slice>:line location.`,
+  },
+  {
+    key: 'bash32',
+    prompt: `Review the sluice launcher for bash 3.2 correctness. bin/sluice must run under macOS's stock bash 3.2 AND modern Linux bash. Read src/*.sh.
+Look for: bash-4+ only constructs (associative arrays, \${var,,}/\${var^^}, |&, mapfile/readarray), a case ')' inside $(...) (mis-parses under 3.2 and 'bash -n' will NOT catch it - the highest-value bug class here), [[ =~ ]] quirks, and 'local x=$(...)' masking the command's exit status.
+Return concrete findings, each with a src/<slice>:line location.`,
+  },
+  {
+    key: 'portability',
+    prompt: `Review the sluice launcher for docker vs rootless-podman divergence. Read src/*.sh, especially src/40-runtime.sh and the firewall/init path.
+Only docker + rootless podman are supported; rootful podman (netavark) is out of scope. Look for: engine-specific flags assumed present on both backends, network/DNS setup that only works on one, sysctl/iptables assumptions, anything that silently no-ops on podman.
+Return concrete findings, each with a src/<slice>:line location.`,
+  },
+]
+
+phase('Review')
+log(`Reviewing the launcher across ${DIMENSIONS.length} dimensions`)
+
+// Pipeline: each dimension's findings verify as soon as that dimension finishes reviewing -
+// no barrier, so the bash32 verifies don't wait on the slower security review.
+const results = await pipeline(
+  DIMENSIONS,
+  d => agent(d.prompt, { label: `review:${d.key}`, phase: 'Review', schema: FINDINGS_SCHEMA }),
+  (review, d) => parallel((review.findings || []).map(f => () =>
+    agent(`You are a skeptical reviewer. Try to REFUTE this finding about the sluice launcher. Read the cited file and surrounding code. Default to isReal=false unless you can clearly confirm the bug is real and reachable.
+
+Finding: ${f.title}
+Location: ${f.file}
+Severity: ${f.severity}
+Detail: ${f.detail}`,
+      { label: `verify:${d.key}:${f.file}`, phase: 'Verify', schema: VERDICT_SCHEMA })
+      .then(v => ({ ...f, dimension: d.key, verdict: v })))),
+)
+
+const confirmed = results.flat().filter(Boolean).filter(f => f.verdict && f.verdict.isReal)
+log(`${confirmed.length} confirmed findings`)
+return { confirmed }
diff --git a/.claude/workflows/triage-tests.js b/.claude/workflows/triage-tests.js
@@ -0,0 +1,101 @@
+export const meta = {
+  name: 'triage-tests',
+  description: 'Run the sluice bats gate suites, cluster the failures by likely shared cause, and spawn one agent per cluster to root-cause and propose a fix (does not apply it).',
+  whenToUse: 'When the bats suites are red and you want failures grouped and root-caused in parallel rather than read one at a time.',
+  phases: [
+    { title: 'Run' },
+    { title: 'Cluster' },
+    { title: 'Root-cause' },
+  ],
+}
+
+// args.suite (optional): a specific .bats path or glob to run instead of the full gate suite.
+// e.g. Workflow({ name: 'triage-tests', args: { suite: 'test/verify-security-dns.bats' } })
+
+const FAILURES_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['passed', 'failures'],
+  properties: {
+    passed: { type: 'boolean' },
+    failures: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        required: ['test', 'file', 'message'],
+        properties: {
+          test: { type: 'string' },
+          file: { type: 'string', description: 'the .bats file' },
+          message: { type: 'string', description: 'the assertion / error excerpt' },
+        },
+      },
+    },
+  },
+}
+
+const CLUSTER_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['clusters'],
+  properties: {
+    clusters: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        required: ['label', 'tests', 'hypothesis'],
+        properties: {
+          label: { type: 'string' },
+          tests: { type: 'array', items: { type: 'string' } },
+          hypothesis: { type: 'string' },
+        },
+      },
+    },
+  },
+}
+
+const ROOTCAUSE_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['cluster', 'rootCause', 'fix', 'confidence'],
+  properties: {
+    cluster: { type: 'string' },
+    rootCause: { type: 'string' },
+    fix: { type: 'string', description: 'concrete proposed change with file:line' },
+    confidence: { type: 'string', enum: ['high', 'medium', 'low'] },
+  },
+}
+
+phase('Run')
+const suite = args && args.suite ? args.suite : null
+const cmd = suite ? `test/bats/bin/bats --print-output-on-failure ${suite}` : 'make test'
+log(`Running ${suite ? suite : 'the gate suites (make test)'}`)
+
+const run = await agent(`Run the sluice bats suites and report failures.
+Command: ${cmd}
+Note: the acceptance and verify-security suites need a working Docker engine; if Docker is unavailable, report that as the failure rather than guessing. Parse the bats output and return each failing test with its .bats file and the assertion/error excerpt. If everything passes, set passed=true and failures=[].`,
+  { label: 'run-suites', phase: 'Run', schema: FAILURES_SCHEMA })
+
+if (run.passed || !run.failures.length) {
+  log('All suites passed - nothing to triage')
+  return { passed: true, failures: [] }
+}
+
+phase('Cluster')
+log(`${run.failures.length} failing tests - clustering by likely shared cause`)
+const clustered = await agent(`Here are the failing bats tests:
+${JSON.stringify(run.failures, null, 2)}
+Group them into clusters that likely share a single root cause (same suite, same error signature, same subsystem). Give each cluster a short label and a one-line hypothesis. A failure can belong to only one cluster.`,
+  { label: 'cluster', phase: 'Cluster', schema: CLUSTER_SCHEMA })
+
+phase('Root-cause')
+const diagnoses = await parallel((clustered.clusters || []).map(c => () =>
+  agent(`Root-cause this cluster of failing sluice bats tests and propose a concrete fix. Do NOT apply it.
+Cluster: ${c.label}
+Tests: ${c.tests.join(', ')}
+Hypothesis: ${c.hypothesis}
+Read the named .bats files under test/ and the relevant src/*.sh slices (bin/sluice is generated from src/ via 'make build', so fixes go in the slices). Find the actual cause and propose a specific change with a file:line.`,
+    { label: `rootcause:${c.label}`, phase: 'Root-cause', schema: ROOTCAUSE_SCHEMA })))
+
+return { failures: run.failures, diagnoses: diagnoses.filter(Boolean) }
diff --git a/.github/workflows/acceptance.yml b/.github/workflows/acceptance.yml
@@ -78,6 +78,8 @@ jobs:
         with: { submodules: recursive }
       - name: version --json, per-command --help, parent_of, completions
         run: test/bats/bin/bats --print-output-on-failure test/verify-cli.bats
+      - name: doctor project scans (mask, symlinks, overlays) + agent scaffold
+        run: test/bats/bin/bats --print-output-on-failure test/verify-doctor-checks.bats test/verify-agent-scaffold.bats
 
   docker:
     name: Linux, Docker