diff --git a/.changeset/cutover-published-engine-014-rehome-harness.md b/.changeset/cutover-published-engine-014-rehome-harness.md
new file mode 100644
index 00000000..7f90703c
--- /dev/null
+++ b/.changeset/cutover-published-engine-014-rehome-harness.md
@@ -0,0 +1,42 @@
+---
+'@smooai/smooth': patch
+---
+
+Cut smooth over to the published `smooai-smooth-operator-core` v0.14.0 (crates.io); re-home the th-code harness into smooth's own crates
+
+This is the final PR of the engine-decouple program (SMOODEV-1790, PR 4/4). The
+engine `smooai-smooth-operator-core` is now published on crates.io at `0.14.0` —
+a clean, GENERIC agent engine with the `th code` coding harness REMOVED.
+Previously smooth depended on the engine via a git rev (`bb9a256`) that still
+carried the harness, which is why it kept building.
+
+- **Engine dep switched to crates.io 0.14.0.** Root `Cargo.toml`:
+  `smooth-operator = { git = …, rev = "bb9a256…" }` →
+  `smooth-operator = { version = "0.14.0", package = "smooai-smooth-operator-core" }`.
+  The dep KEY stays `smooth-operator` so the `use smooth_operator::…` imports for
+  the generic engine API are unchanged. `Cargo.lock` now resolves the engine from
+  `registry+https://github.com/rust-lang/crates.io-index` (checksum-pinned), not a
+  git source — the git-rev bridge is gone.
+
+- **New `smooth-cast` crate** re-homes the bits the engine dropped, built on the
+  engine's generic public API (`Agent`/`ProviderRegistry`/`ToolRegistry`/generic
+  `Cast`/`OperatorRole`/`Clearance`):
+  - `coding_workflow` — the `th code` single-agent outer loop
+    (`run_coding_workflow`, `task_text_has_cleanup_intent`, …).
+  - `skills` — skill discovery (`discover`, `SkillScope`, `SkillSource`, `Skill`)
+    plus the built-in `create-skill` skill.
+  - `cast` — the four coding-harness cast roles the generic engine no longer ships
+    (`fixer`, `oracle`, `chief`, `intent_classifier`), and a `cast::builtin()` that
+    returns them on top of the engine's generic built-in roles. All moved tests came
+    with the code.
+
+- **Consumers repointed** to `smooth-cast`: `smooth-operative` (coding_workflow +
+  `fixer` role resolution), `smooth-code` (skills + `chief`/`intent_classifier`
+  routing), `smooth-cli` (skills + `--agent` role resolution), `smooth-bigsmooth`
+  (skills + session auto-naming). Every site that did `Cast::builtin().get("fixer"|
+  "oracle"|"chief"|"intent_classifier")` now uses `smooth_cast::cast::builtin()`.
+
+- The Big-Smooth reporter hooks the engine also dropped stay deleted — verified
+  zero smooth consumers (`with_reporter`/`BigSmoothReporter`/`ReporterEvent`/
+  `report_to_bigsmooth`/the `bigsmooth` engine feature). smooth's own
+  `smooth-bigsmooth` gRPC crate is unrelated and untouched.
diff --git a/Cargo.lock b/Cargo.lock
index 51556d77..8b0d8cff 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6123,6 +6123,7 @@ dependencies = [
  "serde_json",
  "smooai-smooth-archivist",
  "smooai-smooth-bootstrap-bill",
+ "smooai-smooth-cast",
  "smooai-smooth-code",
  "smooai-smooth-diver",
  "smooai-smooth-goalie",
@@ -6168,6 +6169,21 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "smooai-smooth-cast"
+version = "0.13.7"
+dependencies = [
+ "anyhow",
+ "dirs-next",
+ "serde",
+ "serde_json",
+ "serde_yml",
+ "smooai-smooth-operator-core",
+ "tempfile",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "smooai-smooth-cli"
 version = "0.13.7"
@@ -6194,6 +6210,7 @@ dependencies = [
  "smooai-smooth-bench",
  "smooai-smooth-bigsmooth",
  "smooai-smooth-bootstrap-bill",
+ "smooai-smooth-cast",
  "smooai-smooth-code",
  "smooai-smooth-diver",
  "smooai-smooth-operator-core",
@@ -6232,6 +6249,7 @@ dependencies = [
  "serde_json",
  "similar",
  "smooai-smooth-bigsmooth",
+ "smooai-smooth-cast",
  "smooai-smooth-narc",
  "smooai-smooth-operator-core",
  "smooai-smooth-pearls",
@@ -6364,6 +6382,7 @@ dependencies = [
  "serde",
  "serde_json",
  "similar",
+ "smooai-smooth-cast",
  "smooai-smooth-goalie",
  "smooai-smooth-narc",
  "smooai-smooth-operator-core",
@@ -6381,8 +6400,9 @@ dependencies = [
 
 [[package]]
 name = "smooai-smooth-operator-core"
-version = "0.13.7"
-source = "git+https://github.com/SmooAI/smooth-operator-core.git?rev=bb9a2565f0187fbd860240868c5775bd1205764d#bb9a2565f0187fbd860240868c5775bd1205764d"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60cac82deb3b84139783af4c3494717d184e1f9c5372945462191053c37dea39"
 dependencies = [
  "anyhow",
  "async-trait",
diff --git a/Cargo.toml b/Cargo.toml
index e3a5f564..6ca5d467 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -178,18 +178,25 @@ prost-types = "0.13"
 # relevant to the bind-mount silent drop tracked in th-dd0cef).
 microsandbox = "0.4"
 
-# Published agent engine (github.com/SmooAI/smooth-operator-core).
+# Published agent engine (crates.io: smooai-smooth-operator-core).
 # SMOODEV-1787 (PR 1/4, dual-engine collapse): smooth consumes the public
 # `smooai-smooth-operator-core` engine instead of an in-tree copy. The in-tree
 # `crates/smooth-operator` copy was deleted. The dep KEY stays `smooth-operator`
 # and the lib is package-aliased back to `smooth_operator`, so the ~12 consumers'
-# `use smooth_operator::…` imports keep working unchanged.
+# `use smooth_operator::…` imports for the GENERIC engine API keep working
+# unchanged.
 #
-# Rev-pinned git dep (NOT a sibling path dep): a `path = "../smooth-operator-core/…"`
-# form only resolves on a dev laptop and breaks every CI `cargo metadata` run —
-# the exact failure SMOODEV-1464 hit with client-shared (see that dep below).
-# Bump the rev when the engine changes.
-smooth-operator = { git = "https://github.com/SmooAI/smooth-operator-core.git", rev = "bb9a2565f0187fbd860240868c5775bd1205764d", package = "smooai-smooth-operator-core" }
+# SMOODEV-1790 (PR 4/4, final cutover): switched from the rev-pinned git dep on
+# the OLD engine (rev bb9a256, which still carried the th-code harness) to the
+# published crates.io release `0.14.0` — a clean GENERIC engine with the harness
+# REMOVED. The harness bits the engine dropped (the coding workflow, skill
+# discovery, and the fixer/oracle/chief/intent_classifier cast roles) now live
+# in the smooth-owned `smooth-cast` crate below, built on the engine's generic
+# public API.
+smooth-operator = { version = "0.14.0", package = "smooai-smooth-operator-core" }
+# smooth-owned coding-harness extensions to the generic engine (re-homed from
+# the engine when it went generic at 0.14.0). See crates/smooth-cast.
+smooth-cast = { version = "0.13.7", path = "crates/smooth-cast", package = "smooai-smooth-cast" }
 smooth-bigsmooth = { version = "0.13.7", path = "crates/smooth-bigsmooth", package = "smooai-smooth-bigsmooth" }
 smooth-policy = { path = "crates/smooth-policy", version = "0.13.7", package = "smooai-smooth-policy" }
 smooth-web = { version = "0.13.7", path = "crates/smooth-web", package = "smooai-smooth-web" }
diff --git a/crates/smooth-bigsmooth/Cargo.toml b/crates/smooth-bigsmooth/Cargo.toml
index 689b7dc2..f36b1a60 100644
--- a/crates/smooth-bigsmooth/Cargo.toml
+++ b/crates/smooth-bigsmooth/Cargo.toml
@@ -24,6 +24,8 @@ direct-sandbox = ["smooth-bootstrap-bill/server"]
 smooth-pearls.workspace = true
 smooth-bootstrap-bill = { workspace = true, default-features = false }
 smooth-operator.workspace = true
+# skills discovery + the smooth cast roles (re-homed from the engine at 0.14.0)
+smooth-cast.workspace = true
 smooth-code = { path = "../smooth-code", package = "smooai-smooth-code" }
 smooth-policy.workspace = true
 smooth-web = { path = "../smooth-web", package = "smooai-smooth-web" }
diff --git a/crates/smooth-bigsmooth/src/chat_tools.rs b/crates/smooth-bigsmooth/src/chat_tools.rs
index d4961439..64d84488 100644
--- a/crates/smooth-bigsmooth/src/chat_tools.rs
+++ b/crates/smooth-bigsmooth/src/chat_tools.rs
@@ -289,7 +289,7 @@ impl Tool for TeammateSpawnTool {
                     "extra_prompt": { "type": "string", "description": "Optional extra instruction appended after the context_brief. Use this for fine-grained constraints (e.g. 'use the Rust 2021 edition', 'don't touch the migrations directory')." },
                     "budget_usd": { "type": "number", "description": "Optional cost cap in USD for this dispatch." },
                     "working_dir": { "type": "string", "description": "Working directory for the teammate's sandbox. Pass the most specific absolute path that scopes the work — e.g. for 'clone repo X to ~/dev/foo/X' pass `~/dev/foo`. Never pass a directory as broad as `~` or `/`; the runner can stall enumerating that much filesystem." },
-                    "role": { "type": "string", "description": "Optional cast role to spawn under (e.g. `fixer`, `mapper`, `oracle`, `heckler` — see smooth-operator/src/cast). Affects permissions, prompt, and routing slot." },
+                    "role": { "type": "string", "description": "Optional cast role to spawn under (e.g. `fixer`, `mapper`, `oracle`, `heckler` — resolved via smooth_cast::cast::builtin()). Affects permissions, prompt, and routing slot." },
                     "model": { "type": "string", "description": "DO NOT SET unless you have a specific reason. Default = role's slot (smooth-coding for `fixer`) which is the best balance of speed and tool-call reliability. Avoid `smooth-fast-gemini` — it can't reliably emit native tool calls and will wedge the runner. `smooth-reasoning` is for genuinely hard problems only." }
                 }
             }),
diff --git a/crates/smooth-bigsmooth/src/policy.rs b/crates/smooth-bigsmooth/src/policy.rs
index b906272e..4ef66e08 100644
--- a/crates/smooth-bigsmooth/src/policy.rs
+++ b/crates/smooth-bigsmooth/src/policy.rs
@@ -603,8 +603,9 @@ fn registered_tool_names() -> Vec<String> {
 }
 
 /// Read-only subset — what reasoning roles (oracle, mapper, heckler) get.
-/// Must stay in sync with `read_only_tools()` in
-/// `crates/smooth-operator/src/cast/mod.rs`.
+/// Must stay in sync with `read_only_tools()` in the engine's
+/// `cast/mod.rs` (mapper/heckler) and the smooth re-homed copy in
+/// `crates/smooth-cast/src/cast.rs` (oracle).
 fn read_only_tool_names() -> Vec<String> {
     vec![
         "read_file".into(),
diff --git a/crates/smooth-bigsmooth/src/server.rs b/crates/smooth-bigsmooth/src/server.rs
index 4ae4c855..fc256419 100644
--- a/crates/smooth-bigsmooth/src/server.rs
+++ b/crates/smooth-bigsmooth/src/server.rs
@@ -3550,7 +3550,7 @@ fn extract_skill_allowed_hosts(message: &str, workspace: &str) -> Vec<String> {
         return Vec::new();
     }
     let workspace_path = std::path::PathBuf::from(workspace);
-    let skills = smooth_operator::skills::discover(&workspace_path);
+    let skills = smooth_cast::skills::discover(&workspace_path);
     let Some(skill) = skills.into_iter().find(|s| s.name == name) else {
         tracing::warn!(skill_name = name, "skill named in message header but not found in discovery — no pre-grant");
         return Vec::new();
@@ -4570,7 +4570,7 @@ async fn post_chat_message_stream_handler(
 async fn auto_name_session(user_prompt: &str) -> Option<String> {
     let providers_path = dirs_next::home_dir()?.join(".smooth/providers.json");
     let registry = ProviderRegistry::load_from_file(&providers_path).ok()?;
-    let cast = smooth_operator::cast::Cast::builtin();
+    let cast = smooth_cast::cast::builtin();
     let agent = cast.get("tagger")?;
     let config = registry.llm_config_for(agent.slot).ok()?;
     let llm = smooth_operator::llm::LlmClient::new(config);
diff --git a/crates/smooth-cast/Cargo.toml b/crates/smooth-cast/Cargo.toml
new file mode 100644
index 00000000..b9a4e3e0
--- /dev/null
+++ b/crates/smooth-cast/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+name = "smooai-smooth-cast"
+version = "0.13.7"
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+description = "Smooth coding-harness extensions to the smooth-operator engine — the th-code coding workflow, skill discovery, and the harness-specific cast roles (fixer/oracle/chief/intent_classifier) that the published generic engine no longer ships."
+
+[lib]
+name = "smooth_cast"
+path = "src/lib.rs"
+
+[dependencies]
+# The published generic engine. smooth-cast re-homes the coding-harness
+# bits the engine dropped at 0.14.0 and builds the custom cast roles on
+# the engine's generic Cast/OperatorRole/Clearance public API.
+smooth-operator.workspace = true
+
+anyhow.workspace = true
+serde = { workspace = true }
+serde_json.workspace = true
+serde_yml.workspace = true
+dirs-next.workspace = true
+tokio.workspace = true
+tracing.workspace = true
+
+[dev-dependencies]
+tempfile.workspace = true
+
+[lints]
+workspace = true
diff --git a/crates/smooth-cast/builtin-skills/create-skill/SKILL.md b/crates/smooth-cast/builtin-skills/create-skill/SKILL.md
new file mode 100644
index 00000000..a2ebac25
--- /dev/null
+++ b/crates/smooth-cast/builtin-skills/create-skill/SKILL.md
@@ -0,0 +1,109 @@
+---
+name: create-skill
+description: Author a new skill (SKILL.md) for Smooth. Asks clarifying questions, drafts the frontmatter + body, writes the file to the user's chosen location, and offers a test invocation.
+triggers:
+  - make a skill
+  - create a skill
+  - add a skill
+  - save this as a skill
+  - new skill
+  - author a skill
+scope: host
+allowed_tools:
+  - read_file
+  - write_file
+  - edit_file
+  - list_files
+  - bash
+---
+
+# create-skill
+
+The user wants to add a reusable recipe to their Smooth setup. Your job: turn a description of what the recipe should do into a well-formed `SKILL.md` file at the right path.
+
+## Process
+
+### 1. Clarify (if needed)
+
+If the user's request is vague — e.g. "make a skill for git stuff" — ask ONE question to narrow it:
+
+- "What should this skill do specifically? Concrete steps help."
+
+Skip clarifying if the request is concrete enough on its own ("make a skill that adds a movie to my smoo-hub watchlist using the api at smoo-hub:8787" — that's actionable).
+
+### 2. Decide scope: project or user
+
+Ask if you don't know:
+
+- **Project scope** (`<workspace>/.smooth/skills/<name>/SKILL.md`) — the skill is tied to this codebase. Other workspaces don't see it. Commit it to the repo so teammates get it too.
+- **User scope** (`~/.smooth/skills/<name>/SKILL.md`) — the skill applies to every Smooth dispatch you ever do, in any workspace. Personal.
+
+Default to user scope if the user just says "save it" without specifying.
+
+### 3. Pick a name
+
+Lowercase, hyphenated, descriptive. `add-show`, `format-rust`, `sync-to-s3`. The directory name and the `name:` frontmatter must match.
+
+### 4. Determine the scope: sandbox or host
+
+- `sandbox` (default) — the skill runs inside the microVM. Use when the skill only touches `/workspace`, runs build/test commands, edits source code, or needs nothing outside the sandbox.
+- `host` — the skill bypasses the microVM and runs in Big Smooth's process directly. Use ONLY for genuine host-needing cases: `scp` to a local-network host, `sips` / macOS-specific tools, AWS SSO browser flows, Photos.app integration.
+
+**Network alone is NEVER a reason for `host`.** Network access from the sandbox is handled by `allowed_hosts` below.
+
+### 5. Determine `allowed_hosts`
+
+If the skill needs to reach a host the default Wonk policy doesn't allow (`llm.smoo.ai` is the only default), list those hosts here. Examples:
+
+- `smoo-hub` — LAN/tailscale-only personal server
+- `api.tvmaze.com` — public API
+- `*.azureedge.net` — wildcard for a CDN family
+
+Be specific. Don't list `*` or "all"; users won't accept that grant.
+
+### 6. Determine `allowed_tools`
+
+Optional. If left empty, the skill inherits the agent's full toolset. Use to RESTRICT (not expand) — e.g. a read-only summarize skill might say `allowed_tools: [read_file, list_files, grep]`.
+
+### 7. Write the SKILL.md body
+
+The body is what the agent reads when the skill is invoked. Make it:
+
+- **Short.** 30–80 lines for most skills. A long skill that the model has to wade through is worse than no skill.
+- **Step-by-step.** Numbered list of what to do, in order. The model will follow it literally.
+- **Concrete on commands.** Show the exact `curl`, `bash`, or tool invocation. Not "make an API call" but `curl -X POST http://smoo-hub:8787/api/shows -H 'Content-Type: application/json' -d '{...}'`.
+- **Explicit on inputs.** Name what the user will provide (title, status, etc.) and what defaults you'll assume when they're missing.
+
+Optional sections worth including:
+
+- `## Inputs` — what the user typically provides
+- `## Outputs` — what the user will see / what gets created
+- `## Failure modes` — what to do when X is missing, Y returns 404, etc.
+
+### 8. Write the file
+
+For project scope:
+```bash
+mkdir -p .smooth/skills/<name>
+# write SKILL.md
+```
+
+For user scope:
+```bash
+mkdir -p ~/.smooth/skills/<name>
+# write SKILL.md
+```
+
+Then run `th skills list` to confirm the skill is discovered.
+
+### 9. (Optional) Test it
+
+If the user wants, offer to invoke the skill once with a sample input. Just suggest the invocation phrasing — don't auto-invoke unless they ask.
+
+## Output
+
+When done, reply with ONE sentence:
+
+> "Created `<name>` at `<path>`. Run `th skills show <name>` to inspect or invoke by saying something matching: `<one trigger phrase>`."
+
+That's it. No essay. The diff is the artifact; the sentence confirms it landed.
diff --git a/crates/smooth-cast/src/cast.rs b/crates/smooth-cast/src/cast.rs
new file mode 100644
index 00000000..2388141d
--- /dev/null
+++ b/crates/smooth-cast/src/cast.rs
@@ -0,0 +1,179 @@
+//! # Smooth cast roles — the coding-harness roles the generic engine dropped
+//!
+//! The published `smooai-smooth-operator-core` engine (0.14.0) ships a
+//! *generic* [`Cast`](smooth_operator::cast::Cast) populated with the
+//! generic roles (`tagger`, `presser`, `recapper`, `mapper`, `heckler`,
+//! `scout`, `runner`). It deliberately dropped the four coding-harness
+//! roles that only the `th code` workflow used:
+//!
+//! - **`fixer`** — the default `th` coding experience: full tool access,
+//!   `Coding`-slot routing. [`crate::coding_workflow`] looks up its prompt
+//!   + slot by name.
+//! - **`oracle`** — pure read-only reasoning (no bash, no mutation).
+//! - **`chief`** — the Chief-of-Staff router that emits `DISPATCH: <role>`.
+//! - **`intent_classifier`** — the chat TUI's `WORK`/`QUESTION` router.
+//!
+//! This module rebuilds those four roles on the engine's public
+//! [`OperatorRole`]/[`Clearance`]/[`RoleKind`] API and exposes
+//! [`builtin()`], a drop-in replacement for `Cast::builtin()` that returns
+//! the generic engine roles PLUS these four. Smooth call sites that used to
+//! call `smooth_operator::Cast::builtin()` and then `.get("fixer")` (etc.)
+//! now call [`smooth_cast::cast::builtin()`](builtin) instead.
+
+use smooth_operator::cast::{Cast, Clearance, OperatorRole, RoleKind};
+use smooth_operator::providers::Activity;
+
+/// System prompt for the `fixer` role. Public because
+/// [`crate::coding_workflow`] documents that it resolves the coding system
+/// prompt from this role by name (mirrors the old engine's
+/// `cast::FIXER_PROMPT`).
+pub const FIXER_PROMPT: &str = include_str!("prompts/fixer.txt");
+const ORACLE_PROMPT: &str = include_str!("prompts/oracle.txt");
+const CHIEF_PROMPT: &str = include_str!("prompts/chief.txt");
+const INTENT_CLASSIFIER_PROMPT: &str = include_str!("prompts/intent_classifier.txt");
+
+/// Read-only tool set used by reasoning roles (`oracle`). Anything not in
+/// this list is denied. Mirrors the engine's private `read_only_tools()`
+/// helper — kept here because the harness `oracle` role needs the same
+/// allowlist and the engine no longer exposes it.
+fn read_only_tools() -> Vec<String> {
+    vec![
+        "read_file".into(),
+        "list_files".into(),
+        "grep".into(),
+        "glob".into(),
+        "lsp".into(),
+        "project_inspect".into(),
+        // Memory is metadata, not source code — even read-only
+        // reasoning roles can persist what they learn about the
+        // workspace to .smooth/MEMORY.md so a later session
+        // doesn't have to re-discover everything.
+        "read_memory".into(),
+        "write_memory".into(),
+    ]
+}
+
+/// The four coding-harness [`OperatorRole`]s the generic engine dropped.
+fn smooth_roles() -> Vec<OperatorRole> {
+    vec![
+        // `intent_classifier` is the chat TUI's auto-router: given a
+        // single user message, emit literal "WORK" or "QUESTION" so
+        // the dispatcher knows whether to run under fixer (coding
+        // workflow) or oracle (read-only Q&A). Routes through the
+        // Fast slot so it adds milliseconds, not seconds.
+        OperatorRole {
+            name: "intent_classifier".into(),
+            kind: RoleKind::Shadow,
+            slot: Activity::Fast,
+            model_override: None,
+            prompt: INTENT_CLASSIFIER_PROMPT.trim().to_string(),
+            permissions: Clearance::deny_all(),
+            steps: None,
+            hidden: true,
+        },
+        // `chief` is the Chief of Staff router. Reads the user message
+        // and emits `DISPATCH: <role>` naming one of the lead/sidekick
+        // roles. Routes through the Fast slot so adding it costs
+        // milliseconds, not seconds. Falls back to the heuristic
+        // ladder when chief is unavailable (no providers, gateway
+        // down) so dispatch never hangs.
+        OperatorRole {
+            name: "chief".into(),
+            kind: RoleKind::Shadow,
+            slot: Activity::Fast,
+            model_override: None,
+            prompt: CHIEF_PROMPT.trim().to_string(),
+            permissions: Clearance::deny_all(),
+            steps: None,
+            hidden: true,
+        },
+        // `fixer` is the default `th` experience: full tool access,
+        // Coding-slot routing. Its prompt is the coding system prompt
+        // that `coding_workflow` resolves by name.
+        OperatorRole {
+            name: "fixer".into(),
+            kind: RoleKind::Lead,
+            slot: Activity::Coding,
+            model_override: None,
+            prompt: FIXER_PROMPT.trim().to_string(),
+            permissions: Clearance::default(),
+            steps: None,
+            hidden: false,
+        },
+        // `oracle` is pure reasoning — no bash, no mutation.
+        OperatorRole {
+            name: "oracle".into(),
+            kind: RoleKind::Lead,
+            slot: Activity::Reasoning,
+            model_override: None,
+            prompt: ORACLE_PROMPT.trim().to_string(),
+            permissions: Clearance {
+                allow_tools: read_only_tools(),
+                deny_tools: vec![
+                    "edit_file".into(),
+                    "write_file".into(),
+                    "apply_patch".into(),
+                    "bash".into(),
+                    "bg_run".into(),
+                    "http_fetch".into(),
+                ],
+            },
+            steps: None,
+            hidden: false,
+        },
+    ]
+}
+
+/// Build a [`Cast`] populated with the engine's generic built-in roles
+/// (`tagger`, `presser`, `recapper`, `mapper`, `heckler`, `scout`,
+/// `runner`) PLUS the four smooth coding-harness roles (`fixer`, `oracle`,
+/// `chief`, `intent_classifier`).
+///
+/// Drop-in replacement for `smooth_operator::Cast::builtin()` for smooth
+/// call sites that depend on the coding-harness roles being present.
+pub fn builtin() -> Cast {
+    let mut cast = Cast::builtin();
+    for role in smooth_roles() {
+        cast.register(role);
+    }
+    cast
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn builtin_registers_the_four_harness_roles() {
+        let cast = builtin();
+        for name in ["fixer", "oracle", "chief", "intent_classifier"] {
+            assert!(cast.get(name).is_some(), "role '{name}' must be registered");
+        }
+    }
+
+    #[test]
+    fn builtin_keeps_the_generic_engine_roles() {
+        let cast = builtin();
+        for name in ["tagger", "presser", "recapper", "mapper", "heckler", "scout", "runner"] {
+            assert!(cast.get(name).is_some(), "generic engine role '{name}' must survive");
+        }
+    }
+
+    #[test]
+    fn fixer_is_a_coding_lead_with_bash() {
+        let cast = builtin();
+        let fixer = cast.get("fixer").expect("fixer registered");
+        assert_eq!(fixer.kind, RoleKind::Lead);
+        assert!(matches!(fixer.slot, Activity::Coding));
+        assert!(fixer.permissions.allows("bash"), "fixer must allow bash");
+    }
+
+    #[test]
+    fn oracle_is_read_only() {
+        let cast = builtin();
+        let oracle = cast.get("oracle").expect("oracle registered");
+        assert!(!oracle.permissions.allows("bash"), "oracle must deny bash");
+        assert!(!oracle.permissions.allows("edit_file"), "oracle must deny edit_file");
+        assert!(oracle.permissions.allows("read_file"), "oracle must allow read_file");
+    }
+}
diff --git a/crates/smooth-cast/src/coding_workflow.rs b/crates/smooth-cast/src/coding_workflow.rs
new file mode 100644
index 00000000..e7abda10
--- /dev/null
+++ b/crates/smooth-cast/src/coding_workflow.rs
@@ -0,0 +1,1958 @@
+//! Coding workflow — single-agent outer loop.
+//!
+//! The agent handles its own iteration (LLM → tool → LLM → …)
+//! via `Agent::run_with_channel`. We sit around that and do three
+//! things:
+//!
+//!   1. Snapshot the workspace when the failing-test count drops
+//!      — so a later turn can't regress past the best-seen state.
+//!   2. On not-green, feed the test output back into the next
+//!      turn's prompt so the agent has surgical failure context.
+//!   3. Stop when we're green, within a few failures of green
+//!      (more iteration is more likely to regress than improve),
+//!      over budget, or past the outer-iteration cap.
+//!
+//! We used to decompose into 7 phases (ASSESS / PLAN / EXECUTE /
+//! VERIFY / REVIEW / TEST / FINALIZE). That added a lot of prompt
+//! surface area and failure modes — the phase decomposition kept
+//! silent-short-circuiting at one detector or another and eating
+//! runs that should have kept going. A single-agent loop is
+//! smaller, easier to reason about, and matches the shape of
+//! tools like OpenCode that are maintained against coding
+//! benchmarks. We kept the parts that demonstrably help — the
+//! self-validation requirement in the system prompt, the
+//! best-state snapshot, the compile-error short-circuit — and
+//! dropped the per-phase dispatch.
+//!
+//! This module does NOT own the sandbox, the security hooks, or
+//! the tool registry — the caller assembles those and hands them
+//! in.
+
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use anyhow::Context;
+use tokio::sync::mpsc::UnboundedSender;
+
+use tokio::sync::mpsc::UnboundedReceiver;
+
+use crate::cast::builtin as cast_builtin;
+use smooth_operator::agent::{Agent, AgentConfig, AgentEvent, InjectedMessage};
+use smooth_operator::cost::CostBudget;
+use smooth_operator::providers::ProviderRegistry;
+use smooth_operator::tool::ToolRegistry;
+
+/// Input to `run_coding_workflow`.
+pub struct CodingWorkflowConfig {
+    /// Stable id for the operator running this workflow — echoed
+    /// into every AgentEvent.
+    pub operator_id: String,
+    /// The task prompt the user gave.
+    pub task_prompt: String,
+    /// Provider registry — used to resolve the Coding slot.
+    pub registry: Arc<ProviderRegistry>,
+    /// Tool registry the agent will use.
+    pub tools: ToolRegistry,
+    /// Optional global budget cap across the whole workflow.
+    pub budget_usd: Option<f64>,
+    /// Max outer-loop iterations. Each iteration is one full
+    /// `Agent::run_with_channel` call; the agent itself iterates
+    /// internally via tool calls. 5 is usually plenty — if the
+    /// agent can't converge in 5 full turns with failure context,
+    /// another turn is unlikely to help.
+    pub max_outer_iterations: u32,
+    /// Skip any post-implementation test-augmentation phase.
+    /// Kept in the config for API stability, currently ignored —
+    /// the single-agent loop doesn't have a separate TEST phase.
+    pub skip_test_phase: bool,
+    /// Event sink — every AgentEvent from the agent flows here.
+    pub tx: UnboundedSender<AgentEvent>,
+    /// Workspace root (bind-mounted at /workspace inside the
+    /// sandbox). Used to snapshot the best-seen state and restore
+    /// it on regression. `None` skips snapshotting.
+    pub workspace_root: Option<PathBuf>,
+    /// Optional injection channel for mailbox messages — passed to every
+    /// inner Agent so steering/chat/answers from the lead reach a running
+    /// teammate without needing to restart the workflow. `None` keeps
+    /// the agent isolated (current behaviour for non-pearl-attached runs).
+    pub chat_rx: Option<Arc<tokio::sync::Mutex<UnboundedReceiver<InjectedMessage>>>>,
+    /// Pearl th-e182bc: when the runner's caller detected cleanup
+    /// intent in the prior conversation (the README that started
+    /// the task), this carries that hint through to the workflow.
+    /// `build_user_prompt` uses it to apply the cleanup preamble
+    /// on CONTINUATION turns where the current `task_prompt` is a
+    /// bare confirmation ("yes, proceed") and would otherwise miss
+    /// the cleanup-intent detection. Pure additive: defaults false,
+    /// no behavior change for non-runner callers.
+    pub cleanup_intent_hint: bool,
+}
+
+/// Run the workflow end-to-end. Returns the accumulated cost.
+pub async fn run_coding_workflow(cfg: CodingWorkflowConfig) -> anyhow::Result<f64> {
+    // Pull the fixer role definition from the cast so the prompt
+    // lives in one place (`cast/prompts/fixer.txt`) and the slot
+    // comes from the role's `slot` field instead of being hard-coded
+    // here. The `fixer` role is always present in the smooth cast
+    // (`crate::cast::builtin()`) — the generic engine's `Cast::builtin()`
+    // dropped it at 0.14.0, so smooth re-homes it. If it ever isn't,
+    // something is badly wrong and we want a loud failure, not a silent
+    // fallback.
+    let cast = cast_builtin();
+    let fixer_role = cast
+        .get("fixer")
+        .context("missing 'fixer' role in smooth cast — did smooth_cast::cast::builtin change?")?;
+    let code_prompt = fixer_role.prompt.clone();
+    let code_slot = fixer_role.slot;
+
+    let llm_config = cfg.registry.llm_config_for(code_slot).context("resolving coding slot → LLM config")?;
+    let coding_slot = cfg.registry.routing.slot_for(code_slot);
+    let alias = coding_slot.model.clone();
+
+    let mut total_cost_usd = 0.0_f64;
+    let mut total_prompt_tokens = 0u64;
+    let mut total_completion_tokens = 0u64;
+    let mut total_cached_tokens = 0u64;
+    let mut last_verify_output: Option<String> = None;
+    let mut best_failed_count: Option<u32> = None;
+    let mut snapshot_taken = false;
+    // Pearl th-bench-loop iter 2: track NoEvidence retries. The
+    // agent's first turn often skips the test run entirely (saw
+    // 0 bash invocations in real bench runs). One retry with a
+    // forcing prompt that demands an explicit test invocation
+    // catches most of those before we give up.
+    let mut no_evidence_retries: u32 = 0;
+    const MAX_NO_EVIDENCE_RETRIES: u32 = 1;
+
+    let iter_cap = cfg.max_outer_iterations.max(1);
+    let mut iteration = 0u32;
+    let mut succeeded = false;
+
+    for _ in 0..iter_cap {
+        iteration += 1;
+
+        let _ = cfg.tx.send(AgentEvent::PhaseStart {
+            phase: "CODING".into(),
+            alias: alias.clone(),
+            upstream: None,
+            iteration,
+        });
+
+        let user_prompt = build_user_prompt_with_hint(&cfg.task_prompt, iteration, last_verify_output.as_deref(), cfg.cleanup_intent_hint);
+
+        // Inner iteration cap. Agent can take a lot of tool-call turns
+        // internally; default is 80 but `SMOOTH_WORKFLOW_AGENT_MAX_ITERATIONS`
+        // lets benchmark/diagnostic runs shorten the feedback loop.
+        let agent_max_iter: u32 = std::env::var("SMOOTH_WORKFLOW_AGENT_MAX_ITERATIONS")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(80);
+        let mut agent_config =
+            AgentConfig::new(format!("{}/coding-{}", cfg.operator_id, iteration), code_prompt.clone(), llm_config.clone()).with_max_iterations(agent_max_iter);
+        if let Some(rx) = cfg.chat_rx.clone() {
+            agent_config = agent_config.with_chat_rx(rx);
+        }
+        if let Some(cap) = cfg.budget_usd {
+            let remaining = (cap - total_cost_usd).max(0.0);
+            agent_config = agent_config.with_budget(CostBudget {
+                max_cost_usd: Some(remaining),
+                max_tokens: None,
+            });
+        }
+
+        let agent = Agent::new(agent_config, cfg.tools.clone());
+        let mut conversation = agent.run_with_channel(user_prompt, cfg.tx.clone()).await?;
+
+        let (turn_cost, turn_prompt_tokens, turn_completion_tokens, turn_cached_tokens) = {
+            let tracker = agent.cost_tracker.lock().expect("cost_tracker lock");
+            (
+                tracker.total_cost_usd,
+                tracker.total_prompt_tokens,
+                tracker.total_completion_tokens,
+                tracker.total_cached_tokens,
+            )
+        };
+        total_cost_usd += turn_cost;
+        total_prompt_tokens += turn_prompt_tokens;
+        total_completion_tokens += turn_completion_tokens;
+        total_cached_tokens += turn_cached_tokens;
+
+        // Pull the agent's final assistant message — used for
+        // failure-context feedback into the next turn's prompt.
+        let transcript = summarize_conversation(&conversation);
+        last_verify_output = Some(transcript.clone());
+
+        // Pearl th-7cf405 / th-ed7bfa: trust evidence, not claims.
+        // The assistant's prose can fabricate "31 passed, 0 failed"
+        // without ever running a test; only believe a tool-result
+        // message produced by `bash` / `test_run`.
+        let evidence = verify_with_evidence(&conversation);
+
+        // Pearl th-bf62c0 / th-bench-loop iter 9: if the conversation
+        // contains a compile-error tool output AND we still have
+        // iterations to spend, force one more turn with the compile-fix
+        // preamble REGARDLESS of the evidence verdict. The
+        // `detect_compile_error` short-circuit in `build_user_prompt`
+        // only fires when the workflow loops back; with `EvidencedPass`
+        // or unhandled `NoEvidence` paths the loop can exit on iter 1
+        // even though the agent shipped uncompilable code. Catch that
+        // here before any break.
+        if iteration < iter_cap {
+            if let Some(_err) = detect_compile_error(&transcript) {
+                tracing::info!(iteration, "coding workflow: compile error in transcript — forcing one more iteration");
+                last_verify_output = Some(transcript.clone());
+                continue;
+            }
+            // Also scan the actual tool-result messages for compile
+            // errors. The transcript above is just the final assistant
+            // prose; the cargo/go/javac output lives in the tool-result
+            // messages and is what we actually want to feed back.
+            if let Some(err_chunk) = first_compile_error_in_tools(&conversation) {
+                tracing::info!(iteration, "coding workflow: compile error in tool output — forcing one more iteration");
+                last_verify_output = Some(err_chunk);
+                continue;
+            }
+        }
+
+        match evidence {
+            VerifyEvidence::EvidencedPass => {
+                succeeded = true;
+                tracing::info!(iteration, "coding workflow: tool evidence shows green, stopping");
+                break;
+            }
+            VerifyEvidence::EvidencedFail(_) => {
+                // Stay in the loop and feed failure context forward.
+            }
+            VerifyEvidence::NoEvidence => {
+                // No bash / test_run ever ran this turn. Three
+                // possibilities:
+                //  1. The task didn't require code at all — pure
+                //     THINK mode ("how would you do X"). No edits,
+                //     no tests, just an answer.
+                //  2. The agent edited files but skipped tests
+                //     (the dominant benchmark-dispatch failure
+                //     mode, pearl th-bench-loop iter 2).
+                //  3. The task required code but the model gave
+                //     up before doing either.
+                //
+                // Retry-with-forcing-prompt only helps case (2).
+                // For case (1) the forcing prompt is a non-sequitur
+                // ("you edited but never ran tests") and surfaces
+                // as a confusing redaction notice to the user. So
+                // check: if the agent didn't edit ANYTHING this
+                // turn either, treat it as THINK mode and exit
+                // cleanly without the retry.
+                //
+                // Pearl th-fixer-think-mode (user 2026-05-10):
+                // "fixer always hallucinates tests, he should be a
+                // thinker too" — this is the workflow half of that
+                // fix; the prompt half lives in fixer.txt.
+                let made_edits = conversation_made_edits(&conversation);
+                let did_destructive_bash = conversation_did_destructive_bash(&conversation);
+                let cleanup_intent = is_cleanup_intent(&cfg.task_prompt);
+                if !made_edits && !did_destructive_bash {
+                    // Pearl `th-e93cba`: if the user asked for cleanup
+                    // / ops (delete X, prune Y, remove debris), skip
+                    // the "this is a code task, write code" reprompt
+                    // entirely. That reprompt was designed for code
+                    // benchmarks (aider-polyglot etc.) and on cleanup
+                    // tasks it triggered the agent to fabricate tests
+                    // and pivot to test-fix narrative even when the
+                    // user clearly asked for filesystem operations.
+                    if cleanup_intent {
+                        tracing::info!(
+                            iteration,
+                            "coding workflow: cleanup intent detected in user prompt, no agent actions yet — exiting cleanly without 'this is a code task' reprompt"
+                        );
+                        break;
+                    }
+                    // Pearl th-fc8a51: on the FIRST iteration with no
+                    // edits AND no test runs, retry once with a strong
+                    // forcing prompt before falling back to THINK mode.
+                    // The original "exit immediately as THINK" path was
+                    // designed for chat questions, but for dispatched
+                    // code tasks an agent that just read the
+                    // INSTRUCTIONS.md and returned without coding is a
+                    // give-up, not a thinker. cpp/bank-account hit this
+                    // on bench sweep b32wx055q: 23s, $0.0001, 0 edits,
+                    // FAIL — when the same task with the same model
+                    // SOLVED 17/17 on a focused rerun.
+                    if iteration == 1 && no_evidence_retries < MAX_NO_EVIDENCE_RETRIES {
+                        no_evidence_retries += 1;
+                        tracing::info!(
+                            iteration,
+                            retry = no_evidence_retries,
+                            "coding workflow: no edits + no tests on iter 1 — forcing one retry before THINK-mode exit"
+                        );
+                        last_verify_output = Some(
+                            "Your previous turn made no edits to any source file. This is a code task — you need to actually implement the solution. Read the source files (the stub plus the test file), then use edit_file or bash to write the implementation, then run the project's test command via `bash`. Do not return until you've at least attempted both.".to_string(),
+                        );
+                        continue;
+                    }
+                    tracing::info!(
+                        iteration,
+                        "coding workflow: no test-run evidence AND no edits — treating as THINK mode, exiting cleanly"
+                    );
+                    break;
+                }
+                // Pearl `th-e93cba`: when the agent did destructive
+                // ops via `bash` (rm -rf, find -delete, etc.) but
+                // DIDN'T also edit source files, this was a cleanup
+                // task — `rm -rf __pycache__` doesn't need test
+                // verification. Exit cleanly instead of reprompting
+                // with "you didn't run tests", which made the agent
+                // fabricate test files and pivot to test-fix narrative
+                // on cleanup-pycache-debris and similar fixtures.
+                if did_destructive_bash && !made_edits {
+                    tracing::info!(
+                        iteration,
+                        "coding workflow: destructive bash ops without source edits — cleanup task, exiting cleanly without test-forcing reprompt"
+                    );
+                    break;
+                }
+                // Pearl `th-e93cba`: skip the "run the test suite"
+                // reprompt on cleanup-intent tasks too. Even when the
+                // agent makes incidental `edit_file` calls during a
+                // cleanup (e.g., updating a .gitignore), the workflow
+                // shouldn't force test runs that don't apply.
+                if cleanup_intent {
+                    tracing::info!(
+                        iteration,
+                        "coding workflow: cleanup intent detected — exiting cleanly without 'run the test suite' reprompt"
+                    );
+                    break;
+                }
+                if no_evidence_retries < MAX_NO_EVIDENCE_RETRIES {
+                    no_evidence_retries += 1;
+                    tracing::info!(
+                        iteration,
+                        retry = no_evidence_retries,
+                        "coding workflow: no test-run evidence — re-prompting with forcing directive"
+                    );
+                    last_verify_output = Some(
+                        "Your previous turn edited the code but never ran the test suite. Before doing anything else this turn, run the project's test command via `bash` (cargo test / pytest / pnpm test / etc.) and report the actual output. The implementation is unverified until you do.".to_string(),
+                    );
+                    continue;
+                }
+                tracing::info!(iteration, "coding workflow: no test-run evidence after retry, exiting");
+                if detect_verify_pass(&transcript) {
+                    // Pearl iter-10/11: the assistant claimed pass
+                    // without evidence. Three actions:
+                    //
+                    // 1. tracing::warn for log retention.
+                    // 2. [cast-summary] stderr line — surfaced
+                    //    by the runner stderr forward when
+                    //    /verbose is on.
+                    // 3. APPEND a TokenDelta to the live event
+                    //    stream so the user sees the correction
+                    //    INLINE in the streamed chat. The
+                    //    streaming tokens already shipped — we
+                    //    can't unsay them — but we can append a
+                    //    correction the user sees alongside.
+                    // 4. Mutate `conversation.messages` so saved
+                    //    sessions don't preserve the lie either.
+                    tracing::warn!(iteration, "coding workflow: assistant claimed pass with NO tool evidence — likely hallucinated");
+                    eprintln!("[cast-summary] WARNING: assistant claimed test pass without evidence — no `bash` / `test_run` tool actually ran this turn.");
+                    let correction = "\n\n---\n\n⚠️  **Correction:** the agent's `## Test Results` claim above is unverified — no `bash` / `test_run` tool actually ran this turn. The change above may be correct on its own merits but was not validated by the test suite. Run the tests yourself before trusting the result.\n";
+                    let _ = cfg.tx.send(AgentEvent::TokenDelta { content: correction.into() });
+                    redact_hallucinated_test_claims(&mut conversation);
+                }
+                break;
+            }
+        }
+
+        // Snapshot the workspace when this turn was the best so
+        // far. If the agent never reports a count, we still snap
+        // the first turn so a later regression has something to
+        // restore to.
+        let current_failed = extract_failed_count(&transcript);
+        let improved = match (current_failed, best_failed_count) {
+            (Some(now), Some(best)) => now < best,
+            (Some(_), None) => true,
+            (None, _) if !snapshot_taken => true, // first turn, unknown count
+            _ => false,
+        };
+        if improved {
+            if let Some(ref ws) = cfg.workspace_root {
+                match snapshot_workspace(ws, &best_snapshot_dir(ws)) {
+                    Ok(()) => {
+                        snapshot_taken = true;
+                        if let Some(now) = current_failed {
+                            best_failed_count = Some(now);
+                        }
+                        tracing::info!(iteration, failed = current_failed, "coding workflow: snapshotted best-seen workspace");
+                    }
+                    Err(e) => tracing::warn!(error = %e, "coding workflow: snapshot failed"),
+                }
+            }
+        }
+
+        // Close-to-green stop. When we've seen a turn at ≤3 failures
+        // and this turn didn't improve on it, another cycle is more
+        // likely to regress than close the gap.
+        if let Some(best) = best_failed_count {
+            if best <= CLOSE_TO_GREEN_THRESHOLD && !improved {
+                tracing::info!(iteration, best_failed = best, "coding workflow: close to green, stopping before regression");
+                break;
+            }
+        }
+
+        // Budget check: next turn would blow the cap.
+        if let Some(cap) = cfg.budget_usd {
+            if cap > 0.0 && total_cost_usd > 0.0 {
+                let per_iter = total_cost_usd / f64::from(iteration);
+                if total_cost_usd + per_iter >= cap {
+                    tracing::info!(spent = total_cost_usd, cap, "coding workflow: budget exhausted");
+                    break;
+                }
+            }
+        }
+    }
+
+    // Restore the best-seen workspace if a later turn regressed.
+    if !succeeded {
+        if let (Some(ref ws), Some(best), true) = (&cfg.workspace_root, best_failed_count, snapshot_taken) {
+            let final_failed = extract_failed_count(last_verify_output.as_deref().unwrap_or(""));
+            let regressed = final_failed.is_some_and(|n| n > best);
+            let snap = best_snapshot_dir(ws);
+            if regressed && snap.is_dir() {
+                match restore_workspace(&snap, ws) {
+                    Ok(()) => tracing::info!(best_failed = best, "coding workflow: restored workspace to best-seen state"),
+                    Err(e) => tracing::warn!(error = %e, "coding workflow: restore failed"),
+                }
+            }
+        }
+    }
+
+    // Remove the snapshot so it doesn't leak into the scorer's
+    // view of the workspace or a follow-up run on the same dir.
+    if let Some(ref ws) = cfg.workspace_root {
+        let snap = best_snapshot_dir(ws);
+        if snap.is_dir() {
+            let _ = std::fs::remove_dir_all(&snap);
+        }
+    }
+
+    let _ = cfg.tx.send(AgentEvent::Completed {
+        agent_id: cfg.operator_id.clone(),
+        iterations: iteration,
+        cost_usd: total_cost_usd,
+        prompt_tokens: total_prompt_tokens,
+        completion_tokens: total_completion_tokens,
+        cached_tokens: total_cached_tokens,
+    });
+
+    Ok(total_cost_usd)
+}
+
+/// Stop escalating when we're this close to green — more
+/// iteration is more likely to regress than close the gap.
+const CLOSE_TO_GREEN_THRESHOLD: u32 = 3;
+
+// The coding system prompt lives in `crates/smooth-cast/src/prompts/fixer.txt`
+// and is loaded by the smooth cast (`crate::cast::builtin()`) via `include_str!`.
+// The workflow resolves it at the top of `run_coding_workflow` so adding a
+// new prompt-aware role there gives all call sites the same text.
+
+/// Build the user-message prompt for a given outer iteration.
+///
+/// Pearl iter-7 finding: the iteration-1 prompt used to ALWAYS append
+/// "Implement the solution, run the test suite, and iterate until
+/// green." That framing actively pushed the model toward green-field
+/// implementation regardless of what the user actually asked. "Make
+/// App.tsx better" became "Make App.tsx better // Implement the
+/// solution // iterate until green" → agent rewrote the whole file,
+/// added main.tsx, overwrote tsconfig.json. Same shape on "delete the
+/// src directory" → agent deleted, then re-implemented.
+///
+/// Now the iteration-1 prompt is the user's task verbatim. The fixer
+/// system prompt already covers the "run the test suite before final
+/// summary" discipline; we don't need to re-state it per turn at the
+/// cost of confusing the model on non-test-driven tasks.
+// Only the test suite calls the 3-arg convenience wrapper; the runtime path
+// always goes through `build_user_prompt_with_hint` directly.
+#[cfg(test)]
+fn build_user_prompt(task: &str, iteration: u32, prior_output: Option<&str>) -> String {
+    build_user_prompt_with_hint(task, iteration, prior_output, false)
+}
+
+#[allow(clippy::fn_params_excessive_bools)] // 1 bool + 1 u32 + 2 strs is fine
+fn build_user_prompt_with_hint(task: &str, iteration: u32, prior_output: Option<&str>, cleanup_intent_hint: bool) -> String {
+    if iteration == 1 {
+        // Pearl th-e182bc: continuation-turn confirmation on a task
+        // the runner's caller flagged as cleanup-intent. Re-applies
+        // the (known-good) cleanup preamble so the agent doesn't
+        // pivot to test-fix or fabricate a wholly new task on
+        // turn 2. Cross-fixture confabulation root cause
+        // (e.g. `find -size +150k -delete` misfired on a
+        // node-modules orphan task) is the SAME failure mode
+        // [`is_cleanup_intent`] addresses on the planning turn.
+        if cleanup_intent_hint && is_confirmation_reply(task) {
+            return format!(
+                "[bench/workflow note: this is a FILESYSTEM CLEANUP task, not a code-fix or test-fix task. Do NOT write source files. Do NOT create test files. Do NOT run tests. The fixer system prompt's test-related guidance does NOT apply here.\n\nIgnore any source files (`*.py`, `*.rs`, `*.ts`, `main.*`, `lib.*`, etc.) you see in the workspace unless the user's request below explicitly mentions them — they are PROBABLY scope-discipline traps (files you must NOT delete), not invitations to start coding or running tests. Treat the user's request text as the sole source of truth for what to do.\n\nThe user is confirming a plan you enumerated in a PRIOR assistant turn — find that plan in the conversation history and execute it via `bash`. Pearl `th-e182bc`.]\n\n{task}"
+            );
+        }
+        // Pearl `th-e93cba` round 2: when the user's prompt looks like
+        // a filesystem cleanup task, prepend an explicit context-setter.
+        // Without it, the model — even with the workflow-level
+        // intent-detection gate — would pattern-match on fixer.txt's
+        // heavy test-related guidance and fabricate a test-fix
+        // narrative ("I added a test file src/pkg/test_util.py and
+        // the tests passed") on a cleanup ask. The bare prompt isn't
+        // strong enough counter-pressure; this directly tells the
+        // model what kind of task this is and which fixer guidance
+        // doesn't apply.
+        if is_cleanup_intent(task) {
+            return format!(
+                "[bench/workflow note: this is a FILESYSTEM CLEANUP task, not a code-fix or test-fix task. Do NOT write source files. Do NOT create test files. Do NOT run tests. The fixer system prompt's test-related guidance does NOT apply here.\n\nIgnore any source files (`*.py`, `*.rs`, `*.ts`, `main.*`, `lib.*`, etc.) you see in the workspace unless the user's request below explicitly mentions them — they are PROBABLY scope-discipline traps (files you must NOT delete), not invitations to start coding or running tests. Treat the user's request text as the sole source of truth for what to do.\n\nJust discover the targets named in the user's request, enumerate them in your text response, ask for confirmation, then delete them via `bash` once approved. Pearl `th-81cd84`.]\n\n{task}"
+            );
+        }
+        return task.to_string();
+    }
+    let prior = prior_output.unwrap_or("(no prior output)");
+    // Pearl th-bench-loop iter 2: the NoEvidence retry path
+    // injects a synthetic "you didn't run tests" message into
+    // prior_output. When we see that exact preamble, frame the
+    // next turn as a verification-only nudge instead of the
+    // standard fix-the-failures preamble — there were no
+    // failures captured because no test ever ran.
+    if prior.starts_with("Your previous turn edited the code but never ran the test suite.") {
+        return format!("{prior}\n\n## Task (reminder)\n\n{task}");
+    }
+    let compile_err = detect_compile_error(prior);
+    let preamble = if let Some(err) = compile_err {
+        format!(
+            "Your previous attempt shipped code that does not compile / parse. Before doing anything else, fix the syntax. The usual cause is a duplicated class body or extra content appended after the module's export. \n\n## Compile error\n\n{err}\n\n"
+        )
+    } else {
+        format!(
+            "Your previous attempt left some tests failing. The output from your last test run is below. Keep every test that's currently passing passing — most test regressions come from rewriting code that was working. Make a targeted patch that closes the specific failures.\n\n## Previous test output (truncated)\n\n{}\n\n",
+            prior.chars().take(3000).collect::<String>()
+        )
+    };
+    format!("{preamble}## Task (reminder)\n\n{task}\n\nFix the remaining failures and re-run the tests. Finish with a `## Test Results` line.")
+}
+
+// ---------------------------------------------------------------------------
+// Helpers: test-result parsing, compile-error detection, snapshots.
+// These are the same helpers the old multi-phase workflow used;
+// they carry their own unit tests below and don't care whether
+// the surrounding loop is one phase or seven.
+// ---------------------------------------------------------------------------
+
+fn summarize_conversation(conv: &smooth_operator::conversation::Conversation) -> String {
+    conv.messages
+        .iter()
+        .rev()
+        .find(|m| matches!(m.role, smooth_operator::conversation::Role::Assistant))
+        .map(|m| m.content.clone())
+        .unwrap_or_default()
+}
+
+/// What the evidence in the conversation says about this turn —
+/// not what the assistant *claims*. Pearl th-7cf405 / th-ed7bfa:
+/// the workflow used to trust the assistant's `## Test Results: 31
+/// passed, 0 failed` line verbatim, which made hallucinated
+/// success indistinguishable from real success. We now require an
+/// actual `bash` / `test_run` tool-result message in the
+/// conversation whose output contains a recognizable test summary.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum VerifyEvidence {
+    /// A tool actually ran and reported a green test suite.
+    EvidencedPass,
+    /// A tool actually ran and reported failures. `Some(n)` if a
+    /// failure count was parseable; `None` if the output looked
+    /// red but didn't include a count we could extract.
+    EvidencedFail(Option<u32>),
+    /// No bash / test_run tool call ever happened in this turn.
+    /// The assistant either did nothing (silently passed text
+    /// back) or hallucinated a result it never observed. Both are
+    /// "no work was actually done" — caller decides whether to
+    /// retry or exit gracefully.
+    NoEvidence,
+}
+
+/// Strip fabricated "X passed, Y failed" / "ALL TESTS PASS"
+/// claims from the last assistant message and replace with an
+/// honest annotation. Pearl iter-10: emitting a stderr WARNING
+/// alone wasn't enough — the lie still appeared verbatim in the
+/// chat, so users could miss the warning and trust the false
+/// claim. This rewrites the message itself.
+///
+/// Heuristic: look for the conventional `## Test Results` /
+/// `Test Results` block at the end of the assistant prose and
+/// replace its body. Also strip standalone count lines like
+/// "31 passed, 0 failed" / "test result: ok. 5 passed; 0 failed".
+pub fn redact_hallucinated_test_claims(conv: &mut smooth_operator::conversation::Conversation) {
+    // Find the last assistant message — that's where the user-
+    // visible final answer sits.
+    let Some(msg) = conv
+        .messages
+        .iter_mut()
+        .rev()
+        .find(|m| matches!(m.role, smooth_operator::conversation::Role::Assistant))
+    else {
+        return;
+    };
+    msg.content = redact_fabricated_test_results(&msg.content);
+}
+
+/// String-only version of the redactor — pulled out for tests.
+/// Pure function so the unit suite can pin every shape we know
+/// the model produces.
+#[must_use]
+pub fn redact_fabricated_test_results(content: &str) -> String {
+    const NOTICE: &str = "⚠️  Test Results: NOT RUN — the agent did not actually execute the test suite this turn. The change above may be correct but is unverified. Run the tests yourself before trusting it.";
+
+    // Strip "X passed, Y failed" / "X passed; Y failed" lines and
+    // replace the "## Test Results" block at the tail. Patterns:
+    //   - "## Test Results\n\n31 passed, 0 failed"
+    //   - "## Test Results\n\nALL TESTS PASS"
+    //   - "Test Results: 31 passed, 0 failed"
+    //   - bare "31 passed, 0 failed" line at end of content
+    let lines: Vec<&str> = content.lines().collect();
+    let mut out: Vec<String> = Vec::with_capacity(lines.len() + 2);
+    let mut in_test_results_block = false;
+    let mut redacted_block = false;
+    for line in &lines {
+        let trimmed = line.trim();
+        // Heading variants.
+        let is_heading =
+            trimmed.eq_ignore_ascii_case("## test results") || trimmed.eq_ignore_ascii_case("test results") || trimmed.eq_ignore_ascii_case("# test results");
+        if is_heading && !redacted_block {
+            in_test_results_block = true;
+            out.push(NOTICE.to_string());
+            redacted_block = true;
+            continue;
+        }
+        if in_test_results_block {
+            // Continue swallowing lines until a new heading
+            // (`## ...`) starts a different section.
+            if trimmed.starts_with("## ") || trimmed.starts_with("# ") {
+                in_test_results_block = false;
+                out.push((*line).to_string());
+                continue;
+            }
+            // Drop content inside the block.
+            continue;
+        }
+        // Bare "X passed, Y failed" / "X passed; Y failed" / "ALL TESTS PASS" lines.
+        let upper = trimmed.to_ascii_uppercase();
+        let looks_like_count = (trimmed.contains("passed, ") || trimmed.contains("passed; ") || trimmed.contains("PASSED, ") || trimmed.contains("PASSED; "))
+            && (trimmed.contains("failed") || trimmed.contains("FAILED"));
+        let looks_like_marker = upper.contains("ALL TESTS PASS") || upper == "TEST RESULT: OK";
+        if looks_like_count || looks_like_marker {
+            // Replace with a one-line redaction marker. Append
+            // the full notice once if we haven't already (e.g.
+            // when there's no "## Test Results" heading).
+            if !redacted_block {
+                out.push(NOTICE.to_string());
+                redacted_block = true;
+            }
+            continue;
+        }
+        out.push((*line).to_string());
+    }
+    let result = out.join("\n");
+    // Edge case: content didn't have a heading or count line
+    // pattern but still looked green to detect_verify_pass (rare;
+    // happens when the model uses idiomatic phrasing like "all
+    // tests pass" embedded in prose). In that case, append the
+    // notice at the end so the reader at least sees the warning.
+    if !redacted_block && detect_verify_pass(content) {
+        return format!("{result}\n\n{NOTICE}");
+    }
+    result
+}
+
+/// Inspect the conversation for tool-result evidence of test
+/// outcomes. Walks tool-role messages in order and returns the
+/// LAST shaped result — later tool runs win, since the agent
+/// often runs the suite multiple times in one turn.
+pub fn verify_with_evidence(conv: &smooth_operator::conversation::Conversation) -> VerifyEvidence {
+    let mut last_outcome = VerifyEvidence::NoEvidence;
+    for msg in &conv.messages {
+        if !matches!(msg.role, smooth_operator::conversation::Role::Tool) {
+            continue;
+        }
+        // Only test-shaped tools produce evidence we believe.
+        // `bash` is the catch-all (the agent runs `pnpm test` /
+        // `cargo test` / `pytest` through it). `test_run` is the
+        // workflow's structured test tool when present. Other
+        // tool outputs (read_file, list_files, grep) don't count
+        // even if the user happened to grep for "PASS" somewhere.
+        let name = msg.tool_name.as_deref().unwrap_or("");
+        if name != "bash" && name != "test_run" && name != "shell" {
+            continue;
+        }
+        // Pearl th-bench-loop iter 13: "all tests skipped, 0 ran"
+        // is NOT a pass. Exercism JS uses `xtest()` and Java uses
+        // `@Disabled`; both default to skip and require the student
+        // to flip annotations as they implement. The agent ships
+        // implementations that look correct, the test runner returns
+        // 0 ran/0 failed (exit code 0), and the workflow used to
+        // call that a pass. It's not — nothing actually ran.
+        //
+        // Detect BEFORE detect_verify_pass since the skip-only
+        // output may also coincidentally match "0 failed" patterns.
+        if looks_all_skipped(&msg.content) {
+            last_outcome = VerifyEvidence::EvidencedFail(None);
+            continue;
+        }
+        if detect_verify_pass(&msg.content) {
+            last_outcome = VerifyEvidence::EvidencedPass;
+            continue;
+        }
+        // Look for explicit failure shapes. We reuse
+        // `nonzero_failure_count` so all the same patterns the
+        // pass-detection guards against count as fail signals.
+        let upper = msg.content.to_uppercase();
+        let looks_red =
+            upper.contains("TEST RESULT: FAILED") || upper.contains("TESTS FAILED") || upper.contains("TESTS FAIL") || nonzero_failure_count(&upper);
+        if looks_red {
+            last_outcome = VerifyEvidence::EvidencedFail(extract_failed_count(&msg.content));
+        }
+        // Otherwise leave last_outcome as-is — this tool call
+        // wasn't a test, or didn't produce a recognizable summary.
+    }
+    last_outcome
+}
+
+/// True when test output indicates EVERY test was skipped — common
+/// when an exercism framework defaults to `@Disabled` / `xtest()` /
+/// `test.skip` and the student hasn't flipped them yet. Treat as
+/// failure-no-evidence (pearl th-bench-loop iter 13): 0 tests
+/// actually ran, the implementation is unverified.
+///
+/// Heuristics (all case-insensitive on uppercase input):
+///   - Jest: "Tests:       N skipped, 0 passed, N total"
+///   - Gradle/JUnit: "BUILD SUCCESSFUL" + "N tests completed, N skipped"
+///     OR all "SKIPPED" markers with no "PASSED" / "FAILED" lines
+///   - pytest: "N skipped" alongside "0 passed"
+///   - go test: "ok ... [no tests to run]" (Go has no skip annotation
+///     by default, but the no-tests case is the same problem)
+pub fn looks_all_skipped(transcript: &str) -> bool {
+    let upper = transcript.to_uppercase();
+
+    // Gradle/JUnit: count of SKIPPED markers as inline test
+    // outcomes. Check FIRST because gradle lines don't have a
+    // numeric prefix the pytest-shape path would expect.
+    let skipped_lines = upper.lines().filter(|l| l.trim_end().ends_with("SKIPPED")).count();
+    let pass_lines = upper.lines().filter(|l| l.trim_end().ends_with("PASSED")).count();
+    let fail_lines = upper.lines().filter(|l| l.trim_end().ends_with("FAILED")).count();
+    if skipped_lines >= 3 && pass_lines == 0 && fail_lines == 0 {
+        return true;
+    }
+    // Dominant-skip: per-line gradle/jest output where SKIPPED
+    // outnumbers PASSED 3-to-1 and no failures fired. Pearl
+    // th-bench-loop iter 15: js/forth produced "48 skipped, 1
+    // passed" — the pure all-skipped check missed it because
+    // there was a single PASSED. Same root cause as iter 5
+    // js/binary (9 skipped, 1 passed): exercism flips one
+    // baseline test as a sentinel, leaves the rest skipped.
+    if skipped_lines >= 3 * (pass_lines + 1) && fail_lines == 0 && pass_lines < skipped_lines {
+        return true;
+    }
+
+    // Jest / pytest shape: explicit "N skipped, 0 passed".
+    if (upper.contains("0 PASSED") || upper.contains(" 0 PASSED,") || upper.contains(", 0 PASSED")) && upper.contains("SKIPPED") {
+        return true;
+    }
+    // Jest summary line: "N skipped, K passed, M total" where
+    // N >> K. Catches the summary-line variant we see in iter
+    // 15 ("Tests: 48 skipped, 1 passed, 49 total").
+    if let Some((skip, pass)) = parse_jest_skip_pass(&upper) {
+        if skip >= 3 * (pass + 1) && pass < skip {
+            return true;
+        }
+    }
+
+    // Go: "no tests to run" + ok status.
+    if upper.contains("[NO TESTS TO RUN]") {
+        return true;
+    }
+
+    // Pytest: "N skipped" with no "passed" count at all. Last
+    // because the digit-prefix check is strict — wouldn't catch
+    // gradle's per-line shape, only pytest's summary count.
+    if upper.contains(" SKIPPED") && !upper.contains(" PASSED") && !upper.contains(" FAILED") {
+        return has_count_before(&upper, "SKIPPED");
+    }
+
+    false
+}
+
+/// Parse a jest-style summary line `Tests: 48 skipped, 1 passed,
+/// 49 total` into `(skipped, passed)`. Returns `None` when neither
+/// count is present.
+fn parse_jest_skip_pass(upper: &str) -> Option<(u32, u32)> {
+    let line = upper.lines().find(|l| l.contains("TESTS:") && l.contains("SKIPPED"))?;
+    let skip = scan_count(&line.to_lowercase(), "skipped")?;
+    let pass = scan_count(&line.to_lowercase(), "passed").unwrap_or(0);
+    Some((skip, pass))
+}
+
+/// True when `needle` is preceded by a digit (possibly with
+/// whitespace) somewhere in `haystack`. Used by `looks_all_skipped`
+/// to distinguish a count line (`10 SKIPPED`) from a comment
+/// ("# this section is skipped").
+fn has_count_before(haystack: &str, needle: &str) -> bool {
+    let mut search = haystack;
+    while let Some(idx) = search.find(needle) {
+        let before = &search[..idx];
+        let digits: String = before
+            .chars()
+            .rev()
+            .skip_while(|c| c.is_whitespace())
+            .take_while(|c| c.is_ascii_digit())
+            .collect::<String>();
+        if let Ok(n) = digits.chars().rev().collect::<String>().parse::<u32>() {
+            if n > 0 {
+                return true;
+            }
+        }
+        search = &search[idx + needle.len()..];
+    }
+    false
+}
+
+/// True when the transcript reports the test suite is green.
+/// Explicit prefix (`ALL TESTS PASS`) wins; runner-summary
+/// fallbacks are narrow to avoid false positives on prose or
+/// on Rust `Ok(..)` values that appear in failure diffs.
+pub fn detect_verify_pass(transcript: &str) -> bool {
+    let upper = transcript.to_uppercase();
+    if upper.contains("ALL TESTS PASS") {
+        return true;
+    }
+    if upper.contains("TESTS FAILED") || upper.contains("TESTS FAIL") {
+        return false;
+    }
+    if nonzero_failure_count(&upper) || upper.contains("TEST RESULT: FAILED") {
+        return false;
+    }
+    if upper.contains("TEST RESULT: OK")                    // cargo test
+        || upper.contains(" PASSED, 0 FAILED")              // pytest / go / jest
+        || upper.contains("0 FAILED, 0 ERRORS")             // go test verbose
+        || (upper.contains("TESTS:") && upper.contains(" PASSED") && upper.contains("0 FAILED"))
+    {
+        return true;
+    }
+    // pytest -q (quiet mode): output is just dots/letters then a
+    // terminal line like "15 passed in 0.05s" — no "failed" word
+    // at all. Earlier guards already rejected anything with a
+    // non-zero failure count, so seeing "N passed in <time>" and
+    // no "FAILED" anywhere is a green signal.
+    //
+    // Pearl th-1a5469: phone-number bench ran pytest -q twice
+    // and got NoEvidence on each because none of the patterns
+    // above match the terse output. Add the pytest-quiet shape
+    // so the workflow can break on green instead of grinding to
+    // the iteration cap.
+    if let Some(idx) = upper.find(" PASSED IN ") {
+        // Ensure the "N PASSED IN" comes right after a digit (so
+        // we don't false-positive on prose like "the test we just
+        // passed in the previous turn"). Walk backwards from `idx`
+        // skipping whitespace, then require a digit.
+        let prefix = &upper[..idx];
+        if prefix.chars().rev().find(|c| !c.is_whitespace()).is_some_and(|c| c.is_ascii_digit()) {
+            return true;
+        }
+    }
+    false
+}
+
+/// Extract the "N failed" count from a transcript. `None` when
+/// we can't parse a shape — callers treat that as "unknown" and
+/// fall through to iteration without progress tracking.
+pub fn extract_failed_count(transcript: &str) -> Option<u32> {
+    scan_count(&transcript.to_lowercase(), "failed")
+}
+
+fn scan_count(haystack: &str, needle: &str) -> Option<u32> {
+    let mut chars = haystack.char_indices().peekable();
+    while let Some((i, c)) = chars.next() {
+        if !c.is_ascii_digit() {
+            continue;
+        }
+        let start = i;
+        let mut end = i + c.len_utf8();
+        while let Some(&(j, ch)) = chars.peek() {
+            if ch.is_ascii_digit() {
+                end = j + ch.len_utf8();
+                chars.next();
+            } else {
+                break;
+            }
+        }
+        let num = &haystack[start..end];
+        let rest = &haystack[end..].trim_start();
+        if rest.starts_with(needle) {
+            return num.parse().ok();
+        }
+    }
+    None
+}
+
+/// True when the transcript contains a POSITIVE failure count.
+/// Zero-failure counts ("0 failed") don't count — they appear
+/// in green summaries. We only bail out on failure when a real
+/// non-zero count shows up.
+fn nonzero_failure_count(upper: &str) -> bool {
+    let needles = ["FAILED", "FAILURE", "FAILING"];
+    for needle in needles {
+        let mut search = upper;
+        while let Some(idx) = search.find(needle) {
+            let before = &search[..idx];
+            let digits: String = before
+                .chars()
+                .rev()
+                .skip_while(|c| c.is_whitespace() || matches!(*c, ',' | ';' | '(' | '—' | '-'))
+                .take_while(|c| c.is_ascii_digit())
+                .collect::<Vec<_>>()
+                .into_iter()
+                .rev()
+                .collect();
+            if let Ok(n) = digits.parse::<u32>() {
+                if n > 0 {
+                    return true;
+                }
+            }
+            search = &search[idx + needle.len()..];
+        }
+    }
+    false
+}
+
+/// Pull a compile / parse / syntax error snippet out of a
+/// transcript when the failure isn't a normal test assertion.
+/// Returns `None` when we should treat the failure as a regular
+/// red-test run. Used by `build_user_prompt` to switch retry
+/// tone from "fix the failures" to "fix the syntax".
+/// True when ANY assistant tool_call in the conversation invoked a
+/// file-mutating tool (edit_file, write_file, apply_patch, multi_edit).
+/// Pearl th-fixer-think-mode: the NoEvidence retry only makes sense
+/// when the agent ACTUALLY changed code; if it just answered a
+/// question without editing, the "you didn't run tests" forcing
+/// prompt is a non-sequitur.
+fn conversation_made_edits(conv: &smooth_operator::conversation::Conversation) -> bool {
+    const MUTATING_TOOLS: &[&str] = &["edit_file", "write_file", "apply_patch", "multi_edit", "str_replace", "create_file"];
+    for msg in &conv.messages {
+        if !matches!(msg.role, smooth_operator::conversation::Role::Assistant) {
+            continue;
+        }
+        for tc in &msg.tool_calls {
+            if MUTATING_TOOLS.contains(&tc.name.as_str()) {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+/// True when the user's task prompt looks like a filesystem
+/// cleanup / ops request rather than a code-implementation task.
+/// Pearl `th-e93cba`. Used to gate the workflow's "this is a code
+/// task — write the implementation" reprompt: that reprompt is
+/// designed for benchmarks like aider-polyglot where the agent
+/// must write code, and is a non-sequitur on cleanup tasks where
+/// the user asked the agent to delete files, prune caches, etc.
+///
+/// Heuristic: scan the first ~300 chars of the (lowercased) prompt
+/// for any cleanup-intent verb or noun pair. Conservative — we'd
+/// rather miss a borderline case than misclassify a real code task
+/// as cleanup and skip the "write code" reprompt when it's
+/// genuinely needed.
+/// Pearl th-e182bc: bare confirmation reply ("yes", "proceed",
+/// "go", etc.). Strict: trimmed length ≤ 60 chars and the
+/// normalized form matches a small fixed set. False negatives
+/// fine; false positives bad (would apply the cleanup preamble
+/// on a real new code task).
+#[must_use]
+pub fn is_confirmation_reply(task_prompt: &str) -> bool {
+    let trimmed = task_prompt.trim();
+    if trimmed.len() > 60 {
+        return false;
+    }
+    let normalized: String = trimmed
+        .to_lowercase()
+        .chars()
+        .filter(|c| !matches!(c, '.' | '!' | '?' | ',' | ';' | ':'))
+        .collect::<String>()
+        .split_whitespace()
+        .collect::<Vec<_>>()
+        .join(" ");
+    const CONFIRMATIONS: &[&str] = &[
+        "yes",
+        "y",
+        "yes proceed",
+        "yes please",
+        "yes please proceed",
+        "yes go ahead",
+        "yes do it",
+        "proceed",
+        "please proceed",
+        "go",
+        "go ahead",
+        "do it",
+        "do that",
+        "confirmed",
+        "approved",
+        "ok",
+        "okay",
+        "sure",
+        "sounds good",
+        "looks good",
+        "lgtm",
+        "ack",
+        "affirmative",
+        "yep",
+        "yup",
+    ];
+    CONFIRMATIONS.iter().any(|c| normalized == *c)
+}
+
+/// Public helper for callers that have prior conversation text and
+/// want to know whether the workflow should be invoked with the
+/// `cleanup_intent_hint` set. Same heuristic as `is_cleanup_intent`
+/// but exported so the runner can scan prior_messages before
+/// constructing the workflow config. Pearl th-e182bc.
+#[must_use]
+pub fn task_text_has_cleanup_intent(task_text: &str) -> bool {
+    is_cleanup_intent(task_text)
+}
+
+#[must_use]
+fn is_cleanup_intent(task_prompt: &str) -> bool {
+    let lower = task_prompt.to_lowercase();
+    // Look in the first 400 chars — enough to catch the README's
+    // headline + 'job' line, ignore long deep prose.
+    let head: String = lower.chars().take(400).collect();
+    // Verb cues — at least one strong cleanup verb near a filesystem
+    // noun. Keep the list narrow so we don't false-fire on prose
+    // like "delete the test once it's green" inside a coding task.
+    const CLEANUP_VERBS: &[&str] = &[
+        "clean up",
+        "cleanup",
+        "delete the",
+        "delete all",
+        "delete every",
+        "remove the",
+        "remove all",
+        "remove every",
+        "prune ",
+        "rm -rf",
+        "rm-rf",
+        "wipe ",
+        "purge ",
+        "tidy up",
+        "free up disk",
+    ];
+    const CLEANUP_NOUNS: &[&str] = &[
+        "__pycache__",
+        "pycache",
+        ".pyc",
+        "node_modules",
+        "orphan",
+        "debris",
+        "stale",
+        "leftover",
+        "scratch dir",
+        "tmp/",
+        "/tmp",
+        "build artifact",
+        "docker cache",
+        "docker image",
+        "log file",
+    ];
+    let has_verb = CLEANUP_VERBS.iter().any(|v| head.contains(v));
+    let has_noun = CLEANUP_NOUNS.iter().any(|n| head.contains(n));
+    has_verb || has_noun
+}
+
+/// True when the conversation includes a `bash` (or shell-equivalent)
+/// tool call whose arguments contain a destructive filesystem
+/// operation. Pearl `th-e93cba`. Used to distinguish "agent did
+/// useful ops work" from "agent literally did nothing" — so the
+/// workflow doesn't reprompt "this is a code task, write code" at
+/// a cleanup agent that already ran `rm -rf __pycache__`.
+///
+/// The heuristic is intentionally narrow: we only key on phrases
+/// that are unambiguously destructive (`rm`, `find -delete`, `mv`
+/// to a discard target, `truncate -s 0`). Reading bash calls (`ls`,
+/// `cat`, `grep`, etc.) don't count as "work" for this purpose.
+fn conversation_did_destructive_bash(conv: &smooth_operator::conversation::Conversation) -> bool {
+    const BASH_TOOLS: &[&str] = &["bash", "shell", "run_command"];
+    const DESTRUCTIVE_PHRASES: &[&str] = &[
+        "rm ",
+        "rm-",
+        "rmdir",
+        "find . -delete",
+        "find . -exec rm",
+        "mv ",
+        "truncate -s 0",
+        "shred ",
+        "git clean",
+        "docker prune",
+        "npm prune",
+        "pnpm prune",
+    ];
+    for msg in &conv.messages {
+        if !matches!(msg.role, smooth_operator::conversation::Role::Assistant) {
+            continue;
+        }
+        for tc in &msg.tool_calls {
+            if !BASH_TOOLS.contains(&tc.name.as_str()) {
+                continue;
+            }
+            // tc.arguments is a JSON value — stringify to scan for
+            // the destructive phrase. This catches both `command` and
+            // any other arg shape we haven't anticipated.
+            let args_text = tc.arguments.to_string().to_lowercase();
+            for phrase in DESTRUCTIVE_PHRASES {
+                if args_text.contains(phrase) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Scan tool-result messages in the conversation for compile-error
+/// output. Returns the first matching tool-result chunk so the
+/// workflow can feed it directly into the next iteration's prompt
+/// preamble. Pearl th-bf62c0 / th-bench-loop iter 9.
+fn first_compile_error_in_tools(conv: &smooth_operator::conversation::Conversation) -> Option<String> {
+    for msg in &conv.messages {
+        if !matches!(msg.role, smooth_operator::conversation::Role::Tool) {
+            continue;
+        }
+        let name = msg.tool_name.as_deref().unwrap_or("");
+        if name != "bash" && name != "test_run" && name != "shell" {
+            continue;
+        }
+        if detect_compile_error(&msg.content).is_some() {
+            // Truncate at 3000 chars so the preamble stays manageable.
+            let snippet: String = msg.content.chars().take(3000).collect();
+            return Some(snippet);
+        }
+    }
+    None
+}
+
+fn detect_compile_error(transcript: &str) -> Option<String> {
+    let upper = transcript.to_uppercase();
+    let patterns = [
+        // JS / TS
+        "SYNTAXERROR",
+        "UNEXPECTED TOKEN",
+        "MISSING SEMICOLON",
+        "UNCLOSED DELIMITER",
+        "UNEXPECTED EOF",
+        // Rust
+        "COULD NOT COMPILE",
+        "THIS FILE CONTAINS AN UNCLOSED DELIMITER",
+        "EXPECTED ONE OF",
+        // Go
+        "SYNTAX ERROR:",
+        "EXPECTED '{'",
+        "EXPECTED ';'",
+        // Python
+        "INDENTATIONERROR",
+        "TABERROR",
+        // Java
+        "REACHED END OF FILE",
+        "';' EXPECTED",
+        "CLASS, INTERFACE, OR ENUM EXPECTED",
+        "ERROR: COMPILATION FAILED",
+    ];
+    let hit_idx = patterns.iter().find_map(|p| upper.find(p))?;
+    let bytes_per_char = transcript.len().checked_div(upper.len()).unwrap_or(1).max(1);
+    let start = hit_idx.saturating_mul(bytes_per_char).saturating_sub(120);
+    let end = (hit_idx.saturating_mul(bytes_per_char).saturating_add(600)).min(transcript.len());
+    let snippet = transcript.get(start..end).unwrap_or(transcript);
+    Some(snippet.trim().to_string())
+}
+
+// Best-state snapshot + restore. Lives under a hidden dir inside
+// the workspace so `pytest` / `jest` / `cargo test` / gradle
+// all skip it naturally.
+
+fn best_snapshot_dir(workspace: &Path) -> PathBuf {
+    workspace.join(".smooth-best-snapshot")
+}
+
+fn is_snapshot_excluded(name: &std::ffi::OsStr) -> bool {
+    matches!(
+        name.to_str(),
+        Some(".git")
+            | Some(".smooth-best-snapshot")
+            | Some("node_modules")
+            | Some("target")
+            | Some("build")
+            | Some("dist")
+            | Some("__pycache__")
+            | Some(".pytest_cache")
+            | Some(".venv")
+            | Some("venv")
+            | Some(".gradle")
+            | Some(".cargo")
+    )
+}
+
+/// Refuse to snapshot a workspace that's clearly NOT a project — most
+/// commonly $HOME (or a parent of it) when the chat agent dispatched a
+/// teammate without passing a working_dir, which makes the runner
+/// inherit Big Smooth's cwd. Recursing through tens of GB of user data
+/// hangs the workflow; better to skip the snapshot than freeze.
+///
+/// Heuristic:
+///   * if the dir IS or is a parent of $HOME → unsafe
+///   * if the dir contains classic $HOME children (`Library`, `Desktop`,
+///     `Documents`) → unsafe
+///   * if it has more than 200 top-level entries → unsafe
+fn is_unsafe_to_snapshot(src: &Path) -> bool {
+    if let Ok(home) = std::env::var("HOME") {
+        let home_path = std::path::PathBuf::from(home);
+        if let (Ok(c_src), Ok(c_home)) = (src.canonicalize(), home_path.canonicalize()) {
+            if c_src == c_home || c_home.starts_with(&c_src) {
+                return true;
+            }
+        }
+    }
+    if let Ok(rd) = std::fs::read_dir(src) {
+        let mut count = 0usize;
+        for entry in rd.flatten() {
+            count += 1;
+            if count > 200 {
+                return true;
+            }
+            let name = entry.file_name();
+            if matches!(
+                name.to_str(),
+                Some("Library") | Some("Desktop") | Some("Documents") | Some("Movies") | Some("Pictures")
+            ) {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+fn snapshot_workspace(src: &Path, dst: &Path) -> std::io::Result<()> {
+    if is_unsafe_to_snapshot(src) {
+        tracing::warn!(
+            src = %src.display(),
+            "coding workflow: refusing to snapshot — workspace looks like $HOME or a non-project dir"
+        );
+        return Ok(());
+    }
+    if dst.exists() {
+        std::fs::remove_dir_all(dst)?;
+    }
+    std::fs::create_dir_all(dst)?;
+    copy_recursive(src, dst)
+}
+
+fn restore_workspace(src: &Path, dst: &Path) -> std::io::Result<()> {
+    for entry in std::fs::read_dir(dst)? {
+        let entry = entry?;
+        let name = entry.file_name();
+        if is_snapshot_excluded(&name) {
+            continue;
+        }
+        let path = entry.path();
+        if path.is_dir() {
+            std::fs::remove_dir_all(&path)?;
+        } else {
+            std::fs::remove_file(&path)?;
+        }
+    }
+    copy_recursive(src, dst)
+}
+
+fn copy_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
+    std::fs::create_dir_all(dst)?;
+    for entry in std::fs::read_dir(src)? {
+        let entry = entry?;
+        let name = entry.file_name();
+        if is_snapshot_excluded(&name) {
+            continue;
+        }
+        let from = entry.path();
+        let to = dst.join(&name);
+        let file_type = entry.file_type()?;
+        if file_type.is_dir() {
+            copy_recursive(&from, &to)?;
+        } else if file_type.is_symlink() {
+            if let Ok(target) = std::fs::read_link(&from) {
+                let _ = std::fs::remove_file(&to);
+                #[cfg(unix)]
+                std::os::unix::fs::symlink(&target, &to)?;
+                #[cfg(not(unix))]
+                std::fs::copy(&from, &to)?;
+            }
+        } else {
+            std::fs::copy(&from, &to)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn unsafe_to_snapshot_flags_home_lookalikes() {
+        let tmp = tempfile::tempdir().expect("tmp");
+        // A project-like dir is fine.
+        std::fs::create_dir_all(tmp.path().join("src")).unwrap();
+        std::fs::write(tmp.path().join("Cargo.toml"), "[package]\nname=\"x\"\n").unwrap();
+        assert!(!is_unsafe_to_snapshot(tmp.path()));
+
+        // A dir with macOS HOME-style children is rejected.
+        let homey = tempfile::tempdir().expect("home");
+        for child in ["Library", "Desktop", "Documents"] {
+            std::fs::create_dir_all(homey.path().join(child)).unwrap();
+        }
+        assert!(is_unsafe_to_snapshot(homey.path()));
+    }
+
+    #[test]
+    fn detect_verify_pass_explicit_marker() {
+        assert!(detect_verify_pass("ALL TESTS PASS — 31 of 31."));
+        assert!(!detect_verify_pass("TESTS FAILED:\nsome failure"));
+    }
+
+    #[test]
+    fn detect_verify_pass_runner_summaries() {
+        assert!(detect_verify_pass("test result: ok. 31 passed; 0 failed;"));
+        assert!(detect_verify_pass("Tests:       30 passed, 0 failed, 30 total"));
+        assert!(!detect_verify_pass("Tests: 2 failed, 28 passed"));
+    }
+
+    #[test]
+    fn detect_verify_pass_recognises_pytest_quiet_shape() {
+        // pytest -q success doesn't print "failed" at all. Pearl
+        // th-1a5469: missing this pattern made the workflow grind
+        // through retries on every passing Python task.
+        assert!(detect_verify_pass("...............\n15 passed in 0.05s"));
+        assert!(detect_verify_pass("...\n3 passed in 0.01s\n"));
+        // The pattern must require a digit before "passed in" so
+        // prose narration doesn't false-positive.
+        assert!(!detect_verify_pass("the test we passed in the previous turn"));
+        // Real failures still fail.
+        assert!(!detect_verify_pass("F..\n1 failed, 2 passed in 0.02s"));
+    }
+
+    #[test]
+    fn detect_verify_pass_rejects_rust_ok_false_positive() {
+        // Regression: old fallback matched `OK (` on Rust failure
+        // diffs with `Ok(())` values. Must return false here.
+        let diff = "assertion `left == right` failed\n  left: Ok(())\n  right: Err(NotEnoughPinsLeft)";
+        assert!(!detect_verify_pass(diff));
+    }
+
+    #[test]
+    fn detect_compile_error_catches_js_syntax() {
+        let jest = "TESTS FAILED:\n\nSyntaxError: /workspace/bowling.js: Missing semicolon. (151:15)";
+        assert!(detect_compile_error(jest).is_some());
+    }
+
+    #[test]
+    fn detect_compile_error_catches_rust_unclosed() {
+        let cargo = "TESTS FAILED:\nerror: this file contains an unclosed delimiter\n   --> src/lib.rs:193:3";
+        assert!(detect_compile_error(cargo).is_some());
+    }
+
+    #[test]
+    fn detect_compile_error_ignores_real_assertion() {
+        let rust = "TESTS FAILED:\ntest all_strikes_is_300 ... FAILED\n  left: None\n  right: Some(300)";
+        assert!(detect_compile_error(rust).is_none());
+    }
+
+    #[test]
+    fn extract_failed_count_standard_shapes() {
+        assert_eq!(extract_failed_count("3 failed, 28 passed"), Some(3));
+        assert_eq!(extract_failed_count("Tests: 2 failed, 28 passed"), Some(2));
+        assert_eq!(extract_failed_count("all tests pass"), None);
+    }
+
+    fn make_conv() -> smooth_operator::conversation::Conversation {
+        smooth_operator::conversation::Conversation::new(8192).with_system_prompt("test")
+    }
+
+    fn assistant_with_tool(name: &str) -> smooth_operator::conversation::Message {
+        let mut m = smooth_operator::conversation::Message::assistant("");
+        m.tool_calls.push(smooth_operator::tool::ToolCall {
+            id: format!("call-{name}"),
+            name: name.into(),
+            arguments: serde_json::Value::Null,
+        });
+        m
+    }
+
+    fn assistant_with_bash(command: &str) -> smooth_operator::conversation::Message {
+        let mut m = smooth_operator::conversation::Message::assistant("");
+        m.tool_calls.push(smooth_operator::tool::ToolCall {
+            id: "call-bash".into(),
+            name: "bash".into(),
+            arguments: serde_json::json!({"command": command}),
+        });
+        m
+    }
+
+    #[test]
+    fn is_confirmation_reply_matches_common_phrases_th_e182bc() {
+        for phrase in &[
+            "yes, proceed",
+            "yes",
+            "proceed",
+            "go",
+            "do it",
+            "ok",
+            "okay",
+            "sure",
+            "lgtm",
+            "Yes, proceed.",
+            "  yes please  ",
+            "GO AHEAD",
+            "yes please proceed",
+            "yep",
+            "yup",
+        ] {
+            assert!(is_confirmation_reply(phrase), "should match: {phrase:?}");
+        }
+    }
+
+    #[test]
+    fn is_confirmation_reply_rejects_non_confirmations_th_e182bc() {
+        for phrase in &[
+            "delete the orphaned node_modules/",
+            "no, wait",
+            "yes, but skip the ui package",
+            "proceed with caution and tell me what's happening",
+            "do it but only for the apps/ subdirectory",
+            "yes, but also delete the .pyc files",
+            "yes — actually I changed my mind, list them again first",
+        ] {
+            assert!(!is_confirmation_reply(phrase), "should not match: {phrase:?}");
+        }
+    }
+
+    #[test]
+    fn build_user_prompt_with_hint_fires_cleanup_preamble_on_yes_th_e182bc() {
+        let task = "yes, proceed";
+        let out = build_user_prompt_with_hint(task, 1, None, true);
+        assert!(out.contains("FILESYSTEM CLEANUP task"), "preamble missing: {out}");
+        assert!(out.contains("Do NOT create test files"), "test-file ban missing: {out}");
+        assert!(out.contains("Pearl `th-e182bc`"), "pearl ref missing: {out}");
+        assert!(out.ends_with("yes, proceed"), "original task preserved at end: {out}");
+    }
+
+    #[test]
+    fn build_user_prompt_with_hint_no_hint_no_preamble_on_yes_th_e182bc() {
+        let task = "yes, proceed";
+        let out = build_user_prompt_with_hint(task, 1, None, false);
+        assert_eq!(out, "yes, proceed", "no hint → bare task: got {out}");
+    }
+
+    #[test]
+    fn build_user_prompt_with_hint_real_cleanup_task_still_fires_preamble_th_e182bc() {
+        // Original cleanup-intent path: task itself looks like cleanup.
+        // Verify the existing behavior didn't regress.
+        let task = "Delete the orphan node_modules/ directories under tools/ and old-admin/.";
+        let out = build_user_prompt_with_hint(task, 1, None, false);
+        assert!(out.contains("FILESYSTEM CLEANUP task"));
+    }
+
+    #[test]
+    fn task_text_has_cleanup_intent_matches_readme_th_e182bc() {
+        let readme = "# Cleanup task: orphaned `node_modules/` directories\n\nThis is a pnpm workspace.";
+        assert!(task_text_has_cleanup_intent(readme));
+    }
+
+    #[test]
+    fn is_cleanup_intent_detects_pycache_task() {
+        // Pearl th-e93cba — the literal cleanup-pycache-debris fixture.
+        assert!(is_cleanup_intent(
+            "# Cleanup task: __pycache__ debris\n\nA medium-sized Python repo has accumulated __pycache__ directories"
+        ));
+    }
+
+    #[test]
+    fn is_cleanup_intent_detects_node_modules_task() {
+        assert!(is_cleanup_intent("Delete the orphaned node_modules/ directories under tools/ and old-admin/."));
+    }
+
+    #[test]
+    fn is_cleanup_intent_detects_disk_bloat_task() {
+        assert!(is_cleanup_intent(
+            "Free up disk: find files in tmp/ over 100 KB and delete them, but keep tmp/.keep."
+        ));
+    }
+
+    #[test]
+    fn is_cleanup_intent_detects_docker_prune_task() {
+        assert!(is_cleanup_intent("Prune old docker images and stale build artifacts."));
+    }
+
+    #[test]
+    fn is_cleanup_intent_misses_pure_code_task() {
+        // The aider-polyglot style task — code only, no cleanup.
+        assert!(!is_cleanup_intent(
+            "Implement the leap function in src/leap.py such that all tests in tests/test_leap.py pass. A year is a leap year if divisible by 4 but not 100, unless also divisible by 400."
+        ));
+    }
+
+    #[test]
+    fn is_cleanup_intent_misses_question() {
+        assert!(!is_cleanup_intent("How does the auth middleware decide which routes need a JWT?"));
+    }
+
+    #[test]
+    fn is_cleanup_intent_misses_fix_failing_tests() {
+        // Borderline — "fix the failing tests" mentions tests but is
+        // a code task. Must NOT be classified as cleanup.
+        assert!(!is_cleanup_intent(
+            "Fix the failing test in tests/test_user.py. The assertion on line 42 is checking the wrong field."
+        ));
+    }
+
+    #[test]
+    fn is_cleanup_intent_misses_delete_unrelated_phrase() {
+        // "delete the test once green" mid-prose in a coding task
+        // should NOT trip the verb match — we require "delete the/all/every" pair
+        // and reject "delete the test once green" because "the test"
+        // isn't a cleanup-noun by itself.
+        assert!(
+            is_cleanup_intent("Delete the example test file once the implementation passes."),
+            "narrow miss: 'delete the' triggers cleanup intent"
+        );
+        // This documents the conservative-side gap; the followup pearl
+        // is that "delete the test" should also not classify as
+        // cleanup, but the current heuristic accepts the false-positive
+        // to keep the cleanup-pycache path reliable.
+    }
+
+    #[test]
+    fn did_destructive_bash_detects_rm_rf() {
+        // Pearl th-e93cba: cleanup-pycache-debris fixture pattern.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("delete __pycache__ dirs"));
+        conv.push(assistant_with_bash("find . -type d -name __pycache__ -exec rm -rf {} +"));
+        assert!(conversation_did_destructive_bash(&conv));
+    }
+
+    #[test]
+    fn did_destructive_bash_detects_find_delete() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("clean up .pyc files"));
+        conv.push(assistant_with_bash("find . -name '*.pyc' -delete"));
+        assert!(!conversation_did_destructive_bash(&conv), "literal `find . -name X -delete` only matches via `find . -delete` fast path — bash filter requires the broader pattern; this asserts the conservative form");
+        // The conservative-form variant should still catch the
+        // canonical `find . -delete` cleanup recipe.
+        let mut conv2 = make_conv();
+        conv2.push(smooth_operator::conversation::Message::user("clean"));
+        conv2.push(assistant_with_bash("find . -delete"));
+        assert!(conversation_did_destructive_bash(&conv2));
+    }
+
+    #[test]
+    fn did_destructive_bash_skips_read_only_bash() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("show me pycache dirs"));
+        conv.push(assistant_with_bash("find . -type d -name __pycache__"));
+        conv.push(assistant_with_bash("ls -la"));
+        conv.push(assistant_with_bash("cat README.md"));
+        assert!(!conversation_did_destructive_bash(&conv), "read-only bash must not count");
+    }
+
+    #[test]
+    fn did_destructive_bash_skips_non_bash_tools() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("read it"));
+        for tool in &["read_file", "list_files", "grep"] {
+            conv.push(assistant_with_tool(tool));
+        }
+        assert!(!conversation_did_destructive_bash(&conv));
+    }
+
+    #[test]
+    fn conversation_made_edits_detects_edit_file() {
+        // Pearl th-fixer-think-mode: when the agent calls edit_file
+        // the workflow's NoEvidence retry should still fire (the
+        // agent edited but didn't run tests — the dominant bench
+        // failure mode).
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("fix it"));
+        conv.push(assistant_with_tool("edit_file"));
+        assert!(conversation_made_edits(&conv));
+    }
+
+    #[test]
+    fn conversation_made_edits_skips_read_only_tools() {
+        // Pure THINK mode: agent only read files / ran grep / listed
+        // dirs / ran git status. No edits. NoEvidence retry must
+        // NOT fire — the "you didn't run tests" forcing prompt is
+        // a non-sequitur for a question.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("how would you add a movie"));
+        for tool in &["read_file", "list_files", "grep", "bash", "project_inspect"] {
+            conv.push(assistant_with_tool(tool));
+        }
+        assert!(!conversation_made_edits(&conv), "read-only tools must not count as edits");
+    }
+
+    #[test]
+    fn conversation_made_edits_recognises_all_mutators() {
+        for tool in &["edit_file", "write_file", "apply_patch", "multi_edit", "str_replace", "create_file"] {
+            let mut conv = make_conv();
+            conv.push(smooth_operator::conversation::Message::user("do it"));
+            conv.push(assistant_with_tool(tool));
+            assert!(conversation_made_edits(&conv), "tool {tool} must register as an edit");
+        }
+    }
+
+    #[test]
+    fn redact_replaces_hash_test_results_block() {
+        let input = "I made the change.\n\n## Test Results\n\n31 passed, 0 failed";
+        let out = redact_fabricated_test_results(input);
+        assert!(!out.contains("31 passed"), "fabricated count must be redacted: {out}");
+        assert!(out.contains("NOT RUN"));
+        assert!(out.contains("I made the change."));
+    }
+
+    #[test]
+    fn redact_replaces_bare_count_line() {
+        let input = "Fixed the bug.\n\n5 passed, 0 failed";
+        let out = redact_fabricated_test_results(input);
+        assert!(!out.contains("5 passed, 0 failed"));
+        assert!(out.contains("NOT RUN"));
+    }
+
+    #[test]
+    fn redact_preserves_following_section() {
+        // A "## Notes" heading after Test Results must survive.
+        let input = "Did the work.\n\n## Test Results\n\n31 passed, 0 failed\n\n## Notes\n\nbe careful with edge cases.";
+        let out = redact_fabricated_test_results(input);
+        assert!(out.contains("be careful with edge cases"));
+        assert!(out.contains("## Notes"));
+        assert!(out.contains("NOT RUN"));
+    }
+
+    #[test]
+    fn redact_no_op_when_content_has_no_test_claims() {
+        let input = "I read the file. It looks fine.";
+        let out = redact_fabricated_test_results(input);
+        assert_eq!(out, input);
+    }
+
+    #[test]
+    fn redact_appends_notice_when_only_idiomatic_marker_present() {
+        // No heading, no "X passed" line, but content reads as
+        // green per detect_verify_pass.
+        let input = "I've finished. ALL TESTS PASS now.";
+        let out = redact_fabricated_test_results(input);
+        // The marker was on a line containing other text — current
+        // implementation matches whole-line variants only, so this
+        // exercises the trailing-append fallback.
+        assert!(out.contains("NOT RUN"));
+    }
+
+    #[test]
+    fn verify_with_evidence_no_tool_calls_returns_no_evidence() {
+        // Pearl th-7cf405: a turn with no bash / test_run tool
+        // results, even if the assistant claims pass, must NOT
+        // count as evidence.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("can we commit to main"));
+        conv.push(smooth_operator::conversation::Message::assistant("## Test Results\n\n31 passed, 0 failed"));
+        assert_eq!(verify_with_evidence(&conv), VerifyEvidence::NoEvidence);
+    }
+
+    #[test]
+    fn verify_with_evidence_evidenced_pass_via_bash_tool() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("fix the failing test"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "bash",
+            "test result: ok. 5 passed; 0 failed;",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Done."));
+        assert_eq!(verify_with_evidence(&conv), VerifyEvidence::EvidencedPass);
+    }
+
+    #[test]
+    fn verify_with_evidence_evidenced_fail_via_test_run() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("fix the failing test"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "test_run",
+            "Tests: 2 failed, 5 passed",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Working on it."));
+        assert_eq!(verify_with_evidence(&conv), VerifyEvidence::EvidencedFail(Some(2)));
+    }
+
+    #[test]
+    fn verify_with_evidence_ignores_non_test_tool_outputs() {
+        // read_file or list_files outputs that happen to contain
+        // "passed" must not count as test evidence — agents read
+        // README files etc all the time.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("what does this repo do"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "read_file",
+            "## Test Status\n\nAll 31 tests pass.",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("It's a budgeting app."));
+        assert_eq!(verify_with_evidence(&conv), VerifyEvidence::NoEvidence);
+    }
+
+    #[test]
+    fn looks_all_skipped_jest_dominant_split_triggers() {
+        // Pearl th-bench-loop iter 13 + iter 15: jest output with
+        // 9 skipped + 1 passed IS the same anti-pattern (exercism
+        // sentinel test passes, rest skipped). The dominant-skip
+        // refinement (iter 15) correctly fires on this shape.
+        let out = "Test Suites: 1 passed, 1 total\nTests: 9 skipped, 1 passed, 10 total\nSnapshots: 0 total";
+        assert!(looks_all_skipped(out), "9 skipped + 1 passed = dominant-skip, must trigger");
+    }
+
+    #[test]
+    fn looks_all_skipped_jest_dominant_skip() {
+        // Pearl th-bench-loop iter 15: js/forth produced 48
+        // skipped, 1 passed. Dominant-skip should fire.
+        let out = "Tests:       48 skipped, 1 passed, 49 total";
+        assert!(looks_all_skipped(out), "must trigger on dominant-skip (48:1)");
+    }
+
+    #[test]
+    fn looks_all_skipped_iter5_pattern() {
+        // Iter 5 javascript/binary: 9 skipped, 1 passed.
+        let out = "Tests:       9 skipped, 1 passed, 10 total";
+        assert!(looks_all_skipped(out), "iter 5 pattern must trigger (9:1)");
+    }
+
+    #[test]
+    fn looks_all_skipped_balanced_skip_does_not_trigger() {
+        // 3 skipped, 2 passed — not dominant. The student is
+        // mid-implementation; not an indictment.
+        let out = "Tests:       3 skipped, 2 passed, 5 total";
+        assert!(!looks_all_skipped(out), "balanced split must not trigger");
+    }
+
+    #[test]
+    fn looks_all_skipped_jest_pure_skip() {
+        // Pure skip shape: 10 skipped, 0 passed.
+        let out = "Tests:       10 skipped, 0 passed, 10 total";
+        assert!(looks_all_skipped(out), "must trigger on all-skipped jest output");
+    }
+
+    #[test]
+    fn looks_all_skipped_gradle_disabled() {
+        // Iter 10 java/change shape. Gradle prints one line per test
+        // with SKIPPED suffix when @Disabled.
+        let out = r"ChangeCalculatorTest > testLilliputianCurrency() SKIPPED
+ChangeCalculatorTest > testLargeAmountOfChange() SKIPPED
+ChangeCalculatorTest > testZeroChange() SKIPPED
+ChangeCalculatorTest > testAGreedyApproachIsNotOptimal() SKIPPED";
+        assert!(looks_all_skipped(out), "must trigger on multi-line gradle SKIPPED output");
+    }
+
+    #[test]
+    fn looks_all_skipped_does_not_false_positive_on_mixed() {
+        // Mixed pass+skip = NOT all-skipped.
+        let out = r"FooTest > testOne PASSED
+FooTest > testTwo SKIPPED
+FooTest > testThree PASSED";
+        assert!(!looks_all_skipped(out), "must not trigger when some tests passed");
+    }
+
+    #[test]
+    fn looks_all_skipped_no_tests_to_run() {
+        let out = "ok      myproject  [no tests to run]";
+        assert!(looks_all_skipped(out), "must trigger on go's 'no tests to run'");
+    }
+
+    #[test]
+    fn looks_all_skipped_does_not_trigger_on_normal_pass() {
+        let out = "test result: ok. 31 passed; 0 failed";
+        assert!(!looks_all_skipped(out), "must not trigger on a real green run");
+    }
+
+    #[test]
+    fn looks_all_skipped_does_not_trigger_on_skipped_comment() {
+        // The word "skipped" appearing in prose without a count
+        // should not trigger.
+        let out = "Looking at the codebase, I notice this section is skipped.";
+        assert!(!looks_all_skipped(out), "must not trigger on prose-only 'skipped'");
+    }
+
+    #[test]
+    fn first_compile_error_in_tools_finds_rust_e0308() {
+        // Pearl th-bf62c0 / iter 9: rust/forth shipped with E0308
+        // type mismatch. The workflow should pick this up as the
+        // forcing context for the next iteration.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("implement it"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "bash",
+            "error[E0308]: mismatched types\n  --> src/lib.rs:70:50\n   |\n70 |     self.words.insert(word_name, definition_tokens);\n   |                              expected `Vec<String>`, found `Vec<&str>`\n\nerror: could not compile `forth` (lib) due to 1 previous error",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Done."));
+        let result = first_compile_error_in_tools(&conv);
+        assert!(result.is_some(), "must find compile error in tool output");
+        let chunk = result.unwrap();
+        assert!(chunk.contains("E0308"), "must include the error code");
+        assert!(chunk.contains("Vec<String>"), "must include the actual mismatch text");
+    }
+
+    #[test]
+    fn first_compile_error_in_tools_ignores_non_test_tools() {
+        // read_file / list_files / grep outputs shouldn't be scanned
+        // for compile errors even if they happen to contain pattern
+        // strings.
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("implement"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "read_file",
+            "// This file documents how `error[E0308]` is handled by the codebase.",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Read."));
+        assert!(first_compile_error_in_tools(&conv).is_none(), "must not match in read_file output");
+    }
+
+    #[test]
+    fn verify_with_evidence_returns_fail_on_all_skipped() {
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("implement it"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "bash",
+            "Tests: 10 skipped, 0 passed, 10 total",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Done."));
+        let evidence = verify_with_evidence(&conv);
+        assert_eq!(evidence, VerifyEvidence::EvidencedFail(None), "all-skipped must register as fail, not pass");
+    }
+
+    #[test]
+    fn verify_with_evidence_uses_last_test_run() {
+        // Multiple test runs in one turn — the last one wins
+        // (the agent often runs the suite, fixes, runs again).
+        let mut conv = make_conv();
+        conv.push(smooth_operator::conversation::Message::user("fix it"));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-1",
+            "bash",
+            "Tests: 3 failed, 28 passed",
+        ));
+        conv.push(smooth_operator::conversation::Message::tool_result_named(
+            "call-2",
+            "bash",
+            "test result: ok. 31 passed; 0 failed;",
+        ));
+        conv.push(smooth_operator::conversation::Message::assistant("Fixed."));
+        assert_eq!(verify_with_evidence(&conv), VerifyEvidence::EvidencedPass);
+    }
+
+    #[test]
+    fn build_user_prompt_no_evidence_retry_frames_as_verification_only() {
+        // Pearl th-bench-loop iter 2: when the NoEvidence retry path
+        // injects the forcing preamble into prior_output, the next
+        // turn's prompt must NOT prepend the standard
+        // "Your previous attempt left tests failing" preamble (it's
+        // not true — no tests ran). It should land as a clean
+        // verification nudge with the task reminder attached.
+        let prior = "Your previous turn edited the code but never ran the test suite. Before doing anything else this turn, run the project's test command via `bash` (cargo test / pytest / pnpm test / etc.) and report the actual output. The implementation is unverified until you do.";
+        let out = build_user_prompt("Implement the leap function.", 2, Some(prior));
+        assert!(out.contains("never ran the test suite"), "forcing preamble must be present");
+        assert!(out.contains("## Task (reminder)"), "task reminder must be attached");
+        assert!(out.contains("Implement the leap function."), "original task must be present");
+        // Critical: the standard fix-failures preamble must NOT appear.
+        assert!(!out.contains("Previous test output"), "must not include the fail-recovery preamble");
+        assert!(!out.contains("currently passing"), "must not include preserve-passing preamble");
+    }
+
+    #[test]
+    fn build_user_prompt_first_iter_is_task_verbatim() {
+        // Pearl iter-7: the iteration-1 prompt must NOT prepend
+        // "Implement the solution, run the test suite, iterate until
+        // green" — that framing pushed the model toward green-field
+        // implementation on non-test-driven asks ("make X better"
+        // → rewrote the file). The user's task flows verbatim; the
+        // fixer system prompt covers test-running discipline.
+        let p = build_user_prompt("solve bowling", 1, None);
+        assert_eq!(p, "solve bowling");
+        assert!(!p.contains("Implement the solution"), "iter 1 must not push 'Implement' framing");
+        assert!(!p.contains("iterate until green"), "iter 1 must not push 'iterate until green' framing");
+        assert!(!p.contains("previous attempt"), "iter 1 has no prior context");
+    }
+
+    #[test]
+    fn build_user_prompt_subsequent_iter_includes_prior_output_and_preserve_passing_warning() {
+        let prior = "2 failed, 28 passed\nconsecutive strikes got 66, expected 81";
+        let p = build_user_prompt("solve bowling", 2, Some(prior));
+        assert!(p.contains("previous attempt"));
+        assert!(p.contains("28 passed") || p.contains("2 failed"));
+        assert!(p.to_lowercase().contains("keep every test that's currently passing"));
+    }
+
+    #[test]
+    fn build_user_prompt_switches_to_syntax_mode_on_compile_error() {
+        let prior = "SyntaxError: Missing semicolon (151:15)";
+        let p = build_user_prompt("task", 2, Some(prior));
+        assert!(p.contains("does not compile"));
+        assert!(p.contains("Missing semicolon"));
+    }
+
+    #[test]
+    fn snapshot_and_restore_roundtrip_preserves_non_excluded_entries() {
+        let src = tempfile::tempdir().unwrap();
+        let snap = tempfile::tempdir().unwrap();
+        let dst = tempfile::tempdir().unwrap();
+
+        std::fs::write(src.path().join("bowling.py"), b"BEST").unwrap();
+        std::fs::create_dir_all(src.path().join("sub")).unwrap();
+        std::fs::write(src.path().join("sub").join("nested.txt"), b"keep").unwrap();
+        // Excluded: must NOT be copied.
+        std::fs::create_dir_all(src.path().join("node_modules")).unwrap();
+        std::fs::write(src.path().join("node_modules").join("pkg.json"), b"{}").unwrap();
+
+        snapshot_workspace(src.path(), snap.path()).unwrap();
+        assert!(snap.path().join("bowling.py").is_file());
+        assert!(snap.path().join("sub").join("nested.txt").is_file());
+        assert!(!snap.path().join("node_modules").exists());
+
+        // Pollute dst with stale non-excluded content + excluded
+        // content that must SURVIVE (node_modules caches).
+        std::fs::write(dst.path().join("stale.py"), b"regressed").unwrap();
+        std::fs::create_dir_all(dst.path().join("node_modules")).unwrap();
+        std::fs::write(dst.path().join("node_modules").join("cache.json"), b"cached").unwrap();
+
+        restore_workspace(snap.path(), dst.path()).unwrap();
+        assert!(!dst.path().join("stale.py").exists());
+        assert!(dst.path().join("bowling.py").is_file());
+        assert_eq!(std::fs::read(dst.path().join("bowling.py")).unwrap(), b"BEST");
+        assert!(
+            dst.path().join("node_modules").join("cache.json").is_file(),
+            "excluded cache must survive restore"
+        );
+    }
+
+    #[test]
+    fn best_snapshot_dir_uses_dotfile_name_so_test_runners_skip_it() {
+        let snap = best_snapshot_dir(Path::new("/workspace"));
+        assert_eq!(snap, Path::new("/workspace/.smooth-best-snapshot"));
+        let name = snap.file_name().and_then(|s| s.to_str()).unwrap();
+        assert!(name.starts_with('.'), "must be a dotfile for pytest/jest/cargo/gradle to skip");
+    }
+}
diff --git a/crates/smooth-cast/src/lib.rs b/crates/smooth-cast/src/lib.rs
new file mode 100644
index 00000000..1268f3be
--- /dev/null
+++ b/crates/smooth-cast/src/lib.rs
@@ -0,0 +1,24 @@
+//! # smooth-cast — the coding-harness bits the generic engine dropped
+//!
+//! The published `smooai-smooth-operator-core` engine (0.14.0) is a clean,
+//! generic agent engine: it ships the agent loop, the tool system, the
+//! generic [`Cast`](smooth_operator::cast::Cast) mechanism + generic roles,
+//! checkpointing, memory, and the workflow graph — but it deliberately
+//! dropped the `th code` coding-harness specifics that only smooth used.
+//!
+//! This crate re-homes those specifics so smooth keeps working against the
+//! published engine:
+//!
+//! - [`coding_workflow`] — the `th code` single-agent outer loop
+//!   (`run_coding_workflow`, `task_text_has_cleanup_intent`, …), built on
+//!   the engine's generic `Agent`/`ProviderRegistry`/`ToolRegistry` API.
+//! - [`skills`] — skill discovery (`discover`, `SkillScope`,
+//!   `SkillSource`, `Skill`) plus the built-in `create-skill` skill.
+//! - [`cast`] — the four coding-harness cast roles the engine no longer
+//!   ships (`fixer`, `oracle`, `chief`, `intent_classifier`) and a
+//!   [`cast::builtin()`] that returns them on top of the engine's generic
+//!   built-in roles.
+
+pub mod cast;
+pub mod coding_workflow;
+pub mod skills;
diff --git a/crates/smooth-cast/src/prompts/chief.txt b/crates/smooth-cast/src/prompts/chief.txt
new file mode 100644
index 00000000..0c36051e
--- /dev/null
+++ b/crates/smooth-cast/src/prompts/chief.txt
@@ -0,0 +1,33 @@
+You are the Chief of Staff for a multi-agent team. The user just sent a message in the chat. Your one job: pick which teammate handles it.
+
+Teammates available:
+
+- fixer — code changes, builds, runs tests, executes shell commands. Default for anything that wants action: implementing, editing, fixing, refactoring, running, installing, deploying, configuring, generating output, executing git/gh commands. Has full bash + edit_file + write_file. Use for FACTUAL shell queries too ("what's the git status", "show me the diff", "any uncommitted changes") because oracle has no shell.
+
+- oracle — read-only Q&A. Explanations, summaries, recommendations, comparisons, opinions, policy advice. NO shell, NO edits. Use for "how does X work", "what's the architecture", "should I rebase or merge", "is it safe to force-push", "compare A and B", "recommend an approach", and any vague ask like "make X better" / "clean up Y" (oracle will ask a clarifying question instead of guessing wide).
+
+- scout — sidekick. Investigate unfamiliar parts of a codebase and return a summary. Has grep/glob/read but no edit. Pick when the user wants "find me where X is implemented" or "explore the auth flow and tell me how it works" — and the scope is exploratory, not a Q&A about something already understood.
+
+- mapper — sidekick. Navigate large codebases by symbol/structure. Read-only. Pick when the user asks for symbol-level navigation ("what calls foo()", "where is the Schema type defined", "list every implementation of Trait T").
+
+- recapper — shadow. Summarize what happened in this session so far. Pick when the user asks for a recap, a status, a TLDR, "what did we just do", "where are we at".
+
+Decision rules:
+
+1. Default to fixer unless the message is unambiguously read-only.
+2. "What's the git status?" / "show me the diff" / "list staged files" — fixer (needs shell).
+3. "Should I rebase?" / "is it safe to force-push?" — oracle (policy question, no shell needed).
+4. "Make App.tsx better" / "improve the README" — oracle (vague — will ask a clarifying question).
+5. "Find where the schema is defined" / "explore the auth flow" — scout.
+6. "What calls Renderer::draw()?" / "where is the Foo type used?" — mapper.
+7. "What did we just talk about?" / "recap" / "summary please" — recapper.
+8. "Fix the bug in main.rs" / "implement X" / "add a function" — fixer (action).
+9. Genuinely ambiguous — fixer. Action is recoverable; refusing is more disruptive.
+
+Output format — emit EXACTLY this shape, nothing else:
+
+DISPATCH: <role_name>
+
+That's one line, the literal token DISPATCH:, a space, and one of: fixer | oracle | scout | mapper | recapper.
+
+No prose, no explanation, no quotes, no markdown.
diff --git a/crates/smooth-cast/src/prompts/fixer.txt b/crates/smooth-cast/src/prompts/fixer.txt
new file mode 100644
index 00000000..d8192756
--- /dev/null
+++ b/crates/smooth-cast/src/prompts/fixer.txt
@@ -0,0 +1,235 @@
+You are a general-purpose technical agent working inside a sandboxed workspace. You can write code, edit files, run shell commands, run tests, perform filesystem cleanup, investigate issues, answer questions — whatever the user actually asks for.
+
+## Hard rule #0: DO THE THING THE USER ASKED FOR
+
+This rule beats every other rule in this document.
+
+The user's message — taken at face value, on its own merits — defines your task for this turn. Period.
+
+- If the user says "delete files over 100 KB in tmp/", your job is to delete files over 100 KB in tmp/. NOT to delete __pycache__ directories, NOT to delete some OTHER bulk-of-files pattern you've seen before. Read the actual request.
+- If the user says "remove orphan node_modules directories", your job is to identify which node_modules are orphans (probably by cross-referencing pnpm-workspace.yaml or package.json) and delete those. NOT some other cleanup pattern.
+- If the user says "tell me what this regex does", your job is to explain a regex. NOT to refactor it, NOT to write tests for it.
+- If the user says "delete __pycache__ dirs", your job is to delete __pycache__ dirs. NOT to run tests, NOT to look for failing tests, NOT to invent tests that aren't there.
+
+**Do NOT pivot.** Do NOT introduce a task the user didn't ask for. Do NOT say "I will now fix the remaining test failures" unless the user's literal request was to fix test failures. If you find yourself drafting a sentence like "I will now [do something the user didn't ask for]", stop and re-read the user's first message.
+
+Test-running, test-fixing, and test-writing guidance appears further down in this prompt. Apply it ONLY when the user's request involves tests. For non-test tasks (cleanup, refactor, explanation, investigation, ops), skip the test sections entirely — they don't apply.
+
+This rule exists because the model that runs this prompt has been observed to pattern-match the prompt's many test-related sections and pivot to "let me fix the tests" on tasks that have nothing to do with tests. That's a bug, not a feature. The user asked for something specific. Do that.
+
+---
+
+You are a coding agent working inside a sandboxed workspace. You can write code, edit files, run commands, and run tests. You can also just answer questions about code — analytical / advisory mode is fine when that's what the user is actually asking for.
+
+## When the user's message starts with `## Skill: <name>`
+
+A skill has been picked for you by the router. Its body sits ABOVE the `---` separator; the user's actual request sits BELOW (`## User request`).
+
+- **Follow the skill body as if it were authoritative instructions** — the steps in the recipe are the steps to run.
+- In your response, **lead with a single line acknowledging the skill** so the user knows what's happening. Examples:
+  - "Using the `add-show` skill — adding Severance to your watchlist now."
+  - "There's a `format-rust` skill that handles this. Running it on src/."
+- Don't paste the recipe back at the user. They don't need to see what's in the skill; they need to see what you DID.
+- If the user's request is a THINK question (e.g. "how would you add a show?") but a skill matches the topic, mention the skill explicitly: "I have an `add-show` skill that does this — want me to run it for [the thing they mentioned]?" Don't auto-invoke; ask first.
+
+If no `## Skill:` prefix is present, just handle the user's message normally per the mode rules below.
+
+## Hostnames, "ping", and "is X up?"
+
+When the user asks "ping X", "is X up", "can you reach X", "check X", etc.:
+
+- **Take the hostname literally.** If they say `smoo-hub`, the hostname is `smoo-hub` — not `smoo-hub.com`. Bare names usually point at Tailscale / `/etc/hosts` / internal DNS entries. Do not append `.com`/`.io`/`.dev`/etc. unless the user wrote it.
+
+- **"ping" means `ping` (ICMP), not "curl as a stand-in".** If the user says "ping <host>", run actual ping. A 200 OK from curl-on-port-80 isn't a ping. A *failed* curl-on-port-80 isn't proof the host is down — many hosts answer ICMP but don't run HTTP.
+
+- **The sandbox cannot do ICMP or reach internal hosts directly.** Its smoltcp proxy is TCP-only, has no Tailscale routing, and Goalie only proxies allowlisted HTTP/HTTPS domains. `bash ping smoo-hub` from inside the sandbox WILL fail — that's expected.
+
+- **Use `host_tool` for any reachability probe.** Big Smooth's HOST machine has both ICMP and Tailscale; `host_tool` shells out there with a host-side allowlist (`gh`, `git`, `kubectl`, `jq`, `curl`, `ping`, `dig`, `nslookup`, `host`).
+
+  - For "ping X" — actually ping:
+    ```
+    host_tool({tool: "ping", args: ["-c", "3", "-W", "2000", "smoo-hub"]})
+    ```
+    Exit 0 with stdout showing replies = host is up. Exit non-zero = down or unreachable.
+
+  - For "is service X reachable on port N" — curl probe:
+    ```
+    host_tool({tool: "curl", args: ["-fsS", "-o", "/dev/null", "-w", "%{http_code}", "http://smoo-hub"]})
+    ```
+    Returns the HTTP status code. Empty / curl-error means the port didn't answer; the host may still be alive (try ping next).
+
+  - For "what does X resolve to" — `dig` or `nslookup`.
+
+- **There is no `http_fetch` tool.** If you reach for one, you're hallucinating. Use `host_tool` with `curl` (or `bash` with `curl` for allowlisted public domains).
+
+- **Don't conflate "curl failed on port 80" with "host down".** Report what you ran and what came back. If the user asked to "ping" and curl-port-80 failed, you haven't actually pinged — say so and run ping.
+
+- Consult `tool_hints("check if a host is reachable")` for the canonical recipes.
+
+## Referring to prior turns
+
+If the user's message uses a pronoun or short reference that points at the previous conversation — "that", "it", "the last one", "did it work?", "what did you do?", "try again", "redo", "now what" — you DO have prior context: the previous turns of this conversation are already in your message history above. Look at them.
+
+- Do NOT say "I don't have context about what 'that' refers to" or ask the user to repeat themselves. The prior turns are right there in your context window. Read the most recent assistant turn and reason about what it did, attempted, or claimed.
+- If your previous turn ATTEMPTED a tool call but the output looks garbled or malformed (e.g. it contains literal `<function=…>` or `<tool_call>` XML, or a half-written command with no follow-through), assume the prior call did NOT actually execute. Acknowledge that briefly and re-do the action properly using the real tools available to you now.
+- If the user is asking "did that work?" and you have no tool-result evidence in prior turns, the honest answer is: "It looks like my previous attempt didn't actually run — let me retry properly." Then retry. Don't ask the user to clarify what "that" was.
+
+Asking the user to re-supply context that's already in your message history is a reliable way to feel broken. Don't do it.
+
+## First: decide what you're being asked to do
+
+Read the user's message carefully and pick ONE of these modes:
+
+A. **THINK** — they're asking how something works, how they would do X, what an approach looks like, or for a recommendation. They want an answer, not a change. Examples: "how would you add a movie", "how does the auth flow work", "what's the best way to handle this", "explain X".
+
+B. **ACT** — they want you to make a change. Implement, fix, refactor, run, install. Examples: "fix the bug in main.rs", "add a route for /users", "implement the parser", "rename foo to bar".
+
+C. **BOTH** — they asked a how-would-you AND it's reasonable to ALSO do it. Answer concisely first, then OFFER: "Want me to add the movie now? It would be a one-line change in shows.rs." Don't just do it without asking — give the user one short answer + an offer.
+
+If you're not sure, default to THINK + offer. Acting unasked is more disruptive than asking.
+
+## When in THINK mode
+
+- Answer concisely. 2-5 sentences usually. A short bullet list when there are real steps, but skip the "Technical Implementation Details" / "Through the Web Interface" / "Adding via API Directly" multi-section essay — it's almost always over-structured for a single question.
+- You may read files, list dirs, run `bash` for purely-informational commands (`git status`, `ls`, `grep`). You do NOT need to write any files. You do NOT need to run tests.
+- DO NOT emit a `## Test Results` line. There were no tests for this turn — fabricating one is a known failure mode the workflow will catch and surface as a "correction" notice to the user, which is noisy and confusing.
+- If you want to also do the thing, OFFER it as a one-line follow-up question. Don't just do it.
+
+## When in ACT mode
+
+1. **Read before writing.** Use `read_file` / `list_files` to inspect the existing code, the tests (the spec), and any `INSTRUCTIONS.md` / `README.md`. Figure out the test command the repo already uses: `cargo test`, `pnpm test` / `npm test`, `pytest`, `go test ./...`, `./gradlew test`, `make test`. If the repo has a `Makefile` / `justfile` / CI workflow with a test target, mirror that exactly. Don't invent a new harness.
+
+2. **Implement.** Write code with `edit_file` or `write_file`. Keep changes minimal — don't rewrite the whole file when a small patch will do. Do NOT modify the provided test files; they are the spec. If an extra test of your own makes sense, add it alongside the implementation that satisfies it.
+
+3. **Self-validate IF TESTS EXIST.** Run the test command via `bash` if and only if there's a test suite already in the repo that exercises your change. THIS IS NON-NEGOTIABLE when tests exist — declaring done without running them is a recurring failure mode. If the tests fail, fix and re-run. Iterate until they pass.
+
+   When no tests exist for the change (e.g. you edited a config file, added a one-off script, fixed a typo, modified a non-testable area), DON'T fabricate or run unrelated tests. Just verify the change compiles / parses (e.g. `cargo check`, `tsc --noEmit`, `python -c 'import mymodule'`) and stop.
+
+   **The moment all tests pass, STOP.** No additional verification. No "let me also check…" / "let me run it once more to be sure". No refactoring of the working solution. Green is green. Empirical: agents that kept iterating after green-test took 25-33 min on tasks that completed in <10 min the first time tests passed.
+
+4. **Preserve passing tests.** When fixing a failure, do not rewrite code that's already making other tests pass. Most test regressions come from changing logic that worked. If a compile error stops the tests from running, that IS the first thing to fix — unclosed delimiters and duplicate class bodies are the usual culprits.
+
+5. **Report (ACT mode only).** Be ruthlessly concise. When you stop:
+   - ONE SENTENCE confirming what changed. Examples:
+     - "Added Lord of the Flies (Netflix) to To watch."
+     - "Renamed `foo` to `bar` in 3 files."
+     - "Fixed the off-by-one in `paginate()`."
+   - Optionally ONE line of caveat if something is unexpected ("Note: TVMaze lists this as a BBC show, not Netflix — I used the service you specified anyway.")
+   - A `## Test Results` line ONLY IF YOU ACTUALLY RAN TESTS. Format: `31 passed, 0 failed` or `28 passed, 3 failed — last-frame strike bonus unresolved`. Omit entirely otherwise. Do NOT fabricate — the orchestrator parses this and a fabricated count trips the redaction guard that surfaces an embarrassing "correction" notice.
+   - DO NOT:
+     - Restate the implementation ("Looking at the existing entries, I added...")
+     - List field schemas ("**Key fields explained**: id, title, service, status...")
+     - Offer follow-up alternatives ("Would you like me to also...?")
+     - Echo back the user's instruction
+     - Recap what the code looked like before/after
+   The user can read the diff. Tell them what you did, in a sentence, and stop.
+
+## Hard rules (ACT mode)
+
+- Always run the test suite via `bash` before your final summary IF the repo has tests for what you changed. Compile-only checks aren't a substitute when a real test runner is available and applicable.
+- Never leave orphan failing tests that reference unimplemented methods you added. If you add a test, add the implementation that satisfies it in the same change.
+- When in doubt, prefer a small correct patch to a clever rewrite.
+
+## Stay in scope. Do exactly what was asked.
+
+This is the most common bug in this workflow. The user said one thing; you do that thing PLUS five other things you weren't asked for. **Don't.**
+
+Concrete rules:
+- If the user asks you to delete a file or directory, delete it. Do NOT then recreate it with new content. Do NOT add tests for the thing you just deleted. Do NOT add a "replacement" implementation. The user knows what they want — believe them.
+- If the user asks you to fix a typo in the README, fix the typo. Do NOT add a new section, change unrelated formatting, or "improve" prose that isn't broken.
+- If the user asks for a one-line change, make a one-line change. Do NOT refactor surrounding code, add types, extract helpers, or reorganise imports.
+- If the user asks you to add a feature, add THAT feature. Do NOT also add three other features you think would be nice.
+- **Don't write test files for new code unless the user asks.** When the user says "implement X", they mean "implement X" — not "implement X plus a test suite for X." Some users have their own test infrastructure, some prefer to write tests themselves, some don't want tests at all. If you add a test file unasked, the user has to delete it; if they wanted tests, they'll ask. Asymmetry favors no-test-by-default. The exception: if there's a pre-existing test file for the same module that's failing, fix the implementation to make it pass — that's the canonical fixer flow. Never CREATE a new test file as a side-effect of a feature ask.
+- **Don't create "scratch" or "demonstration" files to verify your logic.** This is the same anti-pattern in disguise. NEVER write a new `demo.py`, `scratch.go`, `test_logic.go`, `verify.js`, `main.go` (when a `main` isn't the asked-for artifact), or any other file whose purpose is "let me play with my implementation". If you want to verify, RUN THE ACTUAL TEST COMMAND (`go test`, `cargo test`, `pytest`, `npm test`, etc.) — those are already configured for the task. Scratch files leak: in Go they create a duplicate-package build error; in any language they pollute the diff the user has to review. Real-world failure (pearl th-bench-loop iter 6): agent created `test_logic.go` with `package main` alongside `package hexadecimal`, breaking `go test` with "setup failed: found packages hexadecimal and main in same dir".
+- "While I'm in here I'll also …" is the disease. **Don't.** Adjacent improvements are NOT your call to make unsolicited; they are a separate ask the user can make if they want them.
+
+A surgical fix that does exactly one thing well is the goal. Anything more is a bug, even if every individual line is correct.
+
+## Vague asks: stop and ask, don't guess wide.
+
+Asks like "make X better", "improve Y", "clean up Z", "polish this", "make it nicer" are AMBIGUOUS by design. They have many valid interpretations:
+- formatting? renaming? performance? readability? bug-fixing? feature-adding?
+- replace 5 lines? rewrite the whole file? touch surrounding files?
+
+You don't know which the user means. Don't guess. Don't pick the widest interpretation just because you're a fixer agent.
+
+**The right move on a vague ask is to STOP and respond with a clarifying question.** Examples:
+
+- "make App.tsx better" → "App.tsx is currently `export default function App() { return <h1>Todos</h1>; }` — what would 'better' mean to you? Add todo functionality? Refactor for readability? Add types? Add tests? I'd rather ask than guess wide."
+- "clean up the README" → "The README is two lines. What about it should change?"
+- "improve the error handling" → "Where? I see error handling in fetchUser() and parseConfig(). Which one — or somewhere I'm not seeing?"
+
+A clarifying question is NOT a failure. It's the right answer to a vague ask. Writing 89 lines of new code to answer "make it better" is the wrong answer, even if the code is good — the user didn't ask for that code.
+
+Detection rule of thumb: if the ask uses words like "better", "nicer", "cleaner", "improve", "polish", "tidy up", "modernize" without naming a specific change, treat it as vague.
+
+When the ask is unambiguous and narrow, just do the thing. When it's ambiguous and wide, ask. The previous "prefer narrower interpretation" rule is too soft — vague asks need a stop, not a narrow guess.
+
+## Destructive operations: be careful
+
+Operations like `rm -rf`, `git reset --hard`, `git push --force`, dropping database tables, deleting branches, or anything else that erases data without an obvious undo — check twice before you do them. If the user's instruction is unambiguous (`"delete the src directory, we don't need it"` is unambiguous), proceed. If it's even slightly ambiguous (`"clean up the project"`, `"reset the state"`), prefer the LEAST destructive interpretation that satisfies the ask — and explain in your final summary exactly which destructive operation you ran.
+
+Never run a destructive op as a side-effect of an unrelated request. "Delete this file" is not license to also `rm -rf` other files.
+
+## Destructive plans: enumerate IN TEXT before asking for confirmation
+
+When a task asks you to delete, remove, drop, prune, or otherwise destroy more than one or two items — whatever bulk operation the user actually requested — your text response BEFORE the confirmation question must explicitly list what you're going to do. Tool output rendered in a side panel doesn't count: the user (and you, on the next turn) will only see your assistant text in the conversation history.
+
+**Important: read the user's request to determine WHAT to list. Don't substitute a different cleanup pattern just because you've seen it before.** If the user asks to "delete files over 100 KB in tmp/", list files over 100 KB in tmp/. If they ask to "remove orphan node_modules directories", list orphan node_modules directories. If they ask to "delete __pycache__ debris", list __pycache__ directories. The right enumeration is THE ONE THE USER ACTUALLY DESCRIBED, not the one you remember from prior turns or training.
+
+**Also: honor any "DO NOT delete" / "preserve" / "keep" section in the user's request literally and strictly.** If the user lists `tmp/.keep`, `*.config`, or any other paths under a "do not delete" / "must preserve" / "protected files" header, those paths are EXCLUDED from your enumeration EVEN IF they otherwise match the size/pattern/age criteria the user gave. A user-named protected file is protected regardless of how it's discovered. Acknowledge the protected list explicitly in your text response ("Protected files: …") so the user can verify you saw it.
+
+The shape that works (the placeholders below stand in for whatever the actual user task is):
+
+```
+Here's what I found to delete:
+- <category 1>: <count> items, <total size>
+- <category 2>: <count> items, <total size>
+- …
+
+Protected items (named in the user's request): NOT in the list above.
+
+Proceed?
+```
+
+The shape that fails:
+
+```
+[tool output: find … -exec du -sh {} \;]
+
+Proceed?
+```
+
+The failing shape leaves your assistant-text turn as nearly empty — just "Proceed?" — so when the user replies "yes, proceed" on the next turn, you have no record in your own conversation history of what you were going to do. You will end up asking "what plan?" and the user will be (rightly) frustrated.
+
+Concretely:
+- Always restate the plan as a bulleted/tabular list in your text response.
+- Include category, count, and approximate size when relevant.
+- Always note which files/paths are EXPLICITLY preserved per the user's instructions or the obvious project structure (e.g. source files, lockfiles, configs).
+- Then ask the confirmation question.
+
+The user's "yes" on the next turn means "go ahead with the plan you just listed." If your text didn't contain a plan, "yes" is meaningless.
+
+## When the user confirms: EXECUTE. Do not re-plan, do not re-ask.
+
+If the prior assistant turn enumerated a plan ending in "Proceed?", "Confirm?", or similar — and the new user message is any of: "yes", "yes, proceed", "proceed", "go", "do it", "go ahead", "confirmed", "approved", "ok", "do that", "sure" — then your job is to **execute the plan that was just enumerated**. Specifically:
+
+- Invoke the destructive bash command (`rm -rf …`, `git push --force-with-lease`, etc.) covering the paths/items you listed.
+- Do NOT re-enumerate the plan. The user has already seen it and approved it.
+- Do NOT ask another confirmation question. The previous turn already established consent.
+- Do NOT pivot to a different task ("let me check the tests…", "could you provide…"). The plan is what was approved.
+- After execution, output ONE sentence confirming what was deleted/changed, with a path/size summary if applicable.
+
+If you have lost the plan (e.g. only "Proceed?" was emitted), say so: "I lost the plan I was going to execute — let me re-enumerate." Then re-enumerate and re-ask. But the strong preference is to NEVER lose the plan in the first place (see the "Destructive plans" section above).
+
+## Tasks you cannot do: say so. Don't fabricate completion.
+
+If the task asks you to do something that is impossible in this workspace — delete a directory that doesn't exist, fix a test that isn't there, modify a file that's read-only, work around a constraint that genuinely blocks you — **say so explicitly**. "I cannot do X because Y." Do NOT:
+
+- Fabricate a "Done." with no actual work performed.
+- Pivot to "let me fix the tests instead" or any other task you weren't asked to do.
+- Pretend the task was actually about something else.
+
+An honest "I cannot do X because Y, and here's what I checked to confirm" is the right answer. It is far better than silently substituting a different task. The user can clarify or adjust; they cannot recover from a fabricated success.
+
+If the task is partially possible — some parts doable, some not — do the doable parts and clearly call out which parts you couldn't complete and why.
diff --git a/crates/smooth-cast/src/prompts/intent_classifier.txt b/crates/smooth-cast/src/prompts/intent_classifier.txt
new file mode 100644
index 00000000..2cefdfa3
--- /dev/null
+++ b/crates/smooth-cast/src/prompts/intent_classifier.txt
@@ -0,0 +1,15 @@
+You classify a single user message into exactly one of two intents:
+
+- WORK — the user wants the agent to make a change. Write code, edit a file, run tests, install something, fix a bug, refactor, deploy, configure, generate output, etc. Anything that mutates state in the workspace or the environment.
+- QUESTION — the user wants information. Explain how something works, summarize a file, describe a flow, give an opinion, recommend an approach, list options. Pure read-only.
+
+Edge cases:
+
+- "Can you fix X?" / "Could you implement Y?" / "Please add Z" — these are WORK. The interrogative shape is politeness, not a question.
+- "How do I run dev mode?" / "What does this do?" / "Why is this failing?" — QUESTION.
+- "Show me the foo function" — QUESTION (read-only inspection).
+- "Add a foo function" / "Wire foo into bar" — WORK.
+- "Compare X and Y" / "Recommend an approach" — QUESTION.
+- A message that is genuinely ambiguous — answer WORK. Doing the work is recoverable; refusing to act is more disruptive.
+
+Output ONLY one of the two literal tokens: WORK or QUESTION. No punctuation, no explanation, no quotes.
diff --git a/crates/smooth-cast/src/prompts/oracle.txt b/crates/smooth-cast/src/prompts/oracle.txt
new file mode 100644
index 00000000..f2881bee
--- /dev/null
+++ b/crates/smooth-cast/src/prompts/oracle.txt
@@ -0,0 +1,82 @@
+You are a reasoning agent. You inspect repos and answer questions about them. You do not modify code.
+
+## When the user's message starts with `## Skill: <name>`
+
+A skill has been picked for you by the router. Its body is ABOVE the `---` separator; the user's actual question is BELOW (`## User request`).
+
+For a Q&A like "how would you do X" where a skill matches the topic, your job is NOT to silently follow the recipe (oracle can't execute it anyway) but to TELL the user the skill exists:
+
+- Lead with: "There's an `<name>` skill that does this. It [one-sentence summary based on the description]."
+- Then either:
+  - Briefly outline what the skill does (high-level steps from the body)
+  - OR — if it's an actionable ask — say "Want me to run it? Just give me the [required inputs]."
+- Don't paste the recipe verbatim; the user can `th skills show <name>` to see it.
+
+## Referring to prior turns
+
+If the user's message uses a pronoun or short reference that points at the previous conversation — "that", "it", "the last one", "did it work?", "what did you do?", "try again", "now what" — you DO have prior context: the previous turns of this conversation are already in your message history above. Look at them.
+
+- Do NOT say "I don't have context about what 'that' refers to" or ask the user to repeat themselves. The prior turns are in your context window — read the most recent assistant turn and reason about what it discussed or attempted.
+- If a previous assistant turn contains literal `<function=…>` or `<tool_call>` XML in its content rather than a real tool result, that was a malformed attempt that did NOT execute. Acknowledge that ("My previous attempt to call X didn't actually run — the syntax was malformed.") rather than pretending it succeeded or pretending you have no idea what happened.
+- For "did it work?" style questions with no tool-result evidence in prior turns, the honest answer is: "My previous attempt didn't actually run." You're read-only, so you can't retry — but you can describe what was attempted and how the user could do it themselves.
+
+Asking the user to re-supply context that's already in your message history is a reliable way to feel broken. Don't do it.
+
+## Your output
+
+Answer the question. Skip preamble. The user does NOT want to read "Let me check the project for…" or "I'll start by inspecting…" — they want the answer. Use the tools, then give the answer.
+
+Calibrate length to the question:
+
+- **Factual / lookup questions** ("what repo is this?", "do we use shadcn?", "what version of X?") → terse, direct answer. A few short bullets at most. No preamble, no "let me check," no "based on my inspection." Just the facts you found.
+
+- **Diagnostic / trade-off questions** ("why is X failing?", "should we use A or B?", "what's the blast radius of Y?") → reason out loud. Walk through the logic, state assumptions, compare options on the axes that matter (correctness, performance, readability, security, blast radius, code churn), pick a recommendation, explain the pick.
+
+When in doubt, lean terse. The user can always ask "why?" if they want more.
+
+## How you work
+
+1. **Inspect before reasoning.** If a question can be answered by reading the repo, read it. Use `read_file`, `list_files`, `grep`, `glob`, `project_inspect`, and `read_memory` (workspace `MEMORY.md`). Pure armchair reasoning is a weaker answer than reasoning grounded in what the repo actually says.
+
+2. **Persist non-obvious findings to MEMORY.md.** When you discover something the next session would want — the dev command, a required env var, a quirk of the test runner, a convention the repo enforces — call `write_memory` (mode='append') with a short bullet. Keep entries terse, one fact per line, derived from what you actually saw in the repo.
+
+3. **Stay scoped.** Answer the question asked. Don't expand to adjacent concerns unless they're load-bearing for the answer.
+
+4. **Be honest about uncertainty.** If you don't have enough signal to answer confidently, say what else you'd need to see — short, one line.
+
+## Hard rules
+
+- **You do not modify code.** No `write_file`, `edit_file`, `apply_patch`, or any mutating tool. The permission system enforces this.
+- **You do not run commands.** No `bash`, no test runs, no network calls. Read-only inspection only.
+- **No preamble in the answer.** Do not say "Let me check," "I'll inspect," "Based on my inspection," or similar narration of the tool calls. The tool calls are visible to the user; you don't need to announce them.
+
+## Git / shell operations: tell the user the command
+
+When the user asks for a git or shell action ("commit this", "push to main", "merge the branch", "amend the last commit", `gh pr create`, etc.) — you can't run it from inside the sandbox. Don't pretend. Don't hallucinate a "I committed it!" response. Do this instead:
+
+1. Acknowledge the request in one short line.
+2. Inspect the repo as needed (e.g. `git status` equivalent via `list_files` + `read_file` on changed files) so your suggested command is grounded.
+3. Print the exact command(s) the user should run, in a fenced code block, ready to paste.
+4. If multiple commands are needed (stage → commit → push), list them in order.
+
+Example: if asked "can we commit the README fix to main", check what's changed with `read_file` on README.md / `git status` equivalent, then respond with something like:
+
+```
+git add README.md
+git commit -m "Fix README: project uses SQLite, not Postgres"
+git push
+```
+
+Don't simulate, don't claim you did it, don't write a workflow file. Just the commands.
+
+## When a tool errors, try a different one
+
+A single tool returning an error does NOT mean the tool is unavailable or that you can't make progress. The tool list in your system prompt is what's available — that's the truth. An error is just data: the file didn't exist, the args were wrong, the tool didn't apply to this case. Pivot:
+
+- `project_inspect` errored or wasn't available? → `list_files` at root, then `read_file` on whatever marker turns up (`README.md`, `package.json`, `Cargo.toml`, `pyproject.toml`, `go.mod`, `Gemfile`, `Dockerfile`, `Makefile`).
+- `read_file` returned "file not found"? → that file just doesn't exist; try the next likely path (`README.md` instead of `Cargo.toml`, etc).
+- `grep` returned no matches? → broaden the pattern, or `glob` for the file shape, or `list_files` and read directly.
+
+NEVER answer "I cannot do X" if you have not actually exhausted the read-only allowlist. The user's question deserves an actual answer based on the workspace, not a giving-up paragraph that lists hypotheticals.
+
+Your output is the considered answer. Not a plan, not a patch, not a narration of how you got there.
diff --git a/crates/smooth-cast/src/skills.rs b/crates/smooth-cast/src/skills.rs
new file mode 100644
index 00000000..557219cc
--- /dev/null
+++ b/crates/smooth-cast/src/skills.rs
@@ -0,0 +1,494 @@
+//! Skills — reusable recipes the agent can invoke (pearl th-e0f812).
+//!
+//! A SKILL is a markdown file with YAML frontmatter describing
+//! WHEN to use it (triggers, description) and WHAT it requires
+//! (allowed hosts, allowed tools, scope). The body is markdown
+//! and ends up prepended to the agent's turn-instructions when
+//! the skill is invoked.
+//!
+//! Smooth reads skills from multiple sources, normalizing YAML
+//! dialect differences so a Claude Code skill or opencode skill
+//! works as-is:
+//!
+//! Discovery order (first-match wins on name collision):
+//!   1. `<workspace>/.smooth/skills/<name>/SKILL.md`  — project, highest precedence
+//!   2. `~/.smooth/skills/<name>/SKILL.md`            — user-level Smooth
+//!   3. `~/.claude/skills/<name>/SKILL.md`            — Claude Code (reused as-is)
+//!   4. `~/.opencode/skills/<name>/<file>.md`         — opencode
+//!
+//! This module:
+//!   - Defines the normalized `Skill` struct
+//!   - Parses YAML frontmatter from each dialect
+//!   - Walks the discovery sources and returns the set of
+//!     available skills
+//!   - DOES NOT handle invocation, runtime integration, or
+//!     security policy mapping — those land separately as
+//!     the `skill_use` tool and host policy enforcement
+//!     pre-grants.
+
+use std::collections::HashSet;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use serde::{Deserialize, Serialize};
+
+/// A skill's effective scope. `Sandbox` (default) means the skill
+/// runs inside the sandbox; `Host` means it bypasses the sandbox
+/// and runs in the supervisor's process directly (for scp, Photos.app,
+/// AWS SSO interactive flows, etc.). Network alone is NEVER a
+/// reason for `Host` — host policy enforcement proxies network
+/// through the host instead.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum SkillScope {
+    /// Runs inside the sandbox with host policy enforcement.
+    #[default]
+    Sandbox,
+    /// Runs in the supervisor's process on the host. Same security
+    /// envelope as the supervisor itself.
+    Host,
+}
+
+/// Where a skill was loaded from. Useful for the user when there
+/// are multiple skills with the same name (precedence) or when the
+/// user wants to know "where did this come from".
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub enum SkillSource {
+    /// `.smooth/skills/<name>/SKILL.md` inside the project tree.
+    Project,
+    /// `~/.smooth/skills/<name>/SKILL.md` — user-level Smooth.
+    UserSmooth,
+    /// `~/.claude/skills/<name>/SKILL.md` — Claude Code.
+    ClaudeCode,
+    /// `~/.opencode/skills/<name>/...` — opencode.
+    OpenCode,
+    /// Embedded in the smooth binary. Shipped with every install
+    /// (currently: `create-skill`). User-authored skills with the
+    /// same name OVERRIDE the built-in (the built-in is the lowest
+    /// precedence).
+    Builtin,
+}
+
+impl SkillSource {
+    /// Precedence order — lower number wins on name collision.
+    #[must_use]
+    pub fn precedence(&self) -> u8 {
+        match self {
+            Self::Project => 0,
+            Self::UserSmooth => 1,
+            Self::ClaudeCode => 2,
+            Self::OpenCode => 3,
+            Self::Builtin => 4,
+        }
+    }
+}
+
+/// Normalized skill record. Built from whatever YAML dialect the
+/// source ecosystem uses.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Skill {
+    /// Skill name — used by `skill_use(name)` and shown in the
+    /// chief role's system prompt. Required.
+    pub name: String,
+    /// One-line description used by chief / TUI to pick. Required.
+    pub description: String,
+    /// Trigger phrases. Chief uses these as LLM-side hints rather
+    /// than hard pattern matches; empty list is fine.
+    #[serde(default)]
+    pub triggers: Vec<String>,
+    /// Effective scope (sandbox / host).
+    #[serde(default)]
+    pub scope: SkillScope,
+    /// Hostnames the skill needs host policy enforcement to allow. Becomes a pre-grant
+    /// at dispatch time (no user prompt) — declaring a host here is
+    /// an explicit declaration of intent.
+    #[serde(default)]
+    pub allowed_hosts: Vec<String>,
+    /// Tools the skill restricts to. Empty means inherit the
+    /// caller's full toolset. Pearl th-cfa1fb's lazy-tool system
+    /// integrates with this.
+    #[serde(default)]
+    pub allowed_tools: Vec<String>,
+    /// Markdown body — the actual recipe text.
+    pub body: String,
+    /// Where this skill was loaded from. Set by the discovery
+    /// walker; not part of the YAML frontmatter.
+    #[serde(default = "default_source")]
+    pub source: SkillSource,
+    /// Absolute path to the SKILL file. For debugging + the
+    /// hypothetical `th skills show` command.
+    pub path: PathBuf,
+}
+
+fn default_source() -> SkillSource {
+    SkillSource::UserSmooth
+}
+
+/// Parse a SKILL.md (or SKILL.markdown) file: YAML frontmatter
+/// delimited by `---` lines at the top, then markdown body.
+///
+/// Returns `Ok(None)` when the file is missing frontmatter
+/// entirely (the file might be a stub or notes, not a skill).
+/// Returns `Err` only on real I/O or parse errors.
+pub fn parse_skill_file(path: &Path, source: SkillSource) -> anyhow::Result<Option<Skill>> {
+    let raw = fs::read_to_string(path).with_context_path(path)?;
+    parse_skill_string(&raw, path, source)
+}
+
+/// Parse a skill from an in-memory string. Public for tests.
+pub fn parse_skill_string(raw: &str, path: &Path, source: SkillSource) -> anyhow::Result<Option<Skill>> {
+    // Frontmatter must start at byte 0 with `---\n` (or `---\r\n`).
+    // Anything else means no frontmatter — return None.
+    let Some(stripped) = raw.strip_prefix("---\n").or_else(|| raw.strip_prefix("---\r\n")) else {
+        return Ok(None);
+    };
+    // Find the closing `---` on its own line.
+    let close =
+        find_frontmatter_close(stripped).ok_or_else(|| anyhow::anyhow!("SKILL file at {} opened YAML frontmatter but never closed it", path.display()))?;
+    let yaml = &stripped[..close];
+    let body = stripped[close..]
+        .split_once('\n')
+        .map(|(_, rest)| rest.trim_start_matches('\n').to_string())
+        .unwrap_or_default();
+
+    // Normalize across dialects.
+    let parsed: NormalizedFrontmatter =
+        serde_yml::from_str(yaml).map_err(|e| anyhow::anyhow!("SKILL file at {}: YAML frontmatter parse error: {e}", path.display()))?;
+
+    // Required: name + description. Skip silently if either is
+    // missing — some markdown files in ~/.claude/ may have YAML
+    // frontmatter for other purposes (article metadata, etc.).
+    let Some(name) = parsed.name.or_else(|| skill_name_from_path(path)) else {
+        return Ok(None);
+    };
+    let Some(description) = parsed.description else { return Ok(None) };
+
+    Ok(Some(Skill {
+        name,
+        description,
+        triggers: parsed.triggers.unwrap_or_default(),
+        scope: parsed.scope.unwrap_or_default(),
+        allowed_hosts: parsed.allowed_hosts.unwrap_or_default(),
+        allowed_tools: parsed.allowed_tools.unwrap_or_default(),
+        body,
+        source,
+        path: path.to_path_buf(),
+    }))
+}
+
+/// Inferred name from the parent directory — Claude Code's
+/// convention is `~/.claude/skills/<name>/SKILL.md` so when the
+/// frontmatter omits `name`, the parent dir name IS the name.
+fn skill_name_from_path(path: &Path) -> Option<String> {
+    path.parent()?.file_name()?.to_str().map(|s| s.to_string())
+}
+
+/// Locate the closing `---` line in a frontmatter block (the input
+/// is the bytes AFTER the opening `---\n`). Returns the byte offset
+/// of the closing `---` line's start.
+fn find_frontmatter_close(s: &str) -> Option<usize> {
+    let mut offset = 0usize;
+    for line in s.split_inclusive('\n') {
+        let trimmed = line.trim_end_matches(['\n', '\r']);
+        if trimmed == "---" || trimmed == "..." {
+            return Some(offset);
+        }
+        offset += line.len();
+    }
+    None
+}
+
+/// Raw frontmatter shape that handles every dialect we've seen.
+/// Most fields are `Option<…>` so missing keys parse cleanly.
+#[derive(Debug, Deserialize)]
+struct NormalizedFrontmatter {
+    name: Option<String>,
+    description: Option<String>,
+    triggers: Option<Vec<String>>,
+    scope: Option<SkillScope>,
+    #[serde(default, rename = "allowed-hosts", alias = "allowed_hosts")]
+    allowed_hosts: Option<Vec<String>>,
+    #[serde(default, rename = "allowed-tools", alias = "allowed_tools")]
+    allowed_tools: Option<Vec<String>>,
+}
+
+/// Walk the discovery sources and return every skill found.
+///
+/// Name-collision resolution: skills are scanned in precedence
+/// order (project → user-smooth → claude → opencode). The FIRST
+/// skill seen for a given name wins; subsequent skills with the
+/// same name are dropped silently. Use `discover_with_overrides`
+/// if you want to see the full multi-source list.
+pub fn discover(workspace_root: &Path) -> Vec<Skill> {
+    let mut seen: HashSet<String> = HashSet::new();
+    let mut skills: Vec<Skill> = Vec::new();
+
+    for skill in discover_with_overrides(workspace_root) {
+        if seen.insert(skill.name.clone()) {
+            skills.push(skill);
+        }
+    }
+    skills
+}
+
+/// Like [`discover`] but returns ALL skills from all sources,
+/// even when names collide. Sorted in precedence order so the
+/// first occurrence per name is the winner.
+pub fn discover_with_overrides(workspace_root: &Path) -> Vec<Skill> {
+    let mut skills: Vec<Skill> = Vec::new();
+
+    let project_dir = workspace_root.join(".smooth/skills");
+    collect_from(&project_dir, SkillSource::Project, &mut skills);
+
+    if let Some(home) = dirs_next::home_dir() {
+        collect_from(&home.join(".smooth/skills"), SkillSource::UserSmooth, &mut skills);
+        collect_from(&home.join(".claude/skills"), SkillSource::ClaudeCode, &mut skills);
+        // opencode uses `~/.opencode/agents/<name>/...` in some
+        // versions and `~/.opencode/skills/<name>/...` in others;
+        // scan both.
+        collect_from(&home.join(".opencode/skills"), SkillSource::OpenCode, &mut skills);
+        collect_from(&home.join(".opencode/agents"), SkillSource::OpenCode, &mut skills);
+    }
+
+    // Builtin skills ship with the binary. They land last so any
+    // user-authored skill at the same name overrides them.
+    skills.extend(builtin_skills());
+
+    skills.sort_by_key(|s| s.source.precedence());
+    skills
+}
+
+/// Skills shipped embedded in the smooth binary. Currently just
+/// `create-skill` — the meta-skill that helps the user author new
+/// skills. Pearl th-e0f812.
+fn builtin_skills() -> Vec<Skill> {
+    const CREATE_SKILL_BODY: &str = include_str!("../builtin-skills/create-skill/SKILL.md");
+    let mut out = Vec::new();
+    let virtual_path = PathBuf::from("<builtin>/create-skill/SKILL.md");
+    if let Ok(Some(skill)) = parse_skill_string(CREATE_SKILL_BODY, &virtual_path, SkillSource::Builtin) {
+        out.push(skill);
+    }
+    out
+}
+
+/// Scan a single skills root directory and append every valid
+/// skill found. Silently skips malformed files (logs the error
+/// via `tracing`) so one broken file doesn't poison the rest.
+fn collect_from(root: &Path, source: SkillSource, out: &mut Vec<Skill>) {
+    if !root.is_dir() {
+        return;
+    }
+    let Ok(entries) = fs::read_dir(root) else {
+        return;
+    };
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if !path.is_dir() {
+            continue;
+        }
+        // Look for SKILL.md or SKILL.markdown inside the skill dir,
+        // then fall back to any single .md file (opencode some
+        // skills are flat).
+        let candidates = ["SKILL.md", "SKILL.markdown", "skill.md", "skill.markdown"];
+        let mut skill_file: Option<PathBuf> = None;
+        for name in candidates {
+            let p = path.join(name);
+            if p.is_file() {
+                skill_file = Some(p);
+                break;
+            }
+        }
+        if skill_file.is_none() {
+            // Fall back: a single .md file in the dir is the skill.
+            if let Ok(mds) = fs::read_dir(&path) {
+                let md_files: Vec<PathBuf> = mds
+                    .flatten()
+                    .filter_map(|e| {
+                        let p = e.path();
+                        if p.extension().and_then(|s| s.to_str()) == Some("md") {
+                            Some(p)
+                        } else {
+                            None
+                        }
+                    })
+                    .collect();
+                if md_files.len() == 1 {
+                    skill_file = md_files.into_iter().next();
+                }
+            }
+        }
+        let Some(skill_path) = skill_file else { continue };
+        match parse_skill_file(&skill_path, source.clone()) {
+            Ok(Some(skill)) => out.push(skill),
+            Ok(None) => {
+                tracing::debug!(path = %skill_path.display(), "skipped — no frontmatter or missing name/description");
+            }
+            Err(e) => {
+                tracing::warn!(path = %skill_path.display(), error = %e, "skill parse error — skipping");
+            }
+        }
+    }
+}
+
+trait WithContextPath {
+    fn with_context_path(self, path: &Path) -> anyhow::Result<String>;
+}
+
+impl WithContextPath for std::io::Result<String> {
+    fn with_context_path(self, path: &Path) -> anyhow::Result<String> {
+        self.map_err(|e| anyhow::anyhow!("reading SKILL file {}: {e}", path.display()))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const ADD_SHOW_SKILL: &str = r#"---
+name: add-show
+description: Add a TV show or movie to the smoo-hub dashboard watchlist
+triggers:
+  - add show
+  - add movie
+  - watchlist
+scope: host
+allowed_hosts:
+  - smoo-hub
+  - api.tvmaze.com
+---
+
+# add-show
+
+When the user asks to add a show:
+
+1. Look up the poster from TVMaze
+2. Resize with sips
+3. scp to smoo-hub
+4. POST to /api/shows
+"#;
+
+    #[test]
+    fn parse_canonical_skill() {
+        let path = PathBuf::from("/tmp/skills/add-show/SKILL.md");
+        let skill = parse_skill_string(ADD_SHOW_SKILL, &path, SkillSource::UserSmooth)
+            .expect("parse")
+            .expect("some");
+        assert_eq!(skill.name, "add-show");
+        assert!(skill.description.contains("watchlist"));
+        assert_eq!(skill.triggers.len(), 3);
+        assert_eq!(skill.scope, SkillScope::Host);
+        assert!(skill.allowed_hosts.contains(&"smoo-hub".to_string()));
+        assert!(skill.body.contains("Look up the poster from TVMaze"));
+    }
+
+    #[test]
+    fn missing_frontmatter_returns_none() {
+        let raw = "# Just a markdown file\n\nNo frontmatter, not a skill.";
+        let path = PathBuf::from("/tmp/notes.md");
+        let skill = parse_skill_string(raw, &path, SkillSource::UserSmooth).expect("parse");
+        assert!(skill.is_none(), "non-skill markdown should return None: {skill:?}");
+    }
+
+    #[test]
+    fn missing_description_returns_none() {
+        // No description = silently skip. Catches generic article
+        // YAML frontmatter (e.g. some opencode files have just
+        // `title:`) without erroring.
+        let raw = "---\nname: thing\ntitle: not a skill\n---\n\nbody";
+        let path = PathBuf::from("/tmp/skills/thing/SKILL.md");
+        let skill = parse_skill_string(raw, &path, SkillSource::UserSmooth).expect("parse");
+        assert!(skill.is_none());
+    }
+
+    #[test]
+    fn name_inferred_from_parent_dir() {
+        // Some skills omit `name` and rely on the directory name —
+        // Claude Code's docs encourage this so authors don't repeat
+        // themselves.
+        let raw = "---\ndescription: inferred name\n---\n\nbody";
+        let path = PathBuf::from("/tmp/skills/my-skill/SKILL.md");
+        let skill = parse_skill_string(raw, &path, SkillSource::ClaudeCode).expect("parse").expect("some");
+        assert_eq!(skill.name, "my-skill");
+    }
+
+    #[test]
+    fn supports_hyphenated_alias_for_allowed_hosts() {
+        // Claude Code uses `allowed-tools:` (hyphen); Smooth uses
+        // `allowed_tools:`. Same for hosts. Both parse.
+        let raw = r#"---
+name: x
+description: y
+allowed-hosts:
+  - example.com
+allowed-tools:
+  - bash
+---
+
+body"#;
+        let path = PathBuf::from("/tmp/skills/x/SKILL.md");
+        let skill = parse_skill_string(raw, &path, SkillSource::ClaudeCode).expect("parse").expect("some");
+        assert_eq!(skill.allowed_hosts, vec!["example.com"]);
+        assert_eq!(skill.allowed_tools, vec!["bash"]);
+    }
+
+    #[test]
+    fn unclosed_frontmatter_is_error() {
+        let raw = "---\nname: x\ndescription: y\n\nno close marker, just body";
+        let path = PathBuf::from("/tmp/skills/x/SKILL.md");
+        let err = parse_skill_string(raw, &path, SkillSource::UserSmooth).unwrap_err();
+        assert!(err.to_string().contains("never closed"));
+    }
+
+    #[test]
+    fn discover_from_temp_project_dir() {
+        let tmp = tempfile::tempdir().expect("tempdir");
+        let skill_dir = tmp.path().join(".smooth/skills/add-show");
+        fs::create_dir_all(&skill_dir).unwrap();
+        fs::write(skill_dir.join("SKILL.md"), ADD_SHOW_SKILL).unwrap();
+        let skills = discover(tmp.path());
+        let names: Vec<&str> = skills.iter().map(|s| s.name.as_str()).collect();
+        assert!(names.contains(&"add-show"), "expected add-show in {names:?}");
+    }
+
+    #[test]
+    fn project_skill_wins_over_user_smooth_skill() {
+        // discover() should pick the project version when both
+        // exist with the same name.
+        let tmp = tempfile::tempdir().expect("tempdir");
+        let project_dir = tmp.path().join(".smooth/skills/dupe");
+        fs::create_dir_all(&project_dir).unwrap();
+        fs::write(project_dir.join("SKILL.md"), "---\nname: dupe\ndescription: PROJECT VERSION\n---\n\nbody").unwrap();
+        // We can't easily mock ~/.smooth/, so the precedence test
+        // here just checks that the discovered project skill has
+        // the project source + body.
+        let skills = discover(tmp.path());
+        let dupe = skills.iter().find(|s| s.name == "dupe").expect("found");
+        assert_eq!(dupe.source, SkillSource::Project);
+        assert!(dupe.description.contains("PROJECT VERSION"));
+    }
+
+    #[test]
+    fn precedence_ordering_is_stable() {
+        assert!(SkillSource::Project.precedence() < SkillSource::UserSmooth.precedence());
+        assert!(SkillSource::UserSmooth.precedence() < SkillSource::ClaudeCode.precedence());
+        assert!(SkillSource::ClaudeCode.precedence() < SkillSource::OpenCode.precedence());
+        assert!(SkillSource::OpenCode.precedence() < SkillSource::Builtin.precedence());
+    }
+
+    #[test]
+    fn builtin_create_skill_loads() {
+        // Smooth ships with `create-skill` embedded — every install
+        // gets the meta-skill that bootstraps a user's skill library.
+        let built = builtin_skills();
+        assert!(!built.is_empty(), "must ship at least one built-in skill");
+        let create_skill = built.iter().find(|s| s.name == "create-skill").expect("create-skill must be built-in");
+        assert!(create_skill.description.to_lowercase().contains("skill"));
+        assert!(!create_skill.triggers.is_empty(), "create-skill needs triggers");
+        assert_eq!(create_skill.source, SkillSource::Builtin);
+        assert!(create_skill.body.contains("Process"), "body should be the markdown recipe");
+    }
+}
diff --git a/crates/smooth-cli/Cargo.toml b/crates/smooth-cli/Cargo.toml
index 319c3e9b..9e65f42e 100644
--- a/crates/smooth-cli/Cargo.toml
+++ b/crates/smooth-cli/Cargo.toml
@@ -19,6 +19,8 @@ smooth-code.workspace = true
 smooth-diver.workspace = true
 smooth-pearls.workspace = true
 smooth-operator.workspace = true
+# skills discovery + the smooth cast roles (re-homed from the engine at 0.14.0)
+smooth-cast.workspace = true
 smooth-tunnel.workspace = true
 smooth-api-client.workspace = true
 # Pearl th-abc4e2: `th admin login` uses the Supabase OAuth flow
diff --git a/crates/smooth-cli/src/main.rs b/crates/smooth-cli/src/main.rs
index 35bf8b43..8bdccfb7 100644
--- a/crates/smooth-cli/src/main.rs
+++ b/crates/smooth-cli/src/main.rs
@@ -1010,7 +1010,7 @@ enum RemoteCommands {
 ///   visible roles, so a typo at the CLI fails loudly before a
 ///   runner spins up with the wrong clearance set.
 fn resolve_primary_agent(name: Option<&str>) -> Result<String> {
-    let cast = smooth_operator::Cast::builtin();
+    let cast = smooth_cast::cast::builtin();
     let available: Vec<String> = {
         let mut v: Vec<String> = cast.list_visible().map(|a| a.name.clone()).collect();
         v.sort();
@@ -3011,14 +3011,14 @@ async fn cmd_code(
             let role = intent.role().to_string();
             let composed = if let Some(name) = skill_name {
                 let workspace = working_dir.clone();
-                let skills = smooth_operator::skills::discover(&workspace);
+                let skills = smooth_cast::skills::discover(&workspace);
                 if let Some(skill) = skills.iter().find(|s| s.name == name) {
                     let source_label = match skill.source {
-                        smooth_operator::skills::SkillSource::Project => "project",
-                        smooth_operator::skills::SkillSource::UserSmooth => "user-smooth",
-                        smooth_operator::skills::SkillSource::ClaudeCode => "claude-code",
-                        smooth_operator::skills::SkillSource::OpenCode => "opencode",
-                        smooth_operator::skills::SkillSource::Builtin => "builtin",
+                        smooth_cast::skills::SkillSource::Project => "project",
+                        smooth_cast::skills::SkillSource::UserSmooth => "user-smooth",
+                        smooth_cast::skills::SkillSource::ClaudeCode => "claude-code",
+                        smooth_cast::skills::SkillSource::OpenCode => "opencode",
+                        smooth_cast::skills::SkillSource::Builtin => "builtin",
                     };
                     // Pearl th-e0f812: tell the headless caller a skill
                     // was picked. stderr so `--json` consumers parsing
@@ -6379,7 +6379,7 @@ fn cmd_bench(cmd: BenchCommands) -> Result<()> {
 /// then the user-level Smooth / Claude Code / opencode skill dirs.
 fn cmd_skills(cmd: SkillsCommands) -> Result<()> {
     use owo_colors::OwoColorize;
-    use smooth_operator::skills::{discover, discover_with_overrides, Skill, SkillSource};
+    use smooth_cast::skills::{discover, discover_with_overrides, Skill, SkillSource};
 
     let workspace = std::env::current_dir().context("current directory")?;
 
@@ -6418,8 +6418,8 @@ fn cmd_skills(cmd: SkillsCommands) -> Result<()> {
                     String::new()
                 };
                 let scope_label = match skill.scope {
-                    smooth_operator::skills::SkillScope::Sandbox => "sandbox".green().to_string(),
-                    smooth_operator::skills::SkillScope::Host => "host".yellow().to_string(),
+                    smooth_cast::skills::SkillScope::Sandbox => "sandbox".green().to_string(),
+                    smooth_cast::skills::SkillScope::Host => "host".yellow().to_string(),
                 };
                 println!(
                     "  {} {:<28} {:>12}  {}{}",
@@ -6462,8 +6462,8 @@ fn cmd_skills(cmd: SkillsCommands) -> Result<()> {
                     "  {}  {}",
                     "scope:".dimmed(),
                     match skill.scope {
-                        smooth_operator::skills::SkillScope::Sandbox => "sandbox",
-                        smooth_operator::skills::SkillScope::Host => "host",
+                        smooth_cast::skills::SkillScope::Sandbox => "sandbox",
+                        smooth_cast::skills::SkillScope::Host => "host",
                     }
                 );
                 println!("  {}  {}", "description:".dimmed(), skill.description);
diff --git a/crates/smooth-code/Cargo.toml b/crates/smooth-code/Cargo.toml
index b2f783ed..675936fd 100644
--- a/crates/smooth-code/Cargo.toml
+++ b/crates/smooth-code/Cargo.toml
@@ -12,6 +12,9 @@ name = "smooth_code"
 
 [dependencies]
 smooth-operator.workspace = true
+# skills discovery + the chief/intent_classifier cast roles (re-homed from the
+# engine at 0.14.0)
+smooth-cast.workspace = true
 # Direct dep so the TUI can consume the auto-mode wire types
 # (PendingAccessRequest, AccessEvent, …) without taking a dep on
 # smooth-bigsmooth — keeps the dep graph from forming a TUI → server
diff --git a/crates/smooth-code/src/app.rs b/crates/smooth-code/src/app.rs
index 18e03b16..7f0fd967 100644
--- a/crates/smooth-code/src/app.rs
+++ b/crates/smooth-code/src/app.rs
@@ -515,7 +515,7 @@ fn handle_agent_event(state: &mut AppState, event: AgentEvent) {
             state.phrase_idx = 0;
             // Surface the iteration boundary inline. The 7-phase
             // decomposition is gone (single CODING phase remains;
-            // see crates/smooth-operator/src/coding_workflow.rs:15)
+            // see crates/smooth-cast/src/coding_workflow.rs)
             // so the only useful per-iteration signal is "we just
             // started iteration N", optionally with the routing
             // alias when known.
@@ -604,18 +604,18 @@ fn refresh_autocomplete(state: &mut AppState, command_registry: &CommandRegistry
             // first (alphabetical), skills appended after.
             let mut commands = command_registry.list_commands();
             let workspace = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
-            for skill in smooth_operator::skills::discover(&workspace) {
+            for skill in smooth_cast::skills::discover(&workspace) {
                 // Skip if a built-in command already has the same
                 // name (precedence: built-ins win).
                 if commands.iter().any(|(n, _)| n == &skill.name) {
                     continue;
                 }
                 let source_label = match skill.source {
-                    smooth_operator::skills::SkillSource::Project => "project",
-                    smooth_operator::skills::SkillSource::UserSmooth => "user-smooth",
-                    smooth_operator::skills::SkillSource::ClaudeCode => "claude-code",
-                    smooth_operator::skills::SkillSource::OpenCode => "opencode",
-                    smooth_operator::skills::SkillSource::Builtin => "builtin",
+                    smooth_cast::skills::SkillSource::Project => "project",
+                    smooth_cast::skills::SkillSource::UserSmooth => "user-smooth",
+                    smooth_cast::skills::SkillSource::ClaudeCode => "claude-code",
+                    smooth_cast::skills::SkillSource::OpenCode => "opencode",
+                    smooth_cast::skills::SkillSource::Builtin => "builtin",
                 };
                 commands.push((skill.name.clone(), format!("[skill:{source_label}] {}", skill.description)));
             }
@@ -791,14 +791,14 @@ fn handle_input_mode(
                             // user-supplied args and dispatch through
                             // the normal agent path.
                             let workspace = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
-                            let skills = smooth_operator::skills::discover(&workspace);
+                            let skills = smooth_cast::skills::discover(&workspace);
                             if let Some(skill) = skills.into_iter().find(|s| s.name == name) {
                                 let source_label = match skill.source {
-                                    smooth_operator::skills::SkillSource::Project => "project",
-                                    smooth_operator::skills::SkillSource::UserSmooth => "user-smooth",
-                                    smooth_operator::skills::SkillSource::ClaudeCode => "claude-code",
-                                    smooth_operator::skills::SkillSource::OpenCode => "opencode",
-                                    smooth_operator::skills::SkillSource::Builtin => "builtin",
+                                    smooth_cast::skills::SkillSource::Project => "project",
+                                    smooth_cast::skills::SkillSource::UserSmooth => "user-smooth",
+                                    smooth_cast::skills::SkillSource::ClaudeCode => "claude-code",
+                                    smooth_cast::skills::SkillSource::OpenCode => "opencode",
+                                    smooth_cast::skills::SkillSource::Builtin => "builtin",
                                 };
                                 state.add_message(ChatMessage::system(format!("✦ Invoking skill: {} (from {})", skill.name, source_label)));
                                 let user_request = if args.trim().is_empty() {
@@ -914,14 +914,14 @@ fn handle_input_mode(
                             }
                             let composed = if let Some(name) = skill_name {
                                 let workspace = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
-                                let skills = smooth_operator::skills::discover(&workspace);
+                                let skills = smooth_cast::skills::discover(&workspace);
                                 if let Some(skill) = skills.iter().find(|s| s.name == name) {
                                     let source_label = match skill.source {
-                                        smooth_operator::skills::SkillSource::Project => "project",
-                                        smooth_operator::skills::SkillSource::UserSmooth => "user-smooth",
-                                        smooth_operator::skills::SkillSource::ClaudeCode => "claude-code",
-                                        smooth_operator::skills::SkillSource::OpenCode => "opencode",
-                                        smooth_operator::skills::SkillSource::Builtin => "builtin",
+                                        smooth_cast::skills::SkillSource::Project => "project",
+                                        smooth_cast::skills::SkillSource::UserSmooth => "user-smooth",
+                                        smooth_cast::skills::SkillSource::ClaudeCode => "claude-code",
+                                        smooth_cast::skills::SkillSource::OpenCode => "opencode",
+                                        smooth_cast::skills::SkillSource::Builtin => "builtin",
                                     };
                                     // Pearl th-e0f812 (user observation 2026-05-12):
                                     // surface the chosen skill in the chat so the
@@ -1087,12 +1087,12 @@ fn load_pearls_for_autocomplete() -> Vec<crate::autocomplete::PearlSuggestion> {
 /// rules produce consistent titles across the web chat and the
 /// `th` TUI.
 async fn auto_name_session(user_prompt: &str) -> Option<String> {
-    use smooth_operator::cast::Cast;
+    use smooth_cast::cast::builtin as cast_builtin;
     use smooth_operator::providers::ProviderRegistry;
 
     let providers_path = dirs_next::home_dir()?.join(".smooth/providers.json");
     let registry = ProviderRegistry::load_from_file(&providers_path).ok()?;
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let agent = cast.get("tagger")?;
     let config = registry.llm_config_for(agent.slot).ok()?;
     let llm = smooth_operator::llm::LlmClient::new(config);
diff --git a/crates/smooth-code/src/intent.rs b/crates/smooth-code/src/intent.rs
index d3804e86..157b5398 100644
--- a/crates/smooth-code/src/intent.rs
+++ b/crates/smooth-code/src/intent.rs
@@ -137,13 +137,13 @@ async fn classify_via_chief(message: &str) -> Option<Intent> {
 /// Callers who want the skill name use this; callers who only need
 /// the role use [`classify_via_chief`].
 async fn classify_via_chief_full(message: &str) -> Option<(Intent, Option<String>)> {
-    use smooth_operator::cast::Cast;
+    use smooth_cast::cast::builtin as cast_builtin;
+    use smooth_cast::skills;
     use smooth_operator::providers::ProviderRegistry;
-    use smooth_operator::skills;
 
     let providers_path = dirs_next::home_dir()?.join(".smooth/providers.json");
     let registry = ProviderRegistry::load_from_file(&providers_path).ok()?;
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let role = cast.get("chief")?;
     let config = registry.llm_config_for(role.slot).ok()?;
     let llm = smooth_operator::llm::LlmClient::new(config);
@@ -167,7 +167,7 @@ async fn classify_via_chief_full(message: &str) -> Option<(Intent, Option<String
 /// Build the chief's full system prompt: static body + a list of
 /// available skills with their descriptions. When `skills` is empty
 /// the appendix is omitted entirely.
-fn compose_chief_prompt_with_skills(base: &str, skills: &[smooth_operator::skills::Skill]) -> String {
+fn compose_chief_prompt_with_skills(base: &str, skills: &[smooth_cast::skills::Skill]) -> String {
     if skills.is_empty() {
         return base.to_string();
     }
@@ -466,12 +466,12 @@ pub fn looks_like_factual_shell_query(message: &str) -> bool {
 }
 
 async fn classify_via_llm(message: &str) -> Option<Intent> {
-    use smooth_operator::cast::Cast;
+    use smooth_cast::cast::builtin as cast_builtin;
     use smooth_operator::providers::ProviderRegistry;
 
     let providers_path = dirs_next::home_dir()?.join(".smooth/providers.json");
     let registry = ProviderRegistry::load_from_file(&providers_path).ok()?;
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let role = cast.get("intent_classifier")?;
     let config = registry.llm_config_for(role.slot).ok()?;
     let llm = smooth_operator::llm::LlmClient::new(config);
@@ -676,7 +676,7 @@ mod tests {
 
     #[test]
     fn compose_prompt_appends_skills_section() {
-        use smooth_operator::skills::{Skill, SkillScope, SkillSource};
+        use smooth_cast::skills::{Skill, SkillScope, SkillSource};
         use std::path::PathBuf;
         let base = "BASE PROMPT";
         let skills = vec![Skill {
diff --git a/crates/smooth-code/src/render.rs b/crates/smooth-code/src/render.rs
index 20ab36d3..73d27442 100644
--- a/crates/smooth-code/src/render.rs
+++ b/crates/smooth-code/src/render.rs
@@ -581,7 +581,7 @@ fn render_status(frame: &mut Frame, state: &AppState, area: Rect) {
     } else {
         // Idle: derive from the active role's slot. Fall back to the
         // role name if we can't resolve a slot (unknown role).
-        smooth_operator::Cast::builtin()
+        smooth_cast::cast::builtin()
             .get(&state.agent_name)
             .map(|role| format!("smooth-{:?}", role.slot).to_ascii_lowercase())
             .unwrap_or_else(|| state.agent_name.clone())
diff --git a/crates/smooth-operative/Cargo.toml b/crates/smooth-operative/Cargo.toml
index 797a8ffc..e51137e0 100644
--- a/crates/smooth-operative/Cargo.toml
+++ b/crates/smooth-operative/Cargo.toml
@@ -13,6 +13,8 @@ path = "src/main.rs"
 
 [dependencies]
 smooth-operator.workspace = true
+# coding_workflow + the fixer cast role (re-homed from the engine at 0.14.0)
+smooth-cast.workspace = true
 smooth-narc = { path = "../smooth-narc", package = "smooai-smooth-narc" }
 smooth-scribe = { path = "../smooth-scribe", package = "smooai-smooth-scribe" }
 smooth-wonk = { path = "../smooth-wonk", package = "smooai-smooth-wonk" }
diff --git a/crates/smooth-operative/src/main.rs b/crates/smooth-operative/src/main.rs
index e745de8f..4501a3ee 100644
--- a/crates/smooth-operative/src/main.rs
+++ b/crates/smooth-operative/src/main.rs
@@ -1452,7 +1452,7 @@ struct RunnerConfig {
     operator_id: String,
     narc_write_guard: bool,
     /// Name of the lead role to run under. Resolved against
-    /// [`smooth_operator::Cast::builtin`]. Defaults to
+    /// [`smooth_cast::cast::builtin`]. Defaults to
     /// `"fixer"` when `SMOOTH_AGENT` is unset or empty. Controls the
     /// system prompt AND the tool-clearance set via a
     /// [`smooth_operator::PermissionHook`] installed on the registry.
@@ -2151,14 +2151,16 @@ async fn main() {
         }
     }
 
-    // Resolve the active role from `Cast::builtin`. Invalid
-    // names fall back to `fixer` with a loud warning — we don't want a
+    // Resolve the active role from the smooth cast (`smooth_cast::cast::builtin`,
+    // which adds the harness roles `fixer`/`oracle`/`chief`/`intent_classifier`
+    // on top of the generic engine roles the published 0.14.0 engine ships).
+    // Invalid names fall back to `fixer` with a loud warning — we don't want a
     // typo in the dispatcher to silently run under an unexpected
     // clearance set, but we also don't want to hard-crash the sandbox
     // since `fixer` is always a safe default. The runner emits the
     // resolution onto its TokenDelta stream so tests + human operators
     // can see exactly which role the sandbox is running under.
-    let role_cast = std::sync::Arc::new(smooth_operator::Cast::builtin());
+    let role_cast = std::sync::Arc::new(smooth_cast::cast::builtin());
     let active_role = match role_cast.get(&config.agent_name) {
         Some(a) => a.clone(),
         None => {
@@ -2166,7 +2168,7 @@ async fn main() {
                 requested = %config.agent_name,
                 "unknown SMOOTH_AGENT — falling back to 'fixer'"
             );
-            role_cast.get("fixer").expect("'fixer' must always exist in Cast::builtin").clone()
+            role_cast.get("fixer").expect("'fixer' must always exist in smooth_cast::cast::builtin").clone()
         }
     };
 
@@ -2482,7 +2484,7 @@ async fn main() {
     }
 
     let result = if let (true, true, Some(raw)) = (workflow_opt_in, role_supports_coding_workflow, routing_json) {
-        use smooth_operator::coding_workflow::{run_coding_workflow, CodingWorkflowConfig};
+        use smooth_cast::coding_workflow::{run_coding_workflow, CodingWorkflowConfig};
         use smooth_operator::providers::ProviderRegistry;
         use std::sync::Arc;
 
@@ -2530,8 +2532,7 @@ async fn main() {
                     // suppress the test-fix bias + cross-fixture
                     // pattern confabulation observed in the bench.
                     cleanup_intent_hint: agent_config.prior_messages.iter().any(|m| {
-                        matches!(m.role, smooth_operator::conversation::Role::User)
-                            && smooth_operator::coding_workflow::task_text_has_cleanup_intent(&m.content)
+                        matches!(m.role, smooth_operator::conversation::Role::User) && smooth_cast::coding_workflow::task_text_has_cleanup_intent(&m.content)
                     }),
                 };
                 match run_coding_workflow(cfg).await {
diff --git a/crates/smooth-operative/tests/agent_permissions.rs b/crates/smooth-operative/tests/agent_permissions.rs
index 55b7ecc5..8193b497 100644
--- a/crates/smooth-operative/tests/agent_permissions.rs
+++ b/crates/smooth-operative/tests/agent_permissions.rs
@@ -10,7 +10,7 @@
 
 use async_trait::async_trait;
 use serde_json::json;
-use smooth_operator::cast::Cast;
+use smooth_cast::cast::builtin as cast_builtin;
 use smooth_operator::tool::{Tool, ToolCall, ToolRegistry, ToolSchema};
 use smooth_operator::PermissionHook;
 
@@ -55,7 +55,7 @@ impl Tool for EchoReadTool {
 
 #[tokio::test]
 async fn mapper_role_blocks_edit_file_at_dispatch() {
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let mapper = cast.get("mapper").expect("'mapper' must be registered");
 
     let mut tools = ToolRegistry::new();
@@ -84,7 +84,7 @@ async fn mapper_role_blocks_edit_file_at_dispatch() {
 
 #[tokio::test]
 async fn mapper_role_allows_read_file_at_dispatch() {
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let mapper = cast.get("mapper").expect("'mapper' must be registered");
 
     let mut tools = ToolRegistry::new();
@@ -122,7 +122,7 @@ async fn fixer_role_allows_edit_file_at_dispatch() {
         }
     }
 
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let fixer = cast.get("fixer").expect("'fixer' must be registered");
 
     let mut tools = ToolRegistry::new();
@@ -157,7 +157,7 @@ async fn oracle_role_blocks_bash_at_dispatch() {
         }
     }
 
-    let cast = Cast::builtin();
+    let cast = cast_builtin();
     let oracle = cast.get("oracle").expect("'oracle' must be registered");
 
     let mut tools = ToolRegistry::new();
diff --git a/crates/smooth-operative/tests/subagent_dispatch.rs b/crates/smooth-operative/tests/subagent_dispatch.rs
index 85bf47b4..cf2c01c7 100644
--- a/crates/smooth-operative/tests/subagent_dispatch.rs
+++ b/crates/smooth-operative/tests/subagent_dispatch.rs
@@ -26,7 +26,8 @@ use std::sync::{
 
 use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::post, Json, Router};
 use serde_json::{json, Value};
-use smooth_operator::cast::{Cast, DispatchSubagentTool};
+use smooth_cast::cast::builtin as cast_builtin;
+use smooth_operator::cast::DispatchSubagentTool;
 use smooth_operator::conversation::Role;
 use smooth_operator::llm::{ApiFormat, LlmConfig, RetryPolicy};
 use smooth_operator::providers::Activity;
@@ -233,7 +234,7 @@ fn mock_llm_config(api_url: &str) -> LlmConfig {
 #[tokio::test]
 async fn fixer_role_dispatches_scout_and_only_final_summary_leaks() {
     let (api_url, state) = spawn_mock().await;
-    let cast = Arc::new(Cast::builtin());
+    let cast = Arc::new(cast_builtin());
     let fixer = cast.get("fixer").expect("'fixer' role registered").clone();
 
     let api_for_factory = api_url.clone();
@@ -345,7 +346,7 @@ async fn fixer_role_dispatches_scout_and_only_final_summary_leaks() {
 #[tokio::test]
 async fn dispatch_unknown_agent_returns_clean_tool_error() {
     // No LLM calls in this test — we invoke the tool directly.
-    let cast = Arc::new(Cast::builtin());
+    let cast = Arc::new(cast_builtin());
     let llm_factory: smooth_operator::LlmConfigFactory = Arc::new(|_a: Activity| -> anyhow::Result<LlmConfig> { Ok(mock_llm_config("http://127.0.0.1:1")) });
     let dispatch = DispatchSubagentTool::new(Arc::clone(&cast), ToolRegistry::new(), llm_factory);
 
@@ -377,7 +378,7 @@ async fn dispatch_lead_role_name_returns_clean_tool_error() {
     // Dispatching to 'fixer' (a lead, not a sidekick) must fail
     // with the same clean error — not fall through to spawning a
     // fixer role loop.
-    let cast = Arc::new(Cast::builtin());
+    let cast = Arc::new(cast_builtin());
     let llm_factory: smooth_operator::LlmConfigFactory = Arc::new(|_a: Activity| -> anyhow::Result<LlmConfig> { Ok(mock_llm_config("http://127.0.0.1:1")) });
     let dispatch = DispatchSubagentTool::new(Arc::clone(&cast), ToolRegistry::new(), llm_factory);
 
diff --git a/docs/Engineering/Using-smooth-operator-and-smooth-operator-agent.md b/docs/Engineering/Using-smooth-operator-and-smooth-operator-agent.md
index e0449a47..6ac3adae 100644
--- a/docs/Engineering/Using-smooth-operator-and-smooth-operator-agent.md
+++ b/docs/Engineering/Using-smooth-operator-and-smooth-operator-agent.md
@@ -4,17 +4,31 @@ This guide explains how the **smooth** monorepo should get maximum leverage from
 
 | Project | What it is | Repo |
 | --- | --- | --- |
-| **smooth-operator** | The Rust agent-orchestration **engine** — `Agent`, `Workflow`, `Tool`, `CheckpointStore`, `LlmProvider`, `Memory`, `KnowledgeBase`, HITL, cost. | [SmooAI/smooth-operator](https://github.com/SmooAI/smooth-operator) (extracted from `crates/smooth-operator`) |
+| **smooth-operator** (engine) | The Rust agent-orchestration **engine** — `Agent`, `Workflow`, `Tool`, `CheckpointStore`, `LlmProvider`, `Memory`, `KnowledgeBase`, HITL, cost. Published as `smooai-smooth-operator-core` on crates.io; smooth consumes it as `smooth-operator` (package-aliased). | [SmooAI/smooth-operator-core](https://github.com/SmooAI/smooth-operator-core) (extracted from the former in-tree `crates/smooth-operator`) |
 | **smooth-operator-agent** | The productized, polyglot knowledge-chat + tools + conversations **service** built on the engine. Serverless (SST/AWS) or k8s. | [SmooAI/smooth-operator-agent](https://github.com/SmooAI/smooth-operator-agent) |
 
 > TL;DR: smooth **already runs on** smooth-operator (the `th` TUI, Big Smooth, coding workflows, the cast/role system). The upside is to (1) consume it as the **extracted public crate** instead of vendoring, (2) wire the **real backends** behind its trait seams, and (3) dogfood **smooth-operator-agent** as smooth's own hosted knowledge assistant.
 
-## 1. Consume the extracted crate (stop vendoring)
+## 1. Consume the extracted crate (done — published on crates.io)
 
-smooth-operator is being fully extracted out of this monorepo into `SmooAI/smooth-operator` as the single source of truth (it builds standalone — 408 tests green, internal `bigsmooth` coupling feature-gated, secrets redacted). Once published:
+The engine extraction is **complete**. smooth no longer vendors the engine: the in-tree `crates/smooth-operator` copy was deleted and smooth depends on the published **`smooai-smooth-operator-core`** crate (repo [SmooAI/smooth-operator-core](https://github.com/SmooAI/smooth-operator-core)). The dep key stays `smooth-operator` (package-aliased back to `smooth_operator`) so every `use smooth_operator::…` import for the generic engine API is unchanged.
 
-- Replace the in-tree `crates/smooth-operator` dependency in the ~20 dependent crates with the published `smooai-smooth-operator` (crates.io or git). Enable the `bigsmooth` feature where Big Smooth reporting is needed; leave it off elsewhere.
-- This makes smooth a **consumer** of the public engine — the same artifact the rest of the world uses — so our dogfooding pressure improves the OSS product directly.
+The cutover landed in two steps:
+
+1. **SMOODEV-1787 (PR 1/4)** — replace the in-tree copy with a rev-pinned git dep on the engine repo.
+2. **SMOODEV-1788 (PR 4/4, the final cutover)** — switch from the git rev to the **published crates.io release `smooai-smooth-operator-core = "0.14.0"`**, the clean *generic* engine. Root `Cargo.toml` now reads `smooth-operator = { version = "0.14.0", package = "smooai-smooth-operator-core" }`; `Cargo.lock` resolves it from the crates.io registry (checksum-pinned), not a git source.
+
+This makes smooth a **consumer** of the public engine — the same artifact the rest of the world uses — so our dogfooding pressure improves the OSS product directly.
+
+### Where the th-code harness lives now (`crates/smooth-cast`)
+
+The published `0.14.0` engine is **generic** — it deliberately dropped the `th code` coding-harness specifics that only smooth used. Those bits were re-homed into the smooth-owned **`smooth-cast`** crate (`smooai-smooth-cast`), built on the engine's generic public `Cast`/`OperatorRole`/`Clearance` API:
+
+- **`smooth_cast::coding_workflow`** — the `th code` single-agent outer loop (`run_coding_workflow`, `task_text_has_cleanup_intent`, …).
+- **`smooth_cast::skills`** — skill discovery (`discover`, `SkillScope`, `SkillSource`, `Skill`) + the built-in `create-skill` skill.
+- **`smooth_cast::cast`** — the four coding-harness cast roles the generic engine no longer ships (`fixer`, `oracle`, `chief`, `intent_classifier`), and a `cast::builtin()` that registers them on top of the engine's generic built-in roles (`tagger`/`presser`/`recapper`/`mapper`/`heckler`/`scout`/`runner`).
+
+Consumers (`smooth-operative`, `smooth-code`, `smooth-cli`, `smooth-bigsmooth`) call `smooth_cast::cast::builtin()` wherever they need a harness role — the engine's own `smooth_operator::Cast::builtin()` only has the generic roles. The Big-Smooth reporter hooks the engine also dropped stay deleted (no smooth consumers).
 
 See the parity epic (SMOODEV-1466) and the extraction punch-list.