diff --git a/CLAUDE.md b/CLAUDE.md
index d7fdfc2..b89a2d9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -103,10 +103,12 @@ Users configure persistent scan preferences via `/pixelslop settings` (interacti
 | Key | Type | Default | What it does |
 |-----|------|---------|-------------|
 | `headed` | boolean | `false` | Open visible browser window during scans |
-| `deep` | boolean | `false` | Extended collection — doubled budgets, more elements tested |
-| `thorough` | boolean | `false` | Show lower-confidence findings (50% vs 65% threshold) |
+| `deep` | boolean | `true` | Extended collection — doubled budgets, more elements tested |
+| `thorough` | boolean | `true` | Show lower-confidence findings, tagged with confidence |
 | `personas` | string | `all` | Persona IDs to evaluate (comma-separated, `all`, or `none`) |
 
+**Exhaustive by default.** `deep` and `thorough` default to `true` because Pixelslop is usually driven by an AI agent that won't remember to pass the flags — the default has to be the thorough one. `--fast` is the opt-out (sets `deep: false`, `thorough: false` for a quick, high-confidence-only pass). The cost of `deep: true` is a slower scan; `--fast` is there when speed matters.
+
 **Merge priority:** CLI args > saved settings > defaults. A user who runs `/pixelslop --thorough` gets thorough mode regardless of what's in `.pixelslop.md`.
 
 **Commands:**
@@ -133,6 +135,7 @@ Agents use `pixelslop-tools` (bin/pixelslop-tools.cjs) for all state operations.
 - **`browser analyze-page`** classifies page type (landing-page, e-commerce, content, form-heavy, app-like, general) and suggests relevant personas. Fast (< 2s), no screenshots.
 - **`config read-tokens` / `config write-tokens`** read and write the project's normative design tokens — a `## Design Tokens` section in `.pixelslop.md` holding flat `key: value` lines (`color-primary: #b8422e`, `font-body: Inter`, `type-scale: 1.25`, `space-unit: 4px`). The setup agent captures them from the codebase; the fixer reads them so a fix moves *toward* the project's real palette/type/spacing instead of a generic default. `write-tokens` merges (unspecified keys preserved) and only touches the Design Tokens section — `config write` stays the initializer, tokens layer on top like settings do.
 - **`scan trend`** reports the score progression across runs. `scan save-results` now appends each run's /20 total (plus per-pillar scores) to `.pixelslop/scan-history.json`; `scan trend [--target <url>] [--last <n>]` reads it back (`11 -> 13 -> 14 (+3)`). History is best-effort — a corrupt history file self-heals and never blocks the actual save. The orchestrator surfaces the trend in its scan summary.
+- **`personas write` / `personas list`** manage project-specific personas. `write --json '<persona>'` validates (required fields, slug-only id, no built-in collision, no path traversal) and saves to `.pixelslop/personas/<id>.json`; `list` returns the 8 built-ins plus any custom ones. The orchestrator generates 1-2 personas from the project's audience/brand and evaluates them alongside the built-ins, so persona findings fit the real users instead of only the generic profiles.
 
 ## Voice & Persona
 
diff --git a/bin/pixelslop-tools.cjs b/bin/pixelslop-tools.cjs
index 26d64ea..de42af4 100644
--- a/bin/pixelslop-tools.cjs
+++ b/bin/pixelslop-tools.cjs
@@ -1100,6 +1100,72 @@ function configWriteTokens(args = {}) {
     : `Design tokens written (${Object.keys(merged).length}): ${configPath}`);
 }
 
+// The 8 shipped persona profiles. Custom (project-specific) personas live in
+// .pixelslop/personas/ and must not collide with these ids.
+const BUILTIN_PERSONA_IDS = [
+  'screen-reader-user', 'low-vision-user', 'keyboard-user', 'rushed-mobile-user',
+  'slow-connection-user', 'non-native-english', 'design-critic', 'first-time-visitor'
+];
+
+/**
+ * Write a project-specific persona to .pixelslop/personas/<id>.json after
+ * validating it. The setup agent generates these from the project's audience,
+ * and the orchestrator evaluates them alongside the built-ins.
+ */
+function personasWrite(args = {}) {
+  try {
+    if (!args.json) return { ok: false, error: '--json is required' };
+    let persona;
+    try { persona = JSON.parse(args.json); } catch (e) { return { ok: false, error: `Invalid --json: ${e.message}` }; }
+    if (!persona || typeof persona !== 'object' || Array.isArray(persona)) {
+      return { ok: false, error: '--json must be a persona object' };
+    }
+
+    const required = ['id', 'name', 'category', 'description', 'designPriorities', 'frustrationTriggers', 'positiveSignals'];
+    const missing = required.filter((k) => persona[k] == null);
+    if (missing.length) return { ok: false, error: `Missing persona fields: ${missing.join(', ')}` };
+    if (!Array.isArray(persona.frustrationTriggers) || !Array.isArray(persona.positiveSignals)) {
+      return { ok: false, error: 'frustrationTriggers and positiveSignals must be arrays' };
+    }
+
+    const id = String(persona.id);
+    // id doubles as the filename, so it must be a safe slug — no traversal, no surprises.
+    if (!/^[a-z0-9][a-z0-9-]{1,40}$/.test(id)) {
+      return { ok: false, error: `Persona id must be a lowercase slug [a-z0-9-], 2-41 chars: got "${id}"` };
+    }
+    if (BUILTIN_PERSONA_IDS.includes(id)) {
+      return { ok: false, error: `"${id}" collides with a built-in persona; use a project-specific id` };
+    }
+
+    const dir = path.join(resolveProjectRoot(args.root), '.pixelslop', 'personas');
+    const outPath = path.join(dir, `${id}.json`);
+    // Defence in depth: the written file must stay inside the personas dir.
+    if (path.dirname(path.resolve(outPath)) !== path.resolve(dir)) {
+      return { ok: false, error: 'unsafe persona path' };
+    }
+    fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(outPath, JSON.stringify(persona, null, 2), 'utf-8');
+    return { ok: true, id, path: outPath };
+  } catch (err) {
+    return { ok: false, error: err.message };
+  }
+}
+
+/**
+ * List available personas: the built-ins plus any custom ones in
+ * .pixelslop/personas/. Lets the orchestrator discover generated personas.
+ */
+function personasList(args = {}) {
+  const dir = path.join(resolveProjectRoot(args.root), '.pixelslop', 'personas');
+  let custom = [];
+  if (fs.existsSync(dir)) {
+    custom = fs.readdirSync(dir)
+      .filter((f) => f.endsWith('.json') && !f.startsWith('._'))
+      .map((f) => f.replace(/\.json$/, ''));
+  }
+  return { ok: true, builtin: BUILTIN_PERSONA_IDS, custom, dir };
+}
+
 /**
  * Check if .pixelslop.md exists.
  */
@@ -1113,10 +1179,14 @@ function configExists(args = {}) {
 // ─────────────────────────────────────────────
 
 /** Valid setting keys and their value types/defaults */
+// Defaults are exhaustive on purpose. Pixelslop is usually driven by an AI agent
+// that won't remember to pass --thorough or --deep, so the default has to be the
+// thorough one. `--fast` (handled in SKILL.md) is the opt-out that turns deep and
+// thorough back off for a quick pass.
 const SETTING_DEFS = {
   headed:   { type: 'boolean', default: false,  description: 'Open visible browser window' },
-  deep:     { type: 'boolean', default: false,  description: 'Extended collection with doubled budgets' },
-  thorough: { type: 'boolean', default: false,  description: 'Show lower-confidence findings' },
+  deep:     { type: 'boolean', default: true,   description: 'Extended collection with doubled budgets (off with --fast)' },
+  thorough: { type: 'boolean', default: true,   description: 'Show lower-confidence findings, tagged (off with --fast)' },
   personas: { type: 'string',  default: 'all',  description: 'Persona IDs (comma-separated, "all", or "none")' },
 };
 
@@ -3077,32 +3147,53 @@ function reportGenerate(flags) {
   </div>`;
     }
 
-    // ── Findings table ──
-    let findingsHtml;
-    if (hasFixData && findings.length > 0) {
-      // Full table with category + status columns
-      const rows = findings.map(f => {
-        const text = typeof f === 'string' ? f : (f.description || '');
-        const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
+    // ── Findings table (split into measured evidence vs design judgment) ──
+    // kind defaults to 'measured' so existing scans render exactly as before;
+    // the design-director pass is the only producer of 'judgment' findings.
+    const kindOf = (f) => (typeof f === 'object' && f.kind === 'judgment') ? 'judgment' : 'measured';
+    const measuredFindings = findings.filter(f => kindOf(f) === 'measured');
+    const judgmentFindings = findings.filter(f => kindOf(f) === 'judgment');
+
+    const renderRows = (list) => list.map(f => {
+      const text = typeof f === 'string' ? f : (f.description || '');
+      const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
+      // Judgment findings carry a confidence the report surfaces inline.
+      const conf = (typeof f === 'object' && f.confidence != null)
+        ? ` <span style="color:var(--ink-ghost);font-size:10px">(${escapeHtml(String(f.confidence))})</span>` : '';
+      if (hasFixData) {
         const category = typeof f === 'object' ? (f.category || '') : '';
-        // Try to match finding to plan issue for status
         let fixStatus = 'OPEN';
         if (typeof f === 'object' && f.id && issueMap.has(f.id)) {
           fixStatus = (issueMap.get(f.id).status || 'pending').toUpperCase();
         }
-        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-category">${escapeHtml(category)}</td><td class="col-finding">${escapeHtml(text)}</td><td class="col-status"><span class="fix-status fix-${escapeHtml(fixStatus)}">${escapeHtml(fixStatus)}</span></td></tr>`;
-      }).join('\n      ');
-      findingsHtml = `<table class="data-table findings-table"><thead><tr><th>Priority</th><th>Category</th><th>Finding</th><th>Status</th></tr></thead><tbody>\n      ${rows}\n    </tbody></table>`;
-    } else if (findings.length > 0) {
-      // Simple table without category/status
-      const rows = findings.map(f => {
-        const text = typeof f === 'string' ? f : (f.description || '');
-        const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
-        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-finding">${escapeHtml(text)}</td></tr>`;
-      }).join('\n      ');
-      findingsHtml = `<table class="data-table findings-table"><thead><tr><th>Priority</th><th>Finding</th></tr></thead><tbody>\n      ${rows}\n    </tbody></table>`;
-    } else {
+        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-category">${escapeHtml(category)}</td><td class="col-finding">${escapeHtml(text)}${conf}</td><td class="col-status"><span class="fix-status fix-${escapeHtml(fixStatus)}">${escapeHtml(fixStatus)}</span></td></tr>`;
+      }
+      return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-finding">${escapeHtml(text)}${conf}</td></tr>`;
+    }).join('\n      ');
+
+    const tableFor = (list) => {
+      const head = hasFixData
+        ? '<thead><tr><th>Priority</th><th>Category</th><th>Finding</th><th>Status</th></tr></thead>'
+        : '<thead><tr><th>Priority</th><th>Finding</th></tr></thead>';
+      return `<table class="data-table findings-table">${head}<tbody>\n      ${renderRows(list)}\n    </tbody></table>`;
+    };
+    const layerHeading = (title, note) =>
+      `<h3 style="font-size:12px;text-transform:uppercase;letter-spacing:0.08em;color:var(--ink-tertiary);margin:18px 0 8px">${escapeHtml(title)} <span style="font-weight:400;text-transform:none;letter-spacing:0;color:var(--ink-ghost)">— ${escapeHtml(note)}</span></h3>`;
+
+    let findingsHtml;
+    if (findings.length === 0) {
       findingsHtml = '<p style="color:var(--ink-ghost);font-size:11px;text-transform:uppercase;letter-spacing:0.08em">No findings</p>';
+    } else if (judgmentFindings.length === 0) {
+      // Only measured findings — render the single table, no layer headings (unchanged look).
+      findingsHtml = tableFor(measuredFindings.length ? measuredFindings : findings);
+    } else {
+      // Both layers present — label and separate them so judgment never reads as measured fact.
+      const sections = [];
+      if (measuredFindings.length > 0) {
+        sections.push(layerHeading('Measured', 'evidence-backed') + tableFor(measuredFindings));
+      }
+      sections.push(layerHeading('Design judgment', "a design director's read, not measured") + tableFor(judgmentFindings));
+      findingsHtml = sections.join('\n    ');
     }
 
     // ── Fix section (entire tab-section div, or empty) ──
@@ -3398,6 +3489,15 @@ async function main() {
       break;
     }
 
+    case 'personas': {
+      switch (command) {
+        case 'write': return output(personasWrite(flags), true);
+        case 'list': return output(personasList(flags), true);
+        default: fail(`Unknown personas command: ${command}. Valid: write, list`);
+      }
+      break;
+    }
+
     default:
       fail(`Unknown group: ${group}. Valid: plan, checkpoint, gate, config, log, discover, serve, init, verify, browser, scan, report`);
   }
diff --git a/dist/agents/internal/pixelslop-eval-design-director.md b/dist/agents/internal/pixelslop-eval-design-director.md
new file mode 100644
index 0000000..f80a56b
--- /dev/null
+++ b/dist/agents/internal/pixelslop-eval-design-director.md
@@ -0,0 +1,95 @@
+---
+name: pixelslop-eval-design-director
+description: >
+  The subjective design-judgment pass. Looks at the screenshots and reads the
+  page like a design director — composition, distinctiveness, emotional fit,
+  missed opportunities — then argues against its own findings before returning
+  them. Produces judgment findings only. Does NOT touch the /20 score.
+model: sonnet
+tools:
+  - Read
+---
+
+You're the design director. The other six evaluators measure things — contrast ratios, type scales, overflow. You do the thing a measurement can't: you look at the page and say whether it's actually *good*, and where a real designer would push back.
+
+This is the subjective pass on purpose. You are allowed to have taste and opinions. But you are also the one evaluator most at risk of producing noise — vague, unfalsifiable, "make it pop" feedback that wastes everyone's time. So you do two passes: first you say what you see, then you argue against yourself and throw out everything you can't defend. What survives is what you return.
+
+**You never touch the /20 score.** The score stays measured. Your findings are a separate layer, labeled as judgment. Your job is coverage and taste, not grading.
+
+## Setup: Load Your Knowledge
+
+```
+Read dist/skill/resources/scoring.md            # The whole rubric — know what's already measured so you don't repeat it
+Read dist/skill/resources/ai-slop-patterns.md   # The visual fingerprints of AI-generated design
+Read dist/skill/resources/heuristics.md          # Nielsen's 10, adapted — the UX lens
+Read dist/skill/resources/cognitive-load.md      # When a page asks too much of the user
+```
+
+## Input
+
+- **evidence_path** (required) — absolute path to the evidence bundle JSON
+- **thorough** (optional, default: false) — when true, keep medium-confidence findings; when false, only high-confidence
+
+## Protocol
+
+1. **Read your resource files.** All four. You need to know what's already measured so you don't just restate it in prose.
+
+2. **Read the evidence bundle** at `evidence_path`. Note the pillar evidence, the slop patterns already detected, the persona checks.
+
+3. **Look at the screenshots.** This is the part the measured evaluators can't do. The bundle has `viewports.desktop.screenshot`, `viewports.tablet.screenshot`, `viewports.mobile.screenshot` (and scroll-fold screenshots if present). `Read` each PNG path. A screenshot you didn't open doesn't count — don't opine on a layout you haven't seen.
+
+4. **First pass — say what you see.** Look like a design director reviewing a junior's work. Draft findings across these lenses:
+   - **Does this look AI-generated?** Be honest. Generic hero, icon-heading-paragraph-button rows, no point of view, every section the same rhythm. The `ai-slop-patterns.md` fingerprints, but as a gestalt, not a checklist.
+   - **Composition & distinctiveness** — does the page have a point of view, or is it a template? Is there a focal point, a reason the eye goes where it goes? Would anyone remember this page?
+   - **Emotional fit** — does the feeling match the job? A funeral home that feels like a fintech startup is wrong even if every contrast ratio passes.
+   - **Missed opportunities** — the strongest design-director move. Not "this is broken" but "this is fine and forgettable, and here's the version that isn't."
+   - **UX heuristics & cognitive load** — where the page makes the user think too hard, in ways the measured pillars don't already flag.
+
+5. **Second pass — argue against yourself.** For every finding from pass 1, ask:
+   - *Is this falsifiable, or is it "make it pop"?* If you can't point at the screenshot and say what specifically and why, cut it.
+   - *Is a measured evaluator already saying this?* If contrast/typography/hierarchy already flagged it, drop yours — it's their finding, measured beats judgment.
+   - *Am I imposing one taste, or is this a real problem?* A bold, deliberate choice you personally wouldn't make is not a finding. Respect intent. Pixelslop does not punish distinctive design for being distinctive.
+   - *Would a second design director agree?* If you're only ~60% sure, tag it `low`. If you'd bet on it, `high`.
+
+   Kill everything that fails. Be ruthless — a short list of sharp, defensible reads beats a long list of vibes. Returning two real findings is a success. Inventing eight to look thorough is the failure mode this pass exists to prevent.
+
+6. **Return JSON.** Findings that survived, each tagged `kind: "judgment"` and a confidence.
+
+## Output Format
+
+Return exactly this. Nothing else.
+
+```json
+{
+  "kind": "design-director",
+  "verdict": "One honest sentence: does this look designed, or generated?",
+  "findings": [
+    {
+      "criterion": "distinctiveness",
+      "kind": "judgment",
+      "confidence": "high",
+      "detail": "Every section is icon / heading / paragraph / button at the same rhythm — the page reads as a template with the content swapped in, not as a designed page.",
+      "evidence": "desktop screenshot: features, testimonials, and pricing sections share identical structure and spacing",
+      "opportunity": "Break the rhythm — let one section be full-bleed, vary the grid, give the hero a real focal object instead of centered text over a gradient."
+    }
+  ]
+}
+```
+
+Each finding needs:
+- `criterion` — the lens (`ai-slop`, `distinctiveness`, `composition`, `emotional-fit`, `missed-opportunity`, `cognitive-load`, `ux-heuristic`)
+- `kind` — always `"judgment"`
+- `confidence` — `"high"` or `"medium"` (or `"low"` only in thorough mode)
+- `detail` — what you see, specific enough to point at in the screenshot
+- `evidence` — which screenshot/viewport, and what in it
+- `opportunity` — optional but encouraged; the better version, concretely
+
+## Rules
+
+1. **Judgment only — never a score.** You do not return a `score` or `pillar`. The /20 is measured. If you find yourself wanting to grade, stop.
+2. **You looked, or you don't speak.** Every finding cites a specific screenshot. No opining on layouts you didn't open.
+3. **Don't restate measured findings.** If a pillar evaluator measured it, it's theirs. You cover what measurement can't.
+4. **Respect intent.** Distinctive ≠ wrong. Bold ≠ broken. A choice you wouldn't make is not a defect.
+5. **The second pass is mandatory.** Returning pass-1 findings without arguing against them is the one thing you must never do. Noise is worse than silence here.
+6. **Confidence is honest.** `high` means you'd defend it in a studio review. Don't inflate.
+7. **Return JSON only.** No markdown, no preamble.
diff --git a/dist/agents/pixelslop.md b/dist/agents/pixelslop.md
index dd01a4f..c7cc5a5 100644
--- a/dist/agents/pixelslop.md
+++ b/dist/agents/pixelslop.md
@@ -159,6 +159,24 @@ node bin/pixelslop-tools.cjs config write \
 
 If the user wants to skip setup, proceed without it — config is optional.
 
+### Step 5b: Generate Project-Specific Personas
+
+If you have a real audience and brand for this project (from design context above or an existing `.pixelslop.md`), generate 1-2 personas tuned to *this* project's actual users — not just the 8 generic built-ins. A wedding-planner site should be tested by "the stressed bride three weeks out," not only "first-time visitor."
+
+First check whether project personas already exist (don't regenerate every run):
+
+```bash
+node bin/pixelslop-tools.cjs personas list --root "$ROOT" --raw
+```
+
+If `custom` is empty and you have audience/brand, synthesize 1-2 personas following `dist/skill/resources/personas/schema.md` (a real `humanName`, the project's actual user in `description`, `frustrationTriggers` and `positiveSignals` specific to this audience), and write each via:
+
+```bash
+node bin/pixelslop-tools.cjs personas write --root "$ROOT" --raw --json '<persona JSON>'
+```
+
+Use a project-specific `id` slug (e.g. `stressed-bride`, not a built-in id). Only generate what the audience genuinely supports — one sharp project persona beats two generic ones. Skip this step entirely when there's no real audience to work from.
+
 ### Step 6: Collect Evidence
 
 **Log before collection:**
@@ -185,22 +203,24 @@ node bin/pixelslop-tools.cjs log write --agent orchestrator --level info --messa
 
 ### Step 6b: Spawn Specialist Evaluators
 
-Spawn all 6 specialist evaluators from `dist/agents/internal/`. Each receives the evidence file path and reads its own domain resource files.
+Spawn the 6 measured specialists plus the design-director from `dist/agents/internal/`. Each receives the evidence file path and reads its own domain resource files.
 
 ```
 Spawn agents (parallel where runtime supports it):
-  - pixelslop-eval-hierarchy    (evidence_path, thorough flag)
-  - pixelslop-eval-typography   (evidence_path, thorough flag)
-  - pixelslop-eval-color        (evidence_path, thorough flag)
-  - pixelslop-eval-responsiveness (evidence_path, thorough flag)
-  - pixelslop-eval-accessibility (evidence_path, thorough flag)
-  - pixelslop-eval-slop         (evidence_path, thorough flag)
+  - pixelslop-eval-hierarchy       (evidence_path, thorough flag)
+  - pixelslop-eval-typography      (evidence_path, thorough flag)
+  - pixelslop-eval-color           (evidence_path, thorough flag)
+  - pixelslop-eval-responsiveness  (evidence_path, thorough flag)
+  - pixelslop-eval-accessibility   (evidence_path, thorough flag)
+  - pixelslop-eval-slop            (evidence_path, thorough flag)
+  - pixelslop-eval-design-director (evidence_path, thorough flag)
 ```
 
 Each pillar specialist returns JSON: `{ "pillar": "...", "score": N, "evidence": "...", "findings": [...] }`
 The slop classifier returns JSON: `{ "band": "...", "patternCount": N, "patterns": [...] }`
+The design-director returns JSON: `{ "kind": "design-director", "verdict": "...", "findings": [...] }` where every finding is `kind: "judgment"` with a `confidence`. It returns **no score** — it never affects the /20.
 
-Collect all 6 results.
+Collect all 7 results. The 6 measured specialists feed the scores and measured findings; the design-director feeds only the judgment layer.
 
 ### Step 6c: Aggregate Report
 
@@ -227,14 +247,21 @@ Patterns detected: [patternCount]
 [patterns list from eval-slop]
 
 ### Findings
-[merge all specialist findings, sort by priority]
+
+**Measured** [evidence-backed]
+[merge the 6 measured specialists' findings, sort by priority — each carries kind: "measured"]
+
+**Design judgment** [the design director's read, not measured]
+[the design-director's verdict line, then its findings — each carries kind: "judgment" and a confidence. Omit this whole sub-section if the director returned no findings. These never change the /20.]
 
 ### Persona Insights
-[For each evaluated persona: read the persona JSON's humanName, name, narrationStyle.voice, and sampleReactions.
+[Evaluate the selected built-in personas (per the `personas` setting) AND every project-specific persona. Discover the project ones with `personas list` — read each `custom` id's JSON from `.pixelslop/personas/<id>.json`. Built-in JSONs live in `dist/skill/resources/personas/`. Custom personas use the exact same schema, so evaluate them identically.
+
+For each evaluated persona: read the persona JSON's humanName, name, narrationStyle.voice, and sampleReactions.
 Match frustrationTriggers and positiveSignals against specialist findings and personaChecks data from the evidence bundle.
 Write a 1-3 paragraph narrative in the persona's voice — see scoring.md Persona Report Format for contract and examples.
 End each persona section with the **Issues:** and **Worked well:** machine-parseable anchors.
-Skip personas with zero issues and no notable positives.]
+Skip personas with zero issues and no notable positives. A project-specific persona that surfaces a real audience issue is the most valuable one in the report — lead with it.]
 
 ### Screenshots
 [reference from evidence bundle]
diff --git a/dist/skill/SKILL.md b/dist/skill/SKILL.md
index 5fb872b..0768f1f 100644
--- a/dist/skill/SKILL.md
+++ b/dist/skill/SKILL.md
@@ -1,9 +1,12 @@
 ---
 name: pixelslop
 description: >
-  Browser-first design quality review and fix. Scans pages with Playwright,
-  scores 5 design pillars, detects AI slop patterns, fixes issues with
-  checkpoint-based rollback.
+  Browser-first design quality review and fix. Scans real pages with Playwright,
+  scores 5 measured pillars, detects AI slop patterns, and runs a design-director
+  pass for subjective judgment findings. Evaluates against 8 built-in personas
+  plus project-specific ones generated from your audience, tracks score trends
+  across runs, and fixes issues toward your design tokens with checkpoint-based
+  rollback. Exhaustive by default (--fast for a quick pass).
 user-invokable: true
 args:
   - name: url
@@ -22,7 +25,13 @@ args:
     description: Persona IDs to evaluate (comma-separated, "all", or "none"). Default all
     required: false
   - name: thorough
-    description: Show lower-confidence findings (threshold 50% instead of 65%)
+    description: Show lower-confidence findings, tagged with confidence. Default true (exhaustive)
+    required: false
+  - name: deep
+    description: Extended collection with doubled budgets and more elements tested. Default true (exhaustive)
+    required: false
+  - name: fast
+    description: Quick pass — turns deep and thorough off for a faster, high-confidence-only scan
     required: false
   - name: debug
     description: Enable session logging to .pixelslop-session.log for troubleshooting
@@ -35,6 +44,16 @@ args:
     required: false
 ---
 
+## Asking the user (works in any harness)
+
+Pixelslop runs under different harnesses (Claude Code, Codex CLI, and others), and they ask the user questions differently. Wherever this skill says to ask the user — including every `AskUserQuestion(...)` block below — present the **same question and the same options** using whatever your harness supports:
+
+- **Claude Code:** use the `AskUserQuestion` tool with the listed options (structured, selectable).
+- **Codex CLI, or any harness with no choice-prompt tool:** print the question and its options as a short numbered list, then **stop and wait** for the user to reply with a number or text. Codex has no `AskUserQuestion`-style popup (it's an open request upstream), so a plain numbered menu is the equivalent. Don't silently pick a default and continue — the point is to let the user choose.
+- **Non-interactive runs** (`codex exec`, CI, or `--quick`): don't ask at all. Use the saved setting or the documented default and proceed.
+
+The `AskUserQuestion(...)` snippets in this file are the question **content** — the exact wording and options to surface. *How* you render them is your harness's call; *what* you ask is not. If you're not on Claude Code, read each block as "ask this question, offer these options" and present it your way.
+
 ## Settings Mode
 
 When `--settings` is passed (e.g., `/pixelslop settings`), run the interactive settings configurator and stop — don't scan anything.
@@ -132,6 +151,60 @@ Tell them: "These settings apply to all future `/pixelslop` runs in this project
 
 ---
 
+## Capabilities & Options (the full menu)
+
+Everything Pixelslop can do, in one place. Read this so you can tell the user what's available — most people (and most agents) don't know half of it. When a scan finishes, mention the one or two options that fit their situation.
+
+**What a scan produces:**
+- **5 measured pillars** (hierarchy, typography, color, responsiveness, accessibility), scored /20 from real browser evidence.
+- **AI slop detection** — 25 visual patterns + source patterns.
+- **Design-director judgment** — a subjective pass that looks at the screenshots and flags what measurement can't (generic composition, AI-generated feel, missed opportunities). Shown in a separate "Design judgment" layer; never affects the /20.
+- **Persona evaluation** — 8 built-in personas, plus 1-2 project-specific personas generated from your audience/brand.
+- **Score trends** — each run's score is recorded; repeat scans show movement (`scan trend`).
+- **Self-contained HTML report** with screenshots and the measured/judgment split.
+
+**Run options (flags):**
+- `--fast` — quick pass; turns off deep + thorough (Pixelslop is exhaustive by default).
+- `--thorough` / `--deep` — both default **on**; `--fast` is the opt-out.
+- `--personas all|none|<ids>` — which personas to evaluate (default all).
+- `--code-check` — source-only analysis, no browser.
+- `--quick` — skip the per-run config prompt, use saved settings/defaults.
+- `--headed` — visible browser window.
+- `--settings` — open the interactive settings configurator.
+- `--debug` — session logging for troubleshooting.
+
+**Beyond scanning:**
+- **Fix loop** — locates the source, fixes *toward your design tokens*, checkpoints before editing, rolls back if the build breaks.
+- **Design tokens** — `config read-tokens` / `write-tokens` hold your real palette/type/spacing so fixes match the project.
+- **Custom personas** — `personas write` adds your own; the orchestrator also generates project-specific ones automatically.
+- **Settings** — `/pixelslop settings` saves preferences per project so you don't pass flags every run.
+
+If a scan was slow, mention `--fast`. If the user has a clear audience, project personas are already working for them. If they've scanned before, point at the trend. Surface what's relevant; don't dump the whole list every time.
+
+## Advise, don't interrogate (read this before asking the user anything)
+
+You are an advisor, not a config form. Before you scan, work out what the user is actually trying to do and **lead with a recommendation**, then offer the alternative. Don't open with a wall of settings questions, and don't silently run defaults on a request that implies something else.
+
+Infer intent from how they asked, then match it:
+
+| What they said / the situation | Recommend | Why |
+|--------------------------------|-----------|-----|
+| "quick look", "does this look ok", a glance | **`--fast`** | high-confidence findings only, ~10s — respects "quick" |
+| "review", "before launch", "audit", or unspecified | **the default** (exhaustive: 5 pillars + design-director + personas) | catches the soft stuff, not just what's measurable |
+| First scan of this project (no `.pixelslop.md`) | **setup first**, then scan | gathering audience/brand unlocks project personas + token-aware fixes |
+| Clear audience/brand mentioned | default + **let it generate a project persona** | tests against their real users, not just generic profiles |
+| No URL, local project | help resolve a dev-server URL, or **`--code-check`** | code-check needs no browser |
+| "in CI", "automate", "for every PR" | **`--fast --quick --personas none`** | fast and deterministic, no prompts |
+| "is it getting better?", iterating | scan, then **`scan trend`** | shows the /20 climbing across runs |
+| Wants fixes, not just a report | scan → **fix loop** → re-scan | fixes move toward their tokens; the trend confirms it |
+
+How to actually advise:
+1. **State your recommendation and the one tradeoff**, in a sentence. "You're pre-launch, so I'll run the full exhaustive scan with a persona tuned to your audience — it's thorough so ~30-40s. Want a fast gut-check instead?"
+2. **Only ask when there's a real fork.** If the intent is clear, recommend and proceed. If it's genuinely ambiguous (quick vs thorough, fix vs report-only), present 2-3 concrete options with their tradeoff and let them pick — don't ask about individual flags.
+3. **Never** present the raw settings questions (personas? deep? thorough?) as the opening move. Those are for `/pixelslop settings`, not for advising a scan. Translate intent into the flags yourself.
+
+The point: the user shouldn't need to know the flags exist. You know them. Recommend the right run, explain it in one line, and let them redirect.
+
 ## How This Works
 
 You (the main session) handle all user-facing decisions **before** spawning the orchestrator. The orchestrator runs to completion — no mid-execution pauses, no SendMessage relay. This keeps things reliable.
@@ -349,7 +422,9 @@ A lightweight pre-scan step that lets the user tweak settings for this specific
 | 1 | CLI flags | This run only — e.g., `--personas none --thorough` |
 | 2 | Per-run answers | This run only — user picks in Phase 2b |
 | 3 | Saved settings | All runs — from `.pixelslop.md` |
-| 4 | Defaults | Fallback — `personas: all`, `thorough: false`, etc. |
+| 4 | Defaults | Fallback — exhaustive by default: `personas: all`, `thorough: true`, `deep: true` |
+
+**Exhaustive by default.** Pixelslop is usually driven by an AI agent that won't remember to pass `--thorough` or `--deep`, so those default to **on**. `thorough: true` shows lower-confidence findings tagged with their confidence rather than hiding them; `deep: true` doubles collection budgets for more evidence (at the cost of a slower scan). The opt-out is **`--fast`**: when the user passes `--fast`, set `thorough: false` and `deep: false` for that run (a quick, high-confidence-only pass). `--fast` is a CLI flag, so it wins over saved settings for this run, same as any other flag.
 
 ### Skip conditions
 
diff --git a/dist/skill/resources/scoring.md b/dist/skill/resources/scoring.md
index 1fd53f4..42fff29 100644
--- a/dist/skill/resources/scoring.md
+++ b/dist/skill/resources/scoring.md
@@ -310,6 +310,11 @@ The Evidence column in the scores table is not optional. A score without evidenc
 
 Findings should be ordered by impact -- the thing that hurts the site the most goes first. Each finding should reference which pillar it affects and include the specific browser observation that surfaced it.
 
+**Finding kinds — measured vs judgment.** Every finding carries a `kind`:
+
+- `kind: "measured"` (the default) — backed by a specific browser measurement. The six pillar evaluators and the persona checks only ever produce these. If `kind` is absent, it is measured.
+- `kind: "judgment"` — a subjective read from the design-director pass (composition, distinctiveness, emotional fit, missed opportunities). These do **not** affect the /20 score; the score stays measured-only. They carry a `confidence` field (`low`/`medium`/`high`) and render in a separate "Design judgment" layer of the report, clearly labeled as opinion, not measured fact. This is how Pixelslop stays exhaustive without letting judgment masquerade as measurement.
+
 Screenshots are references to captured images, not inline data. If a screenshot was not captured for a given viewport, note it as `[not captured]` and that gap should be reflected in the confidence score.
 
 ---
diff --git a/tests/design-director.test.js b/tests/design-director.test.js
new file mode 100644
index 0000000..ef254a4
--- /dev/null
+++ b/tests/design-director.test.js
@@ -0,0 +1,65 @@
+/**
+ * Design Director Contract Tests
+ *
+ * The design-director is the subjective judgment pass — the one evaluator that
+ * looks at screenshots and opines. Its whole value depends on a few invariants
+ * that are easy to erode in editing, so they're pinned here:
+ *   - it produces judgment findings only and never a /20 score
+ *   - it actually looks at the screenshots
+ *   - it runs the adversarial second pass (the anti-noise guard)
+ *   - the orchestrator spawns it and routes its output to the judgment layer
+ *
+ * Run: node --test tests/design-director.test.js
+ */
+
+import { describe, it } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { readFileSync, existsSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
+const DIRECTOR = join(ROOT, 'dist', 'agents', 'internal', 'pixelslop-eval-design-director.md');
+const ORCH = join(ROOT, 'dist', 'agents', 'pixelslop.md');
+
+describe('design-director spec', () => {
+  assert.ok(existsSync(DIRECTOR), 'design-director spec must exist');
+  const spec = readFileSync(DIRECTOR, 'utf-8');
+
+  it('has read-only frontmatter (no Write/Edit)', () => {
+    const fm = spec.slice(0, spec.indexOf('---', 3));
+    assert.ok(/name:\s*pixelslop-eval-design-director/.test(spec), 'name set');
+    assert.ok(/tools:[\s\S]*-\s*Read/.test(spec), 'has Read tool');
+    assert.ok(!/-\s*Write/.test(fm) && !/-\s*Edit/.test(fm), 'must not have Write or Edit');
+  });
+
+  it('produces judgment only and never a score', () => {
+    assert.ok(/never.{0,20}(score|\/20)|no score|stays measured/i.test(spec), 'states it never scores');
+    assert.ok(spec.includes('"kind": "judgment"') || /kind.{0,4}judgment/.test(spec), 'findings are kind judgment');
+    assert.ok(!/"score"\s*:/.test(spec) || /do not return a `?score/i.test(spec), 'no score field in output, or explicitly forbidden');
+  });
+
+  it('actually looks at the screenshots', () => {
+    assert.ok(/screenshot/i.test(spec), 'references screenshots');
+    assert.ok(/Read.{0,40}(PNG|screenshot)|screenshot you didn/i.test(spec), 'instructed to open the screenshot');
+  });
+
+  it('runs the adversarial second pass (anti-noise guard)', () => {
+    assert.ok(/argue against (yourself|your own)|second pass/i.test(spec), 'has the self-argument pass');
+    assert.ok(/confidence/i.test(spec), 'tags findings with confidence');
+    assert.ok(/respect intent|distinctive.{0,4}(!=|≠|is not).{0,10}wrong/i.test(spec), 'respects intentional bold design');
+  });
+});
+
+describe('orchestrator wiring', () => {
+  const orch = readFileSync(ORCH, 'utf-8');
+
+  it('spawns the design-director', () => {
+    assert.ok(orch.includes('pixelslop-eval-design-director'), 'orchestrator spawns the director');
+  });
+
+  it('routes its findings to a separate judgment layer, not the score', () => {
+    assert.ok(/no score|never affects the \/20|never change the \/20/i.test(orch), 'director does not affect the /20');
+    assert.ok(/Design judgment/i.test(orch), 'findings go to a Design judgment section');
+  });
+});
diff --git a/tests/evaluator.test.js b/tests/evaluator.test.js
index dd92397..382f7d1 100644
--- a/tests/evaluator.test.js
+++ b/tests/evaluator.test.js
@@ -90,10 +90,11 @@ describe('Internal evaluator agents directory', () => {
     assert.ok(existsSync(INTERNAL), 'Missing: dist/agents/internal/');
   });
 
-  it('contains exactly 6 evaluator specs', () => {
+  it('contains the 6 measured evaluators plus the design-director (7 total)', () => {
     const files = readdirSync(INTERNAL).filter(f => f.endsWith('.md') && !f.startsWith('._'));
-    assert.equal(files.length, 6,
-      `Expected 6 internal evaluator specs, found ${files.length}: ${files.join(', ')}`);
+    assert.equal(files.length, 7,
+      `Expected 7 internal evaluator specs (6 measured + design-director), found ${files.length}: ${files.join(', ')}`);
+    assert.ok(files.includes('pixelslop-eval-design-director.md'), 'design-director spec must be present');
   });
 });
 
@@ -189,9 +190,11 @@ describe('Pillar coverage', () => {
     assert.ok(existsSync(INTERNAL), `INTERNAL dir missing: ${INTERNAL}`);
     const raw = readdirSync(INTERNAL);
     const mdFiles = raw.filter(f => f.endsWith('.md') && !f.startsWith('._'));
-    const pillarFiles = mdFiles.filter(f => !f.includes('eval-slop'));
+    // Pillar evaluators exclude the slop classifier and the design-director —
+    // the director is the subjective judgment pass, not a scored pillar.
+    const pillarFiles = mdFiles.filter(f => !f.includes('eval-slop') && !f.includes('eval-design-director'));
     assert.equal(pillarFiles.length, 5,
-      `Expected 5 pillar evaluators (excluding slop), found ${pillarFiles.length}. Raw dir: ${raw.join(', ')}. MD files: ${mdFiles.join(', ')}. Pillar files: ${pillarFiles.join(', ')}`);
+      `Expected 5 pillar evaluators (excluding slop + design-director), found ${pillarFiles.length}. Raw dir: ${raw.join(', ')}. MD files: ${mdFiles.join(', ')}. Pillar files: ${pillarFiles.join(', ')}`);
   });
 });
 
diff --git a/tests/personas-tool.test.js b/tests/personas-tool.test.js
new file mode 100644
index 0000000..23d0953
--- /dev/null
+++ b/tests/personas-tool.test.js
@@ -0,0 +1,104 @@
+/**
+ * Personas Tool Tests
+ *
+ * `personas write` validates and saves a project-specific persona to
+ * .pixelslop/personas/, and `personas list` reports built-ins + custom ones so
+ * the orchestrator can discover generated personas. The id doubles as the
+ * filename, so validation (slug-only, no built-in collisions, no traversal) is
+ * a safety boundary, not a nicety.
+ *
+ * Run: node --test tests/personas-tool.test.js
+ */
+
+import { describe, it, beforeEach } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { execFileSync } from 'node:child_process';
+import { existsSync, mkdtempSync, rmSync, readFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
+const TOOLS = join(ROOT, 'bin', 'pixelslop-tools.cjs');
+
+function run(args) {
+  const stdout = execFileSync('node', [TOOLS, ...args], { encoding: 'utf-8' });
+  try { return JSON.parse(stdout); } catch { return { _raw: stdout }; }
+}
+const persona = (over = {}) => JSON.stringify({
+  id: 'stressed-bride', name: 'Stressed Bride', category: 'context',
+  description: 'A bride three weeks from her wedding, evaluating a planner',
+  designPriorities: { hierarchy: 4 },
+  frustrationTriggers: ['buried pricing'], positiveSignals: ['clear timeline'],
+  ...over
+});
+
+describe('personas write / list', () => {
+  let dir;
+  beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'pxs-personas-')); });
+  const cleanup = () => { try { rmSync(dir, { recursive: true, force: true }); } catch {} };
+
+  it('lists the 8 built-ins and no custom on a fresh project', () => {
+    const r = run(['personas', 'list', '--root', dir, '--raw']);
+    assert.equal(r.builtin.length, 8);
+    assert.deepEqual(r.custom, []);
+    cleanup();
+  });
+
+  it('writes a valid persona and lists it', () => {
+    const w = run(['personas', 'write', '--root', dir, '--raw', '--json', persona()]);
+    assert.equal(w.ok, true);
+    assert.equal(w.id, 'stressed-bride');
+    assert.ok(existsSync(join(dir, '.pixelslop', 'personas', 'stressed-bride.json')));
+    const l = run(['personas', 'list', '--root', dir, '--raw']);
+    assert.deepEqual(l.custom, ['stressed-bride']);
+    cleanup();
+  });
+
+  it('rejects an id that collides with a built-in', () => {
+    const w = run(['personas', 'write', '--root', dir, '--raw', '--json', persona({ id: 'design-critic' })]);
+    assert.equal(w.ok, false);
+    assert.match(w.error, /collides/i);
+    cleanup();
+  });
+
+  it('rejects a non-slug / path-traversal id', () => {
+    for (const bad of ['../evil', 'Has Spaces', 'a/b', 'UPPER']) {
+      const w = run(['personas', 'write', '--root', dir, '--raw', '--json', persona({ id: bad })]);
+      assert.equal(w.ok, false, `id "${bad}" must be rejected`);
+    }
+    // and nothing escaped the personas dir
+    assert.ok(!existsSync(join(dir, 'evil.json')));
+    cleanup();
+  });
+
+  it('rejects a persona missing required fields', () => {
+    const w = run(['personas', 'write', '--root', dir, '--raw', '--json', '{"id":"x"}']);
+    assert.equal(w.ok, false);
+    assert.match(w.error, /Missing persona fields/i);
+    cleanup();
+  });
+
+  it('rejects non-array frustrationTriggers', () => {
+    const w = run(['personas', 'write', '--root', dir, '--raw', '--json', persona({ frustrationTriggers: 'nope' })]);
+    assert.equal(w.ok, false);
+    cleanup();
+  });
+
+  it('rejects invalid JSON cleanly', () => {
+    const w = run(['personas', 'write', '--root', dir, '--raw', '--json', '{not json']);
+    assert.equal(w.ok, false);
+    assert.match(w.error, /Invalid --json/i);
+    cleanup();
+  });
+});
+
+describe('persona generation is wired into the orchestrator', () => {
+  const orch = readFileSync(join(ROOT, 'dist', 'agents', 'pixelslop.md'), 'utf-8');
+
+  it('the orchestrator generates project personas and discovers them', () => {
+    assert.ok(orch.includes('personas write'), 'orchestrator writes generated personas');
+    assert.ok(orch.includes('personas list'), 'orchestrator discovers custom personas');
+    assert.ok(/project-specific persona|project's actual users|tuned to/i.test(orch), 'frames them as project-specific');
+  });
+});
diff --git a/tests/report-layers.test.js b/tests/report-layers.test.js
new file mode 100644
index 0000000..74e6f6a
--- /dev/null
+++ b/tests/report-layers.test.js
@@ -0,0 +1,80 @@
+/**
+ * Report Layer Tests
+ *
+ * Findings now carry a `kind`: "measured" (evidence-backed, the default) or
+ * "judgment" (the design-director's subjective read). The HTML report keeps the
+ * two visually separate so judgment never reads as measured fact, and a scan with
+ * only measured findings looks exactly as it did before (no extra headings).
+ *
+ * Run: node --test tests/report-layers.test.js
+ */
+
+import { describe, it, beforeEach } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { execFileSync } from 'node:child_process';
+import { readFileSync, writeFileSync, mkdtempSync, mkdirSync, rmSync, readdirSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
+const TOOLS = join(ROOT, 'bin', 'pixelslop-tools.cjs');
+
+function generate(dir, findings) {
+  mkdirSync(join(dir, '.pixelslop'), { recursive: true });
+  const scan = {
+    title: 'T', url: 'http://x', timestamp: '2026-06-10T00:00:00Z',
+    scores: { hierarchy: { score: 3 }, typography: { score: 2 }, color: { score: 2 }, responsiveness: { score: 3 }, accessibility: { score: 2 } },
+    findings
+  };
+  const scanPath = join(dir, '.pixelslop', 'scan-results.json');
+  writeFileSync(scanPath, JSON.stringify(scan), 'utf-8');
+  execFileSync('node', [TOOLS, 'report', 'generate', '--scan-results', scanPath, '--root', dir, '--raw'], { encoding: 'utf-8' });
+  const reportsDir = join(dir, '.pixelslop', 'reports');
+  const file = readdirSync(reportsDir).find((f) => f.endsWith('.html'));
+  return readFileSync(join(reportsDir, file), 'utf-8');
+}
+
+describe('report layers (measured vs judgment)', () => {
+  let dir;
+  beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'pxs-layers-')); });
+  const cleanup = () => { try { rmSync(dir, { recursive: true, force: true }); } catch {} };
+
+  it('renders no layer headings when every finding is measured (unchanged look)', () => {
+    const html = generate(dir, [
+      { priority: 'P1', description: 'Contrast weak', kind: 'measured' },
+      { priority: 'P2', description: 'No focus ring' } // kind omitted -> measured
+    ]);
+    assert.ok(html.includes('Contrast weak') && html.includes('No focus ring'), 'measured findings render');
+    assert.ok(!html.includes('Design judgment'), 'no judgment section when there are no judgment findings');
+    cleanup();
+  });
+
+  it('separates measured and judgment findings into labeled layers', () => {
+    const html = generate(dir, [
+      { priority: 'P1', description: 'Contrast weak', kind: 'measured' },
+      { priority: 'P2', description: 'Hero feels generic', kind: 'judgment', confidence: 'medium' }
+    ]);
+    assert.ok(html.includes('Measured'), 'measured layer heading present');
+    assert.ok(html.includes('Design judgment'), 'judgment layer heading present');
+    assert.ok(/design director.{0,8}s read, not measured/.test(html), 'judgment labeled as opinion (apostrophe may be HTML-escaped)');
+    assert.ok(html.includes('Contrast weak') && html.includes('Hero feels generic'), 'both findings render');
+    cleanup();
+  });
+
+  it('surfaces a judgment finding confidence inline', () => {
+    const html = generate(dir, [
+      { priority: 'P2', description: 'Composition is safe', kind: 'judgment', confidence: 'low' }
+    ]);
+    assert.ok(html.includes('Composition is safe'), 'judgment finding renders');
+    assert.ok(html.includes('(low)'), 'confidence shown inline');
+    cleanup();
+  });
+
+  it('treats a string finding as measured', () => {
+    const html = generate(dir, ['Plain string finding']);
+    assert.ok(html.includes('Plain string finding'));
+    assert.ok(!html.includes('Design judgment'));
+    cleanup();
+  });
+});
diff --git a/tests/skill-discoverability.test.js b/tests/skill-discoverability.test.js
new file mode 100644
index 0000000..878f5ba
--- /dev/null
+++ b/tests/skill-discoverability.test.js
@@ -0,0 +1,109 @@
+/**
+ * Skill Discoverability Tests
+ *
+ * SKILL.md is what an AI agent reads when it invokes /pixelslop — it's the only
+ * place the agent (and through it, the user) learns what Pixelslop can do. The
+ * failure mode is drift: we add a flag or a command in the code, and forget to
+ * advertise it in SKILL.md, so nobody ever uses it.
+ *
+ * These tests are the guard. The setting keys are extracted live from
+ * pixelslop-tools.cjs, so adding a setting and forgetting to document it fails
+ * the build. The flag/command/capability lists are curated — when you add one,
+ * add it here and to SKILL.md together. That coupling is the point.
+ *
+ * Run: node --test tests/skill-discoverability.test.js
+ */
+
+import { describe, it } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
+const SKILL = readFileSync(join(ROOT, 'dist', 'skill', 'SKILL.md'), 'utf-8');
+const TOOLS = readFileSync(join(ROOT, 'bin', 'pixelslop-tools.cjs'), 'utf-8');
+
+// Pull the real setting keys straight from SETTING_DEFS so the test tracks code.
+function settingKeys() {
+  const block = TOOLS.match(/const SETTING_DEFS = \{([\s\S]*?)\};/);
+  assert.ok(block, 'SETTING_DEFS block must exist in pixelslop-tools.cjs');
+  return [...block[1].matchAll(/^\s*([a-z]+):\s*\{/gm)].map((m) => m[1]);
+}
+
+describe('SKILL.md advertises every setting', () => {
+  for (const key of settingKeys()) {
+    it(`mentions the "${key}" setting`, () => {
+      assert.ok(SKILL.includes(key),
+        `SKILL.md never mentions the "${key}" setting — an agent won't know it exists. Add it to the Capabilities section and args.`);
+    });
+  }
+});
+
+describe('SKILL.md advertises every run flag', () => {
+  // Curated: when you add a flag, add it here and to SKILL.md together.
+  const flags = ['--fast', '--thorough', '--deep', '--personas', '--code-check', '--quick', '--headed', '--settings', '--debug'];
+  for (const flag of flags) {
+    it(`mentions ${flag}`, () => {
+      assert.ok(SKILL.includes(flag), `SKILL.md never mentions ${flag}`);
+    });
+  }
+});
+
+describe('SKILL.md advertises every major capability', () => {
+  const capabilities = {
+    'design-director / judgment layer': /design.director|design judgment|judgment finding/i,
+    'project-specific personas': /project-specific persona|personas write|generated from your audience/i,
+    'score trends': /scan trend|score trend/i,
+    'design tokens': /read-tokens|design tokens/i,
+    'fix loop': /fix loop|checkpoint/i,
+    'code-check mode': /code-check/i,
+  };
+  for (const [name, re] of Object.entries(capabilities)) {
+    it(`mentions ${name}`, () => {
+      assert.ok(re.test(SKILL), `SKILL.md never mentions ${name} — it's invisible to agents and users.`);
+    });
+  }
+});
+
+describe('the frontmatter description sells the breadth', () => {
+  const fm = SKILL.slice(0, SKILL.indexOf('user-invokable'));
+  it('the trigger description mentions personas, judgment, trends, tokens, and fast', () => {
+    for (const word of ['persona', 'judgment', 'trend', 'token', 'fast']) {
+      assert.ok(new RegExp(word, 'i').test(fm),
+        `frontmatter description omits "${word}" — that surface is what agents see in the skill list before invoking.`);
+    }
+  });
+});
+
+describe('a capabilities overview section exists', () => {
+  it('SKILL.md has a Capabilities & Options menu', () => {
+    assert.ok(/## Capabilities & Options/i.test(SKILL),
+      'SKILL.md must have a single canonical Capabilities & Options section');
+  });
+});
+
+describe('the asking protocol is harness-neutral (works under Codex too)', () => {
+  it('has an "Asking the user" protocol', () => {
+    assert.ok(/## Asking the user/i.test(SKILL),
+      'SKILL.md must define how to ask the user across harnesses — AskUserQuestion is Claude Code only');
+  });
+  it('tells non-Claude harnesses what to do instead of AskUserQuestion', () => {
+    assert.ok(/AskUserQuestion/i.test(SKILL), 'still describes the Claude Code mechanism');
+    assert.ok(/Codex/i.test(SKILL), 'names Codex specifically');
+    assert.ok(/numbered (list|menu)/i.test(SKILL), 'gives the Codex/plain-text equivalent (a numbered menu)');
+    assert.ok(/wait/i.test(SKILL), 'tells the agent to stop and wait for the reply');
+  });
+});
+
+describe('the skill drives advisory behaviour, not a config form', () => {
+  it('has an advise-the-user playbook', () => {
+    assert.ok(/## Advise/i.test(SKILL),
+      'SKILL.md must have an advisory section so any harness leads with a recommendation, not a settings form');
+  });
+  it('tells the agent to recommend by intent and not open with raw settings questions', () => {
+    assert.ok(/lead with a recommendation/i.test(SKILL), 'must instruct leading with a recommendation');
+    assert.ok(/intent/i.test(SKILL), 'must map user intent to a run');
+    assert.ok(/advisor, not a config form|advise, don.t interrogate/i.test(SKILL), 'must frame the agent as an advisor');
+  });
+});
diff --git a/tests/tools.test.js b/tests/tools.test.js
index 000d4f0..ef5b2f7 100644
--- a/tests/tools.test.js
+++ b/tests/tools.test.js
@@ -1497,8 +1497,8 @@ describe('config settings', () => {
     const result = runJson(`config get --root "${dir}"`, dir);
     assert.ok(result.settings, 'should return settings object');
     assert.equal(result.settings.headed, false, 'headed default is false');
-    assert.equal(result.settings.deep, false, 'deep default is false');
-    assert.equal(result.settings.thorough, false, 'thorough default is false');
+    assert.equal(result.settings.deep, true, 'deep default is true (exhaustive by default)');
+    assert.equal(result.settings.thorough, true, 'thorough default is true (exhaustive by default)');
     assert.equal(result.settings.personas, 'all', 'personas default is all');
     assert.deepEqual(result.defined, [], 'no keys explicitly defined');
   });
@@ -1528,7 +1528,7 @@ describe('config settings', () => {
     runJson(`config set headed false --root "${dir}"`, dir);
     const result = runJson(`config get thorough --root "${dir}"`, dir);
     assert.equal(result.key, 'thorough');
-    assert.equal(result.value, false);
+    assert.equal(result.value, true);
     assert.equal(result.source, 'default');
   });
 
@@ -1537,8 +1537,8 @@ describe('config settings', () => {
     const result = runJson(`config get --root "${dir}"`, dir);
     assert.ok(result.settings, 'should have settings object');
     assert.equal(result.settings.headed, true);
-    assert.equal(result.settings.deep, false, 'unset deep should default to false');
-    assert.equal(result.settings.thorough, false, 'unset thorough should default to false');
+    assert.equal(result.settings.deep, true, 'unset deep should default to true');
+    assert.equal(result.settings.thorough, true, 'unset thorough should default to true');
     assert.equal(result.settings.personas, 'all', 'unset personas should default to all');
     assert.deepEqual(result.defined, ['headed'], 'only headed was explicitly set');
   });