gabelul · gabelul · Jun 10, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -103,10 +103,12 @@ Users configure persistent scan preferences via `/pixelslop settings` (interacti
 | Key | Type | Default | What it does |
 |-----|------|---------|-------------|
 | `headed` | boolean | `false` | Open visible browser window during scans |
-| `deep` | boolean | `false` | Extended collection — doubled budgets, more elements tested |
-| `thorough` | boolean | `false` | Show lower-confidence findings (50% vs 65% threshold) |
+| `deep` | boolean | `true` | Extended collection — doubled budgets, more elements tested |
+| `thorough` | boolean | `true` | Show lower-confidence findings, tagged with confidence |
 | `personas` | string | `all` | Persona IDs to evaluate (comma-separated, `all`, or `none`) |
 
+**Exhaustive by default.** `deep` and `thorough` default to `true` because Pixelslop is usually driven by an AI agent that won't remember to pass the flags — the default has to be the thorough one. `--fast` is the opt-out (sets `deep: false`, `thorough: false` for a quick, high-confidence-only pass). The cost of `deep: true` is a slower scan; `--fast` is there when speed matters.
+
 **Merge priority:** CLI args > saved settings > defaults. A user who runs `/pixelslop --thorough` gets thorough mode regardless of what's in `.pixelslop.md`.
 
 **Commands:**
@@ -133,6 +135,7 @@ Agents use `pixelslop-tools` (bin/pixelslop-tools.cjs) for all state operations.
 - **`browser analyze-page`** classifies page type (landing-page, e-commerce, content, form-heavy, app-like, general) and suggests relevant personas. Fast (< 2s), no screenshots.
 - **`config read-tokens` / `config write-tokens`** read and write the project's normative design tokens — a `## Design Tokens` section in `.pixelslop.md` holding flat `key: value` lines (`color-primary: #b8422e`, `font-body: Inter`, `type-scale: 1.25`, `space-unit: 4px`). The setup agent captures them from the codebase; the fixer reads them so a fix moves *toward* the project's real palette/type/spacing instead of a generic default. `write-tokens` merges (unspecified keys preserved) and only touches the Design Tokens section — `config write` stays the initializer, tokens layer on top like settings do.
 - **`scan trend`** reports the score progression across runs. `scan save-results` now appends each run's /20 total (plus per-pillar scores) to `.pixelslop/scan-history.json`; `scan trend [--target <url>] [--last <n>]` reads it back (`11 -> 13 -> 14 (+3)`). History is best-effort — a corrupt history file self-heals and never blocks the actual save. The orchestrator surfaces the trend in its scan summary.
+- **`personas write` / `personas list`** manage project-specific personas. `write --json '<persona>'` validates (required fields, slug-only id, no built-in collision, no path traversal) and saves to `.pixelslop/personas/<id>.json`; `list` returns the 8 built-ins plus any custom ones. The orchestrator generates 1-2 personas from the project's audience/brand and evaluates them alongside the built-ins, so persona findings fit the real users instead of only the generic profiles.
 
 ## Voice & Persona
 

diff --git a/bin/pixelslop-tools.cjs b/bin/pixelslop-tools.cjs
@@ -1100,6 +1100,72 @@ function configWriteTokens(args = {}) {
     : `Design tokens written (${Object.keys(merged).length}): ${configPath}`);
 }
 
+// The 8 shipped persona profiles. Custom (project-specific) personas live in
+// .pixelslop/personas/ and must not collide with these ids.
+const BUILTIN_PERSONA_IDS = [
+  'screen-reader-user', 'low-vision-user', 'keyboard-user', 'rushed-mobile-user',
+  'slow-connection-user', 'non-native-english', 'design-critic', 'first-time-visitor'
+];
+
+/**
+ * Write a project-specific persona to .pixelslop/personas/<id>.json after
+ * validating it. The setup agent generates these from the project's audience,
+ * and the orchestrator evaluates them alongside the built-ins.
+ */
+function personasWrite(args = {}) {
+  try {
+    if (!args.json) return { ok: false, error: '--json is required' };
+    let persona;
+    try { persona = JSON.parse(args.json); } catch (e) { return { ok: false, error: `Invalid --json: ${e.message}` }; }
+    if (!persona || typeof persona !== 'object' || Array.isArray(persona)) {
+      return { ok: false, error: '--json must be a persona object' };
+    }
+
+    const required = ['id', 'name', 'category', 'description', 'designPriorities', 'frustrationTriggers', 'positiveSignals'];
+    const missing = required.filter((k) => persona[k] == null);
+    if (missing.length) return { ok: false, error: `Missing persona fields: ${missing.join(', ')}` };
+    if (!Array.isArray(persona.frustrationTriggers) || !Array.isArray(persona.positiveSignals)) {
+      return { ok: false, error: 'frustrationTriggers and positiveSignals must be arrays' };
+    }
+
+    const id = String(persona.id);
+    // id doubles as the filename, so it must be a safe slug — no traversal, no surprises.
+    if (!/^[a-z0-9][a-z0-9-]{1,40}$/.test(id)) {
+      return { ok: false, error: `Persona id must be a lowercase slug [a-z0-9-], 2-41 chars: got "${id}"` };
+    }
+    if (BUILTIN_PERSONA_IDS.includes(id)) {
+      return { ok: false, error: `"${id}" collides with a built-in persona; use a project-specific id` };
+    }
+
+    const dir = path.join(resolveProjectRoot(args.root), '.pixelslop', 'personas');
+    const outPath = path.join(dir, `${id}.json`);
+    // Defence in depth: the written file must stay inside the personas dir.
+    if (path.dirname(path.resolve(outPath)) !== path.resolve(dir)) {
+      return { ok: false, error: 'unsafe persona path' };
+    }
+    fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(outPath, JSON.stringify(persona, null, 2), 'utf-8');
+    return { ok: true, id, path: outPath };
+  } catch (err) {
+    return { ok: false, error: err.message };
+  }
+}
+
+/**
+ * List available personas: the built-ins plus any custom ones in
+ * .pixelslop/personas/. Lets the orchestrator discover generated personas.
+ */
+function personasList(args = {}) {
+  const dir = path.join(resolveProjectRoot(args.root), '.pixelslop', 'personas');
+  let custom = [];
+  if (fs.existsSync(dir)) {
+    custom = fs.readdirSync(dir)
+      .filter((f) => f.endsWith('.json') && !f.startsWith('._'))
+      .map((f) => f.replace(/\.json$/, ''));
+  }
+  return { ok: true, builtin: BUILTIN_PERSONA_IDS, custom, dir };
+}
+
 /**
  * Check if .pixelslop.md exists.
  */
@@ -1113,10 +1179,14 @@ function configExists(args = {}) {
 // ─────────────────────────────────────────────
 
 /** Valid setting keys and their value types/defaults */
+// Defaults are exhaustive on purpose. Pixelslop is usually driven by an AI agent
+// that won't remember to pass --thorough or --deep, so the default has to be the
+// thorough one. `--fast` (handled in SKILL.md) is the opt-out that turns deep and
+// thorough back off for a quick pass.
 const SETTING_DEFS = {
   headed:   { type: 'boolean', default: false,  description: 'Open visible browser window' },
-  deep:     { type: 'boolean', default: false,  description: 'Extended collection with doubled budgets' },
-  thorough: { type: 'boolean', default: false,  description: 'Show lower-confidence findings' },
+  deep:     { type: 'boolean', default: true,   description: 'Extended collection with doubled budgets (off with --fast)' },
+  thorough: { type: 'boolean', default: true,   description: 'Show lower-confidence findings, tagged (off with --fast)' },
   personas: { type: 'string',  default: 'all',  description: 'Persona IDs (comma-separated, "all", or "none")' },
 };
 
@@ -3077,32 +3147,53 @@ function reportGenerate(flags) {
   </div>`;
     }
 
-    // ── Findings table ──
-    let findingsHtml;
-    if (hasFixData && findings.length > 0) {
-      // Full table with category + status columns
-      const rows = findings.map(f => {
-        const text = typeof f === 'string' ? f : (f.description || '');
-        const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
+    // ── Findings table (split into measured evidence vs design judgment) ──
+    // kind defaults to 'measured' so existing scans render exactly as before;
+    // the design-director pass is the only producer of 'judgment' findings.
+    const kindOf = (f) => (typeof f === 'object' && f.kind === 'judgment') ? 'judgment' : 'measured';
+    const measuredFindings = findings.filter(f => kindOf(f) === 'measured');
+    const judgmentFindings = findings.filter(f => kindOf(f) === 'judgment');
+
+    const renderRows = (list) => list.map(f => {
+      const text = typeof f === 'string' ? f : (f.description || '');
+      const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
+      // Judgment findings carry a confidence the report surfaces inline.
+      const conf = (typeof f === 'object' && f.confidence != null)
+        ? ` <span style="color:var(--ink-ghost);font-size:10px">(${escapeHtml(String(f.confidence))})</span>` : '';
+      if (hasFixData) {
         const category = typeof f === 'object' ? (f.category || '') : '';
-        // Try to match finding to plan issue for status
         let fixStatus = 'OPEN';
         if (typeof f === 'object' && f.id && issueMap.has(f.id)) {
           fixStatus = (issueMap.get(f.id).status || 'pending').toUpperCase();
         }
-        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-category">${escapeHtml(category)}</td><td class="col-finding">${escapeHtml(text)}</td><td class="col-status"><span class="fix-status fix-${escapeHtml(fixStatus)}">${escapeHtml(fixStatus)}</span></td></tr>`;
-      }).join('\n      ');
-      findingsHtml = `<table class="data-table findings-table"><thead><tr><th>Priority</th><th>Category</th><th>Finding</th><th>Status</th></tr></thead><tbody>\n      ${rows}\n    </tbody></table>`;
-    } else if (findings.length > 0) {
-      // Simple table without category/status
-      const rows = findings.map(f => {
-        const text = typeof f === 'string' ? f : (f.description || '');
-        const priority = typeof f === 'object' ? (f.priority || 'P2') : 'P2';
-        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-finding">${escapeHtml(text)}</td></tr>`;
-      }).join('\n      ');
-      findingsHtml = `<table class="data-table findings-table"><thead><tr><th>Priority</th><th>Finding</th></tr></thead><tbody>\n      ${rows}\n    </tbody></table>`;
-    } else {
+        return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-category">${escapeHtml(category)}</td><td class="col-finding">${escapeHtml(text)}${conf}</td><td class="col-status"><span class="fix-status fix-${escapeHtml(fixStatus)}">${escapeHtml(fixStatus)}</span></td></tr>`;
+      }
+      return `<tr><td class="col-priority"><span class="priority-tag priority-${escapeHtml(priority)}">${escapeHtml(priority)}</span></td><td class="col-finding">${escapeHtml(text)}${conf}</td></tr>`;
+    }).join('\n      ');
+
+    const tableFor = (list) => {
+      const head = hasFixData
+        ? '<thead><tr><th>Priority</th><th>Category</th><th>Finding</th><th>Status</th></tr></thead>'
+        : '<thead><tr><th>Priority</th><th>Finding</th></tr></thead>';
+      return `<table class="data-table findings-table">${head}<tbody>\n      ${renderRows(list)}\n    </tbody></table>`;
+    };
+    const layerHeading = (title, note) =>
+      `<h3 style="font-size:12px;text-transform:uppercase;letter-spacing:0.08em;color:var(--ink-tertiary);margin:18px 0 8px">${escapeHtml(title)} <span style="font-weight:400;text-transform:none;letter-spacing:0;color:var(--ink-ghost)">— ${escapeHtml(note)}</span></h3>`;
+
+    let findingsHtml;
+    if (findings.length === 0) {
       findingsHtml = '<p style="color:var(--ink-ghost);font-size:11px;text-transform:uppercase;letter-spacing:0.08em">No findings</p>';
+    } else if (judgmentFindings.length === 0) {
+      // Only measured findings — render the single table, no layer headings (unchanged look).
+      findingsHtml = tableFor(measuredFindings.length ? measuredFindings : findings);
+    } else {
+      // Both layers present — label and separate them so judgment never reads as measured fact.
+      const sections = [];
+      if (measuredFindings.length > 0) {
+        sections.push(layerHeading('Measured', 'evidence-backed') + tableFor(measuredFindings));
+      }
+      sections.push(layerHeading('Design judgment', "a design director's read, not measured") + tableFor(judgmentFindings));
+      findingsHtml = sections.join('\n    ');
     }
 
     // ── Fix section (entire tab-section div, or empty) ──
@@ -3398,6 +3489,15 @@ async function main() {
       break;
     }
 
+    case 'personas': {
+      switch (command) {
+        case 'write': return output(personasWrite(flags), true);
+        case 'list': return output(personasList(flags), true);
+        default: fail(`Unknown personas command: ${command}. Valid: write, list`);
+      }
+      break;
+    }
+
     default:
       fail(`Unknown group: ${group}. Valid: plan, checkpoint, gate, config, log, discover, serve, init, verify, browser, scan, report`);
   }

diff --git a/dist/agents/internal/pixelslop-eval-design-director.md b/dist/agents/internal/pixelslop-eval-design-director.md
@@ -0,0 +1,95 @@
+---
+name: pixelslop-eval-design-director
+description: >
+  The subjective design-judgment pass. Looks at the screenshots and reads the
+  page like a design director — composition, distinctiveness, emotional fit,
+  missed opportunities — then argues against its own findings before returning
+  them. Produces judgment findings only. Does NOT touch the /20 score.
+model: sonnet
+tools:
+  - Read
+---
+
+You're the design director. The other six evaluators measure things — contrast ratios, type scales, overflow. You do the thing a measurement can't: you look at the page and say whether it's actually *good*, and where a real designer would push back.
+
+This is the subjective pass on purpose. You are allowed to have taste and opinions. But you are also the one evaluator most at risk of producing noise — vague, unfalsifiable, "make it pop" feedback that wastes everyone's time. So you do two passes: first you say what you see, then you argue against yourself and throw out everything you can't defend. What survives is what you return.
+
+**You never touch the /20 score.** The score stays measured. Your findings are a separate layer, labeled as judgment. Your job is coverage and taste, not grading.
+
+## Setup: Load Your Knowledge
+
+```
+Read dist/skill/resources/scoring.md            # The whole rubric — know what's already measured so you don't repeat it
+Read dist/skill/resources/ai-slop-patterns.md   # The visual fingerprints of AI-generated design
+Read dist/skill/resources/heuristics.md          # Nielsen's 10, adapted — the UX lens
+Read dist/skill/resources/cognitive-load.md      # When a page asks too much of the user
+```
+
+## Input
+
+- **evidence_path** (required) — absolute path to the evidence bundle JSON
+- **thorough** (optional, default: false) — when true, keep medium-confidence findings; when false, only high-confidence
+
+## Protocol
+
+1. **Read your resource files.** All four. You need to know what's already measured so you don't just restate it in prose.
+
+2. **Read the evidence bundle** at `evidence_path`. Note the pillar evidence, the slop patterns already detected, the persona checks.
+
+3. **Look at the screenshots.** This is the part the measured evaluators can't do. The bundle has `viewports.desktop.screenshot`, `viewports.tablet.screenshot`, `viewports.mobile.screenshot` (and scroll-fold screenshots if present). `Read` each PNG path. A screenshot you didn't open doesn't count — don't opine on a layout you haven't seen.
+
+4. **First pass — say what you see.** Look like a design director reviewing a junior's work. Draft findings across these lenses:
+   - **Does this look AI-generated?** Be honest. Generic hero, icon-heading-paragraph-button rows, no point of view, every section the same rhythm. The `ai-slop-patterns.md` fingerprints, but as a gestalt, not a checklist.
+   - **Composition & distinctiveness** — does the page have a point of view, or is it a template? Is there a focal point, a reason the eye goes where it goes? Would anyone remember this page?
+   - **Emotional fit** — does the feeling match the job? A funeral home that feels like a fintech startup is wrong even if every contrast ratio passes.
+   - **Missed opportunities** — the strongest design-director move. Not "this is broken" but "this is fine and forgettable, and here's the version that isn't."
+   - **UX heuristics & cognitive load** — where the page makes the user think too hard, in ways the measured pillars don't already flag.
+
+5. **Second pass — argue against yourself.** For every finding from pass 1, ask:
+   - *Is this falsifiable, or is it "make it pop"?* If you can't point at the screenshot and say what specifically and why, cut it.
+   - *Is a measured evaluator already saying this?* If contrast/typography/hierarchy already flagged it, drop yours — it's their finding, measured beats judgment.
+   - *Am I imposing one taste, or is this a real problem?* A bold, deliberate choice you personally wouldn't make is not a finding. Respect intent. Pixelslop does not punish distinctive design for being distinctive.
+   - *Would a second design director agree?* If you're only ~60% sure, tag it `low`. If you'd bet on it, `high`.
+
+   Kill everything that fails. Be ruthless — a short list of sharp, defensible reads beats a long list of vibes. Returning two real findings is a success. Inventing eight to look thorough is the failure mode this pass exists to prevent.
+
+6. **Return JSON.** Findings that survived, each tagged `kind: "judgment"` and a confidence.
+
+## Output Format
+
+Return exactly this. Nothing else.
+
+```json
+{
+  "kind": "design-director",
+  "verdict": "One honest sentence: does this look designed, or generated?",
+  "findings": [
+    {
+      "criterion": "distinctiveness",
+      "kind": "judgment",
+      "confidence": "high",
+      "detail": "Every section is icon / heading / paragraph / button at the same rhythm — the page reads as a template with the content swapped in, not as a designed page.",
+      "evidence": "desktop screenshot: features, testimonials, and pricing sections share identical structure and spacing",
+      "opportunity": "Break the rhythm — let one section be full-bleed, vary the grid, give the hero a real focal object instead of centered text over a gradient."
+    }
+  ]
+}
+```
+
+Each finding needs:
+- `criterion` — the lens (`ai-slop`, `distinctiveness`, `composition`, `emotional-fit`, `missed-opportunity`, `cognitive-load`, `ux-heuristic`)
+- `kind` — always `"judgment"`
+- `confidence` — `"high"` or `"medium"` (or `"low"` only in thorough mode)
+- `detail` — what you see, specific enough to point at in the screenshot
+- `evidence` — which screenshot/viewport, and what in it
+- `opportunity` — optional but encouraged; the better version, concretely
+
+## Rules
+
+1. **Judgment only — never a score.** You do not return a `score` or `pillar`. The /20 is measured. If you find yourself wanting to grade, stop.
+2. **You looked, or you don't speak.** Every finding cites a specific screenshot. No opining on layouts you didn't open.
+3. **Don't restate measured findings.** If a pillar evaluator measured it, it's theirs. You cover what measurement can't.
+4. **Respect intent.** Distinctive ≠ wrong. Bold ≠ broken. A choice you wouldn't make is not a defect.
+5. **The second pass is mandatory.** Returning pass-1 findings without arguing against them is the one thing you must never do. Noise is worse than silence here.
+6. **Confidence is honest.** `high` means you'd defend it in a studio review. Don't inflate.
+7. **Return JSON only.** No markdown, no preamble.