From c7dd90e7529c1631dcfd0ec441e62437a4d94586 Mon Sep 17 00:00:00 2001
From: Gabi <hola@booplex.com>
Date: Wed, 10 Jun 2026 11:24:25 +0200
Subject: [PATCH 1/2] fix: run agents inline when the harness can't spawn them
 (Codex)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The installer copies our agent specs into .codex/agents/ as Markdown, but Codex
custom agents must be TOML in its own schema — so Codex can't spawn any of them,
and a scan's "Spawn agent: X" instructions pointed at agents that don't exist
there. The orchestrator only knew how to spawn, not how to run an evaluator
itself, so a Codex scan would fail outright.

Adds a harness-neutral spawn protocol: spawn a named agent as a subagent if the
harness supports it, otherwise read its spec and run it inline, in sequence. The
orchestrator now falls back to running all 7 evaluators inline against the same
evidence bundle — same scores and findings, just sequential. Drift-guarded.

This is the robust baseline that makes the skill portable, not just Codex-aware.
Native Codex TOML agents for real parallelism can come later if its subagent
schema settles.
---
 dist/agents/pixelslop.md            |  4 ++++
 dist/skill/SKILL.md                 | 10 ++++++++++
 tests/skill-discoverability.test.js | 17 +++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/dist/agents/pixelslop.md b/dist/agents/pixelslop.md
index c7cc5a5..4c7497c 100644
--- a/dist/agents/pixelslop.md
+++ b/dist/agents/pixelslop.md
@@ -14,6 +14,8 @@ tools:
 
 You are the Pixelslop orchestrator. You coordinate the full design review and fix workflow — from initial scan to final report. You spawn subagents for setup, fixing, checking, and code-check mode, and you use `pixelslop-tools` for all state manipulation. You never edit files directly.
 
+**Spawning vs inline (harness-aware).** Every "Spawn agent: X" below means *spawn X as a subagent if your harness supports named subagents (Claude Code does; Codex does when the agent is in its TOML format); otherwise read X's spec file and run its workflow inline yourself, in sequence.* The output contract is identical — you only lose parallelism. Never skip an agent because you can't spawn it. A scan with the evaluators run inline is still a complete scan.
+
 **The parent session (SKILL.md) handles all user-facing decisions before spawning you.** By the time you run, the URL is resolved, the server is running (if needed), and any setup context has been collected. You receive everything you need in your invocation prompt — just execute and return results.
 
 You run in one of two modes:
@@ -222,6 +224,8 @@ The design-director returns JSON: `{ "kind": "design-director", "verdict": "..."
 
 Collect all 7 results. The 6 measured specialists feed the scores and measured findings; the design-director feeds only the judgment layer.
 
+If your harness can't spawn these as subagents (see "Spawning vs inline" above), run all 7 inline instead: read each spec in `dist/agents/internal/` (or the installed path), follow it against the same evidence bundle, and collect the same JSON. Sequential, but the scores and findings are identical — never drop an evaluator because you couldn't spawn it.
+
 ### Step 6c: Aggregate Report
 
 Assemble the standard report from specialist outputs. The format is defined in `scoring.md` — the same contract as before.
diff --git a/dist/skill/SKILL.md b/dist/skill/SKILL.md
index 0768f1f..916ee39 100644
--- a/dist/skill/SKILL.md
+++ b/dist/skill/SKILL.md
@@ -54,6 +54,16 @@ Pixelslop runs under different harnesses (Claude Code, Codex CLI, and others), a
 
 The `AskUserQuestion(...)` snippets in this file are the question **content** — the exact wording and options to surface. *How* you render them is your harness's call; *what* you ask is not. If you're not on Claude Code, read each block as "ask this question, offer these options" and present it your way.
 
+## Spawning agents (works in any harness)
+
+Pixelslop's work is split across named agents — an orchestrator, a setup agent, an evidence collector, six measured evaluators, a design-director, a fixer, a checker. How you run a named agent depends on your harness:
+
+- **Claude Code:** spawn it as a subagent by name (the Task/Agent tool), parallel where the runtime supports it.
+- **Codex CLI:** Codex supports named subagents, but only when the agent is installed in its TOML format. If a Pixelslop agent isn't spawnable, use the inline fallback below.
+- **Any harness where the named agent isn't spawnable (or that has no subagents at all):** run the agent **inline** — read its spec file (e.g. `dist/agents/internal/pixelslop-eval-hierarchy.md`, or the installed path) and follow its instructions yourself, in sequence.
+
+So **"Spawn agent: X" everywhere in this skill means: spawn X as a subagent if you can, otherwise read X's spec and execute it inline.** The output contract is identical either way — you only lose parallelism. Never skip an agent because you can't spawn it; run it inline instead. A scan with the evaluators run inline is still a complete scan, just sequential.
+
 ## Settings Mode
 
 When `--settings` is passed (e.g., `/pixelslop settings`), run the interactive settings configurator and stop — don't scan anything.
diff --git a/tests/skill-discoverability.test.js b/tests/skill-discoverability.test.js
index 878f5ba..1b7664a 100644
--- a/tests/skill-discoverability.test.js
+++ b/tests/skill-discoverability.test.js
@@ -96,6 +96,23 @@ describe('the asking protocol is harness-neutral (works under Codex too)', () =>
   });
 });
 
+describe('the spawn protocol is harness-neutral with an inline fallback', () => {
+  it('has a "Spawning agents" protocol', () => {
+    assert.ok(/## Spawning agents/i.test(SKILL),
+      'SKILL.md must define how to run named agents across harnesses — the Task tool is Claude Code only');
+  });
+  it('gives an inline fallback for harnesses that can\'t spawn', () => {
+    assert.ok(/inline/i.test(SKILL), 'must describe running an agent inline');
+    assert.ok(/Codex/i.test(SKILL), 'names Codex');
+    assert.ok(/read its spec|read X.s spec|read the agent.s spec/i.test(SKILL), 'tells the agent to read the spec and run it');
+    assert.ok(/never skip an agent|never drop an evaluator/i.test(SKILL), 'forbids skipping an agent it cannot spawn');
+  });
+  it('the orchestrator also carries the inline fallback', () => {
+    const orch = readFileSync(join(ROOT, 'dist', 'agents', 'pixelslop.md'), 'utf-8');
+    assert.ok(/inline/i.test(orch) && /spawn/i.test(orch), 'orchestrator must know to run evaluators inline when it cannot spawn');
+  });
+});
+
 describe('the skill drives advisory behaviour, not a config form', () => {
   it('has an advise-the-user playbook', () => {
     assert.ok(/## Advise/i.test(SKILL),

From 92cfb7e8e2c64ff8ef57ba0f386e1fb20f47ccfb Mon Sep 17 00:00:00 2001
From: Gabi <hola@booplex.com>
Date: Wed, 10 Jun 2026 11:50:00 +0200
Subject: [PATCH 2/2] feat: generate native Codex TOML agents at install time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The inline fallback makes Codex work, but it leans on Codex interpreting our
Markdown specs correctly every run. This adds the native path: at install,
generate a Codex custom-agent TOML (.codex/agents/<name>.toml) from each spec so
Codex spawns them directly. The .md stays put for the inline fallback, so it's
belt-and-suspenders — native spawn when it works, inline when it doesn't.

agentMdToCodexToml emits the three required fields (name, description,
developer_instructions). model and sandbox_mode are omitted so a spawned agent
inherits the parent's settings — mapping Claude's "sonnet" onto a Codex model
would be wrong. The body goes in a TOML literal block so backslashes and regex in
code examples survive verbatim. Uninstall removes the generated pixelslop*.toml.

Verified end to end: a real install writes 13 valid TOMLs (6 agents + 7
evaluators). Can't live-test spawning on Codex from here, which is exactly why
the inline fallback is the safety net.
---
 bin/pixelslop.mjs        |  81 +++++++++++++++++++++++++++++--
 tests/codex-toml.test.js | 101 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 179 insertions(+), 3 deletions(-)
 create mode 100644 tests/codex-toml.test.js

diff --git a/bin/pixelslop.mjs b/bin/pixelslop.mjs
index 4d925ab..2a2b06e 100644
--- a/bin/pixelslop.mjs
+++ b/bin/pixelslop.mjs
@@ -202,6 +202,56 @@ export function rewriteAgentPaths(content, installRoot) {
     .replaceAll('dist/skill/resources/', resourcesPath);
 }
 
+/**
+ * Convert a Pixelslop agent spec (Markdown + YAML frontmatter) into a Codex
+ * custom-agent TOML. Codex spawns agents from `.codex/agents/<name>.toml` and
+ * can't read our Markdown specs, so without this it can only fall back to running
+ * them inline. This gives it native, spawnable agents as the primary path; the
+ * inline fallback in the skill is the safety net.
+ *
+ * Required TOML fields: name, description, developer_instructions. `model` and
+ * `sandbox_mode` are omitted on purpose so a spawned agent inherits the parent
+ * session's settings — mapping Claude's "sonnet" onto a Codex model would be wrong.
+ *
+ * @param {string} md - The agent spec markdown (already path-rewritten)
+ * @returns {string|null} TOML text, or null if the spec has no usable frontmatter
+ */
+export function agentMdToCodexToml(md) {
+  const fm = md.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
+  if (!fm) return null;
+  const frontmatter = fm[1];
+  const body = fm[2].trim();
+
+  const nameMatch = frontmatter.match(/^name:\s*(.+)$/m);
+  const name = nameMatch ? nameMatch[1].trim() : null;
+  if (!name) return null;
+
+  // description is inline (`description: text`) or a folded block (`description: >`
+  // then indented lines).
+  let description = '';
+  const inline = frontmatter.match(/^description:\s*(?!>)(\S.*)$/m);
+  if (inline) {
+    description = inline[1].trim();
+  } else {
+    const folded = frontmatter.match(/^description:\s*>?\s*\n((?:[ \t]+\S.*\n?)+)/m);
+    if (folded) description = folded[1].split('\n').map((l) => l.trim()).filter(Boolean).join(' ');
+  }
+
+  const escBasic = (s) => s.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+  // Multiline LITERAL string ('''...''') so backslashes and regex inside code
+  // blocks survive verbatim. Specs never contain ''' — guard just in case.
+  const safeBody = body.includes("'''") ? body.replace(/'''/g, "'' '") : body;
+
+  return [
+    `name = "${escBasic(name)}"`,
+    `description = "${escBasic(description)}"`,
+    "developer_instructions = '''",
+    safeBody,
+    "'''",
+    ''
+  ].join('\n');
+}
+
 // ─────────────────────────────────────────────
 // Browser Runtime Detection
 // ─────────────────────────────────────────────
@@ -1155,17 +1205,30 @@ function install(options = {}) {
   for (const client of selectedClients) {
     header(`Configuring ${client.name}`);
 
-    // Copy agent files with path rewriting
+    // Copy agent files with path rewriting. Codex also gets a native TOML agent
+    // per spec (it spawns from .codex/agents/<name>.toml; the .md is the inline
+    // fallback). Always keep the .md so the fallback has something to read.
     mkdirSync(client.agentDir, { recursive: true });
+    let codexTomlCount = 0;
+    const writeCodexToml = (rewritten, mdFilename) => {
+      if (client.id !== 'codex') return;
+      const toml = agentMdToCodexToml(rewritten);
+      if (!toml) return;
+      writeFileSync(join(client.agentDir, mdFilename.replace(/\.md$/, '.toml')), toml);
+      codexTomlCount += 1;
+    };
     for (const agentFile of AGENT_FILES) {
       const srcPath = join(PACKAGE_ROOT, 'dist', 'agents', agentFile);
       const raw = readFileSync(srcPath, 'utf8');
       const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT);
       writeFileSync(join(client.agentDir, agentFile), rewritten);
+      writeCodexToml(rewritten, agentFile);
     }
     log('✓', `${AGENT_FILES.length} agent specs ${dim('→')} ${dim(client.agentDir)}`);
 
-    // Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only)
+    // Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only).
+    // Their .md stays under internal/; the Codex TOML goes flat in agentDir,
+    // because Codex looks for spawnable agents directly in .codex/agents/.
     const internalSrc = join(PACKAGE_ROOT, 'dist', 'agents', 'internal');
     if (existsSync(internalSrc)) {
       const internalDest = join(client.agentDir, 'internal');
@@ -1173,10 +1236,15 @@ function install(options = {}) {
       const internalFiles = readdirSync(internalSrc).filter(f => f.endsWith('.md') && !f.startsWith('._'));
       for (const file of internalFiles) {
         const raw = readFileSync(join(internalSrc, file), 'utf8');
-        writeFileSync(join(internalDest, file), rewriteAgentPaths(raw, INSTALL_ROOT));
+        const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT);
+        writeFileSync(join(internalDest, file), rewritten);
+        writeCodexToml(rewritten, file);
       }
       log('✓', `${internalFiles.length} internal evaluators ${dim('→')} ${dim(internalDest)}`);
     }
+    if (codexTomlCount > 0) {
+      log('✓', `${codexTomlCount} Codex agents ${dim('(.toml)')} ${dim('→')} ${dim(client.agentDir)}`);
+    }
 
     // Install skill via linkOrCopy — method is tracked
     const preferredMethod = options.installMethods?.[client.name]?.skill;
@@ -1375,6 +1443,13 @@ function uninstall() {
       }
     }
 
+    // Remove generated Codex agent TOMLs (only ours — pixelslop*.toml).
+    if (existsSync(client.agentDir)) {
+      for (const file of readdirSync(client.agentDir).filter(f => f.startsWith('pixelslop') && f.endsWith('.toml'))) {
+        rmSync(join(client.agentDir, file));
+      }
+    }
+
     // Remove internal evaluator agents (only pixelslop-eval-* files, not the whole directory)
     const internalDir = join(client.agentDir, 'internal');
     if (existsSync(internalDir)) {
diff --git a/tests/codex-toml.test.js b/tests/codex-toml.test.js
new file mode 100644
index 0000000..3daac75
--- /dev/null
+++ b/tests/codex-toml.test.js
@@ -0,0 +1,101 @@
+/**
+ * Codex Agent TOML Tests
+ *
+ * agentMdToCodexToml converts a Pixelslop agent spec (Markdown + YAML
+ * frontmatter) into a Codex custom-agent TOML so Codex can spawn our agents
+ * natively instead of only running them inline.
+ *
+ * The fragile bits, pinned here:
+ *   - the three required Codex fields (name, description, developer_instructions)
+ *   - model/sandbox_mode are NOT emitted (they'd wrongly pin a Claude model)
+ *   - the body goes in a literal ''' block so backslashes and regex in code
+ *     examples survive verbatim (a basic """ string would mangle them)
+ *   - real shipped specs all convert; bad input returns null, not garbage
+ *
+ * Run: node --test tests/codex-toml.test.js
+ */
+
+import { describe, it } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { readFileSync, readdirSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
+import { agentMdToCodexToml } from '../bin/pixelslop.mjs';
+
+const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');
+
+const SAMPLE = `---
+name: pixelslop-eval-sample
+description: >
+  A folded multi-line description that should
+  collapse onto a single TOML line.
+model: sonnet
+tools:
+  - Read
+---
+
+You are the sample evaluator.
+
+Detect with a regex like \\d+px and a Windows path C:\\Users\\x.
+`;
+
+describe('agentMdToCodexToml — required fields', () => {
+  const toml = agentMdToCodexToml(SAMPLE);
+
+  it('emits name, description, developer_instructions', () => {
+    assert.match(toml, /^name = "pixelslop-eval-sample"$/m);
+    assert.match(toml, /^description = ".+"$/m);
+    assert.match(toml, /developer_instructions = '''/);
+    assert.ok(toml.trimEnd().endsWith("'''"), 'closes the literal block');
+  });
+
+  it('collapses a folded description onto one line', () => {
+    const line = toml.match(/^description = "(.+)"$/m)[1];
+    assert.ok(!line.includes('\n'), 'single line');
+    assert.ok(line.includes('collapse onto a single TOML line'), 'content preserved');
+  });
+
+  it('does NOT emit model or sandbox_mode (they must inherit)', () => {
+    assert.ok(!/^model\s*=/m.test(toml), 'no model field — would pin a Claude model');
+    assert.ok(!/^sandbox_mode\s*=/m.test(toml), 'no sandbox_mode field');
+  });
+
+  it('preserves backslashes and regex verbatim (literal block, not basic)', () => {
+    assert.ok(toml.includes('\\d+px'), 'regex backslash survives');
+    assert.ok(toml.includes('C:\\Users\\x'), 'windows path backslashes survive');
+  });
+});
+
+describe('agentMdToCodexToml — edge cases', () => {
+  it('returns null with no frontmatter', () => {
+    assert.equal(agentMdToCodexToml('# just markdown'), null);
+  });
+  it('returns null when name is missing', () => {
+    assert.equal(agentMdToCodexToml('---\ndescription: x\n---\nbody'), null);
+  });
+  it('handles an inline (non-folded) description', () => {
+    const toml = agentMdToCodexToml('---\nname: a\ndescription: one line desc\n---\nbody');
+    assert.match(toml, /^description = "one line desc"$/m);
+  });
+});
+
+describe('every shipped agent spec converts cleanly', () => {
+  const specs = [
+    ...readdirSync(join(ROOT, 'dist', 'agents')).filter(f => f.endsWith('.md')),
+    ...readdirSync(join(ROOT, 'dist', 'agents', 'internal')).filter(f => f.endsWith('.md') && !f.startsWith('._')).map(f => join('internal', f)),
+  ];
+
+  for (const rel of specs) {
+    it(`converts ${rel}`, () => {
+      const md = readFileSync(join(ROOT, 'dist', 'agents', rel), 'utf-8');
+      const toml = agentMdToCodexToml(md);
+      assert.ok(toml, `${rel} must convert`);
+      assert.match(toml, /^name = ".+"$/m, `${rel} has a name`);
+      assert.match(toml, /^description = ".+"$/m, `${rel} has a description`);
+      assert.ok(toml.includes("developer_instructions = '''"), `${rel} has instructions`);
+      // the literal block must actually close — no stray ''' inside the body
+      const opens = (toml.match(/'''/g) || []).length;
+      assert.equal(opens, 2, `${rel} literal block opens and closes exactly once`);
+    });
+  }
+});