From c7dd90e7529c1631dcfd0ec441e62437a4d94586 Mon Sep 17 00:00:00 2001 From: Gabi Date: Wed, 10 Jun 2026 11:24:25 +0200 Subject: [PATCH 1/2] fix: run agents inline when the harness can't spawn them (Codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The installer copies our agent specs into .codex/agents/ as Markdown, but Codex custom agents must be TOML in its own schema — so Codex can't spawn any of them, and a scan's "Spawn agent: X" instructions pointed at agents that don't exist there. The orchestrator only knew how to spawn, not how to run an evaluator itself, so a Codex scan would fail outright. Adds a harness-neutral spawn protocol: spawn a named agent as a subagent if the harness supports it, otherwise read its spec and run it inline, in sequence. The orchestrator now falls back to running all 7 evaluators inline against the same evidence bundle — same scores and findings, just sequential. Drift-guarded. This is the robust baseline that makes the skill portable, not just Codex-aware. Native Codex TOML agents for real parallelism can come later if its subagent schema settles. --- dist/agents/pixelslop.md | 4 ++++ dist/skill/SKILL.md | 10 ++++++++++ tests/skill-discoverability.test.js | 17 +++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/dist/agents/pixelslop.md b/dist/agents/pixelslop.md index c7cc5a5..4c7497c 100644 --- a/dist/agents/pixelslop.md +++ b/dist/agents/pixelslop.md @@ -14,6 +14,8 @@ tools: You are the Pixelslop orchestrator. You coordinate the full design review and fix workflow — from initial scan to final report. You spawn subagents for setup, fixing, checking, and code-check mode, and you use `pixelslop-tools` for all state manipulation. You never edit files directly. +**Spawning vs inline (harness-aware).** Every "Spawn agent: X" below means *spawn X as a subagent if your harness supports named subagents (Claude Code does; Codex does when the agent is in its TOML format); otherwise read X's spec file and run its workflow inline yourself, in sequence.* The output contract is identical — you only lose parallelism. Never skip an agent because you can't spawn it. A scan with the evaluators run inline is still a complete scan. + **The parent session (SKILL.md) handles all user-facing decisions before spawning you.** By the time you run, the URL is resolved, the server is running (if needed), and any setup context has been collected. You receive everything you need in your invocation prompt — just execute and return results. You run in one of two modes: @@ -222,6 +224,8 @@ The design-director returns JSON: `{ "kind": "design-director", "verdict": "..." Collect all 7 results. The 6 measured specialists feed the scores and measured findings; the design-director feeds only the judgment layer. +If your harness can't spawn these as subagents (see "Spawning vs inline" above), run all 7 inline instead: read each spec in `dist/agents/internal/` (or the installed path), follow it against the same evidence bundle, and collect the same JSON. Sequential, but the scores and findings are identical — never drop an evaluator because you couldn't spawn it. + ### Step 6c: Aggregate Report Assemble the standard report from specialist outputs. The format is defined in `scoring.md` — the same contract as before. diff --git a/dist/skill/SKILL.md b/dist/skill/SKILL.md index 0768f1f..916ee39 100644 --- a/dist/skill/SKILL.md +++ b/dist/skill/SKILL.md @@ -54,6 +54,16 @@ Pixelslop runs under different harnesses (Claude Code, Codex CLI, and others), a The `AskUserQuestion(...)` snippets in this file are the question **content** — the exact wording and options to surface. *How* you render them is your harness's call; *what* you ask is not. If you're not on Claude Code, read each block as "ask this question, offer these options" and present it your way. +## Spawning agents (works in any harness) + +Pixelslop's work is split across named agents — an orchestrator, a setup agent, an evidence collector, six measured evaluators, a design-director, a fixer, a checker. How you run a named agent depends on your harness: + +- **Claude Code:** spawn it as a subagent by name (the Task/Agent tool), parallel where the runtime supports it. +- **Codex CLI:** Codex supports named subagents, but only when the agent is installed in its TOML format. If a Pixelslop agent isn't spawnable, use the inline fallback below. +- **Any harness where the named agent isn't spawnable (or that has no subagents at all):** run the agent **inline** — read its spec file (e.g. `dist/agents/internal/pixelslop-eval-hierarchy.md`, or the installed path) and follow its instructions yourself, in sequence. + +So **"Spawn agent: X" everywhere in this skill means: spawn X as a subagent if you can, otherwise read X's spec and execute it inline.** The output contract is identical either way — you only lose parallelism. Never skip an agent because you can't spawn it; run it inline instead. A scan with the evaluators run inline is still a complete scan, just sequential. + ## Settings Mode When `--settings` is passed (e.g., `/pixelslop settings`), run the interactive settings configurator and stop — don't scan anything. diff --git a/tests/skill-discoverability.test.js b/tests/skill-discoverability.test.js index 878f5ba..1b7664a 100644 --- a/tests/skill-discoverability.test.js +++ b/tests/skill-discoverability.test.js @@ -96,6 +96,23 @@ describe('the asking protocol is harness-neutral (works under Codex too)', () => }); }); +describe('the spawn protocol is harness-neutral with an inline fallback', () => { + it('has a "Spawning agents" protocol', () => { + assert.ok(/## Spawning agents/i.test(SKILL), + 'SKILL.md must define how to run named agents across harnesses — the Task tool is Claude Code only'); + }); + it('gives an inline fallback for harnesses that can\'t spawn', () => { + assert.ok(/inline/i.test(SKILL), 'must describe running an agent inline'); + assert.ok(/Codex/i.test(SKILL), 'names Codex'); + assert.ok(/read its spec|read X.s spec|read the agent.s spec/i.test(SKILL), 'tells the agent to read the spec and run it'); + assert.ok(/never skip an agent|never drop an evaluator/i.test(SKILL), 'forbids skipping an agent it cannot spawn'); + }); + it('the orchestrator also carries the inline fallback', () => { + const orch = readFileSync(join(ROOT, 'dist', 'agents', 'pixelslop.md'), 'utf-8'); + assert.ok(/inline/i.test(orch) && /spawn/i.test(orch), 'orchestrator must know to run evaluators inline when it cannot spawn'); + }); +}); + describe('the skill drives advisory behaviour, not a config form', () => { it('has an advise-the-user playbook', () => { assert.ok(/## Advise/i.test(SKILL), From 92cfb7e8e2c64ff8ef57ba0f386e1fb20f47ccfb Mon Sep 17 00:00:00 2001 From: Gabi Date: Wed, 10 Jun 2026 11:50:00 +0200 Subject: [PATCH 2/2] feat: generate native Codex TOML agents at install time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inline fallback makes Codex work, but it leans on Codex interpreting our Markdown specs correctly every run. This adds the native path: at install, generate a Codex custom-agent TOML (.codex/agents/.toml) from each spec so Codex spawns them directly. The .md stays put for the inline fallback, so it's belt-and-suspenders — native spawn when it works, inline when it doesn't. agentMdToCodexToml emits the three required fields (name, description, developer_instructions). model and sandbox_mode are omitted so a spawned agent inherits the parent's settings — mapping Claude's "sonnet" onto a Codex model would be wrong. The body goes in a TOML literal block so backslashes and regex in code examples survive verbatim. Uninstall removes the generated pixelslop*.toml. Verified end to end: a real install writes 13 valid TOMLs (6 agents + 7 evaluators). Can't live-test spawning on Codex from here, which is exactly why the inline fallback is the safety net. --- bin/pixelslop.mjs | 81 +++++++++++++++++++++++++++++-- tests/codex-toml.test.js | 101 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 tests/codex-toml.test.js diff --git a/bin/pixelslop.mjs b/bin/pixelslop.mjs index 4d925ab..2a2b06e 100644 --- a/bin/pixelslop.mjs +++ b/bin/pixelslop.mjs @@ -202,6 +202,56 @@ export function rewriteAgentPaths(content, installRoot) { .replaceAll('dist/skill/resources/', resourcesPath); } +/** + * Convert a Pixelslop agent spec (Markdown + YAML frontmatter) into a Codex + * custom-agent TOML. Codex spawns agents from `.codex/agents/.toml` and + * can't read our Markdown specs, so without this it can only fall back to running + * them inline. This gives it native, spawnable agents as the primary path; the + * inline fallback in the skill is the safety net. + * + * Required TOML fields: name, description, developer_instructions. `model` and + * `sandbox_mode` are omitted on purpose so a spawned agent inherits the parent + * session's settings — mapping Claude's "sonnet" onto a Codex model would be wrong. + * + * @param {string} md - The agent spec markdown (already path-rewritten) + * @returns {string|null} TOML text, or null if the spec has no usable frontmatter + */ +export function agentMdToCodexToml(md) { + const fm = md.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/); + if (!fm) return null; + const frontmatter = fm[1]; + const body = fm[2].trim(); + + const nameMatch = frontmatter.match(/^name:\s*(.+)$/m); + const name = nameMatch ? nameMatch[1].trim() : null; + if (!name) return null; + + // description is inline (`description: text`) or a folded block (`description: >` + // then indented lines). + let description = ''; + const inline = frontmatter.match(/^description:\s*(?!>)(\S.*)$/m); + if (inline) { + description = inline[1].trim(); + } else { + const folded = frontmatter.match(/^description:\s*>?\s*\n((?:[ \t]+\S.*\n?)+)/m); + if (folded) description = folded[1].split('\n').map((l) => l.trim()).filter(Boolean).join(' '); + } + + const escBasic = (s) => s.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + // Multiline LITERAL string ('''...''') so backslashes and regex inside code + // blocks survive verbatim. Specs never contain ''' — guard just in case. + const safeBody = body.includes("'''") ? body.replace(/'''/g, "'' '") : body; + + return [ + `name = "${escBasic(name)}"`, + `description = "${escBasic(description)}"`, + "developer_instructions = '''", + safeBody, + "'''", + '' + ].join('\n'); +} + // ───────────────────────────────────────────── // Browser Runtime Detection // ───────────────────────────────────────────── @@ -1155,17 +1205,30 @@ function install(options = {}) { for (const client of selectedClients) { header(`Configuring ${client.name}`); - // Copy agent files with path rewriting + // Copy agent files with path rewriting. Codex also gets a native TOML agent + // per spec (it spawns from .codex/agents/.toml; the .md is the inline + // fallback). Always keep the .md so the fallback has something to read. mkdirSync(client.agentDir, { recursive: true }); + let codexTomlCount = 0; + const writeCodexToml = (rewritten, mdFilename) => { + if (client.id !== 'codex') return; + const toml = agentMdToCodexToml(rewritten); + if (!toml) return; + writeFileSync(join(client.agentDir, mdFilename.replace(/\.md$/, '.toml')), toml); + codexTomlCount += 1; + }; for (const agentFile of AGENT_FILES) { const srcPath = join(PACKAGE_ROOT, 'dist', 'agents', agentFile); const raw = readFileSync(srcPath, 'utf8'); const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT); writeFileSync(join(client.agentDir, agentFile), rewritten); + writeCodexToml(rewritten, agentFile); } log('✓', `${AGENT_FILES.length} agent specs ${dim('→')} ${dim(client.agentDir)}`); - // Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only) + // Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only). + // Their .md stays under internal/; the Codex TOML goes flat in agentDir, + // because Codex looks for spawnable agents directly in .codex/agents/. const internalSrc = join(PACKAGE_ROOT, 'dist', 'agents', 'internal'); if (existsSync(internalSrc)) { const internalDest = join(client.agentDir, 'internal'); @@ -1173,10 +1236,15 @@ function install(options = {}) { const internalFiles = readdirSync(internalSrc).filter(f => f.endsWith('.md') && !f.startsWith('._')); for (const file of internalFiles) { const raw = readFileSync(join(internalSrc, file), 'utf8'); - writeFileSync(join(internalDest, file), rewriteAgentPaths(raw, INSTALL_ROOT)); + const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT); + writeFileSync(join(internalDest, file), rewritten); + writeCodexToml(rewritten, file); } log('✓', `${internalFiles.length} internal evaluators ${dim('→')} ${dim(internalDest)}`); } + if (codexTomlCount > 0) { + log('✓', `${codexTomlCount} Codex agents ${dim('(.toml)')} ${dim('→')} ${dim(client.agentDir)}`); + } // Install skill via linkOrCopy — method is tracked const preferredMethod = options.installMethods?.[client.name]?.skill; @@ -1375,6 +1443,13 @@ function uninstall() { } } + // Remove generated Codex agent TOMLs (only ours — pixelslop*.toml). + if (existsSync(client.agentDir)) { + for (const file of readdirSync(client.agentDir).filter(f => f.startsWith('pixelslop') && f.endsWith('.toml'))) { + rmSync(join(client.agentDir, file)); + } + } + // Remove internal evaluator agents (only pixelslop-eval-* files, not the whole directory) const internalDir = join(client.agentDir, 'internal'); if (existsSync(internalDir)) { diff --git a/tests/codex-toml.test.js b/tests/codex-toml.test.js new file mode 100644 index 0000000..3daac75 --- /dev/null +++ b/tests/codex-toml.test.js @@ -0,0 +1,101 @@ +/** + * Codex Agent TOML Tests + * + * agentMdToCodexToml converts a Pixelslop agent spec (Markdown + YAML + * frontmatter) into a Codex custom-agent TOML so Codex can spawn our agents + * natively instead of only running them inline. + * + * The fragile bits, pinned here: + * - the three required Codex fields (name, description, developer_instructions) + * - model/sandbox_mode are NOT emitted (they'd wrongly pin a Claude model) + * - the body goes in a literal ''' block so backslashes and regex in code + * examples survive verbatim (a basic """ string would mangle them) + * - real shipped specs all convert; bad input returns null, not garbage + * + * Run: node --test tests/codex-toml.test.js + */ + +import { describe, it } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { readFileSync, readdirSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; +import { agentMdToCodexToml } from '../bin/pixelslop.mjs'; + +const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..'); + +const SAMPLE = `--- +name: pixelslop-eval-sample +description: > + A folded multi-line description that should + collapse onto a single TOML line. +model: sonnet +tools: + - Read +--- + +You are the sample evaluator. + +Detect with a regex like \\d+px and a Windows path C:\\Users\\x. +`; + +describe('agentMdToCodexToml — required fields', () => { + const toml = agentMdToCodexToml(SAMPLE); + + it('emits name, description, developer_instructions', () => { + assert.match(toml, /^name = "pixelslop-eval-sample"$/m); + assert.match(toml, /^description = ".+"$/m); + assert.match(toml, /developer_instructions = '''/); + assert.ok(toml.trimEnd().endsWith("'''"), 'closes the literal block'); + }); + + it('collapses a folded description onto one line', () => { + const line = toml.match(/^description = "(.+)"$/m)[1]; + assert.ok(!line.includes('\n'), 'single line'); + assert.ok(line.includes('collapse onto a single TOML line'), 'content preserved'); + }); + + it('does NOT emit model or sandbox_mode (they must inherit)', () => { + assert.ok(!/^model\s*=/m.test(toml), 'no model field — would pin a Claude model'); + assert.ok(!/^sandbox_mode\s*=/m.test(toml), 'no sandbox_mode field'); + }); + + it('preserves backslashes and regex verbatim (literal block, not basic)', () => { + assert.ok(toml.includes('\\d+px'), 'regex backslash survives'); + assert.ok(toml.includes('C:\\Users\\x'), 'windows path backslashes survive'); + }); +}); + +describe('agentMdToCodexToml — edge cases', () => { + it('returns null with no frontmatter', () => { + assert.equal(agentMdToCodexToml('# just markdown'), null); + }); + it('returns null when name is missing', () => { + assert.equal(agentMdToCodexToml('---\ndescription: x\n---\nbody'), null); + }); + it('handles an inline (non-folded) description', () => { + const toml = agentMdToCodexToml('---\nname: a\ndescription: one line desc\n---\nbody'); + assert.match(toml, /^description = "one line desc"$/m); + }); +}); + +describe('every shipped agent spec converts cleanly', () => { + const specs = [ + ...readdirSync(join(ROOT, 'dist', 'agents')).filter(f => f.endsWith('.md')), + ...readdirSync(join(ROOT, 'dist', 'agents', 'internal')).filter(f => f.endsWith('.md') && !f.startsWith('._')).map(f => join('internal', f)), + ]; + + for (const rel of specs) { + it(`converts ${rel}`, () => { + const md = readFileSync(join(ROOT, 'dist', 'agents', rel), 'utf-8'); + const toml = agentMdToCodexToml(md); + assert.ok(toml, `${rel} must convert`); + assert.match(toml, /^name = ".+"$/m, `${rel} has a name`); + assert.match(toml, /^description = ".+"$/m, `${rel} has a description`); + assert.ok(toml.includes("developer_instructions = '''"), `${rel} has instructions`); + // the literal block must actually close — no stray ''' inside the body + const opens = (toml.match(/'''/g) || []).length; + assert.equal(opens, 2, `${rel} literal block opens and closes exactly once`); + }); + } +});