Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 78 additions & 3 deletions bin/pixelslop.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,56 @@ export function rewriteAgentPaths(content, installRoot) {
.replaceAll('dist/skill/resources/', resourcesPath);
}

/**
* Convert a Pixelslop agent spec (Markdown + YAML frontmatter) into a Codex
* custom-agent TOML. Codex spawns agents from `.codex/agents/<name>.toml` and
* can't read our Markdown specs, so without this it can only fall back to running
* them inline. This gives it native, spawnable agents as the primary path; the
* inline fallback in the skill is the safety net.
*
* Required TOML fields: name, description, developer_instructions. `model` and
* `sandbox_mode` are omitted on purpose so a spawned agent inherits the parent
* session's settings — mapping Claude's "sonnet" onto a Codex model would be wrong.
*
* @param {string} md - The agent spec markdown (already path-rewritten)
* @returns {string|null} TOML text, or null if the spec has no usable frontmatter
*/
export function agentMdToCodexToml(md) {
const fm = md.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
if (!fm) return null;
const frontmatter = fm[1];
const body = fm[2].trim();

const nameMatch = frontmatter.match(/^name:\s*(.+)$/m);
const name = nameMatch ? nameMatch[1].trim() : null;
if (!name) return null;

// description is inline (`description: text`) or a folded block (`description: >`
// then indented lines).
let description = '';
const inline = frontmatter.match(/^description:\s*(?!>)(\S.*)$/m);
if (inline) {
description = inline[1].trim();
} else {
const folded = frontmatter.match(/^description:\s*>?\s*\n((?:[ \t]+\S.*\n?)+)/m);
if (folded) description = folded[1].split('\n').map((l) => l.trim()).filter(Boolean).join(' ');
}

const escBasic = (s) => s.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
// Multiline LITERAL string ('''...''') so backslashes and regex inside code
// blocks survive verbatim. Specs never contain ''' — guard just in case.
const safeBody = body.includes("'''") ? body.replace(/'''/g, "'' '") : body;

return [
`name = "${escBasic(name)}"`,
`description = "${escBasic(description)}"`,
"developer_instructions = '''",
safeBody,
"'''",
''
].join('\n');
}

// ─────────────────────────────────────────────
// Browser Runtime Detection
// ─────────────────────────────────────────────
Expand Down Expand Up @@ -1155,28 +1205,46 @@ function install(options = {}) {
for (const client of selectedClients) {
header(`Configuring ${client.name}`);

// Copy agent files with path rewriting
// Copy agent files with path rewriting. Codex also gets a native TOML agent
// per spec (it spawns from .codex/agents/<name>.toml; the .md is the inline
// fallback). Always keep the .md so the fallback has something to read.
mkdirSync(client.agentDir, { recursive: true });
let codexTomlCount = 0;
const writeCodexToml = (rewritten, mdFilename) => {
if (client.id !== 'codex') return;
const toml = agentMdToCodexToml(rewritten);
if (!toml) return;
writeFileSync(join(client.agentDir, mdFilename.replace(/\.md$/, '.toml')), toml);
codexTomlCount += 1;
};
for (const agentFile of AGENT_FILES) {
const srcPath = join(PACKAGE_ROOT, 'dist', 'agents', agentFile);
const raw = readFileSync(srcPath, 'utf8');
const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT);
writeFileSync(join(client.agentDir, agentFile), rewritten);
writeCodexToml(rewritten, agentFile);
}
log('✓', `${AGENT_FILES.length} agent specs ${dim('→')} ${dim(client.agentDir)}`);

// Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only)
// Copy internal evaluator agents (not in AGENT_FILES — orchestrator-only).
// Their .md stays under internal/; the Codex TOML goes flat in agentDir,
// because Codex looks for spawnable agents directly in .codex/agents/.
const internalSrc = join(PACKAGE_ROOT, 'dist', 'agents', 'internal');
if (existsSync(internalSrc)) {
const internalDest = join(client.agentDir, 'internal');
mkdirSync(internalDest, { recursive: true });
const internalFiles = readdirSync(internalSrc).filter(f => f.endsWith('.md') && !f.startsWith('._'));
for (const file of internalFiles) {
const raw = readFileSync(join(internalSrc, file), 'utf8');
writeFileSync(join(internalDest, file), rewriteAgentPaths(raw, INSTALL_ROOT));
const rewritten = rewriteAgentPaths(raw, INSTALL_ROOT);
writeFileSync(join(internalDest, file), rewritten);
writeCodexToml(rewritten, file);
}
log('✓', `${internalFiles.length} internal evaluators ${dim('→')} ${dim(internalDest)}`);
}
if (codexTomlCount > 0) {
log('✓', `${codexTomlCount} Codex agents ${dim('(.toml)')} ${dim('→')} ${dim(client.agentDir)}`);
}

// Install skill via linkOrCopy — method is tracked
const preferredMethod = options.installMethods?.[client.name]?.skill;
Expand Down Expand Up @@ -1375,6 +1443,13 @@ function uninstall() {
}
}

// Remove generated Codex agent TOMLs (only ours — pixelslop*.toml).
if (existsSync(client.agentDir)) {
for (const file of readdirSync(client.agentDir).filter(f => f.startsWith('pixelslop') && f.endsWith('.toml'))) {
rmSync(join(client.agentDir, file));
}
}

// Remove internal evaluator agents (only pixelslop-eval-* files, not the whole directory)
const internalDir = join(client.agentDir, 'internal');
if (existsSync(internalDir)) {
Expand Down
4 changes: 4 additions & 0 deletions dist/agents/pixelslop.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ tools:

You are the Pixelslop orchestrator. You coordinate the full design review and fix workflow — from initial scan to final report. You spawn subagents for setup, fixing, checking, and code-check mode, and you use `pixelslop-tools` for all state manipulation. You never edit files directly.

**Spawning vs inline (harness-aware).** Every "Spawn agent: X" below means *spawn X as a subagent if your harness supports named subagents (Claude Code does; Codex does when the agent is in its TOML format); otherwise read X's spec file and run its workflow inline yourself, in sequence.* The output contract is identical — you only lose parallelism. Never skip an agent because you can't spawn it. A scan with the evaluators run inline is still a complete scan.

**The parent session (SKILL.md) handles all user-facing decisions before spawning you.** By the time you run, the URL is resolved, the server is running (if needed), and any setup context has been collected. You receive everything you need in your invocation prompt — just execute and return results.

You run in one of two modes:
Expand Down Expand Up @@ -222,6 +224,8 @@ The design-director returns JSON: `{ "kind": "design-director", "verdict": "..."

Collect all 7 results. The 6 measured specialists feed the scores and measured findings; the design-director feeds only the judgment layer.

If your harness can't spawn these as subagents (see "Spawning vs inline" above), run all 7 inline instead: read each spec in `dist/agents/internal/` (or the installed path), follow it against the same evidence bundle, and collect the same JSON. Sequential, but the scores and findings are identical — never drop an evaluator because you couldn't spawn it.

### Step 6c: Aggregate Report

Assemble the standard report from specialist outputs. The format is defined in `scoring.md` — the same contract as before.
Expand Down
10 changes: 10 additions & 0 deletions dist/skill/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ Pixelslop runs under different harnesses (Claude Code, Codex CLI, and others), a

The `AskUserQuestion(...)` snippets in this file are the question **content** — the exact wording and options to surface. *How* you render them is your harness's call; *what* you ask is not. If you're not on Claude Code, read each block as "ask this question, offer these options" and present it your way.

## Spawning agents (works in any harness)

Pixelslop's work is split across named agents — an orchestrator, a setup agent, an evidence collector, six measured evaluators, a design-director, a fixer, a checker. How you run a named agent depends on your harness:

- **Claude Code:** spawn it as a subagent by name (the Task/Agent tool), parallel where the runtime supports it.
- **Codex CLI:** Codex supports named subagents, but only when the agent is installed in its TOML format. If a Pixelslop agent isn't spawnable, use the inline fallback below.
- **Any harness where the named agent isn't spawnable (or that has no subagents at all):** run the agent **inline** — read its spec file (e.g. `dist/agents/internal/pixelslop-eval-hierarchy.md`, or the installed path) and follow its instructions yourself, in sequence.

So **"Spawn agent: X" everywhere in this skill means: spawn X as a subagent if you can, otherwise read X's spec and execute it inline.** The output contract is identical either way — you only lose parallelism. Never skip an agent because you can't spawn it; run it inline instead. A scan with the evaluators run inline is still a complete scan, just sequential.

## Settings Mode

When `--settings` is passed (e.g., `/pixelslop settings`), run the interactive settings configurator and stop — don't scan anything.
Expand Down
101 changes: 101 additions & 0 deletions tests/codex-toml.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/**
* Codex Agent TOML Tests
*
* agentMdToCodexToml converts a Pixelslop agent spec (Markdown + YAML
* frontmatter) into a Codex custom-agent TOML so Codex can spawn our agents
* natively instead of only running them inline.
*
* The fragile bits, pinned here:
* - the three required Codex fields (name, description, developer_instructions)
* - model/sandbox_mode are NOT emitted (they'd wrongly pin a Claude model)
* - the body goes in a literal ''' block so backslashes and regex in code
* examples survive verbatim (a basic """ string would mangle them)
* - real shipped specs all convert; bad input returns null, not garbage
*
* Run: node --test tests/codex-toml.test.js
*/

import { describe, it } from 'node:test';
import { strict as assert } from 'node:assert';
import { readFileSync, readdirSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { agentMdToCodexToml } from '../bin/pixelslop.mjs';

const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..');

const SAMPLE = `---
name: pixelslop-eval-sample
description: >
A folded multi-line description that should
collapse onto a single TOML line.
model: sonnet
tools:
- Read
---

You are the sample evaluator.

Detect with a regex like \\d+px and a Windows path C:\\Users\\x.
`;

describe('agentMdToCodexToml — required fields', () => {
const toml = agentMdToCodexToml(SAMPLE);

it('emits name, description, developer_instructions', () => {
assert.match(toml, /^name = "pixelslop-eval-sample"$/m);
assert.match(toml, /^description = ".+"$/m);
assert.match(toml, /developer_instructions = '''/);
assert.ok(toml.trimEnd().endsWith("'''"), 'closes the literal block');
});

it('collapses a folded description onto one line', () => {
const line = toml.match(/^description = "(.+)"$/m)[1];
assert.ok(!line.includes('\n'), 'single line');
assert.ok(line.includes('collapse onto a single TOML line'), 'content preserved');
});

it('does NOT emit model or sandbox_mode (they must inherit)', () => {
assert.ok(!/^model\s*=/m.test(toml), 'no model field — would pin a Claude model');
assert.ok(!/^sandbox_mode\s*=/m.test(toml), 'no sandbox_mode field');
});

it('preserves backslashes and regex verbatim (literal block, not basic)', () => {
assert.ok(toml.includes('\\d+px'), 'regex backslash survives');
assert.ok(toml.includes('C:\\Users\\x'), 'windows path backslashes survive');
});
});

describe('agentMdToCodexToml — edge cases', () => {
it('returns null with no frontmatter', () => {
assert.equal(agentMdToCodexToml('# just markdown'), null);
});
it('returns null when name is missing', () => {
assert.equal(agentMdToCodexToml('---\ndescription: x\n---\nbody'), null);
});
it('handles an inline (non-folded) description', () => {
const toml = agentMdToCodexToml('---\nname: a\ndescription: one line desc\n---\nbody');
assert.match(toml, /^description = "one line desc"$/m);
});
});

describe('every shipped agent spec converts cleanly', () => {
const specs = [
...readdirSync(join(ROOT, 'dist', 'agents')).filter(f => f.endsWith('.md')),
...readdirSync(join(ROOT, 'dist', 'agents', 'internal')).filter(f => f.endsWith('.md') && !f.startsWith('._')).map(f => join('internal', f)),
];

for (const rel of specs) {
it(`converts ${rel}`, () => {
const md = readFileSync(join(ROOT, 'dist', 'agents', rel), 'utf-8');
const toml = agentMdToCodexToml(md);
assert.ok(toml, `${rel} must convert`);
assert.match(toml, /^name = ".+"$/m, `${rel} has a name`);
assert.match(toml, /^description = ".+"$/m, `${rel} has a description`);
assert.ok(toml.includes("developer_instructions = '''"), `${rel} has instructions`);
// the literal block must actually close — no stray ''' inside the body
const opens = (toml.match(/'''/g) || []).length;
assert.equal(opens, 2, `${rel} literal block opens and closes exactly once`);
});
}
});
17 changes: 17 additions & 0 deletions tests/skill-discoverability.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,23 @@ describe('the asking protocol is harness-neutral (works under Codex too)', () =>
});
});

describe('the spawn protocol is harness-neutral with an inline fallback', () => {
it('has a "Spawning agents" protocol', () => {
assert.ok(/## Spawning agents/i.test(SKILL),
'SKILL.md must define how to run named agents across harnesses — the Task tool is Claude Code only');
});
it('gives an inline fallback for harnesses that can\'t spawn', () => {
assert.ok(/inline/i.test(SKILL), 'must describe running an agent inline');
assert.ok(/Codex/i.test(SKILL), 'names Codex');
assert.ok(/read its spec|read X.s spec|read the agent.s spec/i.test(SKILL), 'tells the agent to read the spec and run it');
assert.ok(/never skip an agent|never drop an evaluator/i.test(SKILL), 'forbids skipping an agent it cannot spawn');
});
it('the orchestrator also carries the inline fallback', () => {
const orch = readFileSync(join(ROOT, 'dist', 'agents', 'pixelslop.md'), 'utf-8');
assert.ok(/inline/i.test(orch) && /spawn/i.test(orch), 'orchestrator must know to run evaluators inline when it cannot spawn');
});
});

describe('the skill drives advisory behaviour, not a config form', () => {
it('has an advise-the-user playbook', () => {
assert.ok(/## Advise/i.test(SKILL),
Expand Down
Loading