From 14ba8a4d8906bfd8a135cb0cef330187faf18cca Mon Sep 17 00:00:00 2001 From: Ziyang Liu Date: Sun, 22 Mar 2026 12:31:04 +0800 Subject: [PATCH] Fix Codex skill description length --- .agents/skills/gstack/SKILL.md | 75 +++++++++++++++++----------------- SKILL.md | 75 +++++++++++++++++----------------- SKILL.md.tmpl | 75 +++++++++++++++++----------------- scripts/gen-skill-docs.ts | 6 +++ test/skill-validation.test.ts | 39 ++++++++++++++++++ 5 files changed, 159 insertions(+), 111 deletions(-) diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md index 931288661..3b1485a40 100644 --- a/.agents/skills/gstack/SKILL.md +++ b/.agents/skills/gstack/SKILL.md @@ -1,43 +1,10 @@ --- name: gstack description: | - Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with - elements, verify page state, diff before/after actions, take annotated screenshots, check - responsive layouts, test forms and uploads, handle dialogs, and assert element states. - ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a - user flow, or file a bug with evidence. - - gstack also includes development workflow skills. When you notice the user is at - these stages, suggest the appropriate skill: - - Brainstorming a new idea → suggest /office-hours - - Reviewing a plan (strategy) → suggest /plan-ceo-review - - Reviewing a plan (architecture) → suggest /plan-eng-review - - Reviewing a plan (design) → suggest /plan-design-review - - Creating a design system → suggest /design-consultation - - Debugging errors → suggest /investigate - - Testing the app → suggest /qa - - Code review before merge → suggest /review - - Visual design audit → suggest /design-review - - Ready to deploy / create PR → suggest /ship - - Post-ship doc updates → suggest /document-release - - Weekly retrospective → suggest /retro - - Wanting a second opinion or adversarial code review → suggest /codex - - Working with production or live systems → suggest /careful - - Want to scope edits to one module/directory → suggest /freeze - - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard - - Removing edit restrictions → suggest /unfreeze - - Upgrading gstack to latest version → suggest /gstack-upgrade - - If the user pushes back on skill suggestions ("stop suggesting things", - "I don't need suggestions", "too aggressive"): - 1. Stop suggesting for the rest of this session - 2. Run: gstack-config set proactive false - 3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive - again if you change your mind." - - If the user says "be proactive again" or "turn on suggestions": - 1. Run: gstack-config set proactive true - 2. Say: "Proactive suggestions are back on." + Entry point for gstack's headless browser and workflow skills. Use it for QA testing, + site dogfooding, deployment checks, screenshots, and routed suggestions such as + /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers. + Full routing and proactive suggestion policy live in the body below. --- @@ -266,10 +233,44 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". This runs in the background and never blocks the user. +## Skill Routing + +gstack also includes development workflow skills. When you notice the user is at +these stages, suggest the appropriate skill: + +- Brainstorming a new idea → suggest /office-hours +- Reviewing a plan (strategy) → suggest /plan-ceo-review +- Reviewing a plan (architecture) → suggest /plan-eng-review +- Reviewing a plan (design) → suggest /plan-design-review +- Creating a design system → suggest /design-consultation +- Debugging errors → suggest /investigate +- Testing the app → suggest /qa +- Code review before merge → suggest /review +- Visual design audit → suggest /design-review +- Ready to deploy / create PR → suggest /ship +- Post-ship doc updates → suggest /document-release +- Weekly retrospective → suggest /retro +- Wanting a second opinion or adversarial code review → suggest /codex +- Working with production or live systems → suggest /careful +- Want to scope edits to one module/directory → suggest /freeze +- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard +- Removing edit restrictions → suggest /unfreeze +- Upgrading gstack to latest version → suggest /gstack-upgrade + If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session. Only run skills the user explicitly invokes. This preference persists across sessions via `gstack-config`. +If the user pushes back on skill suggestions ("stop suggesting things", +"I don't need suggestions", "too aggressive"): +1. Stop suggesting for the rest of this session. +2. Run: `gstack-config set proactive false` +3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind." + +If the user says "be proactive again" or "turn on suggestions": +1. Run: `gstack-config set proactive true` +2. Say: "Proactive suggestions are back on." + # gstack browse: QA Testing & Dogfooding Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command. diff --git a/SKILL.md b/SKILL.md index d8e51bd19..62e2cc1cf 100644 --- a/SKILL.md +++ b/SKILL.md @@ -2,43 +2,10 @@ name: gstack version: 1.1.0 description: | - Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with - elements, verify page state, diff before/after actions, take annotated screenshots, check - responsive layouts, test forms and uploads, handle dialogs, and assert element states. - ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a - user flow, or file a bug with evidence. - - gstack also includes development workflow skills. When you notice the user is at - these stages, suggest the appropriate skill: - - Brainstorming a new idea → suggest /office-hours - - Reviewing a plan (strategy) → suggest /plan-ceo-review - - Reviewing a plan (architecture) → suggest /plan-eng-review - - Reviewing a plan (design) → suggest /plan-design-review - - Creating a design system → suggest /design-consultation - - Debugging errors → suggest /investigate - - Testing the app → suggest /qa - - Code review before merge → suggest /review - - Visual design audit → suggest /design-review - - Ready to deploy / create PR → suggest /ship - - Post-ship doc updates → suggest /document-release - - Weekly retrospective → suggest /retro - - Wanting a second opinion or adversarial code review → suggest /codex - - Working with production or live systems → suggest /careful - - Want to scope edits to one module/directory → suggest /freeze - - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard - - Removing edit restrictions → suggest /unfreeze - - Upgrading gstack to latest version → suggest /gstack-upgrade - - If the user pushes back on skill suggestions ("stop suggesting things", - "I don't need suggestions", "too aggressive"): - 1. Stop suggesting for the rest of this session - 2. Run: gstack-config set proactive false - 3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive - again if you change your mind." - - If the user says "be proactive again" or "turn on suggestions": - 1. Run: gstack-config set proactive true - 2. Say: "Proactive suggestions are back on." + Entry point for gstack's headless browser and workflow skills. Use it for QA testing, + site dogfooding, deployment checks, screenshots, and routed suggestions such as + /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers. + Full routing and proactive suggestion policy live in the body below. allowed-tools: - Bash - Read @@ -272,10 +239,44 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was If you cannot determine the outcome, use "unknown". This runs in the background and never blocks the user. +## Skill Routing + +gstack also includes development workflow skills. When you notice the user is at +these stages, suggest the appropriate skill: + +- Brainstorming a new idea → suggest /office-hours +- Reviewing a plan (strategy) → suggest /plan-ceo-review +- Reviewing a plan (architecture) → suggest /plan-eng-review +- Reviewing a plan (design) → suggest /plan-design-review +- Creating a design system → suggest /design-consultation +- Debugging errors → suggest /investigate +- Testing the app → suggest /qa +- Code review before merge → suggest /review +- Visual design audit → suggest /design-review +- Ready to deploy / create PR → suggest /ship +- Post-ship doc updates → suggest /document-release +- Weekly retrospective → suggest /retro +- Wanting a second opinion or adversarial code review → suggest /codex +- Working with production or live systems → suggest /careful +- Want to scope edits to one module/directory → suggest /freeze +- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard +- Removing edit restrictions → suggest /unfreeze +- Upgrading gstack to latest version → suggest /gstack-upgrade + If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session. Only run skills the user explicitly invokes. This preference persists across sessions via `gstack-config`. +If the user pushes back on skill suggestions ("stop suggesting things", +"I don't need suggestions", "too aggressive"): +1. Stop suggesting for the rest of this session. +2. Run: `gstack-config set proactive false` +3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind." + +If the user says "be proactive again" or "turn on suggestions": +1. Run: `gstack-config set proactive true` +2. Say: "Proactive suggestions are back on." + # gstack browse: QA Testing & Dogfooding Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command. diff --git a/SKILL.md.tmpl b/SKILL.md.tmpl index 0c9859655..01a506bbf 100644 --- a/SKILL.md.tmpl +++ b/SKILL.md.tmpl @@ -2,43 +2,10 @@ name: gstack version: 1.1.0 description: | - Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with - elements, verify page state, diff before/after actions, take annotated screenshots, check - responsive layouts, test forms and uploads, handle dialogs, and assert element states. - ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a - user flow, or file a bug with evidence. - - gstack also includes development workflow skills. When you notice the user is at - these stages, suggest the appropriate skill: - - Brainstorming a new idea → suggest /office-hours - - Reviewing a plan (strategy) → suggest /plan-ceo-review - - Reviewing a plan (architecture) → suggest /plan-eng-review - - Reviewing a plan (design) → suggest /plan-design-review - - Creating a design system → suggest /design-consultation - - Debugging errors → suggest /investigate - - Testing the app → suggest /qa - - Code review before merge → suggest /review - - Visual design audit → suggest /design-review - - Ready to deploy / create PR → suggest /ship - - Post-ship doc updates → suggest /document-release - - Weekly retrospective → suggest /retro - - Wanting a second opinion or adversarial code review → suggest /codex - - Working with production or live systems → suggest /careful - - Want to scope edits to one module/directory → suggest /freeze - - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard - - Removing edit restrictions → suggest /unfreeze - - Upgrading gstack to latest version → suggest /gstack-upgrade - - If the user pushes back on skill suggestions ("stop suggesting things", - "I don't need suggestions", "too aggressive"): - 1. Stop suggesting for the rest of this session - 2. Run: gstack-config set proactive false - 3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive - again if you change your mind." - - If the user says "be proactive again" or "turn on suggestions": - 1. Run: gstack-config set proactive true - 2. Say: "Proactive suggestions are back on." + Entry point for gstack's headless browser and workflow skills. Use it for QA testing, + site dogfooding, deployment checks, screenshots, and routed suggestions such as + /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers. + Full routing and proactive suggestion policy live in the body below. allowed-tools: - Bash - Read @@ -48,10 +15,44 @@ allowed-tools: {{PREAMBLE}} +## Skill Routing + +gstack also includes development workflow skills. When you notice the user is at +these stages, suggest the appropriate skill: + +- Brainstorming a new idea → suggest /office-hours +- Reviewing a plan (strategy) → suggest /plan-ceo-review +- Reviewing a plan (architecture) → suggest /plan-eng-review +- Reviewing a plan (design) → suggest /plan-design-review +- Creating a design system → suggest /design-consultation +- Debugging errors → suggest /investigate +- Testing the app → suggest /qa +- Code review before merge → suggest /review +- Visual design audit → suggest /design-review +- Ready to deploy / create PR → suggest /ship +- Post-ship doc updates → suggest /document-release +- Weekly retrospective → suggest /retro +- Wanting a second opinion or adversarial code review → suggest /codex +- Working with production or live systems → suggest /careful +- Want to scope edits to one module/directory → suggest /freeze +- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard +- Removing edit restrictions → suggest /unfreeze +- Upgrading gstack to latest version → suggest /gstack-upgrade + If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session. Only run skills the user explicitly invokes. This preference persists across sessions via `gstack-config`. +If the user pushes back on skill suggestions ("stop suggesting things", +"I don't need suggestions", "too aggressive"): +1. Stop suggesting for the rest of this session. +2. Run: `gstack-config set proactive false` +3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind." + +If the user says "be proactive again" or "turn on suggestions": +1. Run: `gstack-config set proactive true` +2. Say: "Proactive suggestions are back on." + # gstack browse: QA Testing & Dogfooding Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command. diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 27718933a..e75e6f99a 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -16,6 +16,7 @@ import * as path from 'path'; const ROOT = path.resolve(import.meta.dir, '..'); const DRY_RUN = process.argv.includes('--dry-run'); +const MAX_CODEX_DESCRIPTION_LEN = 1024; // ─── Template Context ─────────────────────────────────────── @@ -1763,6 +1764,11 @@ function transformFrontmatter(content: string, host: Host): string { if (descLines.length > 0) { description = descLines.join('\n').trim(); } + if (description.length > MAX_CODEX_DESCRIPTION_LEN) { + throw new Error( + `Codex frontmatter description exceeds ${MAX_CODEX_DESCRIPTION_LEN} characters (${description.length})`, + ); + } // Re-emit Codex frontmatter (name + description only) const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n'); diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 03640ccba..14023114f 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -6,6 +6,32 @@ import * as fs from 'fs'; import * as path from 'path'; const ROOT = path.resolve(import.meta.dir, '..'); +const MAX_CODEX_DESCRIPTION_LEN = 1024; + +function extractDescriptionFromFrontmatter(content: string): string { + const frontmatterEnd = content.indexOf('\n---', 4); + if (frontmatterEnd === -1) return ''; + + const frontmatter = content.slice(4, frontmatterEnd); + const lines = frontmatter.split('\n'); + const descLines: string[] = []; + let inDescription = false; + + for (const line of lines) { + if (line.match(/^description:\s*\|?\s*$/)) { + inDescription = true; + continue; + } + if (!inDescription) continue; + if (line === '' || line.match(/^\s/)) { + descLines.push(line.replace(/^ /, '')); + continue; + } + break; + } + + return descLines.join('\n').trim(); +} describe('SKILL.md command validation', () => { test('all $B commands in SKILL.md are valid browse commands', () => { @@ -1435,4 +1461,17 @@ describe('Codex skill validation', () => { expect(result.invalid).toHaveLength(0); } }); + + test('Codex SKILL.md descriptions stay within the loader limit', () => { + const codexDirs = fs.readdirSync(AGENTS_DIR); + for (const dir of codexDirs) { + const skillMd = path.join(AGENTS_DIR, dir, 'SKILL.md'); + if (!fs.existsSync(skillMd)) continue; + + const content = fs.readFileSync(skillMd, 'utf-8'); + const description = extractDescriptionFromFrontmatter(content); + expect(description.length).toBeGreaterThan(0); + expect(description.length).toBeLessThanOrEqual(MAX_CODEX_DESCRIPTION_LEN); + } + }); });