garrytan · ziyangliu-666 · Mar 22, 2026
diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md
@@ -1,43 +1,10 @@
 ---
 name: gstack
 description: |
-  Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
-  elements, verify page state, diff before/after actions, take annotated screenshots, check
-  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
-  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
-  user flow, or file a bug with evidence.
-
-  gstack also includes development workflow skills. When you notice the user is at
-  these stages, suggest the appropriate skill:
-  - Brainstorming a new idea → suggest /office-hours
-  - Reviewing a plan (strategy) → suggest /plan-ceo-review
-  - Reviewing a plan (architecture) → suggest /plan-eng-review
-  - Reviewing a plan (design) → suggest /plan-design-review
-  - Creating a design system → suggest /design-consultation
-  - Debugging errors → suggest /investigate
-  - Testing the app → suggest /qa
-  - Code review before merge → suggest /review
-  - Visual design audit → suggest /design-review
-  - Ready to deploy / create PR → suggest /ship
-  - Post-ship doc updates → suggest /document-release
-  - Weekly retrospective → suggest /retro
-  - Wanting a second opinion or adversarial code review → suggest /codex
-  - Working with production or live systems → suggest /careful
-  - Want to scope edits to one module/directory → suggest /freeze
-  - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
-  - Removing edit restrictions → suggest /unfreeze
-  - Upgrading gstack to latest version → suggest /gstack-upgrade
-
-  If the user pushes back on skill suggestions ("stop suggesting things",
-  "I don't need suggestions", "too aggressive"):
-  1. Stop suggesting for the rest of this session
-  2. Run: gstack-config set proactive false
-  3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
-     again if you change your mind."
-
-  If the user says "be proactive again" or "turn on suggestions":
-  1. Run: gstack-config set proactive true
-  2. Say: "Proactive suggestions are back on."
+  Entry point for gstack's headless browser and workflow skills. Use it for QA testing,
+  site dogfooding, deployment checks, screenshots, and routed suggestions such as
+  /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers.
+  Full routing and proactive suggestion policy live in the body below.
 ---
 <!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
 <!-- Regenerate: bun run gen:skill-docs -->
@@ -266,10 +233,44 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was
 If you cannot determine the outcome, use "unknown". This runs in the background and
 never blocks the user.
 
+## Skill Routing
+
+gstack also includes development workflow skills. When you notice the user is at
+these stages, suggest the appropriate skill:
+
+- Brainstorming a new idea → suggest /office-hours
+- Reviewing a plan (strategy) → suggest /plan-ceo-review
+- Reviewing a plan (architecture) → suggest /plan-eng-review
+- Reviewing a plan (design) → suggest /plan-design-review
+- Creating a design system → suggest /design-consultation
+- Debugging errors → suggest /investigate
+- Testing the app → suggest /qa
+- Code review before merge → suggest /review
+- Visual design audit → suggest /design-review
+- Ready to deploy / create PR → suggest /ship
+- Post-ship doc updates → suggest /document-release
+- Weekly retrospective → suggest /retro
+- Wanting a second opinion or adversarial code review → suggest /codex
+- Working with production or live systems → suggest /careful
+- Want to scope edits to one module/directory → suggest /freeze
+- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
+- Removing edit restrictions → suggest /unfreeze
+- Upgrading gstack to latest version → suggest /gstack-upgrade
+
 If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session.
 Only run skills the user explicitly invokes. This preference persists across sessions via
 `gstack-config`.
 
+If the user pushes back on skill suggestions ("stop suggesting things",
+"I don't need suggestions", "too aggressive"):
+1. Stop suggesting for the rest of this session.
+2. Run: `gstack-config set proactive false`
+3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind."
+
+If the user says "be proactive again" or "turn on suggestions":
+1. Run: `gstack-config set proactive true`
+2. Say: "Proactive suggestions are back on."
+
 # gstack browse: QA Testing & Dogfooding
 
 Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.

diff --git a/SKILL.md b/SKILL.md
@@ -2,43 +2,10 @@
 name: gstack
 version: 1.1.0
 description: |
-  Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
-  elements, verify page state, diff before/after actions, take annotated screenshots, check
-  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
-  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
-  user flow, or file a bug with evidence.
-
-  gstack also includes development workflow skills. When you notice the user is at
-  these stages, suggest the appropriate skill:
-  - Brainstorming a new idea → suggest /office-hours
-  - Reviewing a plan (strategy) → suggest /plan-ceo-review
-  - Reviewing a plan (architecture) → suggest /plan-eng-review
-  - Reviewing a plan (design) → suggest /plan-design-review
-  - Creating a design system → suggest /design-consultation
-  - Debugging errors → suggest /investigate
-  - Testing the app → suggest /qa
-  - Code review before merge → suggest /review
-  - Visual design audit → suggest /design-review
-  - Ready to deploy / create PR → suggest /ship
-  - Post-ship doc updates → suggest /document-release
-  - Weekly retrospective → suggest /retro
-  - Wanting a second opinion or adversarial code review → suggest /codex
-  - Working with production or live systems → suggest /careful
-  - Want to scope edits to one module/directory → suggest /freeze
-  - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
-  - Removing edit restrictions → suggest /unfreeze
-  - Upgrading gstack to latest version → suggest /gstack-upgrade
-
-  If the user pushes back on skill suggestions ("stop suggesting things",
-  "I don't need suggestions", "too aggressive"):
-  1. Stop suggesting for the rest of this session
-  2. Run: gstack-config set proactive false
-  3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
-     again if you change your mind."
-
-  If the user says "be proactive again" or "turn on suggestions":
-  1. Run: gstack-config set proactive true
-  2. Say: "Proactive suggestions are back on."
+  Entry point for gstack's headless browser and workflow skills. Use it for QA testing,
+  site dogfooding, deployment checks, screenshots, and routed suggestions such as
+  /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers.
+  Full routing and proactive suggestion policy live in the body below.
 allowed-tools:
   - Bash
   - Read
@@ -272,10 +239,44 @@ success/error/abort, and `USED_BROWSE` with true/false based on whether `$B` was
 If you cannot determine the outcome, use "unknown". This runs in the background and
 never blocks the user.
 
+## Skill Routing
+
+gstack also includes development workflow skills. When you notice the user is at
+these stages, suggest the appropriate skill:
+
+- Brainstorming a new idea → suggest /office-hours
+- Reviewing a plan (strategy) → suggest /plan-ceo-review
+- Reviewing a plan (architecture) → suggest /plan-eng-review
+- Reviewing a plan (design) → suggest /plan-design-review
+- Creating a design system → suggest /design-consultation
+- Debugging errors → suggest /investigate
+- Testing the app → suggest /qa
+- Code review before merge → suggest /review
+- Visual design audit → suggest /design-review
+- Ready to deploy / create PR → suggest /ship
+- Post-ship doc updates → suggest /document-release
+- Weekly retrospective → suggest /retro
+- Wanting a second opinion or adversarial code review → suggest /codex
+- Working with production or live systems → suggest /careful
+- Want to scope edits to one module/directory → suggest /freeze
+- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
+- Removing edit restrictions → suggest /unfreeze
+- Upgrading gstack to latest version → suggest /gstack-upgrade
+
 If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session.
 Only run skills the user explicitly invokes. This preference persists across sessions via
 `gstack-config`.
 
+If the user pushes back on skill suggestions ("stop suggesting things",
+"I don't need suggestions", "too aggressive"):
+1. Stop suggesting for the rest of this session.
+2. Run: `gstack-config set proactive false`
+3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind."
+
+If the user says "be proactive again" or "turn on suggestions":
+1. Run: `gstack-config set proactive true`
+2. Say: "Proactive suggestions are back on."
+
 # gstack browse: QA Testing & Dogfooding
 
 Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.

diff --git a/SKILL.md.tmpl b/SKILL.md.tmpl
@@ -2,43 +2,10 @@
 name: gstack
 version: 1.1.0
 description: |
-  Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
-  elements, verify page state, diff before/after actions, take annotated screenshots, check
-  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
-  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
-  user flow, or file a bug with evidence.
-
-  gstack also includes development workflow skills. When you notice the user is at
-  these stages, suggest the appropriate skill:
-  - Brainstorming a new idea → suggest /office-hours
-  - Reviewing a plan (strategy) → suggest /plan-ceo-review
-  - Reviewing a plan (architecture) → suggest /plan-eng-review
-  - Reviewing a plan (design) → suggest /plan-design-review
-  - Creating a design system → suggest /design-consultation
-  - Debugging errors → suggest /investigate
-  - Testing the app → suggest /qa
-  - Code review before merge → suggest /review
-  - Visual design audit → suggest /design-review
-  - Ready to deploy / create PR → suggest /ship
-  - Post-ship doc updates → suggest /document-release
-  - Weekly retrospective → suggest /retro
-  - Wanting a second opinion or adversarial code review → suggest /codex
-  - Working with production or live systems → suggest /careful
-  - Want to scope edits to one module/directory → suggest /freeze
-  - Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
-  - Removing edit restrictions → suggest /unfreeze
-  - Upgrading gstack to latest version → suggest /gstack-upgrade
-
-  If the user pushes back on skill suggestions ("stop suggesting things",
-  "I don't need suggestions", "too aggressive"):
-  1. Stop suggesting for the rest of this session
-  2. Run: gstack-config set proactive false
-  3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
-     again if you change your mind."
-
-  If the user says "be proactive again" or "turn on suggestions":
-  1. Run: gstack-config set proactive true
-  2. Say: "Proactive suggestions are back on."
+  Entry point for gstack's headless browser and workflow skills. Use it for QA testing,
+  site dogfooding, deployment checks, screenshots, and routed suggestions such as
+  /office-hours, /investigate, /qa, /review, /ship, /design-review, and related helpers.
+  Full routing and proactive suggestion policy live in the body below.
 allowed-tools:
   - Bash
   - Read
@@ -48,10 +15,44 @@ allowed-tools:
 
 {{PREAMBLE}}
 
+## Skill Routing
+
+gstack also includes development workflow skills. When you notice the user is at
+these stages, suggest the appropriate skill:
+
+- Brainstorming a new idea → suggest /office-hours
+- Reviewing a plan (strategy) → suggest /plan-ceo-review
+- Reviewing a plan (architecture) → suggest /plan-eng-review
+- Reviewing a plan (design) → suggest /plan-design-review
+- Creating a design system → suggest /design-consultation
+- Debugging errors → suggest /investigate
+- Testing the app → suggest /qa
+- Code review before merge → suggest /review
+- Visual design audit → suggest /design-review
+- Ready to deploy / create PR → suggest /ship
+- Post-ship doc updates → suggest /document-release
+- Weekly retrospective → suggest /retro
+- Wanting a second opinion or adversarial code review → suggest /codex
+- Working with production or live systems → suggest /careful
+- Want to scope edits to one module/directory → suggest /freeze
+- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
+- Removing edit restrictions → suggest /unfreeze
+- Upgrading gstack to latest version → suggest /gstack-upgrade
+
 If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during this session.
 Only run skills the user explicitly invokes. This preference persists across sessions via
 `gstack-config`.
 
+If the user pushes back on skill suggestions ("stop suggesting things",
+"I don't need suggestions", "too aggressive"):
+1. Stop suggesting for the rest of this session.
+2. Run: `gstack-config set proactive false`
+3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive again if you change your mind."
+
+If the user says "be proactive again" or "turn on suggestions":
+1. Run: `gstack-config set proactive true`
+2. Say: "Proactive suggestions are back on."
+
 # gstack browse: QA Testing & Dogfooding
 
 Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.

diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts
@@ -16,6 +16,7 @@ import * as path from 'path';
 
 const ROOT = path.resolve(import.meta.dir, '..');
 const DRY_RUN = process.argv.includes('--dry-run');
+const MAX_CODEX_DESCRIPTION_LEN = 1024;
 
 // ─── Template Context ───────────────────────────────────────
 
@@ -1763,6 +1764,11 @@ function transformFrontmatter(content: string, host: Host): string {
   if (descLines.length > 0) {
     description = descLines.join('\n').trim();
   }
+  if (description.length > MAX_CODEX_DESCRIPTION_LEN) {
+    throw new Error(
+      `Codex frontmatter description exceeds ${MAX_CODEX_DESCRIPTION_LEN} characters (${description.length})`,
+    );
+  }
 
   // Re-emit Codex frontmatter (name + description only)
   const indentedDesc = description.split('\n').map(l => `  ${l}`).join('\n');

diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
@@ -6,6 +6,32 @@ import * as fs from 'fs';
 import * as path from 'path';
 
 const ROOT = path.resolve(import.meta.dir, '..');
+const MAX_CODEX_DESCRIPTION_LEN = 1024;
+
+function extractDescriptionFromFrontmatter(content: string): string {
+  const frontmatterEnd = content.indexOf('\n---', 4);
+  if (frontmatterEnd === -1) return '';
+
+  const frontmatter = content.slice(4, frontmatterEnd);
+  const lines = frontmatter.split('\n');
+  const descLines: string[] = [];
+  let inDescription = false;
+
+  for (const line of lines) {
+    if (line.match(/^description:\s*\|?\s*$/)) {
+      inDescription = true;
+      continue;
+    }
+    if (!inDescription) continue;
+    if (line === '' || line.match(/^\s/)) {
+      descLines.push(line.replace(/^  /, ''));
+      continue;
+    }
+    break;
+  }
+
+  return descLines.join('\n').trim();
+}
 
 describe('SKILL.md command validation', () => {
   test('all $B commands in SKILL.md are valid browse commands', () => {
@@ -1435,4 +1461,17 @@ describe('Codex skill validation', () => {
       expect(result.invalid).toHaveLength(0);
     }
   });
+
+  test('Codex SKILL.md descriptions stay within the loader limit', () => {
+    const codexDirs = fs.readdirSync(AGENTS_DIR);
+    for (const dir of codexDirs) {
+      const skillMd = path.join(AGENTS_DIR, dir, 'SKILL.md');
+      if (!fs.existsSync(skillMd)) continue;
+
+      const content = fs.readFileSync(skillMd, 'utf-8');
+      const description = extractDescriptionFromFrontmatter(content);
+      expect(description.length).toBeGreaterThan(0);
+      expect(description.length).toBeLessThanOrEqual(MAX_CODEX_DESCRIPTION_LEN);
+    }
+  });
 });