diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..b0efc7ce --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +node_modules +dist +.worktrees +*.tgz diff --git a/.github/workflows/deprecate.yml b/.github/workflows/deprecate.yml new file mode 100644 index 00000000..f0482188 --- /dev/null +++ b/.github/workflows/deprecate.yml @@ -0,0 +1,23 @@ +name: Deprecate old versions + +on: + workflow_dispatch: + +jobs: + deprecate: + permissions: + contents: read + runs-on: ubuntu-latest + steps: + - uses: actions/setup-node@v4 + with: + node-version: '20' + registry-url: 'https://registry.npmjs.org' + + - name: Deprecate broken versions + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + npm deprecate squads-cli@0.7.0 "Broken release — crashes on 'squads run'. Use 0.2.1+" || true + npm deprecate squads-cli@0.7.1 "Broken release. Use 0.2.1+" || true + echo "Done" diff --git a/package-lock.json b/package-lock.json index 0830da63..5fd1db94 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "squads-cli", - "version": "0.7.1", + "version": "0.2.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "squads-cli", - "version": "0.7.1", + "version": "0.2.2", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.71.2", diff --git a/package.json b/package.json index dddb5992..71bfdc66 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "squads-cli", - "version": "0.2.1", + "version": "0.2.2", "description": "Your AI workforce. Every user gets an AI manager that runs their team — finance, marketing, engineering, operations — for the cost of API calls.", "type": "module", "bin": { diff --git a/src/cli.ts b/src/cli.ts index 5f7a3108..713327e5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -58,6 +58,11 @@ import { registerApprovalCommand } from './commands/approval.js'; import { registerDeployCommand } from './commands/deploy.js'; import { registerEvalCommand } from './commands/eval.js'; import { registerCognitionCommand } from './commands/cognition.js'; +import { registerCatalogCommands } from './commands/catalog.js'; +import { registerReleaseCommands } from './commands/release-check.js'; +import { registerObservabilityCommands } from './commands/observability.js'; +import { registerTierCommand } from './commands/tier.js'; +import { registerServicesCommands } from './commands/services.js'; // All other command handlers are lazy-loaded via dynamic import() inside // action handlers. Only the invoked command's dependencies are loaded, @@ -303,6 +308,7 @@ program .option('--once', 'Autopilot: run one cycle then exit') .option('--phased', 'Autopilot: use dependency-based phase ordering (from SQUAD.md depends_on)') .option('--no-eval', 'Skip post-run COO evaluation') + .option('--org', 'Run all squads as a coordinated org cycle (scan → plan → execute → report)') .addHelpText('after', ` Examples: $ squads run engineering Run squad conversation (lead → scan → work → review) @@ -325,8 +331,11 @@ Examples: return runCommand(target || null, { ...options, timeout: parseInt(options.timeout, 10) }); }); -// List command (removed — use status instead) -program.command('list', { hidden: true }).description('[removed]').action(removedCommand('list', 'Use: squads status')); +// List command — alias for status +program.command('list').description('List squads (alias for: squads status)').action(async () => { + const { statusCommand } = await import('./commands/status.js'); + return statusCommand(); +}); // Orchestrate command - lead-coordinated squad execution registerOrchestrateCommand(program); @@ -1043,6 +1052,13 @@ registerDeployCommand(program); // Cognition command group - business cognition engine registerCognitionCommand(program); +// IDP — service catalog, scorecards, release checks +registerCatalogCommands(program); +registerReleaseCommands(program); +registerObservabilityCommands(program); +registerTierCommand(program); +registerServicesCommands(program); + // Providers command - show LLM CLI availability for multi-LLM support program .command('providers') diff --git a/src/commands/catalog.ts b/src/commands/catalog.ts new file mode 100644 index 00000000..bc28d49e --- /dev/null +++ b/src/commands/catalog.ts @@ -0,0 +1,222 @@ +/** + * squads catalog — service catalog commands. + * + * squads catalog list Show all services + * squads catalog show Service details + * squads catalog check Validate against scorecard + */ + +import { Command } from 'commander'; +import { loadCatalog, loadService, loadScorecard } from '../lib/idp/catalog-loader.js'; +import { evaluateService } from '../lib/idp/scorecard-engine.js'; +import { findIdpDir } from '../lib/idp/resolver.js'; +import type { CatalogEntry } from '../lib/idp/types.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +function noIdp(): boolean { + if (!findIdpDir()) { + writeLine(` ${colors.red}IDP not found${RESET}`); + writeLine(` ${colors.dim}Set SQUADS_IDP_PATH or clone the idp repo as a sibling directory.${RESET}`); + return true; + } + return false; +} + +export function registerCatalogCommands(program: Command): void { + const catalog = program + .command('catalog') + .description('Service catalog — browse, inspect, and validate services'); + + // ── catalog list ── + catalog + .command('list') + .description('List all services in the catalog') + .option('--type ', 'Filter by type (product, domain)') + .option('--json', 'Output as JSON') + .action((opts) => { + if (noIdp()) return; + + const entries = loadCatalog(); + if (entries.length === 0) { + writeLine(' No catalog entries found.'); + return; + } + + const filtered = opts.type + ? entries.filter(e => e.spec.type === opts.type) + : entries; + + if (opts.json) { + console.log(JSON.stringify(filtered.map(e => ({ + name: e.metadata.name, + type: e.spec.type, + stack: e.spec.stack, + owner: e.metadata.owner, + repo: e.metadata.repo, + })), null, 2)); + return; + } + + writeLine(); + writeLine(` ${bold}Service Catalog${RESET} (${filtered.length} services)`); + writeLine(); + + // Group by type + const products = filtered.filter(e => e.spec.type === 'product'); + const domains = filtered.filter(e => e.spec.type === 'domain'); + + if (products.length > 0) { + writeLine(` ${colors.cyan}Product Services${RESET}`); + writeLine(); + for (const e of products) { + const ci = e.spec.ci.template ? `ci:${e.spec.ci.template}` : 'no-ci'; + const deploy = e.spec.deploy?.target || 'manual'; + writeLine(` ${bold}${e.metadata.name}${RESET} ${colors.dim}${e.spec.stack} | ${ci} | deploy:${deploy} | owner:${e.metadata.owner}${RESET}`); + writeLine(` ${colors.dim}${e.metadata.description}${RESET}`); + } + writeLine(); + } + + if (domains.length > 0) { + writeLine(` ${colors.cyan}Domain Repos${RESET}`); + writeLine(); + for (const e of domains) { + writeLine(` ${e.metadata.name} ${colors.dim}owner:${e.metadata.owner} | ${e.metadata.repo}${RESET}`); + } + writeLine(); + } + }); + + // ── catalog show ── + catalog + .command('show ') + .description('Show detailed info for a service') + .option('--json', 'Output as JSON') + .action((serviceName: string, opts) => { + if (noIdp()) return; + + const entry = loadService(serviceName); + if (!entry) { + writeLine(` ${colors.red}Service not found: ${serviceName}${RESET}`); + writeLine(` ${colors.dim}Run 'squads catalog list' to see available services.${RESET}`); + return; + } + + if (opts.json) { + console.log(JSON.stringify(entry, null, 2)); + return; + } + + writeLine(); + writeLine(` ${bold}${entry.metadata.name}${RESET} ${colors.dim}${entry.spec.type}${RESET}`); + writeLine(` ${entry.metadata.description}`); + writeLine(); + + writeLine(` ${colors.cyan}General${RESET}`); + writeLine(` Owner: ${entry.metadata.owner}`); + writeLine(` Repo: ${entry.metadata.repo}`); + writeLine(` Stack: ${entry.spec.stack}${entry.spec.framework ? ` (${entry.spec.framework})` : ''}`); + writeLine(` Scorecard: ${entry.spec.scorecard}`); + writeLine(` Tags: ${entry.metadata.tags?.join(', ') || 'none'}`); + writeLine(); + + writeLine(` ${colors.cyan}Branches${RESET}`); + writeLine(` Default: ${entry.spec.branches.default}`); + writeLine(` Workflow: ${entry.spec.branches.workflow}`); + if (entry.spec.branches.development) { + writeLine(` Dev branch: ${entry.spec.branches.development}`); + } + writeLine(); + + if (entry.spec.ci.template) { + writeLine(` ${colors.cyan}CI/CD${RESET}`); + writeLine(` Template: ${entry.spec.ci.template}`); + writeLine(` Checks: ${entry.spec.ci.required_checks.join(', ') || 'none'}`); + if (entry.spec.ci.build_command) writeLine(` Build: ${entry.spec.ci.build_command}`); + if (entry.spec.ci.test_command) writeLine(` Test: ${entry.spec.ci.test_command}`); + writeLine(); + } + + if (entry.spec.deploy) { + writeLine(` ${colors.cyan}Deploy${RESET}`); + writeLine(` Target: ${entry.spec.deploy.target}`); + writeLine(` Trigger: ${entry.spec.deploy.trigger}`); + if (entry.spec.deploy.environments) { + for (const env of entry.spec.deploy.environments) { + writeLine(` ${env.name}: ${env.url}`); + } + } + writeLine(); + } + + if (entry.spec.dependencies.runtime.length > 0) { + writeLine(` ${colors.cyan}Dependencies${RESET}`); + for (const dep of entry.spec.dependencies.runtime) { + const req = dep.required === false ? '(optional)' : '(required)'; + writeLine(` → ${dep.service} ${dep.version || ''} ${req}`); + writeLine(` ${colors.dim}${dep.description}${RESET}`); + } + writeLine(); + } + + if (entry.spec.health.length > 0) { + writeLine(` ${colors.cyan}Health Endpoints${RESET}`); + for (const h of entry.spec.health) { + writeLine(` ${h.name}: ${h.url}`); + } + writeLine(); + } + }); + + // ── catalog check ── + catalog + .command('check [service]') + .description('Run scorecard checks for a service (or all)') + .option('--json', 'Output as JSON') + .action((serviceName: string | undefined, opts) => { + if (noIdp()) return; + + const entries = serviceName + ? [loadService(serviceName)].filter(Boolean) as CatalogEntry[] + : loadCatalog(); + + if (entries.length === 0) { + writeLine(` ${colors.red}No services found${RESET}`); + return; + } + + const results = []; + + for (const entry of entries) { + const scorecard = loadScorecard(entry.spec.scorecard); + if (!scorecard) { + writeLine(` ${colors.dim}No scorecard '${entry.spec.scorecard}' for ${entry.metadata.name}${RESET}`); + continue; + } + + const result = evaluateService(entry, scorecard); + results.push(result); + + if (!opts.json) { + const gradeColor = result.grade === 'A' ? colors.green + : result.grade === 'B' ? colors.cyan + : result.grade === 'C' ? colors.yellow + : colors.red; + + writeLine(); + writeLine(` ${bold}${result.service}${RESET} ${gradeColor}${result.grade}${RESET} (${result.score}/100)`); + + for (const check of result.checks) { + const icon = check.passed ? `${colors.green}pass${RESET}` : `${colors.red}fail${RESET}`; + writeLine(` ${icon} ${check.name} ${colors.dim}(${check.detail})${RESET}`); + } + } + } + + if (opts.json) { + console.log(JSON.stringify(results, null, 2)); + } else { + writeLine(); + } + }); +} diff --git a/src/commands/init.ts b/src/commands/init.ts index 88a06d15..a046a682 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -19,6 +19,7 @@ import { execSync } from 'child_process'; import { createInterface } from 'readline'; import { checkGitStatus, getRepoName } from '../lib/git.js'; import { track, Events } from '../lib/telemetry.js'; +import { existsSync, readFileSync } from 'fs'; import { loadTemplate, type TemplateVariables, @@ -194,6 +195,80 @@ function getOperationsSquad(): SquadConfig { }; } +interface ProjectInfo { + name: string; + type: 'product' | 'domain'; + stack: string; + repoName: string; + buildCommand: string | null; + testCommand: string | null; +} + +/** + * Auto-detect project metadata from the filesystem + */ +function detectProjectInfo(cwd: string, gitStatus: { remoteUrl?: string }): ProjectInfo { + const dirName = path.basename(cwd); + + // Name: from git remote (last segment) or directory name + let name = dirName; + let repoName = dirName; + if (gitStatus.remoteUrl) { + const full = getRepoName(gitStatus.remoteUrl); + if (full) { + repoName = full; + name = full.includes('/') ? full.split('/')[1] : full; + } + } + + // Stack: detect from project files + let stack = 'unknown'; + let type: 'product' | 'domain' = 'domain'; + let buildCommand: string | null = null; + let testCommand: string | null = null; + + if (existsSync(path.join(cwd, 'package.json'))) { + stack = 'node'; + type = 'product'; + buildCommand = 'npm run build'; + testCommand = 'npm test'; + // Check for specific frameworks + try { + const pkg = JSON.parse(readFileSync(path.join(cwd, 'package.json'), 'utf-8')); + const deps = { ...pkg.dependencies, ...pkg.devDependencies }; + if (deps['next']) stack = 'next'; + else if (deps['nuxt']) stack = 'nuxt'; + else if (deps['astro']) stack = 'astro'; + else if (deps['react']) stack = 'react'; + else if (deps['vue']) stack = 'vue'; + } catch { /* ignore */ } + } else if (existsSync(path.join(cwd, 'go.mod'))) { + stack = 'go'; + type = 'product'; + buildCommand = 'go build ./...'; + testCommand = 'go test ./...'; + } else if ( + existsSync(path.join(cwd, 'requirements.txt')) || + existsSync(path.join(cwd, 'pyproject.toml')) || + existsSync(path.join(cwd, 'setup.py')) + ) { + stack = 'python'; + type = 'product'; + testCommand = 'pytest'; + } else if (existsSync(path.join(cwd, 'Gemfile'))) { + stack = 'ruby'; + type = 'product'; + testCommand = 'bundle exec rspec'; + } else if (existsSync(path.join(cwd, 'Cargo.toml'))) { + stack = 'rust'; + type = 'product'; + buildCommand = 'cargo build'; + testCommand = 'cargo test'; + } + + return { name, type, stack, repoName, buildCommand, testCommand }; +} + function isInteractive(): boolean { return process.stdin.isTTY === true && process.stdout.isTTY === true; } @@ -345,7 +420,10 @@ export async function initCommand(options: InitOptions): Promise { writeLine(); - // 4. Ask about the business + // 4. Detect project info (used for IDP catalog) + const projectInfo = detectProjectInfo(cwd, gitStatus); + + // Ask about the business let businessName: string; let businessDescription: string; let businessFocus: string; @@ -487,6 +565,7 @@ export async function initCommand(options: InitOptions): Promise { : '', PROVIDER: selectedProvider, PROVIDER_NAME: provider?.name || 'Unknown', + CURRENT_DATE: new Date().toISOString().split('T')[0], }; // Core directories (always created) @@ -585,10 +664,45 @@ export async function initCommand(options: InitOptions): Promise { await writeIfNew(path.join(cwd, dest), loadSeedTemplate(template, variables)); } + // Squad-level priorities and goals (all squads including use-case squads) + const reviewDate = new Date(); + reviewDate.setDate(reviewDate.getDate() + 14); + const allSquads = [ + { name: 'company', label: 'Company', lead: 'manager' }, + { name: 'research', label: 'Research', lead: 'lead' }, + { name: 'intelligence', label: 'Intelligence', lead: 'intel-lead' }, + { name: 'product', label: 'Product', lead: 'lead' }, + ...useCaseConfig.squads.map(s => ({ + name: s.name, + label: s.name.charAt(0).toUpperCase() + s.name.slice(1), + lead: s.agentSummary.split(',')[0].trim(), + })), + ]; + for (const squad of allSquads) { + const squadVars: TemplateVariables = { + ...variables, + SQUAD_NAME: squad.name, + SQUAD_LABEL: squad.label, + SQUAD_LEAD: squad.lead, + REVIEW_DATE: reviewDate.toISOString().split('T')[0], + }; + await writeIfNew( + path.join(cwd, `.agents/memory/${squad.name}/priorities.md`), + loadSeedTemplate('memory/_squad/priorities.md', squadVars), + ); + await writeIfNew( + path.join(cwd, `.agents/memory/${squad.name}/goals.md`), + loadSeedTemplate('memory/_squad/goals.md', squadVars), + ); + } + // Skills const skillContent = loadSeedTemplate('skills/squads-cli/SKILL.md', variables); await writeFile(path.join(cwd, '.agents/skills/squads-cli/SKILL.md'), skillContent); + const skillRefContent = loadSeedTemplate('skills/squads-cli/references/commands.md', variables); + await writeFile(path.join(cwd, '.agents/skills/squads-cli/references/commands.md'), skillRefContent); + const ghSkillContent = loadSeedTemplate('skills/gh/SKILL.md', variables); await writeFile(path.join(cwd, '.agents/skills/gh/SKILL.md'), ghSkillContent); @@ -600,6 +714,33 @@ export async function initCommand(options: InitOptions): Promise { const systemMd = loadSeedTemplate('config/SYSTEM.md', variables); await writeFile(path.join(cwd, '.agents/config/SYSTEM.md'), systemMd); + // IDP catalog entry (only if .agents/idp/ doesn't already exist) + const idpCatalogDir = path.join(cwd, '.agents', 'idp', 'catalog'); + if (!existsSync(idpCatalogDir)) { + const ownerSquad = useCaseConfig.squads[0]?.name || 'engineering'; + const isProduct = projectInfo.type === 'product'; + const idpVariables: TemplateVariables = { + ...variables, + SERVICE_NAME: projectInfo.name, + SERVICE_TYPE: projectInfo.type, + SERVICE_STACK: projectInfo.stack, + SERVICE_SCORECARD: isProduct ? 'product' : 'domain', + REPO_NAME: projectInfo.repoName, + OWNER_SQUAD: ownerSquad, + BRANCHES_WORKFLOW: isProduct ? 'pr-to-develop' : 'direct-to-main', + BRANCHES_DEVELOPMENT: isProduct ? 'develop' : '', + CI_TEMPLATE: isProduct ? projectInfo.stack : 'null', + BUILD_COMMAND: projectInfo.buildCommand ?? 'null', + TEST_COMMAND: projectInfo.testCommand ?? 'null', + }; + const catalogContent = loadSeedTemplate('idp/catalog/service.yaml.template', idpVariables); + await writeFile(path.join(idpCatalogDir, `${projectInfo.name}.yaml`), catalogContent); + } + + // Company context (Layer 1 of context cascade) + const companyMd = loadSeedTemplate('memory/company/company.md', variables); + await writeIfNew(path.join(cwd, '.agents/memory/company/company.md'), companyMd); + // Directives (Layer 3 of context cascade) const directivesMd = loadSeedTemplate('memory/company/directives.md', variables); await writeIfNew(path.join(cwd, '.agents/memory/company/directives.md'), directivesMd); diff --git a/src/commands/observability.ts b/src/commands/observability.ts new file mode 100644 index 00000000..ca70a1c2 --- /dev/null +++ b/src/commands/observability.ts @@ -0,0 +1,174 @@ +/** + * squads obs — observability commands + * + * squads obs history Execution history with tokens/cost + * squads obs cost Spend summary by squad and model + */ + +import { Command } from 'commander'; +import { queryExecutions, calculateCostSummary } from '../lib/observability.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +export function registerObservabilityCommands(program: Command): void { + const obs = program + .command('obs') + .description('Observability — execution history, token costs, and trends'); + + obs + .command('history') + .description('Show execution history with tokens and cost') + .option('-s, --squad ', 'Filter by squad') + .option('-a, --agent ', 'Filter by agent') + .option('-n, --limit ', 'Number of records', '20') + .option('--since ', 'Since date (ISO or relative: 1d, 7d, 30d)') + .option('--json', 'Output as JSON') + .action((opts) => { + let since = opts.since; + if (since && /^\d+d$/.test(since)) { + const days = parseInt(since, 10); + since = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString(); + } + + const records = queryExecutions({ + squad: opts.squad, agent: opts.agent, since, limit: parseInt(opts.limit, 10), + }); + + if (records.length === 0) { + writeLine(`\n ${colors.dim}No executions found. Run \`squads run \` to generate data.${RESET}\n`); + return; + } + + if (opts.json) { console.log(JSON.stringify(records, null, 2)); return; } + + writeLine(`\n ${bold}Execution History${RESET} (${records.length} records)\n`); + + for (const r of records) { + const icon = r.status === 'completed' ? `${colors.green}pass${RESET}` + : r.status === 'failed' ? `${colors.red}fail${RESET}` : `${colors.yellow}timeout${RESET}`; + const dur = r.duration_ms > 60000 ? `${Math.round(r.duration_ms / 60000)}m` : `${Math.round(r.duration_ms / 1000)}s`; + const cost = r.cost_usd > 0 ? `$${r.cost_usd.toFixed(3)}` : '$—'; + const tok = (r.input_tokens + r.output_tokens) > 0 ? `${(r.input_tokens + r.output_tokens).toLocaleString()} tok` : '— tok'; + const date = r.ts.slice(0, 16).replace('T', ' '); + + const grade = r.grade ? ` ${r.grade}` : ''; + writeLine(` ${icon} ${bold}${r.squad}/${r.agent}${RESET} ${colors.dim}${date} ${dur} ${tok} ${cost} ${r.model}${grade}${RESET}`); + if (r.error) writeLine(` ${colors.red}${r.error.slice(0, 80)}${RESET}`); + if (r.goals_changed && r.goals_changed.length > 0) { + for (const g of r.goals_changed) { + writeLine(` ${colors.green}goal: ${g.name} ${g.before} → ${g.after}${RESET}`); + } + } + } + writeLine(); + }); + + obs + .command('cost') + .description('Show token spend summary') + .option('-p, --period ', 'Time period: today, 7d, 30d, all', '7d') + .option('--json', 'Output as JSON') + .action((opts) => { + const summary = calculateCostSummary(opts.period); + + if (summary.total_runs === 0) { + writeLine(`\n ${colors.dim}No executions in the last ${opts.period}.${RESET}\n`); + return; + } + + if (opts.json) { console.log(JSON.stringify(summary, null, 2)); return; } + + writeLine(`\n ${bold}Cost Summary${RESET} (${summary.period})`); + writeLine(`\n Total: ${bold}$${summary.total_cost.toFixed(2)}${RESET} across ${summary.total_runs} runs`); + writeLine(` Tokens: ${summary.total_input_tokens.toLocaleString()} in / ${summary.total_output_tokens.toLocaleString()} out\n`); + + const squads = Object.entries(summary.by_squad).sort((a, b) => b[1].cost - a[1].cost); + if (squads.length > 0) { + writeLine(` ${colors.cyan}By Squad${RESET}`); + for (const [name, data] of squads) { + const bar = '█'.repeat(Math.max(1, Math.round(data.cost / (summary.total_cost || 1) * 20))); + writeLine(` ${name.padEnd(20)} ${colors.dim}${bar}${RESET} $${data.cost.toFixed(2)} (${data.runs} runs, avg $${data.avg_cost.toFixed(3)})`); + } + writeLine(); + } + + const models = Object.entries(summary.by_model).sort((a, b) => b[1].cost - a[1].cost); + if (models.length > 0) { + writeLine(` ${colors.cyan}By Model${RESET}`); + for (const [name, data] of models) { + writeLine(` ${name.padEnd(30)} $${data.cost.toFixed(2)} (${data.runs} runs)`); + } + writeLine(); + } + }); + + // ── obs sync ── + obs + .command('sync') + .description('Backfill JSONL execution data to Postgres (Tier 2)') + .option('--dry-run', 'Show what would be synced without sending') + .action(async (opts) => { + const { detectTier } = await import('../lib/tier-detect.js'); + const { queryExecutions } = await import('../lib/observability.js'); + const info = await detectTier(); + + if (info.tier < 2 || !info.urls.api) { + writeLine(`\n ${colors.dim}Tier 2 not available. Run 'squads services up' first.${RESET}\n`); + return; + } + + const records = queryExecutions({ limit: 10000 }); + if (records.length === 0) { + writeLine(`\n ${colors.dim}No JSONL records to sync.${RESET}\n`); + return; + } + + writeLine(`\n ${bold}Syncing ${records.length} records to Postgres...${RESET}\n`); + + let synced = 0; + let skipped = 0; + let errors = 0; + + for (const record of records) { + if (opts.dryRun) { + writeLine(` ${colors.dim}[dry-run] ${record.ts} ${record.squad}/${record.agent} $${record.cost_usd.toFixed(3)}${RESET}`); + synced++; + continue; + } + + try { + const res = await fetch(`${info.urls.api}/agent-executions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + execution_id: record.id, + squad: record.squad, + agent: record.agent, + model: record.model, + status: record.status, + input_tokens: record.input_tokens, + output_tokens: record.output_tokens, + cache_read_tokens: record.cache_read_tokens, + cache_write_tokens: record.cache_write_tokens, + cost_usd: record.cost_usd, + duration_seconds: Math.round(record.duration_ms / 1000), + error_message: record.error || null, + metadata: { trigger: record.trigger, provider: record.provider }, + }), + signal: AbortSignal.timeout(5000), + }); + + if (res.ok) { + synced++; + } else if (res.status === 409) { + skipped++; // Already exists (dedup) + } else { + errors++; + } + } catch { + errors++; + } + } + + writeLine(` ${colors.green}Synced: ${synced}${RESET} ${colors.dim}Skipped: ${skipped} Errors: ${errors}${RESET}\n`); + }); +} diff --git a/src/commands/release-check.ts b/src/commands/release-check.ts new file mode 100644 index 00000000..5ecc8019 --- /dev/null +++ b/src/commands/release-check.ts @@ -0,0 +1,140 @@ +/** + * squads release — release pre-check and status. + * + * squads release pre-check Validate dependencies before deploy + */ + +import { Command } from 'commander'; +import { loadService, loadDependencyGraph } from '../lib/idp/catalog-loader.js'; +import { findIdpDir } from '../lib/idp/resolver.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +async function checkHealth(url: string, expect: number): Promise<{ ok: boolean; status: number | string }> { + try { + const response = await fetch(url, { signal: AbortSignal.timeout(10000) }); + return { ok: response.status === expect, status: response.status }; + } catch (e) { + return { ok: false, status: e instanceof Error ? e.message : 'unreachable' }; + } +} + +export function registerReleaseCommands(program: Command): void { + const release = program + .command('release') + .description('Release management — pre-deploy checks and status'); + + release + .command('pre-check ') + .description('Validate dependencies and health before deploying a service') + .option('--skip-health', 'Skip health endpoint checks') + .action(async (serviceName: string, opts) => { + const idpDir = findIdpDir(); + if (!idpDir) { + writeLine(` ${colors.red}IDP not found${RESET}`); + return; + } + + const service = loadService(serviceName); + if (!service) { + writeLine(` ${colors.red}Service not found: ${serviceName}${RESET}`); + return; + } + + const graph = loadDependencyGraph(); + const deps = service.spec.dependencies.runtime; + + writeLine(); + writeLine(` ${bold}Release Pre-Check: ${serviceName}${RESET}`); + writeLine(); + + let allGreen = true; + + // Check dependencies + if (deps.length === 0) { + writeLine(` ${colors.green}pass${RESET} No runtime dependencies`); + } else { + writeLine(` ${colors.cyan}Dependencies${RESET}`); + for (const dep of deps) { + const depService = loadService(dep.service); + const req = dep.required !== false; + + if (!depService) { + if (dep.type === 'infrastructure') { + writeLine(` ${colors.dim}skip${RESET} ${dep.service} (infrastructure — not in catalog)`); + continue; + } + if (req) { + writeLine(` ${colors.red}fail${RESET} ${dep.service} — not found in catalog`); + allGreen = false; + } else { + writeLine(` ${colors.yellow}warn${RESET} ${dep.service} — not in catalog (optional)`); + } + continue; + } + + // Check health of dependency + if (!opts.skipHealth && depService.spec.health.length > 0) { + for (const h of depService.spec.health) { + const result = await checkHealth(h.url, h.expect); + if (result.ok) { + writeLine(` ${colors.green}pass${RESET} ${dep.service}/${h.name} — ${result.status}`); + } else if (req) { + writeLine(` ${colors.red}fail${RESET} ${dep.service}/${h.name} — ${result.status}`); + allGreen = false; + } else { + writeLine(` ${colors.yellow}warn${RESET} ${dep.service}/${h.name} — ${result.status} (optional)`); + } + } + } else { + writeLine(` ${colors.dim}skip${RESET} ${dep.service} health check (${opts.skipHealth ? 'skipped' : 'no endpoints'})`); + } + } + } + + writeLine(); + + // Check deploy order from graph + if (graph) { + const order = graph.deploy_order; + let servicePhase = -1; + for (let i = 0; i < order.length; i++) { + if (order[i].includes(serviceName)) { + servicePhase = i; + break; + } + } + + if (servicePhase >= 0) { + writeLine(` ${colors.cyan}Deploy Order${RESET}`); + for (let i = 0; i < order.length; i++) { + const marker = i === servicePhase ? `${colors.green}→${RESET}` : ' '; + const phase = order[i].join(', '); + writeLine(` ${marker} Phase ${i + 1}: ${i === servicePhase ? bold : colors.dim}${phase}${RESET}`); + } + writeLine(); + } + } + + // Self health check + if (!opts.skipHealth && service.spec.health.length > 0) { + writeLine(` ${colors.cyan}Self Health${RESET}`); + for (const h of service.spec.health) { + const result = await checkHealth(h.url, h.expect); + if (result.ok) { + writeLine(` ${colors.green}pass${RESET} ${h.name} — ${result.status}`); + } else { + writeLine(` ${colors.yellow}warn${RESET} ${h.name} — ${result.status}`); + } + } + writeLine(); + } + + // Summary + if (allGreen) { + writeLine(` ${colors.green}All checks passed — safe to deploy ${serviceName}${RESET}`); + } else { + writeLine(` ${colors.red}Pre-check failed — fix issues before deploying ${serviceName}${RESET}`); + } + writeLine(); + }); +} diff --git a/src/commands/run.ts b/src/commands/run.ts index 6d484486..4e6e9f1f 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -1,28 +1,20 @@ -import ora from 'ora'; -import { spawn, execSync } from 'child_process'; -import { join, dirname } from 'path'; -import { existsSync, readFileSync, writeFileSync, mkdirSync, cpSync, unlinkSync } from 'fs'; +import { join } from 'path'; +import { existsSync } from 'fs'; import { findSquadsDir, loadSquad, listAgents, - loadAgentDefinition, - parseAgentProvider, listSquads, findSimilarSquads, - EffortLevel, - Squad, } from '../lib/squad-parser.js'; -import { resolveMcpConfigPath } from '../lib/mcp-config.js'; import { - buildContextFromSquad, - validateExecution, - formatViolations, - ExecutionRequest -} from '../lib/permissions.js'; -import { findMemoryDir } from '../lib/memory.js'; + type RunOptions, + TOOL_USE_PROVIDERS, +} from '../lib/run-types.js'; +import { + preflightExecutorCheck, +} from '../lib/execution-engine.js'; import { track, Events, flushEvents } from '../lib/telemetry.js'; -import { parseCooldown } from '../lib/cron.js'; import { colors, bold, @@ -31,955 +23,143 @@ import { icons, writeLine, } from '../lib/terminal.js'; -import { - getCLIConfig, - isProviderCLIAvailable, -} from '../lib/llm-clis.js'; -import { detectProviderFromModel } from '../lib/providers.js'; -import { loadSession, isLoggedIn } from '../lib/auth.js'; -import { getApiUrl, getBridgeUrl } from '../lib/env-config.js'; +import { runCloudDispatch } from '../lib/cloud-dispatch.js'; import { runConversation, saveTranscript, type ConversationOptions } from '../lib/workflow.js'; import { reportExecutionStart, reportConversationResult, pushCognitionSignal } from '../lib/api-client.js'; -import { getBotGitEnv, getBotPushUrl, getBotGhEnv, getCoAuthorTrailer } from '../lib/github.js'; -import { - type LoopState, - loadLoopState, - saveLoopState, - getSquadRepos, - scoreSquads, - checkCooldown, - classifyRunOutcome, - pushMemorySignals, - slackNotify, - computePhases, - scoreSquadsForPhase, -} from '../lib/squad-loop.js'; -import { - loadCognitionState, - saveCognitionState, - seedBeliefsIfEmpty, - runCognitionCycle, - -} from '../lib/cognition.js'; -import { - type AgentFrontmatter, - type ContextRole, - parseAgentFrontmatter, - extractMcpServersFromDefinition, - loadSystemProtocol, - gatherSquadContext, -} from '../lib/run-context.js'; -import { classifyAgent } from '../lib/conversation.js'; - -// ── Operational constants (no magic numbers) ────────────────────────── -const CLOUD_POLL_INTERVAL_MS = 3000; -const CLOUD_POLL_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes max poll -const DEFAULT_LEARNINGS_LIMIT = 5; -const EXECUTION_EVENT_TIMEOUT_MS = 5000; -const VERIFICATION_STATE_MAX_CHARS = 2000; -const VERIFICATION_EXEC_TIMEOUT_MS = 30000; -const DRYRUN_DEF_MAX_CHARS = 500; -const DRYRUN_CONTEXT_MAX_CHARS = 800; -const DEFAULT_SCHEDULED_COOLDOWN_MS = 6 * 60 * 60 * 1000; // 6 hours -const DEFAULT_TIMEOUT_MINUTES = 30; -const SOFT_DEADLINE_RATIO = 0.7; -const LOG_FILE_INIT_DELAY_MS = 500; -const VERBOSE_COMMAND_MAX_CHARS = 50; - -interface RunOptions { - verbose?: boolean; - dryRun?: boolean; - agent?: string; - timeout?: number; // minutes, default 30 - execute?: boolean; - parallel?: boolean; // Run all agents in parallel - lead?: boolean; // Run as lead session using Task tool for parallelization - foreground?: boolean; // Run in foreground (deprecated, now default) - background?: boolean; // Run in background (detached process) - watch?: boolean; // Run in background but tail the log - useApi?: boolean; // Use API credits instead of subscription - effort?: EffortLevel; // Effort level: high, medium, low - skills?: string[]; // Skills to load (skill IDs or local paths) - trigger?: 'manual' | 'scheduled' | 'event' | 'smart'; // Trigger source for telemetry - provider?: string; // LLM provider: anthropic, google, openai, mistral, xai, aider, ollama - model?: string; // Model to use (Claude aliases or full model IDs like gemini-2.5-flash) - verify?: boolean; // Post-execution verification (default true, --no-verify to skip) - cloud?: boolean; // Dispatch to cloud worker via API instead of local execution - conversation?: boolean; // Run squad as multi-agent conversation (default for squad runs) - task?: string; // Founder directive — replaces lead briefing in conversation mode - maxTurns?: number; // Max conversation turns (default: 20) - costCeiling?: number; // Cost ceiling in USD (default: 25) - interval?: number | string; // Autopilot: minutes between cycles - maxParallel?: number | string; // Autopilot: max parallel squad loops - budget?: number | string; // Autopilot: daily budget cap ($) - once?: boolean; // Autopilot: run one cycle then exit - phased?: boolean; // Autopilot: use dependency-based phase ordering - eval?: boolean; // Post-run COO evaluation (default: true, --no-eval to skip) -} - -/** - * Execution context for telemetry tagging - * Passed to Claude via environment variables for per-agent cost tracking - */ -interface ExecutionContext { - squad: string; - agent: string; - taskType: 'evaluation' | 'execution' | 'research' | 'lead'; - trigger: 'manual' | 'scheduled' | 'event' | 'smart'; - executionId: string; -} - -/** - * Register execution context with the API for telemetry - * This allows the API to tag incoming OTel data with correct squad/agent info - */ -async function registerContextWithBridge(ctx: ExecutionContext): Promise { - const bridgeUrl = getBridgeUrl(); - - try { - const response = await fetch(`${bridgeUrl}/api/context/register`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - execution_id: ctx.executionId, - squad: ctx.squad, - agent: ctx.agent, - task_type: ctx.taskType, - trigger: ctx.trigger, - }), - signal: AbortSignal.timeout(3000), - }); - - if (!response.ok) { - // Non-fatal - continue even if bridge is unavailable - return false; - } - return true; - } catch (e) { - writeLine(` ${colors.dim}warn: bridge registration failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - return false; - } -} - -/** - * Pre-execution gate check via bridge API. - * Checks quota (monthly spend) and cooldown before running an agent. - * Fails open (allows execution) if bridge is unavailable. - */ -interface PreflightResult { - allowed: boolean; - gates: { - quota?: { ok: boolean; used: number; limit: number; remaining: number; period: string }; - cooldown?: { ok: boolean; elapsed_sec: number | null; min_gap_sec: number }; - }; - error?: string; -} - -async function checkPreflightGates(squad: string, agent: string): Promise { - const bridgeUrl = getBridgeUrl(); - - try { - const response = await fetch(`${bridgeUrl}/api/execution/preflight`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ squad, agent }), - signal: AbortSignal.timeout(3000), - }); - - if (!response.ok) { - // Fail open if bridge returns error - return { allowed: true, gates: {} }; - } - - return await response.json() as PreflightResult; - } catch (e) { - writeLine(` ${colors.dim}warn: preflight gate check failed (allowing execution): ${e instanceof Error ? e.message : String(e)}${RESET}`); - return { allowed: true, gates: {} }; - } -} - -/** - * Fetch relevant learnings from bridge for prompt injection. - * Returns empty array if bridge is unavailable. - */ -interface Learning { - content: string; - importance: string; - created_at: string; -} - -async function fetchLearnings(squad: string, limit = DEFAULT_LEARNINGS_LIMIT): Promise { - const bridgeUrl = getBridgeUrl(); - - try { - const response = await fetch( - `${bridgeUrl}/api/learnings/relevant?squad=${encodeURIComponent(squad)}&limit=${limit}`, - { signal: AbortSignal.timeout(3000) } - ); - - if (!response.ok) { - return []; - } - - const data = await response.json() as { learnings: Learning[] }; - return data.learnings || []; - } catch (e) { - writeLine(` ${colors.dim}warn: learnings fetch failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - return []; - } -} - -// loadApprovalInstructions, loadPostExecution → moved to src/lib/run-context.ts - -// gatherSquadContext → moved to src/lib/run-context.ts - -/** - * Generate a unique execution ID for telemetry tracking - */ -function generateExecutionId(): string { - const timestamp = Date.now().toString(36); - const random = Math.random().toString(36).substring(2, 8); - return `exec_${timestamp}_${random}`; -} - -/** - * Select MCP config based on squad name and context - * Uses three-tier resolution: - * 1. Squad context.mcp from SQUAD.md frontmatter (dynamic) - * 2. User override at ~/.claude/mcp-configs/{squad}.json - * 3. Legacy hardcoded mapping (backward compatibility) - * 4. Fallback to ~/.claude.json - */ -function selectMcpConfig(squadName: string, squad?: Squad | null): string { - // Tier 1 & 2: Use new context-based resolution if squad has context.mcp - if (squad?.context?.mcp && squad.context.mcp.length > 0) { - return resolveMcpConfigPath(squadName, squad.context.mcp); - } - - // Tier 3: Legacy hardcoded mapping (for squads without context block) - const home = process.env.HOME || ''; - const configsDir = join(home, '.claude', 'mcp-configs'); - - const squadConfigs: Record = { - website: 'website.json', - research: 'research.json', - intelligence: 'research.json', - analytics: 'data.json', - engineering: 'data.json', - }; - - const configFile = squadConfigs[squadName.toLowerCase()]; - if (configFile) { - const configPath = join(configsDir, configFile); - if (existsSync(configPath)) { - return configPath; - } - } - - // Tier 4: No MCP config — return empty string to skip --mcp-config flag. - // Previously fell back to ~/.claude.json but that's Claude's settings file, - // not an MCP config, and causes claude to exit silently with no output. - return ''; -} - -/** - * Detect task type from agent name patterns - * - *-eval, *-critic, *-review → evaluation - * - *-lead, *-orchestrator → lead - * - *-research, *-analyst → research - * - everything else → execution - */ -function detectTaskType(agentName: string): ExecutionContext['taskType'] { - const name = agentName.toLowerCase(); - if (name.includes('eval') || name.includes('critic') || name.includes('review') || name.includes('test')) { - return 'evaluation'; - } - if (name.includes('lead') || name.includes('orchestrator')) { - return 'lead'; - } - if (name.includes('research') || name.includes('analyst') || name.includes('intel')) { - return 'research'; - } - return 'execution'; -} - -/** Claude Code --model flag aliases */ -type ClaudeModelAlias = 'opus' | 'sonnet' | 'haiku'; - -/** - * Map full model names to Claude Code --model aliases. - * Claude Code only accepts: opus, sonnet, haiku (not full model IDs) - */ -function getClaudeModelAlias(model: string): ClaudeModelAlias | undefined { - const lower = model.toLowerCase(); - - // Direct aliases - if (lower === 'opus' || lower === 'sonnet' || lower === 'haiku') { - return lower as ClaudeModelAlias; - } - - // Full model name mapping - if (lower.includes('opus')) return 'opus'; - if (lower.includes('sonnet')) return 'sonnet'; - if (lower.includes('haiku')) return 'haiku'; - - // Unknown Claude model - let Claude Code handle it - return undefined; -} - -/** - * Resolve model based on squad context and task type. - * Priority: explicit --model flag > squad context routing > undefined (provider default) - * - * Supports multi-provider models: - * - Anthropic: claude-opus-4-5, claude-sonnet-4, claude-3-5-haiku, opus, sonnet, haiku - * - Google: gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash - * - Others: model names passed through to provider CLI - * - * Routing logic: - * - evaluation (critics, tests) → cheap model - simple validation - * - research (analysts, intel) → default model - balanced - * - execution (builders, fixers) → default model - balanced - * - lead (orchestrators) → expensive model - complex coordination - */ -function resolveModel( - explicitModel: string | undefined, - squad: Squad | null, - taskType: ExecutionContext['taskType'] -): string | undefined { - // Explicit --model flag always wins - if (explicitModel) { - return explicitModel; - } - - // No squad context = let provider decide - const modelConfig = squad?.context?.model; - if (!modelConfig) { - return undefined; - } - - // Route by task type - switch (taskType) { - case 'evaluation': - // Critics/evals are simple - use cheap model - return modelConfig.cheap || modelConfig.default; - case 'lead': - // Leads need complex reasoning - use expensive model - return modelConfig.expensive || modelConfig.default; - case 'research': - case 'execution': - default: - // Default for most tasks - return modelConfig.default; - } -} - -/** - * Ensure the project directory is trusted in Claude's config. - * This prevents the workspace trust dialog from blocking autonomous execution. - */ -function ensureProjectTrusted(projectPath: string): void { - const configPath = join(process.env.HOME || '', '.claude.json'); - - if (!existsSync(configPath)) { - // No Claude config yet - will be created on first interactive run - return; - } - - try { - const config = JSON.parse(readFileSync(configPath, 'utf-8')); - - if (!config.projects) { - config.projects = {}; - } - - if (!config.projects[projectPath]) { - config.projects[projectPath] = {}; - } - - // Mark as trusted for autonomous execution - if (!config.projects[projectPath].hasTrustDialogAccepted) { - config.projects[projectPath].hasTrustDialogAccepted = true; - writeFileSync(configPath, JSON.stringify(config, null, 2)); - } - } catch (e) { - // Don't fail execution if we can't update config — the trust dialog will just appear - writeLine(` ${colors.dim}warn: config update failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - } -} +import { runAgent } from '../lib/agent-runner.js'; +import { runPostEvaluation, runAutopilot, runLeadMode, runSequentialMode } from '../lib/run-modes.js'; -/** - * Get the project root directory (where .agents/ lives) - */ -function getProjectRoot(): string { +export async function runCommand( + target: string | null, + options: RunOptions +): Promise { const squadsDir = findSquadsDir(); - if (squadsDir) { - // .agents/squads -> .agents -> project root - return dirname(dirname(squadsDir)); - } - return process.cwd(); -} - -interface ExecutionRecord { - squadName: string; - agentName: string; - executionId: string; - startTime: string; - endTime?: string; - durationMs?: number; - status: 'running' | 'completed' | 'failed'; - trigger?: 'manual' | 'scheduled' | 'event' | 'smart'; - taskType?: 'evaluation' | 'execution' | 'research' | 'lead'; - outcome?: string; - error?: string; -} - -function getExecutionLogPath(squadName: string, agentName: string): string | null { - const memoryDir = findMemoryDir(); - if (!memoryDir) return null; - return join(memoryDir, squadName, agentName, 'executions.md'); -} - -function logExecution(record: ExecutionRecord): void { - const logPath = getExecutionLogPath(record.squadName, record.agentName); - if (!logPath) return; - - const dir = dirname(logPath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); - } - - let content = ''; - if (existsSync(logPath)) { - content = readFileSync(logPath, 'utf-8').trimEnd(); - } else { - content = `# ${record.squadName}/${record.agentName} - Execution Log`; - } - - // Structured entry format for parsing - const entry = ` - ---- - -**${record.startTime}** | Status: ${record.status} -- ID: \`${record.executionId}\` -- Trigger: ${record.trigger || 'manual'} -- Task Type: ${record.taskType || 'execution'} -`; - - writeFileSync(logPath, content + entry); -} - -function updateExecutionStatus( - squadName: string, - agentName: string, - executionId: string, - status: 'completed' | 'failed', - details?: { - outcome?: string; - error?: string; - durationMs?: number; - } -): void { - const logPath = getExecutionLogPath(squadName, agentName); - if (!logPath || !existsSync(logPath)) return; - - let content = readFileSync(logPath, 'utf-8'); - const endTime = new Date().toISOString(); - - // Find and update the specific execution by ID - const execMarker = ``; - const markerIndex = content.indexOf(execMarker); - - if (markerIndex === -1) return; - - // Find the next entry marker or end of file - const nextEntryIndex = content.indexOf('\n---\n', markerIndex + 1); - const entryEnd = nextEntryIndex === -1 ? content.length : nextEntryIndex; - - // Extract and update the entry - const entryStart = content.lastIndexOf('\n---\n', markerIndex); - const currentEntry = content.slice(entryStart, entryEnd); - - // Build completion details - const durationStr = details?.durationMs - ? `${(details.durationMs / 1000).toFixed(1)}s` - : 'unknown'; - - let updatedEntry = currentEntry - .replace(/Status: running/, `Status: ${status}`) - + `- Completed: ${endTime} -- Duration: ${durationStr}`; - - if (details?.outcome) { - updatedEntry += `\n- Outcome: ${details.outcome}`; - } - if (details?.error) { - updatedEntry += `\n- Error: ${details.error}`; - } - - // Replace the entry in content - content = content.slice(0, entryStart) + updatedEntry + content.slice(entryEnd); - writeFileSync(logPath, content); -} - -/** - * Auto-commit agent work after execution completes. - * Commits as the Agents Squads bot (if configured), pushes with bot token. - * Falls back to user's git identity if bot not configured. - */ -async function autoCommitAgentWork( - squadName: string, - agentName: string, - executionId: string, - provider?: string, -): Promise<{ committed: boolean; message?: string; error?: string }> { - const { execSync } = await import('child_process'); - const { detectGitHubRepo } = await import('../lib/github.js'); - const projectRoot = getProjectRoot(); - - try { - // Check for uncommitted changes - const status = execSync('git status --porcelain', { - encoding: 'utf-8', - cwd: projectRoot, - }).trim(); - - if (!status) { - return { committed: false }; - } - - // Get bot identity for commits - const botEnv = await getBotGitEnv(); - const execOpts = { - cwd: projectRoot, - env: { ...process.env, ...botEnv }, - }; - - // Stage all changes (agent work should be committed) - execSync('git add -A', execOpts); - - // Build commit message with provider-specific co-author - // Write to temp file to avoid shell injection via squad/agent names - const shortExecId = executionId.slice(0, 12); - const coAuthor = getCoAuthorTrailer(provider || 'claude'); - const msgFile = join(projectRoot, '.git', 'SQUADS_COMMIT_MSG'); - writeFileSync(msgFile, `feat(${squadName}/${agentName}): execution ${shortExecId}\n\n${coAuthor}\n`); - - // Commit using --file to avoid shell interpolation - try { - execSync(`git commit --file "${msgFile}"`, execOpts); - } finally { - try { unlinkSync(msgFile); } catch { /* ignore */ } - } - - // Push to origin using bot token - try { - const { spawnSync } = await import('child_process'); - const repo = detectGitHubRepo(projectRoot); - // Validate repo format (org/name) to prevent injection - if (repo && /^[\w.-]+\/[\w.-]+$/.test(repo)) { - const pushUrl = await getBotPushUrl(repo); - if (pushUrl) { - // Use spawnSync with args array to avoid shell injection - spawnSync('git', ['push', pushUrl, 'HEAD'], { ...execOpts, stdio: 'pipe' }); - } else { - spawnSync('git', ['push', 'origin', 'HEAD'], { ...execOpts, stdio: 'pipe' }); - } - } else { - spawnSync('git', ['push', 'origin', 'HEAD'], { ...execOpts, stdio: 'pipe' }); - } - } catch (e) { - writeLine(` ${colors.dim}warn: git push failed (commit is still local): ${e instanceof Error ? e.message : String(e)}${RESET}`); - } - return { committed: true, message: `Committed changes from ${agentName}` }; - } catch (error) { - return { committed: false, error: String(error) }; + if (!squadsDir) { + writeLine(` ${colors.red}No .agents/squads directory found${RESET}`); + writeLine(` ${colors.dim}Run \`squads init\` to create one.${RESET}`); + process.exit(1); } -} -/** - * Get the timestamp of the last execution from executions.md - */ -function getLastExecutionTime(squadName: string, agentName: string): Date | null { - const logPath = getExecutionLogPath(squadName, agentName); - if (!logPath || !existsSync(logPath)) return null; - - const content = readFileSync(logPath, 'utf-8'); - - // Find all timestamps in the format **2026-01-21T14:00:02.358Z** - const timestamps = content.match(/\*\*(\d{4}-\d{2}-\d{2}T[\d:.]+Z)\*\*/g); - if (!timestamps || timestamps.length === 0) return null; - - // Get the last (most recent) timestamp - const lastTimestamp = timestamps[timestamps.length - 1].replace(/\*\*/g, ''); - return new Date(lastTimestamp); -} - -/** - * Local cooldown check - works without bridge - * Returns { ok: true } if allowed, { ok: false, ... } if blocked - */ -function checkLocalCooldown( - squadName: string, - agentName: string, - cooldownMs: number -): { ok: boolean; elapsedMs?: number; cooldownMs: number } { - const lastExec = getLastExecutionTime(squadName, agentName); - if (!lastExec) return { ok: true, cooldownMs }; - - const elapsedMs = Date.now() - lastExec.getTime(); - if (elapsedMs < cooldownMs) { - return { ok: false, elapsedMs, cooldownMs }; + // Execution is now the default behavior (no --execute flag needed) + // --dry-run disables execution + if (!options.dryRun && options.execute === undefined) { + options.execute = true; } - return { ok: true, elapsedMs, cooldownMs }; -} - -/** - * Format milliseconds as human-readable duration - */ -function formatDuration(ms: number): string { - const hours = Math.floor(ms / (60 * 60 * 1000)); - const minutes = Math.floor((ms % (60 * 60 * 1000)) / (60 * 1000)); - - if (hours >= 24) { - const days = Math.floor(hours / 24); - const remainingHours = hours % 24; - return remainingHours > 0 ? `${days}d ${remainingHours}h` : `${days}d`; - } - if (hours > 0) { - return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h`; - } - return `${minutes}m`; -} + // MODE 0: Org cycle — run all squads as a coordinated system + if (target === '--org' || options.org) { + const { scanOrg, planOrgCycle, displayOrgScan, displayPlan } = await import('../lib/org-cycle.js'); -// extractMcpServersFromDefinition, AgentFrontmatter, parseAgentFrontmatter → moved to src/lib/run-context.ts + writeLine(); + writeLine(` ${gradient('squads')} ${colors.dim}org cycle${RESET}`); + writeLine(); -/** - * Emit an execution event to the API for tracking and routing. - * Non-blocking and fail-safe — falls back to file if API unavailable. - */ -async function emitExecutionEvent( - eventType: 'agent.completed' | 'agent.failed', - data: { squad: string; agent: string; executionId: string; error?: string } -): Promise { - const apiUrl = getApiUrl(); + // Step 1: SCAN + const scan = scanOrg(); + displayOrgScan(scan); - if (apiUrl) { - try { - await fetch(`${apiUrl}/events/ingest`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - source: 'scheduler', - event_type: eventType, - data: { - squad: data.squad, - agent: data.agent, - execution_id: data.executionId, - ...(data.error ? { error: data.error } : {}), - }, - }), - signal: AbortSignal.timeout(EXECUTION_EVENT_TIMEOUT_MS), - }); + // Step 2: PLAN + const plan = planOrgCycle(scan); + if (plan.length === 0) { + writeLine(` ${colors.dim}No squads to run.${RESET}\n`); return; - } catch { - // API unavailable — fall through to file-based event recording - } - } - - // Fallback: write event to memory file - try { - const memDir = findMemoryDir(); - if (!memDir) return; - - const eventsDir = join(memDir, data.squad, data.agent); - if (!existsSync(eventsDir)) { - mkdirSync(eventsDir, { recursive: true }); - } - - const eventsPath = join(eventsDir, 'events.md'); - const timestamp = new Date().toISOString(); - const entry = `\n## ${timestamp}: ${eventType}\n- execution_id: ${data.executionId}\n${data.error ? `- error: ${data.error}\n` : ''}`; - - let existing = ''; - if (existsSync(eventsPath)) { - existing = readFileSync(eventsPath, 'utf-8'); } - writeFileSync(eventsPath, existing + entry); - } catch { - // Truly fail-safe — never block execution - } -} - -/** - * Verify execution against acceptance criteria using a lightweight model. - * Returns pass/fail with reason. Used by the Ralph verification loop. - */ -async function verifyExecution( - squadName: string, - agentName: string, - criteria: string, - options: { verbose?: boolean } = {} -): Promise<{ passed: boolean; reason: string }> { - const { execSync } = await import('child_process'); - const projectRoot = getProjectRoot(); + displayPlan(plan); - // Gather evidence: state file + recent commits - let stateContent = ''; - const memDir = findMemoryDir(); - if (memDir) { - const statePath = join(memDir, squadName, agentName, 'state.md'); - if (existsSync(statePath)) { - stateContent = readFileSync(statePath, 'utf-8').slice(0, VERIFICATION_STATE_MAX_CHARS); + if (options.dryRun) { + writeLine(` ${colors.dim}[dry-run] Would run ${plan.length} squad leads in order above.${RESET}\n`); + return; } - } - - let recentCommits = ''; - try { - recentCommits = execSync('git log --oneline -5 --no-color', { - encoding: 'utf-8', - cwd: projectRoot, - }).trim(); - } catch (e) { - if (options.verbose) writeLine(` ${colors.dim}warn: git log failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - recentCommits = '(no commits found)'; - } - - const verifyPrompt = `You are verifying whether an agent completed its task successfully. - -Agent: ${squadName}/${agentName} - -## Acceptance Criteria -${criteria} -## Evidence - -### Agent State File -${stateContent || '(empty or not found)'} - -### Recent Git Commits -${recentCommits} - -## Instructions -Evaluate whether the acceptance criteria are met based on the evidence. -Respond with EXACTLY one line: -PASS: -or -FAIL: `; - - try { - const escapedPrompt = verifyPrompt.replace(/'/g, "'\\''"); - const result = execSync( - `unset CLAUDECODE; claude --print --model haiku -- '${escapedPrompt}'`, - { encoding: 'utf-8', cwd: projectRoot, timeout: VERIFICATION_EXEC_TIMEOUT_MS, shell: '/bin/sh' } - ).trim(); - - if (options.verbose) { - writeLine(` ${colors.dim}Verification: ${result}${RESET}`); - } + // Step 3: EXECUTE — run each lead sequentially + const cycleStart = Date.now(); + const results: Array<{ squad: string; agent: string; status: string; durationMs: number }> = []; - if (result.startsWith('PASS')) { - return { passed: true, reason: result.replace(/^PASS:\s*/, '') }; - } - return { passed: false, reason: result.replace(/^FAIL:\s*/, '') }; - } catch (error) { - if (options.verbose) { - writeLine(` ${colors.dim}Verification error (defaulting to PASS): ${error}${RESET}`); + // Snapshot all goals before execution + const { snapshotGoals, diffGoals } = await import('../lib/observability.js'); + const allGoalsBefore: Record> = {}; + for (const s of plan) { + allGoalsBefore[s.squad] = snapshotGoals(s.squad); } - return { passed: true, reason: 'Verification unavailable — defaulting to pass' }; - } -} - -// ── Cloud Dispatch ───────────────────────────────────────────────────── - -/** - * Dispatch agent execution to cloud worker via API. - * Posts to /agent-dispatch, then polls /agent-executions for status. - */ -async function runCloudDispatch( - squadName: string, - agentName: string, - options: RunOptions -): Promise { - const apiUrl = getApiUrl(); - - if (!apiUrl) { - writeLine(` ${colors.red}${icons.error} API URL not configured${RESET}`); - writeLine(` ${colors.dim}Run: squads config use staging (or set SQUADS_API_URL)${RESET}`); - process.exit(1); - } - - // Require auth session - if (!isLoggedIn()) { - writeLine(` ${colors.red}${icons.error} Not logged in${RESET}`); - writeLine(` ${colors.dim}Run \`squads login\` to authenticate before using --cloud${RESET}`); - process.exit(1); - } - - const session = loadSession(); - const headers: Record = { - 'Content-Type': 'application/json', - }; - - // Use access token if available, otherwise use API key - if (session?.accessToken) { - headers['Authorization'] = `Bearer ${session.accessToken}`; - } - const apiKey = process.env.SQUADS_PLATFORM_API_TOKEN || process.env.SCHEDULER_API_KEY; - if (apiKey) { - headers['X-API-Key'] = apiKey; - } - - const spinner = ora(`Dispatching ${squadName}/${agentName} to cloud...`).start(); + for (const s of plan) { + if (!s.lead) continue; + const leadPath = join(squadsDir, s.squad, `${s.lead}.md`); + if (!existsSync(leadPath)) continue; - try { - // 1. Create dispatch request - const dispatchRes = await fetch(`${apiUrl}/agent-dispatch`, { - method: 'POST', - headers, - body: JSON.stringify({ - squad: squadName, - agent: agentName, - trigger_type: 'manual', - trigger_data: { - source: 'cli', - cloud: true, - model: options.model, - provider: options.provider, - effort: options.effort, - }, - }), - }); + writeLine(` ${colors.cyan}Running ${s.squad}/${s.lead}...${RESET}`); + const runStart = Date.now(); + try { + await runAgent(s.lead, leadPath, s.squad, { ...options, execute: true }); + results.push({ squad: s.squad, agent: s.lead, status: 'completed', durationMs: Date.now() - runStart }); + } catch (e) { + const errMsg = e instanceof Error ? e.message : String(e); + results.push({ squad: s.squad, agent: s.lead, status: 'failed', durationMs: Date.now() - runStart }); + + // Detect quota limit — if agent fails in <10s, likely quota/rate limit + const failDuration = Date.now() - runStart; + const isQuotaLikely = failDuration < 10000 && errMsg.includes('code 1'); + const isExplicitQuota = errMsg.includes('hit your limit') || errMsg.includes('rate limit') || errMsg.includes('quota'); + + if (isExplicitQuota || isQuotaLikely) { + // Check if previous squad also failed fast — confirms it's quota, not a bug + const prevFailed = results.length >= 2 && + results[results.length - 2]?.status === 'failed' && + (results[results.length - 2]?.durationMs || 0) < 10000; + + if (isExplicitQuota || prevFailed) { + writeLine(` ${colors.red}Quota limit reached — stopping org cycle.${RESET}`); + writeLine(` ${colors.dim}Completed ${results.filter(r => r.status === 'completed').length} squads before hitting limit.${RESET}`); + writeLine(` ${colors.dim}Resume with 'squads run --org' when quota resets.${RESET}`); + break; + } + } - if (!dispatchRes.ok) { - const error = await dispatchRes.text(); - spinner.fail(`Dispatch failed: ${dispatchRes.status}`); - writeLine(` ${colors.dim}${error}${RESET}`); - process.exit(1); + writeLine(` ${colors.red}${s.squad}/${s.lead} failed: ${errMsg}${RESET}`); + } } - const dispatch = await dispatchRes.json() as { dispatch_id: number; status: string }; - spinner.succeed(`Dispatched to cloud`); + // Step 4: REPORT — compare goals before and after + const totalMs = Date.now() - cycleStart; + const completed = results.filter(r => r.status === 'completed').length; + const failed = results.filter(r => r.status === 'failed').length; writeLine(); - writeLine(` ${colors.cyan}Dispatch ID${RESET} ${dispatch.dispatch_id}`); - writeLine(` ${colors.cyan}Squad${RESET} ${squadName}`); - writeLine(` ${colors.cyan}Agent${RESET} ${agentName}`); + writeLine(` ${bold}Org Cycle Complete${RESET}`); + writeLine(` Duration: ${Math.round(totalMs / 60000)}m | Squads: ${completed} completed, ${failed} failed | Frozen: ${scan.filter(s => s.status === 'frozen').length} skipped`); writeLine(); - // 2. Poll for execution status - const pollSpinner = ora('Waiting for execution to start...').start(); - const pollStart = Date.now(); - let executionId: string | null = null; - let lastStatus = ''; - - while (Date.now() - pollStart < CLOUD_POLL_TIMEOUT_MS) { - try { - const execRes = await fetch( - `${apiUrl}/agent-executions?squad=${encodeURIComponent(squadName)}&agent=${encodeURIComponent(agentName)}&limit=1`, - { headers }, - ); - - if (execRes.ok) { - const executions = await execRes.json() as Array<{ - execution_id: string; - status: string; - summary?: string; - error?: string; - duration_seconds?: number; - cost_usd?: number; - }>; - - if (executions.length > 0) { - const exec = executions[0]; - - // Only track executions started after our dispatch - if (!executionId && exec.status === 'running') { - executionId = exec.execution_id; - pollSpinner.text = `Running (${exec.execution_id})`; - } - - if (executionId && exec.execution_id === executionId) { - if (exec.status !== lastStatus) { - lastStatus = exec.status; - pollSpinner.text = `Status: ${exec.status}`; - } - - if (exec.status === 'completed') { - pollSpinner.succeed('Execution completed'); - writeLine(); - writeLine(` ${colors.cyan}Execution${RESET} ${exec.execution_id}`); - if (exec.summary) { - writeLine(` ${colors.cyan}Summary${RESET} ${exec.summary}`); - } - if (exec.duration_seconds) { - writeLine(` ${colors.cyan}Duration${RESET} ${Math.round(exec.duration_seconds)}s`); - } - if (exec.cost_usd) { - writeLine(` ${colors.cyan}Cost${RESET} $${exec.cost_usd.toFixed(4)}`); - } - writeLine(); - return; - } - - if (exec.status === 'failed') { - pollSpinner.fail('Execution failed'); - writeLine(); - if (exec.error) { - writeLine(` ${colors.red}Error: ${exec.error}${RESET}`); - } - writeLine(); - process.exit(1); - } - - if (exec.status === 'cancelled') { - pollSpinner.warn('Execution cancelled'); - return; - } - } - } + for (const r of results) { + const icon = r.status === 'completed' ? `${colors.green}pass${RESET}` : `${colors.red}fail${RESET}`; + writeLine(` ${icon} ${r.squad}/${r.agent} ${colors.dim}${Math.round(r.durationMs / 1000)}s${RESET}`); + } + + // Goal changes summary + let totalGoalChanges = 0; + const goalSummary: string[] = []; + for (const s of plan) { + const after = snapshotGoals(s.squad); + const changes = diffGoals(allGoalsBefore[s.squad] || {}, after); + if (changes.length > 0) { + totalGoalChanges += changes.length; + for (const c of changes) { + goalSummary.push(` ${colors.green}${s.squad}${RESET}: ${c.name} ${colors.dim}${c.before} → ${c.after}${RESET}`); } - } catch (e) { - if (options.verbose) writeLine(` ${colors.dim}warn: cloud poll failed (retrying): ${e instanceof Error ? e.message : String(e)}${RESET}`); } - - await new Promise(resolve => setTimeout(resolve, CLOUD_POLL_INTERVAL_MS)); } - pollSpinner.warn('Poll timeout — execution may still be running'); - writeLine(` ${colors.dim}Check status: squads trigger status${RESET}`); - if (executionId) { - writeLine(` ${colors.dim}Execution ID: ${executionId}${RESET}`); + if (goalSummary.length > 0) { + writeLine(); + writeLine(` ${bold}Goal Changes${RESET} (${totalGoalChanges})`); + for (const line of goalSummary) writeLine(line); + } else { + writeLine(); + writeLine(` ${colors.dim}No goal changes this cycle.${RESET}`); } - } catch (error) { - spinner.fail('Cloud dispatch failed'); - writeLine(` ${colors.red}${error instanceof Error ? error.message : String(error)}${RESET}`); writeLine(); - writeLine(` ${colors.dim}Check your network and SQUADS_API_URL setting${RESET}`); - process.exit(1); - } -} - -export async function runCommand( - target: string | null, - options: RunOptions -): Promise { - const squadsDir = findSquadsDir(); - - if (!squadsDir) { - writeLine(` ${colors.red}No .agents/squads directory found${RESET}`); - writeLine(` ${colors.dim}Run \`squads init\` to create one.${RESET}`); - process.exit(1); - } - // Execution is now the default behavior (no --execute flag needed) - // --dry-run disables execution - if (!options.dryRun && options.execute === undefined) { - options.execute = true; + return; } // MODE 1: Autopilot — no target means run all squads continuously @@ -1167,9 +347,15 @@ async function runSquad( return; } } else { - // Default: Run squad as multi-agent conversation - // Lead briefs → scanners discover → workers execute → lead reviews → converge - if (options.execute) { + // Determine provider for mode selection + const squadProvider = options.provider || squad?.providers?.default || 'anthropic'; + + if (options.execute && !TOOL_USE_PROVIDERS.has(squadProvider)) { + // Sequential mode for providers without tool use (Ollama, Codex, etc.) + await runSequentialMode(squad, squadsDir, squadProvider, options); + } else if (options.execute) { + // Default: Run squad as multi-agent conversation + // Lead briefs → scanners discover → workers execute → lead reviews → converge writeLine(` ${bold}Conversation mode${RESET} ${colors.dim}(lead → scan → work → review → verify)${RESET}`); writeLine(); @@ -1226,13 +412,17 @@ async function runSquad( writeLine(); } else { // Dry-run: show what would happen - writeLine(` ${colors.dim}Default mode: conversation (lead → scan → work → review → verify)${RESET}`); + const squadProvider2 = options.provider || squad?.providers?.default || 'anthropic'; + const modeLabel = TOOL_USE_PROVIDERS.has(squadProvider2) + ? 'conversation (lead → scan → work → review → verify)' + : `sequential (${squadProvider2} — agents run one at a time)`; + writeLine(` ${colors.dim}Default mode: ${modeLabel}${RESET}`); writeLine(); for (const agent of squad.agents) { writeLine(` ${icons.empty} ${colors.cyan}${agent.name}${RESET} ${colors.dim}${agent.role}${RESET}`); } writeLine(); - writeLine(` ${colors.dim}Run conversation:${RESET}`); + writeLine(` ${colors.dim}Run:${RESET}`); writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET}`); writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET} --task "review and merge open PRs"`); writeLine(); @@ -1248,1680 +438,6 @@ async function runSquad( writeLine(); } -// ── Post-run evaluation ───────────────────────────────────────────── -// After any squad run, dispatch the COO (company-lead) to evaluate outputs. -// This is the feedback loop that makes the system learn. - -const EVAL_TIMEOUT_MINUTES = 15; - -/** - * Run the COO evaluation after squad execution. - * Dispatches company-lead with a scoped evaluation task for the squads that just ran. - * Generates feedback.md and active-work.md per squad. - */ -async function runPostEvaluation( - squadsRun: string[], - options: RunOptions, -): Promise { - // Skip if running company squad itself (prevent recursion) - if (squadsRun.length === 1 && squadsRun[0] === 'company') return; - // Skip if evaluation disabled - if (options.eval === false) return; - // Skip dry-run - if (options.dryRun) return; - // Skip background runs — evaluation needs foreground context - if (options.background) return; - - const squadsDir = findSquadsDir(); - if (!squadsDir) return; - - // Find company-lead agent - const cooPath = join(squadsDir, 'company', 'company-lead.md'); - if (!existsSync(cooPath)) { - if (options.verbose) { - writeLine(` ${colors.dim}Skipping evaluation: company-lead.md not found${RESET}`); - } - return; - } - - const squadList = squadsRun.join(', '); - writeLine(); - writeLine(` ${gradient('eval')} ${colors.dim}COO evaluating: ${squadList}${RESET}`); - - const evalTask = `Post-run evaluation for: ${squadList}. - -## Evaluation Process - -For each squad (${squadList}): - -### 1. Read previous feedback FIRST -Read \`.agents/memory/{squad}/feedback.md\` if it exists. Note the previous grade, identified patterns, and priorities. This is your baseline — you are measuring CHANGE, not just current state. - -### 2. Gather current evidence -- PRs (last 7 days): \`gh pr list --state all --limit 20 --json number,title,state,mergedAt,createdAt\` -- Recent commits (last 7 days): \`gh api repos/{owner}/{repo}/commits?since=YYYY-MM-DDT00:00:00Z&per_page=20 --jq '.[].commit.message'\` -- Open issues: \`gh issue list --state open --limit 15 --json number,title,labels\` -- Read \`.agents/memory/{squad}/priorities.md\` and \`.agents/memory/company/directives.md\` -- Read \`.agents/memory/{squad}/active-work.md\` (previous cycle's work tracking) - -### 3. Write feedback.md (APPEND history, don't overwrite) -\`\`\`markdown -# Feedback — {squad} - -## Current Assessment (YYYY-MM-DD): [A-F] -Merge rate: X% | Noise ratio: Y% | Priority alignment: Z% - -## Trajectory: [improving | stable | declining | new] -Previous grade: [grade] → Current: [grade]. [1-line explanation of why] - -## Valuable (continue) -- [specific PR/issue that advanced priorities] - -## Noise (stop) -- [specific anti-pattern observed] - -## Next Cycle Priorities -1. [specific actionable item] - -## History -| Date | Grade | Key Signal | -|------|-------|------------| -| YYYY-MM-DD | X | [what drove this grade] | -[keep last 10 entries, append new row] -\`\`\` - -### 4. Write active-work.md -\`\`\`markdown -# Active Work — {squad} (YYYY-MM-DD) -## Continue (open PRs) -- #{number}: {title} — {status/next action} -## Backlog (assigned issues) -- #{number}: {title} — {priority} -## Do NOT Create -- {description of known duplicate patterns from feedback history} -\`\`\` - -### 5. Commit to hq main -${squadsRun.length > 1 ? ` -### 6. Cross-squad assessment -Evaluate how outputs from ${squadList} connect: -- Duplicated efforts across squads? -- Missing handoffs (one squad's output should feed another)? -- Coordination gaps (conflicting PRs, redundant issues)? -- Combined trajectory: is the org getting more effective or more noisy? -Write cross-squad findings to \`.agents/memory/company/cross-squad-review.md\`. -` : ''} -CRITICAL: You are measuring DIRECTION not just position. A C-grade squad improving from F is better than a B-grade squad declining from A. The history table IS the feedback loop — agents read it next cycle.`; - - await runAgent('company-lead', cooPath, 'company', { - ...options, - task: evalTask, - timeout: EVAL_TIMEOUT_MINUTES, - eval: false, // prevent recursion - trigger: 'manual', - }); -} - -// ── Autopilot mode ────────────────────────────────────────────────── -// When `squads run` is called with no target, it becomes the daemon: -// score all squads, dispatch the full loop (scanner→lead→worker→verifier) -// for top-priority squads, push cognition signals, repeat. - -// Default cooldowns per agent role (ms) -const ROLE_COOLDOWNS: Record = { - scanner: 60 * 60 * 1000, // 1h — fast, cheap - lead: 4 * 60 * 60 * 1000, // 4h — orchestration - worker: 30 * 60 * 1000, // 30m — if work exists - verifier: 30 * 60 * 1000, // 30m — follows workers - 'issue-solver': 30 * 60 * 1000, // 30m — default worker -}; - -/** - * Classify an agent's role from its name. - * Uses classifyAgent from conversation.ts, falls back to 'worker'. - */ -function classifyAgentRole(name: string): string { - return classifyAgent(name) ?? 'worker'; -} - -/** - * Autopilot: continuous loop that scores squads and dispatches full squad loops. - * Replaces the daemon command — same state file, same scoring, but dispatches - * the full agent roster instead of just issue-solver. - */ -async function runAutopilot( - squadsDir: string, - options: RunOptions, -): Promise { - const interval = parseInt(String(options.interval || '30'), 10); - const maxParallel = parseInt(String(options.maxParallel || '2'), 10); - const budget = parseFloat(String(options.budget || '0')); - const once = !!options.once; - - // Seed cognition beliefs on first run - const cognitionState = loadCognitionState(); - seedBeliefsIfEmpty(cognitionState); - saveCognitionState(cognitionState); - - writeLine(); - writeLine(` ${gradient('squads')} ${colors.dim}autopilot${RESET}`); - writeLine(` ${colors.dim}Interval: ${interval}m | Parallel: ${maxParallel} | Budget: ${budget > 0 ? '$' + budget + '/day' : 'unlimited'}${RESET}`); - writeLine(` ${colors.dim}Cognition: ${cognitionState.beliefs.length} beliefs, ${cognitionState.signals.length} signals${RESET}`); - writeLine(); - - let running = true; - const handleSignal = () => { running = false; }; - process.on('SIGINT', handleSignal); - process.on('SIGTERM', handleSignal); - - while (running) { - const cycleStart = Date.now(); - const state = loadLoopState(); - - // Reset daily cost at midnight - const today = new Date().toISOString().slice(0, 10); - if (state.dailyCostDate !== today) { - state.dailyCost = 0; - state.dailyCostDate = today; - } - - // Budget check - if (budget > 0 && state.dailyCost >= budget) { - writeLine(` ${icons.warning} ${colors.yellow}Daily budget reached ($${state.dailyCost.toFixed(2)}/$${budget})${RESET}`); - saveLoopState(state); - if (once) break; - await sleep(interval * 60 * 1000); - continue; - } - - writeLine(` ${colors.dim}── Cycle ${new Date().toLocaleTimeString()} ──${RESET}`); - - // Get bot env for GitHub API calls - let ghEnv: Record = {}; - try { ghEnv = await getBotGhEnv(); } catch { /* use default */ } - - // Score squads - const squadRepos = getSquadRepos(); - - let dispatchedSquadNames: string[]; - const failed: string[] = []; - const completed: string[] = []; - - if (options.phased) { - // ── Phased dispatch: execute squads in dependency order ── - const phases = computePhases(); - const phaseCount = phases.size; - writeLine(` ${colors.dim}Phased mode: ${phaseCount} phase(s)${RESET}`); - - dispatchedSquadNames = []; - - for (const [phaseNum, phaseSquads] of phases) { - writeLine(` ${colors.dim}── Phase ${phaseNum} (${phaseSquads.join(', ')}) ──${RESET}`); - - // Score only squads in this phase - const phaseSignals = scoreSquadsForPhase(phaseSquads, state, squadRepos, ghEnv); - const phaseDispatch = phaseSignals - .filter(s => s.score > 0) - .slice(0, maxParallel); - - if (phaseDispatch.length === 0) { - writeLine(` ${colors.dim}No squads need attention in this phase${RESET}`); - continue; - } - - for (const sig of phaseDispatch) { - writeLine(` ${colors.cyan}${sig.squad}${RESET} (score: ${sig.score}) — ${sig.reason}`); - } - - if (options.dryRun) { - continue; - } - - // Dispatch phase squads in parallel, wait for all before next phase - const phaseResults = await Promise.allSettled( - phaseDispatch.map(sig => { - const squad = loadSquad(sig.squad); - if (!squad) return Promise.resolve(); - return runSquadLoop(squad, squadsDir, state, ghEnv, options); - }) - ); - - for (let i = 0; i < phaseResults.length; i++) { - const name = phaseDispatch[i].squad; - dispatchedSquadNames.push(name); - if (phaseResults[i].status === 'rejected') { - failed.push(name); - state.failCounts[name] = (state.failCounts[name] || 0) + 1; - } else { - completed.push(name); - delete state.failCounts[name]; - } - } - } - - if (options.dryRun) { - writeLine(` ${colors.yellow}[DRY RUN] Would dispatch above squads in phase order${RESET}`); - saveLoopState(state); - if (once) break; - await sleep(interval * 60 * 1000); - continue; - } - } else { - // ── Flat dispatch: score-based, no phase ordering ── - const signals = scoreSquads(state, squadRepos, ghEnv); - - if (signals.length === 0 || signals.every(s => s.score <= 0)) { - writeLine(` ${colors.dim}No squads need attention${RESET}`); - saveLoopState(state); - if (once) break; - await sleep(interval * 60 * 1000); - continue; - } - - // Pick top N squads to dispatch - const toDispatch = signals - .filter(s => s.score > 0) - .slice(0, maxParallel); - - writeLine(` ${colors.dim}Dispatching ${toDispatch.length} squad(s):${RESET}`); - for (const sig of toDispatch) { - writeLine(` ${colors.cyan}${sig.squad}${RESET} (score: ${sig.score}) — ${sig.reason}`); - } - - if (options.dryRun) { - writeLine(` ${colors.yellow}[DRY RUN] Would dispatch above squads${RESET}`); - saveLoopState(state); - if (once) break; - await sleep(interval * 60 * 1000); - continue; - } - - // Dispatch squad loops in parallel - const results = await Promise.allSettled( - toDispatch.map(sig => { - const squad = loadSquad(sig.squad); - if (!squad) return Promise.resolve(); - return runSquadLoop(squad, squadsDir, state, ghEnv, options); - }) - ); - - for (let i = 0; i < results.length; i++) { - const r = results[i]; - const name = toDispatch[i].squad; - if (r.status === 'rejected') { - failed.push(name); - state.failCounts[name] = (state.failCounts[name] || 0) + 1; - } else { - completed.push(name); - delete state.failCounts[name]; - } - } - - dispatchedSquadNames = toDispatch.map(s => s.squad); - } - - // Estimate cost (rough: $1 per squad loop) - const cycleCost = dispatchedSquadNames.length * 1.0; - state.dailyCost += cycleCost; - - // Push memory signals for dispatched squads - await pushMemorySignals(dispatchedSquadNames, state, !!options.verbose); - - // Trim and save state - state.recentRuns = state.recentRuns.slice(-100); - state.lastCycle = new Date().toISOString(); - saveLoopState(state); - - // Slack: only on failures - if (failed.length > 0) { - slackNotify([ - `*Autopilot cycle — failures*`, - `Failed: ${failed.join(', ')}`, - `Completed: ${completed.join(', ')}`, - `Daily: $${state.dailyCost.toFixed(2)}${budget > 0 ? '/$' + budget : ''}`, - ].join('\n')); - } - - // Escalate persistent failures - for (const [key, count] of Object.entries(state.failCounts)) { - if (count >= 3) { - slackNotify(`🚨 *Escalation*: ${key} has failed ${count} times consecutively.`); - } - } - - // ── Post-run COO evaluation ── - // Evaluate outputs from all dispatched squads (skips if company was the only one) - if (dispatchedSquadNames.length > 0) { - await runPostEvaluation(dispatchedSquadNames, options); - } - - // ── Cognition: learn from this cycle ── - // Ingest memory → synthesize signals → evaluate decisions → reflect - writeLine(` ${colors.dim}Cognition cycle...${RESET}`); - const cognitionResult = await runCognitionCycle(dispatchedSquadNames, !!options.verbose); - if (cognitionResult.signalsIngested > 0 || cognitionResult.beliefsUpdated > 0 || cognitionResult.reflected) { - writeLine(` ${colors.dim}🧠 ${cognitionResult.signalsIngested} signals → ${cognitionResult.beliefsUpdated} beliefs updated${cognitionResult.reflected ? ' → reflected' : ''}${RESET}`); - } - - const elapsed = ((Date.now() - cycleStart) / 1000).toFixed(0); - writeLine(` ${colors.dim}Cycle done in ${elapsed}s | Daily: $${state.dailyCost.toFixed(2)}${RESET}`); - writeLine(); - - if (once) break; - await sleep(interval * 60 * 1000); - } - - process.off('SIGINT', handleSignal); - process.off('SIGTERM', handleSignal); -} - -/** - * Run the full squad loop: scanner → lead → worker → verifier. - * Each step checks cooldowns and pushes cognition signals. - * This is the core intelligence loop. - */ -async function runSquadLoop( - squad: NonNullable>, - squadsDir: string, - state: LoopState, - ghEnv: Record, - options: RunOptions, -): Promise { - writeLine(` ${gradient('▸')} ${colors.cyan}${squad.name}${RESET} — full loop`); - - // Discover agents and classify by role - const agentsByRole: Record> = { - scanner: [], - lead: [], - worker: [], - verifier: [], - }; - - for (const agent of squad.agents) { - const role = classifyAgentRole(agent.name); - const agentPath = join(squadsDir, squad.dir, `${agent.name}.md`); - if (existsSync(agentPath)) { - agentsByRole[role].push({ name: agent.name, path: agentPath }); - } - } - - const loopSteps: Array<{ role: string; agents: Array<{ name: string; path: string }> }> = [ - { role: 'scanner', agents: agentsByRole.scanner }, - { role: 'lead', agents: agentsByRole.lead }, - { role: 'worker', agents: agentsByRole.worker }, - { role: 'verifier', agents: agentsByRole.verifier }, - ]; - - for (const step of loopSteps) { - if (step.agents.length === 0) continue; - - for (const agent of step.agents) { - const cooldownMs = ROLE_COOLDOWNS[step.role] || ROLE_COOLDOWNS.worker; - if (!checkCooldown(state, squad.name, agent.name, cooldownMs)) { - if (options.verbose) { - writeLine(` ${colors.dim}↳ ${agent.name} (${step.role}) — in cooldown, skip${RESET}`); - } - continue; - } - - writeLine(` ${colors.dim}↳ ${agent.name} (${step.role})${RESET}`); - - const startMs = Date.now(); - try { - // For workers with no specific agent flag, use conversation mode - // For scanners/leads/verifiers, run as direct agent - if (step.role === 'worker' && step.agents.length > 1) { - // Multiple workers → conversation mode coordinates them - const convOptions: ConversationOptions = { - task: options.task, - maxTurns: options.maxTurns || 20, - costCeiling: options.costCeiling || 25, - verbose: options.verbose, - model: options.model, - }; - await runConversation(squad, convOptions); - } else { - await runAgent(agent.name, agent.path, squad.dir, { - ...options, - background: false, - watch: false, - execute: true, - }); - } - - const durationMs = Date.now() - startMs; - const outcome = classifyRunOutcome(0, durationMs); - - // Update cooldown - state.cooldowns[`${squad.name}:${agent.name}`] = Date.now(); - - // Record run - state.recentRuns.push({ - squad: squad.name, - agent: agent.name, - at: new Date().toISOString(), - result: outcome === 'skipped' ? 'completed' : outcome, - durationMs, - }); - - // Push cognition signal - pushCognitionSignal({ - source: 'execution', - signal_type: `${step.role}_${outcome}`, - value: durationMs / 1000, - unit: 'seconds', - data: { - squad: squad.name, - agent: agent.name, - role: step.role, - duration_ms: durationMs, - }, - entity_type: 'agent', - entity_id: `${squad.name}/${agent.name}`, - confidence: 0.9, - }); - - if (outcome === 'skipped') { - writeLine(` ${colors.dim}↳ ${agent.name} — phantom (${(durationMs / 1000).toFixed(0)}s), skipped${RESET}`); - } - - // If this was a worker step, break after first conversation - if (step.role === 'worker' && step.agents.length > 1) break; - - } catch (err) { - const durationMs = Date.now() - startMs; - state.cooldowns[`${squad.name}:${agent.name}`] = Date.now(); - state.recentRuns.push({ - squad: squad.name, - agent: agent.name, - at: new Date().toISOString(), - result: 'failed', - durationMs, - }); - - writeLine(` ${colors.red}↳ ${agent.name} failed: ${err instanceof Error ? err.message : 'unknown'}${RESET}`); - } - } - } - - writeLine(` ${colors.dim}↳ ${squad.name} loop complete${RESET}`); -} - -function sleep(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)); -} - -/** - * Lead mode: Single orchestrator session that uses Task tool for parallel work. - * Benefits over --parallel: - * - Single session overhead vs N sessions - * - Lead coordinates and routes work intelligently - * - Task agents share context when needed - * - Better parallelization (Claude's native Task tool) - */ -async function runLeadMode( - squad: ReturnType, - squadsDir: string, - options: RunOptions -): Promise { - if (!squad) return; - - const agentFiles = squad.agents - .map(a => ({ - name: a.name, - path: join(squadsDir, squad.dir, `${a.name}.md`), - role: a.role || '', - })) - .filter(a => existsSync(a.path)); - - if (agentFiles.length === 0) { - writeLine(` ${icons.error} ${colors.red}No agent files found${RESET}`); - return; - } - - writeLine(` ${bold}Lead mode${RESET} ${colors.dim}orchestrating ${agentFiles.length} agents${RESET}`); - writeLine(); - - // List available agents - for (const agent of agentFiles) { - writeLine(` ${icons.empty} ${colors.cyan}${agent.name}${RESET} ${colors.dim}${agent.role}${RESET}`); - } - writeLine(); - - if (!options.execute) { - writeLine(` ${colors.dim}Launch lead session:${RESET}`); - writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET} --lead`); - writeLine(); - return; - } - - // Build the lead prompt - const timeoutMins = options.timeout || DEFAULT_TIMEOUT_MINUTES; - const agentList = agentFiles.map(a => `- ${a.name}: ${a.role}`).join('\n'); - const agentPaths = agentFiles.map(a => `- ${a.name}: ${a.path}`).join('\n'); - - const prompt = `You are the Lead of the ${squad.name} squad. - -## Mission -${squad.mission || 'Execute squad operations efficiently.'} - -## Available Agents -${agentList} - -## Agent Definition Files -${agentPaths} - -## Your Role as Lead - -1. **Assess the situation**: Check for pending work: - - Run \`gh issue list --repo agents-squads/hq --label squad:${squad.name}\` for assigned issues - - Check .agents/memory/${squad.dir}/ for squad state and pending tasks - - Review recent activity with \`git log --oneline -10\` - -2. **Delegate work using Task tool**: For each piece of work: - - Use the Task tool with subagent_type="general-purpose" - - Include the agent definition file path in the prompt - - Spawn multiple Task agents IN PARALLEL when work is independent - - Example: "Read ${agentFiles[0]?.path || 'agent.md'} and execute its instructions for [specific task]" - -3. **Coordinate parallel execution**: - - Independent tasks → spawn Task agents in parallel (single message, multiple tool calls) - - Dependent tasks → run sequentially - - Monitor progress and handle failures - -4. **Report and update memory**: - - Update .agents/memory/${squad.dir}/state.md with completed work - - Log learnings to learnings.md - - Create issues for follow-up work if needed - -## Time Budget -You have ${timeoutMins} minutes. Prioritize high-impact work. - -## Critical Instructions -- Use Task tool for delegation, NOT direct execution of agent work -- Spawn parallel Task agents when work is independent -- When done, type /exit to end the session -- Do NOT wait for user input - work autonomously - -## Async Mode (CRITICAL) -This is ASYNC execution - Task agents must be fully autonomous: -- **Findings** → Create GitHub issues (gh issue create) -- **Code changes** → Create PRs (gh pr create) -- **Analysis results** → Write to .agents/outputs/ or memory files -- **NEVER wait for human review** - complete the work and move on -- **NEVER ask clarifying questions** - make reasonable decisions - -Instruct each Task agent: "Work autonomously. Output findings to GitHub issues. Output code changes as PRs. Do not wait for review." - -Begin by assessing pending work, then delegate to agents via Task tool.`; - - // Determine provider - const provider = options.provider || squad?.providers?.default || 'anthropic'; - const isAnthropic = provider === 'anthropic'; - - if (isAnthropic) { - const claudeAvailable = await checkClaudeCliAvailable(); - if (!claudeAvailable) { - writeLine(` ${colors.yellow}Claude CLI not found${RESET}`); - writeLine(` ${colors.dim}Install: npm install -g @anthropic-ai/claude-code${RESET}`); - return; - } - } else { - if (!isProviderCLIAvailable(provider)) { - const cliConfig = getCLIConfig(provider); - writeLine(` ${colors.yellow}${cliConfig?.displayName || provider} CLI not found${RESET}`); - if (cliConfig?.install) { - writeLine(` ${colors.dim}Install: ${cliConfig.install}${RESET}`); - } - return; - } - } - - // Determine execution mode (foreground is default, background is opt-in) - const isBackground = options.background === true && !options.watch; - const isWatch = options.watch === true; - const isForeground = !isBackground && !isWatch; - - const modeText = isBackground ? ' (background)' : isWatch ? ' (watch)' : ''; - const providerDisplay = isAnthropic ? 'Claude' : (getCLIConfig(provider)?.displayName || provider); - writeLine(` ${gradient('Launching')} lead session${modeText} with ${providerDisplay}...`); - writeLine(); - - try { - // Find lead agent name from agent files or use default - const leadAgentName = agentFiles.find(a => a.name.includes('lead'))?.name || `${squad.dir}-lead`; - - let result: string; - if (isAnthropic) { - result = await executeWithClaude(prompt, { - verbose: options.verbose, - timeoutMinutes: timeoutMins, - foreground: options.foreground, - background: options.background, - watch: options.watch, - useApi: options.useApi, - effort: options.effort, - skills: options.skills, - trigger: options.trigger || 'manual', - squadName: squad.dir, - agentName: leadAgentName, - model: options.model, - }); - } else { - result = await executeWithProvider(provider, prompt, { - verbose: options.verbose, - foreground: isForeground || isWatch, - squadName: squad.dir, - agentName: leadAgentName, - }); - } - - if (isForeground || isWatch) { - writeLine(); - writeLine(` ${icons.success} Lead session completed`); - } else { - writeLine(` ${icons.success} Lead session launched in background`); - writeLine(` ${colors.dim}${result}${RESET}`); - writeLine(); - writeLine(` ${colors.dim}The lead will:${RESET}`); - writeLine(` ${colors.dim} 1. Assess pending work (issues, memory)${RESET}`); - writeLine(` ${colors.dim} 2. Spawn Task agents for parallel execution${RESET}`); - writeLine(` ${colors.dim} 3. Coordinate and report results${RESET}`); - writeLine(); - writeLine(` ${colors.dim}Monitor: squads workers${RESET}`); - } - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - writeLine(` ${icons.error} ${colors.red}Failed to launch agent${RESET}`); - writeLine(` ${colors.dim}${msg}${RESET}`); - writeLine(` ${colors.dim}Run \`squads doctor\` to check your setup.${RESET}`); - } -} - -async function runAgent( - agentName: string, - agentPath: string, - squadName: string, - options: RunOptions & { execute?: boolean } -): Promise { - const spinner = ora(`Running agent: ${agentName}`).start(); - const startMs = Date.now(); - const startTime = new Date(startMs).toISOString(); - const executionId = generateExecutionId(); - const taskType = detectTaskType(agentName); - - const definition = loadAgentDefinition(agentPath); - - // Fetch learnings from bridge (needed for both dry-run preview and real execution) - const learnings = await fetchLearnings(squadName); - const learningContext = learnings.length > 0 - ? `\n## Learnings from Previous Runs\n${learnings.map(l => `- ${l.content}`).join('\n')}\n` - : ''; - - if (options.dryRun) { - spinner.info(`[DRY RUN] Would run ${agentName}`); - // Show context that would be injected (with role-based gating) - const dryRunAgentRole = classifyAgent(agentName); - const dryRunContextRole: ContextRole = agentName.includes('company-lead') ? 'coo' - : (dryRunAgentRole as ContextRole | null) ?? 'worker'; - const dryRunContext = gatherSquadContext(squadName, agentName, { - verbose: options.verbose, agentPath, role: dryRunContextRole - }); - if (options.verbose) { - writeLine(` ${colors.dim}Agent definition:${RESET}`); - writeLine(` ${colors.dim}${definition.slice(0, DRYRUN_DEF_MAX_CHARS)}...${RESET}`); - if (learnings.length > 0) { - writeLine(` ${colors.dim}Learnings: ${learnings.length} from bridge${RESET}`); - } - if (dryRunContext || learningContext) { - const fullContext = `${dryRunContext}${learningContext}`; - writeLine(); - writeLine(` ${colors.cyan}Context to inject (${Math.ceil(fullContext.length / 4)} tokens):${RESET}`); - writeLine(` ${colors.dim}${fullContext.slice(0, DRYRUN_CONTEXT_MAX_CHARS)}...${RESET}`); - } - } - return; - } - - // Pre-execution permission validation (Phase 3) - const squadsDir = findSquadsDir(); - if (squadsDir) { - const squadFilePath = join(squadsDir, squadName, 'SQUAD.md'); - if (existsSync(squadFilePath)) { - const squadContent = readFileSync(squadFilePath, 'utf-8'); - const permContext = buildContextFromSquad(squadName, squadContent, agentName); - - // Build execution request from agent definition - // For now, we validate MCP servers mentioned in the agent definition - const mcpServers = extractMcpServersFromDefinition(definition); - const execRequest: ExecutionRequest = { - mcpServers - }; - - const permResult = validateExecution(permContext, execRequest); - - if (permResult.violations.length > 0) { - spinner.stop(); - const violationLines = formatViolations(permResult); - for (const line of violationLines) { - writeLine(` ${line}`); - } - writeLine(); - - if (!permResult.allowed) { - writeLine(` ${colors.red}Execution blocked due to permission violations.${RESET}`); - writeLine(` ${colors.dim}Configure permissions in ${squadFilePath}${RESET}`); - return; - } - } - } - } - - // Preflight gate check (quota, cooldown) via bridge API - const preflight = await checkPreflightGates(squadName, agentName); - - if (!preflight.allowed) { - spinner.stop(); - writeLine(); - writeLine(` ${colors.red}${icons.error} Execution blocked by preflight gates${RESET}`); - - if (preflight.gates.quota && !preflight.gates.quota.ok) { - writeLine(` ${colors.dim}Quota: $${preflight.gates.quota.used.toFixed(2)}/$${preflight.gates.quota.limit}/mo limit exceeded${RESET}`); - } - - if (preflight.gates.cooldown && !preflight.gates.cooldown.ok) { - const elapsed = preflight.gates.cooldown.elapsed_sec; - const minGap = preflight.gates.cooldown.min_gap_sec; - writeLine(` ${colors.dim}Cooldown: ${elapsed}s since last run (min: ${minGap}s)${RESET}`); - } - - writeLine(); - return; - } - - // Show preflight status in verbose mode - if (options.verbose && Object.keys(preflight.gates).length > 0) { - writeLine(` ${colors.dim}Preflight: quota ${preflight.gates.quota?.ok ? '✓' : '✗'} cooldown ${preflight.gates.cooldown?.ok ? '✓' : '✗'}${RESET}`); - } - - // Local cooldown check (when bridge is unavailable or has no execution history) - // Skip for manual triggers - only enforce for scheduled/cron runs - const isScheduledRun = options.trigger === 'scheduled' || options.trigger === 'smart'; - const bridgeHasNoHistory = preflight.gates.cooldown?.elapsed_sec === null; - if (isScheduledRun && (!preflight.gates.cooldown || bridgeHasNoHistory)) { - // Read cooldown from agent frontmatter, fall back to default - const frontmatterForCooldown = parseAgentFrontmatter(agentPath); - const cooldownMs = frontmatterForCooldown.cooldown - ? (parseCooldown(frontmatterForCooldown.cooldown) || DEFAULT_SCHEDULED_COOLDOWN_MS) - : DEFAULT_SCHEDULED_COOLDOWN_MS; - const localCooldown = checkLocalCooldown(squadName, agentName, cooldownMs); - - if (!localCooldown.ok) { - spinner.stop(); - writeLine(); - writeLine(` ${colors.yellow}${icons.warning} Skipping: cooldown not elapsed${RESET}`); - writeLine(` ${colors.dim}Last run: ${formatDuration(localCooldown.elapsedMs!)} ago (cooldown: ${formatDuration(localCooldown.cooldownMs)})${RESET}`); - writeLine(); - return; - } - - if (options.verbose) { - writeLine(` ${colors.dim}Local cooldown: ✓ (${formatDuration(localCooldown.elapsedMs || 0)} since last run)${RESET}`); - } - } - - // Log execution start - logExecution({ - squadName, - agentName, - executionId, - startTime, - status: 'running', - trigger: options.trigger || 'manual', - taskType, - }); - - if (options.verbose && learnings.length > 0) { - writeLine(` ${colors.dim}Injecting ${learnings.length} learnings${RESET}`); - } - - // Load system protocol (SYSTEM.md, replaces legacy approval + post-execution) - const systemProtocol = loadSystemProtocol(); - const systemContext = systemProtocol ? `\n${systemProtocol}\n` : ''; - - // Derive context role from agent name for role-based context gating - const agentRole = classifyAgent(agentName); - const contextRole: ContextRole = agentName.includes('company-lead') ? 'coo' - : (agentRole as ContextRole | null) ?? 'worker'; - - // Gather squad context (role-based: scanners get minimal, leads get everything) - const squadContext = gatherSquadContext(squadName, agentName, { - verbose: options.verbose, agentPath, role: contextRole - }); - - // Fetch cognition beliefs for prompt injection (Reflexion pattern) - let cognitionContext = ''; - try { - const { loadSession } = await import('../lib/auth.js'); - const { getApiUrl } = await import('../lib/env-config.js'); - const session = loadSession(); - if (session?.accessToken && session.status === 'active') { - const safeSquadName = encodeURIComponent(squadName); - const res = await fetch(`${getApiUrl()}/cognition/context/squad:${safeSquadName}`, { - headers: { Authorization: `Bearer ${session.accessToken}` }, - signal: AbortSignal.timeout(3000), - }); - if (res.ok) { - const data = await res.json() as { markdown: string }; - if (data.markdown && !data.markdown.includes('No cognition data')) { - cognitionContext = `\n${data.markdown}\n`; - if (options.verbose) { - writeLine(` ${colors.dim}Injecting cognition beliefs${RESET}`); - } - } - } - } - } catch (e) { - if (options.verbose) writeLine(` ${colors.dim}warn: cognition fetch failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - } - - // Generate the Claude Code prompt with timeout awareness - const timeoutMins = options.timeout || DEFAULT_TIMEOUT_MINUTES; - const taskDirective = options.task - ? `\n## TASK DIRECTIVE (overrides default behavior)\n${options.task}\n` - : ''; - const prompt = `Execute the ${agentName} agent from squad ${squadName}. - -Read the agent definition at ${agentPath} and follow its instructions exactly. -${taskDirective} -The agent definition contains: -- Purpose/role -- Tools it can use (MCP servers, skills) -- Step-by-step instructions -- Expected output format - -TOOL PREFERENCE: Always prefer CLI tools over MCP servers when both can accomplish the task: -- Use \`squads\` CLI for squad operations (run, memory, status, feedback) -- Use \`gh\` CLI for GitHub (issues, PRs, repos) -- Use \`git\` CLI for version control -- Use Bash for file operations, builds, tests -- Only use MCP tools when CLI cannot do it or MCP is significantly better -${systemContext}${squadContext}${cognitionContext}${learningContext} -TIME LIMIT: You have ${timeoutMins} minutes. Work efficiently: -- Focus on the most important tasks first -- If a task is taking too long, move on and note it for next run -- Aim to complete within ${Math.floor(timeoutMins * SOFT_DEADLINE_RATIO)} minutes`; - - // Resolve provider with full chain: - // 1. Agent config (from agent file frontmatter/header) - // 2. CLI option (--provider flag) - // 3. Squad default (from SQUAD.md providers.default) - // 4. Fallback to 'anthropic' - const agentProvider = parseAgentProvider(agentPath); - const squad = loadSquad(squadName); - const squadDefaultProvider = squad?.providers?.default; - - const provider = agentProvider || options.provider || squadDefaultProvider || 'anthropic'; - const isAnthropic = provider === 'anthropic'; - - if (options.verbose && (agentProvider || squadDefaultProvider)) { - writeLine(` ${colors.dim}Provider resolution:${RESET}`); - if (agentProvider) writeLine(` ${colors.dim}Agent: ${agentProvider}${RESET}`); - if (options.provider) writeLine(` ${colors.dim}CLI: ${options.provider}${RESET}`); - if (squadDefaultProvider) writeLine(` ${colors.dim}Squad: ${squadDefaultProvider}${RESET}`); - writeLine(` ${colors.dim}→ Using: ${provider}${RESET}`); - } - - // Check CLI availability - const cliAvailable = isAnthropic - ? await checkClaudeCliAvailable() - : isProviderCLIAvailable(provider); - - if (options.execute && cliAvailable) { - const cliConfig = getCLIConfig(provider); - const cliName = cliConfig?.displayName || provider; - - // Determine execution mode (foreground is default, background is opt-in) - const isBackground = options.background === true && !options.watch; - const isWatch = options.watch === true; - const isForeground = !isBackground && !isWatch; - - spinner.text = isBackground - ? `Launching ${agentName} with ${cliName} in background...` - : isWatch - ? `Starting ${agentName} with ${cliName} (watch mode)...` - : `Running ${agentName} with ${cliName}...`; - - // Parse frontmatter for verification criteria (Ralph loop) - const frontmatter = parseAgentFrontmatter(agentPath); - const hasCriteria = !!frontmatter.acceptance_criteria && options.verify !== false; - const maxRetries = frontmatter.max_retries ?? 2; - let currentPrompt = prompt; - - for (let attempt = 0; attempt <= (hasCriteria ? maxRetries : 0); attempt++) { - try { - let result: string; - - if (isAnthropic) { - result = await executeWithClaude(currentPrompt, { - verbose: options.verbose, - timeoutMinutes: options.timeout || 30, - foreground: options.foreground, - background: options.background, - watch: options.watch, - useApi: options.useApi, - effort: options.effort, - skills: options.skills, - trigger: options.trigger || 'manual', - squadName, - agentName, - model: options.model, - }); - } else { - result = await executeWithProvider(provider, currentPrompt, { - verbose: options.verbose, - foreground: !isBackground, - squadName, - agentName, - }); - } - - // Ralph loop: verify foreground execution against acceptance criteria - if (hasCriteria && (isForeground || isWatch)) { - const verification = await verifyExecution( - squadName, agentName, frontmatter.acceptance_criteria!, { verbose: options.verbose } - ); - if (!verification.passed && attempt < maxRetries) { - writeLine(` ${colors.yellow}Verification: FAIL - ${verification.reason}${RESET}`); - writeLine(` ${colors.dim}Retrying (${attempt + 1}/${maxRetries})...${RESET}`); - currentPrompt = `${prompt}\n\n## PREVIOUS ATTEMPT FAILED\nVerification found: ${verification.reason}\nPlease address this issue and try again.`; - continue; - } - if (verification.passed) { - writeLine(` ${colors.green}Verification: PASS - ${verification.reason}${RESET}`); - } - } - - // Emit completion event (non-blocking) - emitExecutionEvent('agent.completed', { - squad: squadName, agent: agentName, executionId, - }).catch(() => {}); - - if (isForeground || isWatch) { - spinner.succeed(`Agent ${agentName} completed (${cliName})`); - } else { - spinner.succeed(`Agent ${agentName} launched in background (${cliName})`); - writeLine(` ${colors.dim}${result}${RESET}`); - writeLine(); - writeLine(` ${colors.dim}Monitor:${RESET} squads workers`); - writeLine(` ${colors.dim}Memory:${RESET} squads memory show ${squadName}`); - } - break; // Success — exit retry loop - } catch (error) { - // Emit failure event (non-blocking) - emitExecutionEvent('agent.failed', { - squad: squadName, agent: agentName, executionId, error: String(error), - }).catch(() => {}); - - spinner.fail(`Agent ${agentName} failed to launch`); - updateExecutionStatus(squadName, agentName, executionId, 'failed', { - error: String(error), - durationMs: Date.now() - startMs, - }); - const msg = error instanceof Error ? error.message : String(error); - const isLikelyBug = error instanceof ReferenceError || error instanceof TypeError || error instanceof SyntaxError; - writeLine(` ${colors.red}${msg}${RESET}`); - writeLine(); - if (isLikelyBug) { - writeLine(` ${colors.yellow}This looks like a bug. Please try:${RESET}`); - writeLine(` ${colors.dim}$${RESET} squads doctor ${colors.dim}— check your setup${RESET}`); - writeLine(` ${colors.dim}$${RESET} squads update ${colors.dim}— get the latest fixes${RESET}`); - writeLine(); - writeLine(` ${colors.dim}If the problem persists, file an issue:${RESET}`); - writeLine(` ${colors.dim}https://github.com/agents-squads/squads-cli/issues${RESET}`); - } else { - writeLine(` ${colors.dim}Run \`squads doctor\` to check your setup, or \`squads run ${agentName} --verbose\` for details.${RESET}`); - } - break; // Error — exit retry loop - } - } - } else { - // Show instructions for manual execution - spinner.succeed(`Agent ${agentName} ready`); - writeLine(` ${colors.dim}Execution logged: ${startTime}${RESET}`); - - if (!cliAvailable) { - const cliConfig = getCLIConfig(provider); - writeLine(); - writeLine(` ${colors.yellow}${cliConfig?.command || provider} CLI not found${RESET}`); - writeLine(` ${colors.dim}Install: ${cliConfig?.install || 'squads providers'}${RESET}`); - } - - writeLine(); - writeLine(` ${colors.dim}To launch as background task:${RESET}`); - writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squadName}${RESET} -a ${colors.cyan}${agentName}${RESET}`); - if (provider !== 'anthropic') { - writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squadName}${RESET} -a ${colors.cyan}${agentName}${RESET} --provider=${provider}`); - } - writeLine(); - writeLine(` ${colors.dim}Or run interactively:${RESET}`); - writeLine(` ${colors.dim}$${RESET} Run the ${colors.cyan}${agentName}${RESET} agent from ${agentPath}`); - } -} - -async function checkClaudeCliAvailable(): Promise { - return new Promise((resolve) => { - const check = spawn('which', ['claude'], { stdio: 'pipe' }); - check.on('close', (code) => resolve(code === 0)); - check.on('error', () => resolve(false)); - }); -} - -/** - * Pre-flight check for the executor (Claude Code or other provider CLI). - * Runs once at the start of `squads run` before any agent execution. - * Checks: - * 1. CLI binary is available on PATH - * 2. Authentication looks configured (credentials file or API key) - * Skippable with SQUADS_SKIP_CHECKS=1 env var (for CI/CD). - * Returns true if checks pass (or are skipped), false if execution should abort. - */ -async function preflightExecutorCheck(provider: string): Promise { - // Allow skipping for CI/CD or advanced users - if (process.env.SQUADS_SKIP_CHECKS === '1') { - return true; - } - - const isAnthropic = provider === 'anthropic'; - - // --- Check 1: CLI binary on PATH --- - let cliFound: boolean; - - if (isAnthropic) { - cliFound = await checkClaudeCliAvailable(); - } else { - cliFound = isProviderCLIAvailable(provider); - } - - if (!cliFound) { - const cliConfig = getCLIConfig(provider); - const cliName = cliConfig?.command || provider; - const installCmd = cliConfig?.install || `See ${provider} documentation`; - - writeLine(); - writeLine(` ${icons.error} ${colors.red}${cliName} CLI not found${RESET}`); - writeLine(); - writeLine(` ${colors.dim}The ${cliName} command is required to run agents but was not found on your PATH.${RESET}`); - writeLine(); - writeLine(` ${colors.cyan}Install:${RESET} ${installCmd}`); - writeLine(); - writeLine(` ${colors.dim}Skip this check: SQUADS_SKIP_CHECKS=1 squads run ...${RESET}`); - writeLine(); - return false; - } - - // Auth check removed: Claude CLI handles its own auth errors with clear messages. - // Pre-checking here caused false warnings for OAuth users (keychain auth works - // without .credentials.json or ANTHROPIC_API_KEY). See #520. - - return true; -} - -interface ExecuteWithClaudeOptions { - verbose?: boolean; - timeoutMinutes?: number; - foreground?: boolean; // Deprecated, now default - background?: boolean; // Opt-in background mode - watch?: boolean; // Background but tail log - useApi?: boolean; - effort?: EffortLevel; - skills?: string[]; - trigger?: ExecutionContext['trigger']; - squadName: string; - agentName: string; - model?: string; // Model to use (Claude aliases or full model IDs like gemini-2.5-flash) -} - -/** Build agent environment variables for Claude execution */ -function buildAgentEnv( - baseEnv: Record, - execContext: ExecutionContext, - options?: { effort?: EffortLevel; skills?: string[]; includeOtel?: boolean; ghToken?: string } -): Record { - // Strip CLAUDECODE to allow spawning claude from within a Claude Code session - const { CLAUDECODE: _, ...cleanEnv } = baseEnv; - const env: Record = { - ...cleanEnv, - SQUADS_SQUAD: execContext.squad, - SQUADS_AGENT: execContext.agent, - SQUADS_TASK_TYPE: execContext.taskType, - SQUADS_TRIGGER: execContext.trigger, - SQUADS_EXECUTION_ID: execContext.executionId, - BRIDGE_API: getBridgeUrl(), - }; - - // Inject bot GH_TOKEN so agents create PRs/issues as the bot identity, - // not the user's personal gh auth. This enables founder to review/approve. - if (options?.ghToken) env.GH_TOKEN = options.ghToken; - - if (options?.includeOtel) { - env.OTEL_RESOURCE_ATTRIBUTES = `squads.squad=${execContext.squad},squads.agent=${execContext.agent},squads.task_type=${execContext.taskType},squads.trigger=${execContext.trigger},squads.execution_id=${execContext.executionId}`; - } - - if (options?.effort) env.CLAUDE_EFFORT = options.effort; - if (options?.skills && options.skills.length > 0) env.CLAUDE_SKILLS = options.skills.join(','); - - return env; -} - -/** Log verbose execution config (shared by foreground and background modes) */ -function logVerboseExecution(config: { - projectRoot: string; - mode: string; - useApi?: boolean; - execContext: ExecutionContext; - effort?: EffortLevel; - skills?: string[]; - resolvedModel?: string; - claudeModelAlias?: string; - explicitModel?: string; - logFile?: string; - mcpConfigPath?: string; -}): void { - writeLine(` ${colors.dim}Project: ${config.projectRoot}${RESET}`); - writeLine(` ${colors.dim}Mode: ${config.mode}${RESET}`); - if (config.logFile) writeLine(` ${colors.dim}Log: ${config.logFile}${RESET}`); - if (config.mcpConfigPath) writeLine(` ${colors.dim}MCP config: ${config.mcpConfigPath}${RESET}`); - if (config.useApi !== undefined) writeLine(` ${colors.dim}Auth: ${config.useApi ? 'API credits' : 'subscription'}${RESET}`); - writeLine(` ${colors.dim}Execution: ${config.execContext.executionId}${RESET}`); - writeLine(` ${colors.dim}Task type: ${config.execContext.taskType}${RESET}`); - writeLine(` ${colors.dim}Trigger: ${config.execContext.trigger}${RESET}`); - if (config.effort) writeLine(` ${colors.dim}Effort: ${config.effort}${RESET}`); - if (config.skills && config.skills.length > 0) writeLine(` ${colors.dim}Skills: ${config.skills.join(', ')}${RESET}`); - if (config.resolvedModel || config.claudeModelAlias) { - const source = config.explicitModel ? 'explicit' : 'auto-routed'; - const displayModel = config.resolvedModel || config.claudeModelAlias; - writeLine(` ${colors.dim}Model: ${displayModel} (${source})${RESET}`); - } -} - -/** Resolve the target repo root from the squad's repo field (e.g. "org/squads-cli" → sibling dir) */ -function resolveTargetRepoRoot(projectRoot: string, squad: Squad | null): string { - if (!squad?.repo) return projectRoot; - const repoName = squad.repo.split('/').pop(); - if (!repoName) return projectRoot; - const candidatePath = join(projectRoot, '..', repoName); - return existsSync(candidatePath) ? candidatePath : projectRoot; -} - -/** Create an isolated worktree for agent execution (Node.js-based, for foreground mode) */ -function createAgentWorktree(projectRoot: string, squadName: string, agentName: string): string { - const timestamp = Date.now(); - const branchName = `agent/${squadName}/${agentName}-${timestamp}`; - const worktreePath = join(projectRoot, '..', '.worktrees', `${squadName}-${agentName}-${timestamp}`); - - try { - mkdirSync(join(projectRoot, '..', '.worktrees'), { recursive: true }); - execSync(`git worktree add '${worktreePath}' -b '${branchName}' HEAD`, { cwd: projectRoot, stdio: 'pipe' }); - return worktreePath; - } catch (e) { - writeLine(` ${colors.dim}warn: worktree creation failed, using project root: ${e instanceof Error ? e.message : String(e)}${RESET}`); - return projectRoot; - } -} - -/** Remove a worktree and its branch after agent execution completes */ -function cleanupWorktree(worktreePath: string, projectRoot: string): void { - if (worktreePath === projectRoot) return; // fallback mode, nothing to clean - - try { - // Extract branch name from worktree before removing - const branchInfo = execSync(`git -C '${projectRoot}' worktree list --porcelain`, { encoding: 'utf-8' }); - let branchName = ''; - const lines = branchInfo.split('\n'); - for (let i = 0; i < lines.length; i++) { - if (lines[i] === `worktree ${worktreePath}` && i + 2 < lines.length) { - const branchLine = lines[i + 2]; // "branch refs/heads/..." - if (branchLine.startsWith('branch refs/heads/')) { - branchName = branchLine.replace('branch refs/heads/', ''); - } - break; - } - } - - // Remove worktree - execSync(`git -C '${projectRoot}' worktree remove '${worktreePath}' --force`, { stdio: 'pipe' }); - - // Delete the agent branch (only agent/* branches, safety check) - if (branchName && branchName.startsWith('agent/')) { - execSync(`git -C '${projectRoot}' branch -D '${branchName}'`, { stdio: 'pipe' }); - } - } catch { - // Non-critical — worktree prune will catch it later - } -} - -/** Build shell script for detached execution with worktree isolation */ -function buildDetachedShellScript(config: { - projectRoot: string; - squadName: string; - agentName: string; - timestamp: number; - claudeModelAlias?: string; - escapedPrompt: string; - logFile: string; - pidFile: string; -}): string { - const modelFlag = config.claudeModelAlias ? `--model ${config.claudeModelAlias}` : ''; - const branchName = `agent/${config.squadName}/${config.agentName}-${config.timestamp}`; - const worktreeDir = `${config.projectRoot}/../.worktrees/${config.squadName}-${config.agentName}-${config.timestamp}`; - const cleanup = `if [ "\${WORK_DIR}" != '${config.projectRoot}' ]; then git -C '${config.projectRoot}' worktree remove "\${WORK_DIR}" --force 2>/dev/null; BRANCH='${branchName}'; git -C '${config.projectRoot}' branch -D "\${BRANCH}" 2>/dev/null; fi`; - const script = `mkdir -p '${config.projectRoot}/../.worktrees'; WORK_DIR='${config.projectRoot}'; if git -C '${config.projectRoot}' worktree add '${worktreeDir}' -b '${branchName}' HEAD 2>/dev/null; then WORK_DIR='${worktreeDir}'; fi; cd "\${WORK_DIR}"; unset CLAUDECODE; claude --print --dangerously-skip-permissions --disable-slash-commands ${modelFlag} -- '${config.escapedPrompt}' > '${config.logFile}' 2>&1; ${cleanup}`; - return `echo $$ > '${config.pidFile}'; ${script}`; -} - -/** Prepare log directory and file paths for detached execution */ -function prepareLogFiles(projectRoot: string, squadName: string, agentName: string, timestamp: number): { logDir: string; logFile: string; pidFile: string } { - const logDir = join(projectRoot, '.agents', 'logs', squadName); - const logFile = join(logDir, `${agentName}-${timestamp}.log`); - const pidFile = join(logDir, `${agentName}-${timestamp}.pid`); - - if (!existsSync(logDir)) { - mkdirSync(logDir, { recursive: true }); - } - - return { logDir, logFile, pidFile }; -} - -/** Execute Claude in foreground mode (direct stdio, default) */ -function executeForeground(config: { - prompt: string; - claudeArgs: string[]; - agentEnv: Record; - projectRoot: string; - squadName: string; - agentName: string; - execContext: ExecutionContext; - startMs: number; - provider?: string; -}): Promise { - const workDir = createAgentWorktree(config.projectRoot, config.squadName, config.agentName); - - return new Promise((resolve, reject) => { - const claude = spawn('claude', config.claudeArgs, { - stdio: 'inherit', - cwd: workDir, - env: config.agentEnv, - }); - - claude.on('close', async (code) => { - const durationMs = Date.now() - config.startMs; - - if (code === 0) { - updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'completed', { - outcome: 'Session completed successfully', - durationMs, - }); - - const commitResult = await autoCommitAgentWork(config.squadName, config.agentName, config.execContext.executionId, config.provider); - if (commitResult.committed) { - writeLine(); - writeLine(` ${colors.green}Auto-committed agent work${RESET}`); - } - - cleanupWorktree(workDir, config.projectRoot); - resolve('Session completed'); - } else { - updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', { - error: `Claude exited with code ${code}`, - durationMs, - }); - cleanupWorktree(workDir, config.projectRoot); - reject(new Error(`Claude exited with code ${code}`)); - } - }); - - claude.on('error', (err) => { - const durationMs = Date.now() - config.startMs; - updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', { - error: String(err), - durationMs, - }); - cleanupWorktree(workDir, config.projectRoot); - reject(err); - }); - }); -} - -/** Execute Claude in watch mode (background + tail log) */ -async function executeWatch(config: { - projectRoot: string; - agentEnv: Record; - logFile: string; - wrapperScript: string; -}): Promise { - const child = spawn('sh', ['-c', config.wrapperScript], { - cwd: config.projectRoot, - detached: true, - stdio: 'ignore', - env: config.agentEnv, - }); - child.unref(); - - await new Promise(resolve => setTimeout(resolve, LOG_FILE_INIT_DELAY_MS)); - - writeLine(` ${colors.dim}Tailing log (Ctrl+C to stop watching, agent continues)...${RESET}`); - writeLine(); - - const tail = spawn('tail', ['-f', config.logFile], { stdio: 'inherit' }); - - process.on('SIGINT', () => { - tail.kill(); - writeLine(); - writeLine(` ${colors.dim}Stopped watching. Agent continues in background.${RESET}`); - writeLine(` ${colors.dim}Resume: tail -f ${config.logFile}${RESET}`); - process.exit(0); - }); - - return new Promise((resolve) => { - tail.on('close', () => { - resolve(`Agent running in background. Log: ${config.logFile}`); - }); - }); -} - -async function executeWithClaude( - prompt: string, - options: ExecuteWithClaudeOptions -): Promise { - const { - verbose, - timeoutMinutes: _timeoutMinutes = 30, - foreground, - background, - watch, - useApi, - effort, - skills, - trigger = 'manual', - squadName, - agentName, - model, - } = options; - - // Determine execution mode - const runInBackground = background === true && !watch; - const runInWatch = watch === true; - const runInForeground = !runInBackground && !runInWatch; - - const startMs = Date.now(); - const projectRoot = getProjectRoot(); - ensureProjectTrusted(projectRoot); - - // Resolve model and provider - const squad = squadName !== 'unknown' ? loadSquad(squadName) : null; - const mcpConfigPath = selectMcpConfig(squadName, squad); - const taskType = detectTaskType(agentName); - const resolvedModel = resolveModel(model, squad, taskType); - const provider = resolvedModel ? detectProviderFromModel(resolvedModel) : 'anthropic'; - - // Resolve target repo for worktree creation (squad.repo → sibling dir) - const targetRepoRoot = resolveTargetRepoRoot(projectRoot, squad); - - // Delegate to non-Anthropic providers - if (provider !== 'anthropic' && provider !== 'unknown') { - if (verbose) { - const source = model ? 'explicit' : 'auto-routed'; - writeLine(` ${colors.dim}Model: ${resolvedModel} (${source})${RESET}`); - writeLine(` ${colors.dim}Provider: ${provider}${RESET}`); - } - return executeWithProvider(provider, prompt, { - verbose, foreground, cwd: targetRepoRoot, squadName, agentName, - }); - } - - const claudeModelAlias = resolvedModel ? getClaudeModelAlias(resolvedModel) : undefined; - - const execContext: ExecutionContext = { - squad: squadName, agent: agentName, taskType, trigger, - executionId: generateExecutionId(), - }; - - // Build base env: remove ANTHROPIC_API_KEY unless --use-api, remove CLAUDECODE - const { ANTHROPIC_API_KEY: _apiKey, CLAUDECODE: _claudeCode, ...envWithoutApiKey } = process.env; - const spawnEnv = useApi - ? (() => { const { CLAUDECODE: _, ...rest } = process.env; return rest; })() - : envWithoutApiKey; - - const escapedPrompt = prompt.replace(/'/g, "'\\''"); - - await registerContextWithBridge(execContext); - - // Get bot token so agents create PRs/issues as bot identity (not user's personal gh auth) - let botGhToken: string | undefined; - try { - const ghEnv = await getBotGhEnv(); - botGhToken = ghEnv.GH_TOKEN; - } catch { /* graceful: falls back to user's gh auth */ } - - // ── Foreground mode ────────────────────────────────────────────────── - if (runInForeground) { - if (verbose) { - logVerboseExecution({ - projectRoot, mode: 'foreground', useApi, execContext, - effort, skills, resolvedModel, claudeModelAlias, explicitModel: model, - }); - } - - // Build claude args as array to avoid shell escaping issues with large prompts - const claudeArgs: string[] = []; - if (!process.stdin.isTTY) claudeArgs.push('--print'); - claudeArgs.push('--dangerously-skip-permissions'); - claudeArgs.push('--disable-slash-commands'); - if (mcpConfigPath) claudeArgs.push('--mcp-config', mcpConfigPath); - if (claudeModelAlias) claudeArgs.push('--model', claudeModelAlias); - claudeArgs.push('--', prompt); - - const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { - effort, skills, includeOtel: true, ghToken: botGhToken, - }); - - return executeForeground({ - prompt, claudeArgs, agentEnv, projectRoot: targetRepoRoot, - squadName, agentName, execContext, startMs, provider, - }); - } - - // ── Detached modes (watch + background) ────────────────────────────── - const timestamp = Date.now(); - const { logFile, pidFile } = prepareLogFiles(projectRoot, squadName, agentName, timestamp); - const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { - effort, skills, includeOtel: !runInWatch, ghToken: botGhToken, - }); - - const wrapperScript = buildDetachedShellScript({ - projectRoot: targetRepoRoot, squadName, agentName, timestamp, - claudeModelAlias, escapedPrompt, logFile, pidFile, - }); - - if (runInWatch) { - if (verbose) { - logVerboseExecution({ - projectRoot, mode: 'watch (background + tail)', - execContext, logFile, - }); - } - - return executeWatch({ projectRoot: targetRepoRoot, agentEnv, logFile, wrapperScript }); - } - - // ── Background mode ────────────────────────────────────────────────── - if (verbose) { - logVerboseExecution({ - projectRoot, mode: 'background', useApi, execContext, - effort, skills, resolvedModel, claudeModelAlias, - explicitModel: model, logFile, mcpConfigPath, - }); - } - - const child = spawn('sh', ['-c', wrapperScript], { - cwd: targetRepoRoot, - detached: true, - stdio: 'ignore', - env: agentEnv, - }); - child.unref(); - - if (verbose) { - writeLine(` ${colors.dim}Monitor: tail -f ${logFile}${RESET}`); - } - - return `Log: ${logFile}. Monitor: tail -f ${logFile}`; -} - -/** - * Execute agent with a non-Anthropic LLM CLI provider. - * - * Supports: google (gemini), openai (codex), mistral (vibe), xai (grok), aider, ollama - * - * Unlike executeWithClaude which has full session management, - * other CLIs run in simpler non-interactive mode. - */ -async function executeWithProvider( - provider: string, - prompt: string, - options: { - verbose?: boolean; - foreground?: boolean; - cwd?: string; - squadName?: string; - agentName?: string; - } -): Promise { - const cliConfig = getCLIConfig(provider); - - if (!cliConfig) { - throw new Error(`Unknown provider: ${provider}. Run 'squads providers' to see available providers.`); - } - - if (!isProviderCLIAvailable(provider)) { - throw new Error(`CLI '${cliConfig.command}' not found. Install: ${cliConfig.install}`); - } - - const projectRoot = options.cwd || getProjectRoot(); - const squadName = options.squadName || 'unknown'; - const agentName = options.agentName || 'unknown'; - const timestamp = Date.now(); - - // Build clean env: remove CLAUDECODE to allow nesting, pass squad context - const { CLAUDECODE: _claudeCode, ...cleanEnv } = process.env; - const providerEnv = { - ...cleanEnv, - SQUADS_SQUAD: squadName, - SQUADS_AGENT: agentName, - SQUADS_PROVIDER: provider, - }; - - // Create isolated worktree for this agent (same pattern as executeWithClaude) - const branchName = `agent/${squadName}/${agentName}-${timestamp}`; - const worktreePath = join(projectRoot, '..', '.worktrees', `${squadName}-${agentName}-${timestamp}`); - let workDir = projectRoot; - try { - mkdirSync(join(projectRoot, '..', '.worktrees'), { recursive: true }); - execSync(`git worktree add '${worktreePath}' -b '${branchName}' HEAD`, { cwd: projectRoot, stdio: 'pipe' }); - workDir = worktreePath; - } catch (e) { - writeLine(` ${colors.dim}warn: worktree creation failed, using project root: ${e instanceof Error ? e.message : String(e)}${RESET}`); - } - - // Copy .agents directory into worktree so sandboxed providers can access - // agent definitions, memory, and config files. Providers like Gemini restrict - // file reads to the workspace directory, so these must be local. - let effectivePrompt = prompt; - if (workDir !== projectRoot) { - const agentsDir = join(projectRoot, '.agents'); - const targetAgentsDir = join(workDir, '.agents'); - if (existsSync(agentsDir) && !existsSync(targetAgentsDir)) { - try { - cpSync(agentsDir, targetAgentsDir, { recursive: true }); - } catch (e) { - writeLine(` ${colors.dim}warn: .agents copy failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); - } - } - // Rewrite absolute paths in prompt so sandboxed providers can resolve them - effectivePrompt = prompt.replaceAll(projectRoot, workDir); - } - - const args = cliConfig.buildArgs(effectivePrompt); - - if (options.verbose) { - writeLine(` ${colors.dim}Provider: ${cliConfig.displayName}${RESET}`); - writeLine(` ${colors.dim}Command: ${cliConfig.command} ${args.join(' ').slice(0, VERBOSE_COMMAND_MAX_CHARS)}...${RESET}`); - writeLine(` ${colors.dim}CWD: ${workDir}${RESET}`); - if (workDir !== projectRoot) { - writeLine(` ${colors.dim}Worktree: ${branchName}${RESET}`); - } - } - - // Foreground mode: run directly in terminal - if (options.foreground) { - return new Promise((resolve, reject) => { - const proc = spawn(cliConfig.command, args, { - stdio: 'inherit', - cwd: workDir, - env: providerEnv, - }); - - proc.on('close', (code) => { - cleanupWorktree(workDir, projectRoot); - if (code === 0) { - resolve('Session completed'); - } else { - reject(new Error(`${cliConfig.command} exited with code ${code}`)); - } - }); - - proc.on('error', (err) => { - cleanupWorktree(workDir, projectRoot); - reject(err); - }); - }); - } - - // Background mode: run detached with log file (matches executeWithClaude pattern) - const logDir = join(projectRoot, '.agents', 'logs', squadName); - const logFile = join(logDir, `${agentName}-${timestamp}.log`); - const pidFile = join(logDir, `${agentName}-${timestamp}.pid`); - - if (!existsSync(logDir)) { - mkdirSync(logDir, { recursive: true }); - } - - const escapedPrompt = effectivePrompt.replace(/'/g, "'\\''"); - const providerArgs = cliConfig.buildArgs(escapedPrompt).map(a => `'${a}'`).join(' '); - const cleanupCmd = workDir !== projectRoot - ? `; git -C '${projectRoot}' worktree remove '${workDir}' --force 2>/dev/null; git -C '${projectRoot}' branch -D '${branchName}' 2>/dev/null` - : ''; - const shellScript = `cd '${workDir}' && ${cliConfig.command} ${providerArgs} > '${logFile}' 2>&1${cleanupCmd}`; - const wrapperScript = `echo $$ > '${pidFile}'; ${shellScript}`; - - const child = spawn('sh', ['-c', wrapperScript], { - cwd: workDir, - detached: true, - stdio: 'ignore', - env: providerEnv, - }); - - child.unref(); - - if (options.verbose) { - writeLine(` ${colors.dim}Log: ${logFile}${RESET}`); - writeLine(` ${colors.dim}PID file: ${pidFile}${RESET}`); - } - - return `Log: ${logFile}. Monitor: tail -f ${logFile}`; -} - export async function runSquadCommand( squadName: string, options: RunOptions diff --git a/src/commands/services.ts b/src/commands/services.ts new file mode 100644 index 00000000..a7ca1c11 --- /dev/null +++ b/src/commands/services.ts @@ -0,0 +1,184 @@ +/** + * squads services — manage Tier 2 local infrastructure. + * + * squads services up Start Docker services, switch to local config + * squads services down Stop services, fall back to standalone + * squads services status Show running containers and health + */ + +import { Command } from 'commander'; +import { execSync } from 'child_process'; +import { existsSync } from 'fs'; +import { join } from 'path'; +import { detectTier } from '../lib/tier-detect.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +function exec(cmd: string, opts?: { cwd?: string }): string | null { + try { + return execSync(cmd, { encoding: 'utf-8', timeout: 30000, stdio: ['pipe', 'pipe', 'pipe'], ...opts }).trim(); + } catch { + return null; + } +} + +function findComposeFile(): string | null { + // Search for docker-compose in known locations + const home = process.env.HOME || ''; + const candidates = [ + join(home, 'agents-squads', 'engineering', 'docker', 'docker-compose.yml'), + join(home, 'agents-squads', 'engineering', 'docker', 'docker-compose.yaml'), + join(process.cwd(), '..', 'engineering', 'docker', 'docker-compose.yml'), + ]; + + for (const candidate of candidates) { + if (existsSync(candidate)) return candidate; + } + return null; +} + +function dockerAvailable(): boolean { + return exec('docker --version') !== null; +} + +function dockerComposeAvailable(): boolean { + return exec('docker compose version') !== null; +} + +export function registerServicesCommands(program: Command): void { + const services = program + .command('services') + .description('Manage Tier 2 local services (Postgres, Redis, API, Bridge)'); + + // ── services up ── + services + .command('up') + .description('Start local services (Docker required)') + .option('--webhooks', 'Also start ngrok tunnel for GitHub webhooks') + .option('--telemetry', 'Also start OpenTelemetry collector') + .action(async (opts) => { + if (!dockerAvailable()) { + writeLine(`\n ${colors.red}Docker not found.${RESET}`); + writeLine(` ${colors.dim}Install Docker Desktop: https://www.docker.com/products/docker-desktop${RESET}\n`); + return; + } + if (!dockerComposeAvailable()) { + writeLine(`\n ${colors.red}Docker Compose not found.${RESET}\n`); + return; + } + + const composeFile = findComposeFile(); + if (!composeFile) { + writeLine(`\n ${colors.red}docker-compose.yml not found.${RESET}`); + writeLine(` ${colors.dim}Expected at: ~/agents-squads/engineering/docker/docker-compose.yml${RESET}\n`); + return; + } + + const composeDir = join(composeFile, '..'); + writeLine(`\n ${bold}Starting Tier 2 services...${RESET}\n`); + + // Build profile args + let profileArgs = ''; + if (opts.webhooks) profileArgs += ' --profile webhooks'; + if (opts.telemetry) profileArgs += ' --profile telemetry'; + + try { + writeLine(` ${colors.dim}docker compose up -d${profileArgs}${RESET}`); + execSync(`docker compose${profileArgs} up -d`, { + cwd: composeDir, + stdio: 'inherit', + timeout: 120000, + }); + + writeLine(); + writeLine(` ${colors.green}Services started.${RESET} Waiting for health checks...`); + + // Wait for API to be healthy + let healthy = false; + for (let i = 0; i < 15; i++) { + await new Promise(r => setTimeout(r, 2000)); + const info = await detectTier(); + if (info.services.api) { + healthy = true; + break; + } + } + + if (healthy) { + writeLine(` ${colors.green}Tier 2 active.${RESET} All services healthy.\n`); + writeLine(` ${colors.dim}API: http://localhost:8090${RESET}`); + writeLine(` ${colors.dim}Bridge: http://localhost:8088${RESET}`); + writeLine(` ${colors.dim}Postgres: localhost:5432${RESET}`); + writeLine(` ${colors.dim}Redis: localhost:6379${RESET}`); + } else { + writeLine(` ${colors.yellow}Services started but API not healthy yet. Run 'squads services status' to check.${RESET}`); + } + writeLine(); + } catch (e) { + writeLine(`\n ${colors.red}Failed to start services: ${e instanceof Error ? e.message : String(e)}${RESET}\n`); + } + }); + + // ── services down ── + services + .command('down') + .description('Stop local services') + .action(() => { + const composeFile = findComposeFile(); + if (!composeFile) { + writeLine(`\n ${colors.dim}No docker-compose.yml found. Nothing to stop.${RESET}\n`); + return; + } + + const composeDir = join(composeFile, '..'); + writeLine(`\n ${bold}Stopping Tier 2 services...${RESET}\n`); + + try { + execSync('docker compose down', { + cwd: composeDir, + stdio: 'inherit', + timeout: 60000, + }); + writeLine(`\n ${colors.dim}Services stopped. Falling back to Tier 1 (file-based).${RESET}\n`); + } catch (e) { + writeLine(`\n ${colors.red}Failed to stop services: ${e instanceof Error ? e.message : String(e)}${RESET}\n`); + } + }); + + // ── services status ── + services + .command('status') + .description('Show running services and health') + .action(async () => { + const info = await detectTier(); + + writeLine(); + writeLine(` ${bold}Services${RESET} (Tier ${info.tier})\n`); + + const containers = exec('docker ps --filter name=squads --format "{{.Names}}\\t{{.Status}}\\t{{.Ports}}"'); + if (!containers) { + writeLine(` ${colors.dim}No Docker containers running.${RESET}\n`); + return; + } + + for (const line of containers.split('\n').filter(Boolean)) { + const [name, status, ports] = line.split('\t'); + const healthy = status?.includes('healthy') || status?.includes('Up'); + const icon = healthy ? `${colors.green}up${RESET}` : `${colors.red}down${RESET}`; + const portStr = ports ? ` ${colors.dim}${ports.split(',')[0]}${RESET}` : ''; + writeLine(` ${icon} ${bold}${name}${RESET}${portStr}`); + } + + writeLine(); + + // Show DB stats + const jobCount = exec("docker exec squads-postgres psql -U squads -d squads -t -c 'SELECT count(*) FROM procrastinate_jobs;'"); + const execCount = exec("docker exec squads-postgres psql -U squads -d squads -t -c 'SELECT count(*) FROM agent_executions;'"); + + if (jobCount || execCount) { + writeLine(` ${colors.cyan}Database${RESET}`); + if (jobCount) writeLine(` Procrastinate jobs: ${jobCount.trim()}`); + if (execCount) writeLine(` Agent executions: ${execCount.trim()}`); + writeLine(); + } + }); +} diff --git a/src/commands/tier.ts b/src/commands/tier.ts new file mode 100644 index 00000000..d5ec337b --- /dev/null +++ b/src/commands/tier.ts @@ -0,0 +1,81 @@ +/** + * squads tier — show active infrastructure tier and available services. + */ + +import { Command } from 'commander'; +import { detectTier } from '../lib/tier-detect.js'; +import { queryExecutions } from '../lib/observability.js'; +import { loadCatalog } from '../lib/idp/catalog-loader.js'; +import { findIdpDir } from '../lib/idp/resolver.js'; +import { findMemoryDir } from '../lib/memory.js'; +import { findSquadsDir } from '../lib/squad-parser.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; +import { existsSync, readdirSync } from 'fs'; + +export function registerTierCommand(program: Command): void { + program + .command('tier') + .description('Show active infrastructure tier and available services') + .option('--json', 'Output as JSON') + .action(async (opts) => { + const info = await detectTier(); + + if (opts.json) { + console.log(JSON.stringify(info, null, 2)); + return; + } + + writeLine(); + if (info.tier === 1) { + writeLine(` ${bold}Tier 1${RESET} ${colors.dim}(file-based)${RESET}`); + } else { + writeLine(` ${bold}Tier 2${RESET} ${colors.green}(local services)${RESET}`); + } + writeLine(); + + // Data sources + writeLine(` ${colors.cyan}Data${RESET}`); + const executions = queryExecutions({ limit: 1000 }); + writeLine(` Observability: ${executions.length} executions logged`); + + const squadsDir = findSquadsDir(); + if (squadsDir) { + const squads = readdirSync(squadsDir).filter(f => { + try { return existsSync(`${squadsDir}/${f}/SQUAD.md`); } catch { return false; } + }); + writeLine(` Squads: ${squads.length} defined`); + } + + const memoryDir = findMemoryDir(); + if (memoryDir) { + writeLine(` Memory: ${memoryDir}`); + } + + const idpDir = findIdpDir(); + if (idpDir) { + const catalog = loadCatalog(); + writeLine(` IDP: ${catalog.length} catalog entries`); + } else { + writeLine(` IDP: not configured`); + } + writeLine(); + + // Services + writeLine(` ${colors.cyan}Services${RESET}`); + const svc = info.services; + const icon = (ok: boolean) => ok ? `${colors.green}up${RESET}` : `${colors.dim}—${RESET}`; + writeLine(` API: ${icon(svc.api)}${svc.api ? ` ${info.urls.api}` : ''}`); + writeLine(` Bridge: ${icon(svc.bridge)}${svc.bridge ? ` ${info.urls.bridge}` : ''}`); + writeLine(` Postgres: ${icon(svc.postgres)}`); + writeLine(` Redis: ${icon(svc.redis)}`); + writeLine(); + + if (info.tier === 1) { + writeLine(` ${colors.dim}Upgrade: run 'squads services up' for Tier 2${RESET}`); + writeLine(` ${colors.dim}(smart triggers, Postgres, webhooks, budget enforcement)${RESET}`); + } else { + writeLine(` ${colors.dim}All local services healthy. Data syncs to Postgres.${RESET}`); + } + writeLine(); + }); +} diff --git a/src/lib/agent-runner.ts b/src/lib/agent-runner.ts new file mode 100644 index 00000000..df2f62de --- /dev/null +++ b/src/lib/agent-runner.ts @@ -0,0 +1,446 @@ +/** + * Agent runner: the core function that prepares and dispatches a single agent. + * Extracted from commands/run.ts to reduce its size. + */ + +import ora from 'ora'; +import { join } from 'path'; +import { existsSync, readFileSync } from 'fs'; +import { + findSquadsDir, + loadSquad, + loadAgentDefinition, + parseAgentProvider, +} from './squad-parser.js'; +import { + type RunOptions, + DEFAULT_TIMEOUT_MINUTES, + SOFT_DEADLINE_RATIO, +} from './run-types.js'; +import { + generateExecutionId, + detectTaskType, + formatDuration, + checkClaudeCliAvailable, +} from './run-utils.js'; +import { + logExecution, + updateExecutionStatus, + checkPreflightGates, + fetchLearnings, + checkLocalCooldown, + emitExecutionEvent, + DEFAULT_SCHEDULED_COOLDOWN_MS, +} from './execution-log.js'; +import { + executeWithClaude, + executeWithProvider, + verifyExecution, + preflightExecutorCheck, +} from './execution-engine.js'; +import { + type ContextRole, + parseAgentFrontmatter, + extractMcpServersFromDefinition, + loadSystemProtocol, + gatherSquadContext, + resolveContextRoleFromAgent, +} from './run-context.js'; +import { + buildContextFromSquad, + validateExecution, + formatViolations, + ExecutionRequest, +} from './permissions.js'; +import { parseCooldown } from './cron.js'; +import { + colors, + bold, + RESET, + gradient, + icons, + writeLine, +} from './terminal.js'; +import { + getCLIConfig, + isProviderCLIAvailable, +} from './llm-clis.js'; +import { loadSession } from './auth.js'; +import { getApiUrl } from './env-config.js'; +import { pushCognitionSignal } from './api-client.js'; +import { findMemoryDir } from './memory.js'; + +// ── Operational constants (no magic numbers) ────────────────────────── +export const DRYRUN_DEF_MAX_CHARS = 500; +export const DRYRUN_CONTEXT_MAX_CHARS = parseInt(process.env.SQUADS_DRYRUN_MAX_CHARS || '800', 10); + +export async function runAgent( + agentName: string, + agentPath: string, + squadName: string, + options: RunOptions & { execute?: boolean } +): Promise { + const spinner = ora(`Running agent: ${agentName}`).start(); + const startMs = Date.now(); + const startTime = new Date(startMs).toISOString(); + const executionId = generateExecutionId(); + const taskType = detectTaskType(agentName); + + const definition = loadAgentDefinition(agentPath); + + // Enforce repo layout before execution + const { checkAndReport } = await import('./repo-enforcement.js'); + if (!checkAndReport(squadName, { verbose: options.verbose })) { + spinner.fail(`Repo enforcement failed for ${squadName} — fix errors above before running`); + return; + } + + // Fetch learnings from bridge (needed for both dry-run preview and real execution) + const learnings = await fetchLearnings(squadName); + const learningContext = learnings.length > 0 + ? `\n## Learnings from Previous Runs\n${learnings.map(l => `- ${l.content}`).join('\n')}\n` + : ''; + + if (options.dryRun) { + spinner.info(`[DRY RUN] Would run ${agentName}`); + // Show context that would be injected (with role-based gating) + const dryRunContextRole: ContextRole = agentName.includes('company-lead') + ? 'coo' + : resolveContextRoleFromAgent(agentPath, agentName); + const dryRunContext = gatherSquadContext(squadName, agentName, { + verbose: options.verbose, agentPath, role: dryRunContextRole + }); + if (options.verbose) { + writeLine(` ${colors.dim}Agent definition:${RESET}`); + writeLine(` ${colors.dim}${definition.slice(0, DRYRUN_DEF_MAX_CHARS)}...${RESET}`); + if (learnings.length > 0) { + writeLine(` ${colors.dim}Learnings: ${learnings.length} from bridge${RESET}`); + } + if (dryRunContext || learningContext) { + const fullContext = `${dryRunContext}${learningContext}`; + writeLine(); + writeLine(` ${colors.cyan}Context to inject (${Math.ceil(fullContext.length / 4)} tokens):${RESET}`); + writeLine(` ${colors.dim}${fullContext.slice(0, DRYRUN_CONTEXT_MAX_CHARS)}...${RESET}`); + } + } + return; + } + + // Pre-execution permission validation (Phase 3) + const squadsDir = findSquadsDir(); + if (squadsDir) { + const squadFilePath = join(squadsDir, squadName, 'SQUAD.md'); + if (existsSync(squadFilePath)) { + const squadContent = readFileSync(squadFilePath, 'utf-8'); + const permContext = buildContextFromSquad(squadName, squadContent, agentName); + + // Build execution request from agent definition + // For now, we validate MCP servers mentioned in the agent definition + const mcpServers = extractMcpServersFromDefinition(definition); + const execRequest: ExecutionRequest = { + mcpServers + }; + + const permResult = validateExecution(permContext, execRequest); + + if (permResult.violations.length > 0) { + spinner.stop(); + const violationLines = formatViolations(permResult); + for (const line of violationLines) { + writeLine(` ${line}`); + } + writeLine(); + + if (!permResult.allowed) { + writeLine(` ${colors.red}Execution blocked due to permission violations.${RESET}`); + writeLine(` ${colors.dim}Configure permissions in ${squadFilePath}${RESET}`); + return; + } + } + } + } + + // Preflight gate check (quota, cooldown) via bridge API + const preflight = await checkPreflightGates(squadName, agentName); + + if (!preflight.allowed) { + spinner.stop(); + writeLine(); + writeLine(` ${colors.red}${icons.error} Execution blocked by preflight gates${RESET}`); + + if (preflight.gates.quota && !preflight.gates.quota.ok) { + writeLine(` ${colors.dim}Quota: $${preflight.gates.quota.used.toFixed(2)}/$${preflight.gates.quota.limit}/mo limit exceeded${RESET}`); + } + + if (preflight.gates.cooldown && !preflight.gates.cooldown.ok) { + const elapsed = preflight.gates.cooldown.elapsed_sec; + const minGap = preflight.gates.cooldown.min_gap_sec; + writeLine(` ${colors.dim}Cooldown: ${elapsed}s since last run (min: ${minGap}s)${RESET}`); + } + + writeLine(); + return; + } + + // Show preflight status in verbose mode + if (options.verbose && Object.keys(preflight.gates).length > 0) { + writeLine(` ${colors.dim}Preflight: quota ${preflight.gates.quota?.ok ? '✓' : '✗'} cooldown ${preflight.gates.cooldown?.ok ? '✓' : '✗'}${RESET}`); + } + + // Local cooldown check (when bridge is unavailable or has no execution history) + // Skip for manual triggers - only enforce for scheduled/cron runs + const isScheduledRun = options.trigger === 'scheduled' || options.trigger === 'smart'; + const bridgeHasNoHistory = preflight.gates.cooldown?.elapsed_sec === null; + if (isScheduledRun && (!preflight.gates.cooldown || bridgeHasNoHistory)) { + // Read cooldown from agent frontmatter, fall back to default + const frontmatterForCooldown = parseAgentFrontmatter(agentPath); + const cooldownMs = frontmatterForCooldown.cooldown + ? (parseCooldown(frontmatterForCooldown.cooldown) || DEFAULT_SCHEDULED_COOLDOWN_MS) + : DEFAULT_SCHEDULED_COOLDOWN_MS; + const localCooldown = checkLocalCooldown(squadName, agentName, cooldownMs); + + if (!localCooldown.ok) { + spinner.stop(); + writeLine(); + writeLine(` ${colors.yellow}${icons.warning} Skipping: cooldown not elapsed${RESET}`); + writeLine(` ${colors.dim}Last run: ${formatDuration(localCooldown.elapsedMs!)} ago (cooldown: ${formatDuration(localCooldown.cooldownMs)})${RESET}`); + writeLine(); + return; + } + + if (options.verbose) { + writeLine(` ${colors.dim}Local cooldown: ✓ (${formatDuration(localCooldown.elapsedMs || 0)} since last run)${RESET}`); + } + } + + // Log execution start + logExecution({ + squadName, + agentName, + executionId, + startTime, + status: 'running', + trigger: options.trigger || 'manual', + taskType, + }); + + if (options.verbose && learnings.length > 0) { + writeLine(` ${colors.dim}Injecting ${learnings.length} learnings${RESET}`); + } + + // Load system protocol (SYSTEM.md, replaces legacy approval + post-execution) + const systemProtocol = loadSystemProtocol(); + const systemContext = systemProtocol ? `\n${systemProtocol}\n` : ''; + + // Derive context role from the agent's own YAML frontmatter `role:` free-text. + // Company COO override remains explicit. + const contextRole: ContextRole = agentName.includes('company-lead') + ? 'coo' + : resolveContextRoleFromAgent(agentPath, agentName); + + // Gather squad context (role-based: scanners get minimal, leads get everything) + const squadContext = gatherSquadContext(squadName, agentName, { + verbose: options.verbose, agentPath, role: contextRole + }); + + // Fetch cognition beliefs for prompt injection (Reflexion pattern) + // Only attempts when API is available (Tier 2). Silent skip in Tier 1. + let cognitionContext = ''; + const apiUrl = getApiUrl(); + if (apiUrl) { + try { + const session = loadSession(); + if (session?.accessToken && session.status === 'active') { + const safeSquadName = encodeURIComponent(squadName); + const res = await fetch(`${apiUrl}/cognition/context/squad:${safeSquadName}`, { + headers: { Authorization: `Bearer ${session.accessToken}` }, + signal: AbortSignal.timeout(3000), + }); + if (res.ok) { + const data = await res.json() as { markdown: string }; + if (data.markdown && !data.markdown.includes('No cognition data')) { + cognitionContext = `\n${data.markdown}\n`; + if (options.verbose) { + writeLine(` ${colors.dim}Injecting cognition beliefs${RESET}`); + } + } + } + } + } catch { + // Silent — API not available or auth not configured + } + } + + // Generate the Claude Code prompt with timeout awareness + const timeoutMins = options.timeout || DEFAULT_TIMEOUT_MINUTES; + const taskDirective = options.task + ? `\n## TASK DIRECTIVE (overrides default behavior)\n${options.task}\n` + : ''; + const prompt = `You are ${agentName} from squad ${squadName}. +${taskDirective} +Your full context follows — read it top-to-bottom. Each layer builds on the previous: +- SYSTEM.md: how the system works (already loaded) +- Company: who we are and why +- Priorities: where to focus now +- Goals: what to achieve (measurable targets) +- Agent: your specific role and instructions +- State: where you left off +${systemContext}${squadContext}${cognitionContext}${learningContext} +TIME LIMIT: ${timeoutMins} minutes. Focus on priorities first. If blocked, note it in state.md and move on.`; + + // Resolve provider with full chain: + // 1. Agent config (from agent file frontmatter/header) + // 2. CLI option (--provider flag) + // 3. Squad default (from SQUAD.md providers.default) + // 4. Fallback to 'anthropic' + const agentProvider = parseAgentProvider(agentPath); + const squad = loadSquad(squadName); + const squadDefaultProvider = squad?.providers?.default; + + const provider = agentProvider || options.provider || squadDefaultProvider || 'anthropic'; + const isAnthropic = provider === 'anthropic'; + + if (options.verbose && (agentProvider || squadDefaultProvider)) { + writeLine(` ${colors.dim}Provider resolution:${RESET}`); + if (agentProvider) writeLine(` ${colors.dim}Agent: ${agentProvider}${RESET}`); + if (options.provider) writeLine(` ${colors.dim}CLI: ${options.provider}${RESET}`); + if (squadDefaultProvider) writeLine(` ${colors.dim}Squad: ${squadDefaultProvider}${RESET}`); + writeLine(` ${colors.dim}→ Using: ${provider}${RESET}`); + } + + // Check CLI availability + const cliAvailable = isAnthropic + ? await checkClaudeCliAvailable() + : isProviderCLIAvailable(provider); + + if (options.execute && cliAvailable) { + const cliConfig = getCLIConfig(provider); + const cliName = cliConfig?.displayName || provider; + + // Determine execution mode (foreground is default, background is opt-in) + const isBackground = options.background === true && !options.watch; + const isWatch = options.watch === true; + const isForeground = !isBackground && !isWatch; + + spinner.text = isBackground + ? `Launching ${agentName} with ${cliName} in background...` + : isWatch + ? `Starting ${agentName} with ${cliName} (watch mode)...` + : `Running ${agentName} with ${cliName}...`; + + // Parse frontmatter for verification criteria (Ralph loop) + const frontmatter = parseAgentFrontmatter(agentPath); + const hasCriteria = !!frontmatter.acceptance_criteria && options.verify !== false; + const maxRetries = frontmatter.max_retries ?? 2; + let currentPrompt = prompt; + + for (let attempt = 0; attempt <= (hasCriteria ? maxRetries : 0); attempt++) { + try { + let result: string; + + if (isAnthropic) { + result = await executeWithClaude(currentPrompt, { + verbose: options.verbose, + timeoutMinutes: options.timeout || 30, + foreground: options.foreground, + background: options.background, + watch: options.watch, + useApi: options.useApi, + effort: options.effort, + skills: options.skills, + trigger: options.trigger || 'manual', + squadName, + agentName, + model: options.model, + }); + } else { + result = await executeWithProvider(provider, currentPrompt, { + verbose: options.verbose, + foreground: !isBackground, + squadName, + agentName, + }); + } + + // Ralph loop: verify foreground execution against acceptance criteria + if (hasCriteria && (isForeground || isWatch)) { + const verification = await verifyExecution( + squadName, agentName, frontmatter.acceptance_criteria!, { verbose: options.verbose } + ); + if (!verification.passed && attempt < maxRetries) { + writeLine(` ${colors.yellow}Verification: FAIL - ${verification.reason}${RESET}`); + writeLine(` ${colors.dim}Retrying (${attempt + 1}/${maxRetries})...${RESET}`); + currentPrompt = `${prompt}\n\n## PREVIOUS ATTEMPT FAILED\nVerification found: ${verification.reason}\nPlease address this issue and try again.`; + continue; + } + if (verification.passed) { + writeLine(` ${colors.green}Verification: PASS - ${verification.reason}${RESET}`); + } + } + + // Emit completion event (non-blocking) + emitExecutionEvent('agent.completed', { + squad: squadName, agent: agentName, executionId, + }).catch(() => {}); + + if (isForeground || isWatch) { + spinner.succeed(`Agent ${agentName} completed (${cliName})`); + } else { + spinner.succeed(`Agent ${agentName} launched in background (${cliName})`); + writeLine(` ${colors.dim}${result}${RESET}`); + writeLine(); + writeLine(` ${colors.dim}Monitor:${RESET} squads workers`); + writeLine(` ${colors.dim}Memory:${RESET} squads memory show ${squadName}`); + } + break; // Success — exit retry loop + } catch (error) { + // Emit failure event (non-blocking) + emitExecutionEvent('agent.failed', { + squad: squadName, agent: agentName, executionId, error: String(error), + }).catch(() => {}); + + spinner.fail(`Agent ${agentName} failed to launch`); + updateExecutionStatus(squadName, agentName, executionId, 'failed', { + error: String(error), + durationMs: Date.now() - startMs, + }); + const msg = error instanceof Error ? error.message : String(error); + const isLikelyBug = error instanceof ReferenceError || error instanceof TypeError || error instanceof SyntaxError; + writeLine(` ${colors.red}${msg}${RESET}`); + writeLine(); + if (isLikelyBug) { + writeLine(` ${colors.yellow}This looks like a bug. Please try:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads doctor ${colors.dim}— check your setup${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads update ${colors.dim}— get the latest fixes${RESET}`); + writeLine(); + writeLine(` ${colors.dim}If the problem persists, file an issue:${RESET}`); + writeLine(` ${colors.dim}https://github.com/agents-squads/squads-cli/issues${RESET}`); + } else { + writeLine(` ${colors.dim}Run \`squads doctor\` to check your setup, or \`squads run ${agentName} --verbose\` for details.${RESET}`); + } + break; // Error — exit retry loop + } + } + } else { + // Show instructions for manual execution + spinner.succeed(`Agent ${agentName} ready`); + writeLine(` ${colors.dim}Execution logged: ${startTime}${RESET}`); + + if (!cliAvailable) { + const cliConfig = getCLIConfig(provider); + writeLine(); + writeLine(` ${colors.yellow}${cliConfig?.command || provider} CLI not found${RESET}`); + writeLine(` ${colors.dim}Install: ${cliConfig?.install || 'squads providers'}${RESET}`); + } + + writeLine(); + writeLine(` ${colors.dim}To launch as background task:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squadName}${RESET} -a ${colors.cyan}${agentName}${RESET}`); + if (provider !== 'anthropic') { + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squadName}${RESET} -a ${colors.cyan}${agentName}${RESET} --provider=${provider}`); + } + writeLine(); + writeLine(` ${colors.dim}Or run interactively:${RESET}`); + writeLine(` ${colors.dim}$${RESET} Run the ${colors.cyan}${agentName}${RESET} agent from ${agentPath}`); + } +} diff --git a/src/lib/cloud-dispatch.ts b/src/lib/cloud-dispatch.ts new file mode 100644 index 00000000..b19a737c --- /dev/null +++ b/src/lib/cloud-dispatch.ts @@ -0,0 +1,182 @@ +/** + * Cloud worker dispatch and polling. + * Extracted from src/commands/run.ts to reduce its size. + */ + +import ora from 'ora'; +import { loadSession, isLoggedIn } from './auth.js'; +import { getApiUrl } from './env-config.js'; +import { colors, RESET, icons, writeLine } from './terminal.js'; +import { type RunOptions } from './run-types.js'; + +const CLOUD_POLL_INTERVAL_MS = 3000; +const CLOUD_POLL_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes max poll + +/** + * Dispatch agent execution to cloud worker via API. + * Posts to /agent-dispatch, then polls /agent-executions for status. + */ +export async function runCloudDispatch( + squadName: string, + agentName: string, + options: RunOptions +): Promise { + const apiUrl = getApiUrl(); + + if (!apiUrl) { + writeLine(` ${colors.red}${icons.error} API URL not configured${RESET}`); + writeLine(` ${colors.dim}Run: squads config use staging (or set SQUADS_API_URL)${RESET}`); + process.exit(1); + } + + // Require auth session + if (!isLoggedIn()) { + writeLine(` ${colors.red}${icons.error} Not logged in${RESET}`); + writeLine(` ${colors.dim}Run \`squads login\` to authenticate before using --cloud${RESET}`); + process.exit(1); + } + + const session = loadSession(); + const headers: Record = { + 'Content-Type': 'application/json', + }; + + // Use access token if available, otherwise use API key + if (session?.accessToken) { + headers['Authorization'] = `Bearer ${session.accessToken}`; + } + + const apiKey = process.env.SQUADS_PLATFORM_API_TOKEN || process.env.SCHEDULER_API_KEY; + if (apiKey) { + headers['X-API-Key'] = apiKey; + } + + const spinner = ora(`Dispatching ${squadName}/${agentName} to cloud...`).start(); + + try { + // 1. Create dispatch request + const dispatchRes = await fetch(`${apiUrl}/agent-dispatch`, { + method: 'POST', + headers, + body: JSON.stringify({ + squad: squadName, + agent: agentName, + trigger_type: 'manual', + trigger_data: { + source: 'cli', + cloud: true, + model: options.model, + provider: options.provider, + effort: options.effort, + }, + }), + }); + + if (!dispatchRes.ok) { + const error = await dispatchRes.text(); + spinner.fail(`Dispatch failed: ${dispatchRes.status}`); + writeLine(` ${colors.dim}${error}${RESET}`); + process.exit(1); + } + + const dispatch = await dispatchRes.json() as { dispatch_id: number; status: string }; + spinner.succeed(`Dispatched to cloud`); + + writeLine(); + writeLine(` ${colors.cyan}Dispatch ID${RESET} ${dispatch.dispatch_id}`); + writeLine(` ${colors.cyan}Squad${RESET} ${squadName}`); + writeLine(` ${colors.cyan}Agent${RESET} ${agentName}`); + writeLine(); + + // 2. Poll for execution status + const pollSpinner = ora('Waiting for execution to start...').start(); + const pollStart = Date.now(); + let executionId: string | null = null; + let lastStatus = ''; + + while (Date.now() - pollStart < CLOUD_POLL_TIMEOUT_MS) { + try { + const execRes = await fetch( + `${apiUrl}/agent-executions?squad=${encodeURIComponent(squadName)}&agent=${encodeURIComponent(agentName)}&limit=1`, + { headers }, + ); + + if (execRes.ok) { + const executions = await execRes.json() as Array<{ + execution_id: string; + status: string; + summary?: string; + error?: string; + duration_seconds?: number; + cost_usd?: number; + }>; + + if (executions.length > 0) { + const exec = executions[0]; + + // Only track executions started after our dispatch + if (!executionId && exec.status === 'running') { + executionId = exec.execution_id; + pollSpinner.text = `Running (${exec.execution_id})`; + } + + if (executionId && exec.execution_id === executionId) { + if (exec.status !== lastStatus) { + lastStatus = exec.status; + pollSpinner.text = `Status: ${exec.status}`; + } + + if (exec.status === 'completed') { + pollSpinner.succeed('Execution completed'); + writeLine(); + writeLine(` ${colors.cyan}Execution${RESET} ${exec.execution_id}`); + if (exec.summary) { + writeLine(` ${colors.cyan}Summary${RESET} ${exec.summary}`); + } + if (exec.duration_seconds) { + writeLine(` ${colors.cyan}Duration${RESET} ${Math.round(exec.duration_seconds)}s`); + } + if (exec.cost_usd) { + writeLine(` ${colors.cyan}Cost${RESET} $${exec.cost_usd.toFixed(4)}`); + } + writeLine(); + return; + } + + if (exec.status === 'failed') { + pollSpinner.fail('Execution failed'); + writeLine(); + if (exec.error) { + writeLine(` ${colors.red}Error: ${exec.error}${RESET}`); + } + writeLine(); + process.exit(1); + } + + if (exec.status === 'cancelled') { + pollSpinner.warn('Execution cancelled'); + return; + } + } + } + } + } catch (e) { + if (options.verbose) writeLine(` ${colors.dim}warn: cloud poll failed (retrying): ${e instanceof Error ? e.message : String(e)}${RESET}`); + } + + await new Promise(resolve => setTimeout(resolve, CLOUD_POLL_INTERVAL_MS)); + } + + pollSpinner.warn('Poll timeout — execution may still be running'); + writeLine(` ${colors.dim}Check status: squads trigger status${RESET}`); + if (executionId) { + writeLine(` ${colors.dim}Execution ID: ${executionId}${RESET}`); + } + } catch (error) { + spinner.fail('Cloud dispatch failed'); + writeLine(` ${colors.red}${error instanceof Error ? error.message : String(error)}${RESET}`); + writeLine(); + writeLine(` ${colors.dim}Check your network and SQUADS_API_URL setting${RESET}`); + process.exit(1); + } +} diff --git a/src/lib/execution-engine.ts b/src/lib/execution-engine.ts new file mode 100644 index 00000000..547b19e8 --- /dev/null +++ b/src/lib/execution-engine.ts @@ -0,0 +1,908 @@ +/** + * Execution engine: agent spawning, worktree management, and provider dispatch. + * Extracted from commands/run.ts to separate execution mechanics from command logic. + */ + +import { spawn, execSync } from 'child_process'; +import { join } from 'path'; +import { existsSync, readFileSync, writeFileSync, mkdirSync, cpSync, unlinkSync } from 'fs'; +import { + loadSquad, + type EffortLevel, + type Squad, +} from './squad-parser.js'; +import { + type ExecutionContext, +} from './run-types.js'; +import { + selectMcpConfig, + detectTaskType, + getClaudeModelAlias, + resolveModel, + ensureProjectTrusted, + getProjectRoot, + generateExecutionId, + checkClaudeCliAvailable, +} from './run-utils.js'; +import { + registerContextWithBridge, + updateExecutionStatus, +} from './execution-log.js'; +import { logObservability, captureSessionUsage, snapshotGoals, diffGoals, type ObservabilityRecord } from './observability.js'; +import { findMemoryDir } from './memory.js'; +import { detectProviderFromModel } from './providers.js'; +import { getBridgeUrl } from './env-config.js'; +import { getBotGitEnv, getBotPushUrl, getCoAuthorTrailer, getBotGhEnv } from './github.js'; +import { + colors, + RESET, + icons, + writeLine, +} from './terminal.js'; +import { + getCLIConfig, + isProviderCLIAvailable, +} from './llm-clis.js'; + +// ── Operational constants (no magic numbers) ────────────────────────── +export const VERIFICATION_STATE_MAX_CHARS = 2000; +export const VERIFICATION_EXEC_TIMEOUT_MS = 30000; +export const LOG_FILE_INIT_DELAY_MS = 500; +export const VERBOSE_COMMAND_MAX_CHARS = 50; + +// ── Interfaces ──────────────────────────────────────────────────────── + +export interface ExecuteWithClaudeOptions { + verbose?: boolean; + timeoutMinutes?: number; + foreground?: boolean; // Deprecated, now default + background?: boolean; // Opt-in background mode + watch?: boolean; // Background but tail log + useApi?: boolean; + effort?: EffortLevel; + skills?: string[]; + trigger?: ExecutionContext['trigger']; + squadName: string; + agentName: string; + model?: string; // Model to use (Claude aliases or full model IDs like gemini-2.5-flash) +} + +// ── Auto-commit ────────────────────────────────────────────────────── + +/** + * Auto-commit agent work after execution completes. + * Commits as the Agents Squads bot (if configured), pushes with bot token. + * Falls back to user's git identity if bot not configured. + */ +export async function autoCommitAgentWork( + squadName: string, + agentName: string, + executionId: string, + provider?: string, +): Promise<{ committed: boolean; message?: string; error?: string }> { + const { execSync } = await import('child_process'); + const { detectGitHubRepo } = await import('./github.js'); + const projectRoot = getProjectRoot(); + + try { + // Check for uncommitted changes + const status = execSync('git status --porcelain', { + encoding: 'utf-8', + cwd: projectRoot, + }).trim(); + + if (!status) { + return { committed: false }; + } + + // Get bot identity for commits + const botEnv = await getBotGitEnv(); + const execOpts = { + cwd: projectRoot, + env: { ...process.env, ...botEnv }, + }; + + // Stage all changes (agent work should be committed) + execSync('git add -A', execOpts); + + // Build commit message with provider-specific co-author + // Write to temp file to avoid shell injection via squad/agent names + const shortExecId = executionId.slice(0, 12); + const coAuthor = getCoAuthorTrailer(provider || 'claude'); + const msgFile = join(projectRoot, '.git', 'SQUADS_COMMIT_MSG'); + writeFileSync(msgFile, `feat(${squadName}/${agentName}): execution ${shortExecId}\n\n${coAuthor}\n`); + + // Commit using --file to avoid shell interpolation + try { + execSync(`git commit --file "${msgFile}"`, execOpts); + } finally { + try { unlinkSync(msgFile); } catch { /* ignore */ } + } + + // Push to origin using bot token + try { + const { spawnSync } = await import('child_process'); + const repo = detectGitHubRepo(projectRoot); + // Validate repo format (org/name) to prevent injection + if (repo && /^[\w.-]+\/[\w.-]+$/.test(repo)) { + const pushUrl = await getBotPushUrl(repo); + if (pushUrl) { + // Use spawnSync with args array to avoid shell injection + spawnSync('git', ['push', pushUrl, 'HEAD'], { ...execOpts, stdio: 'pipe' }); + } else { + spawnSync('git', ['push', 'origin', 'HEAD'], { ...execOpts, stdio: 'pipe' }); + } + } else { + spawnSync('git', ['push', 'origin', 'HEAD'], { ...execOpts, stdio: 'pipe' }); + } + } catch (e) { + writeLine(` ${colors.dim}warn: git push failed (commit is still local): ${e instanceof Error ? e.message : String(e)}${RESET}`); + } + + return { committed: true, message: `Committed changes from ${agentName}` }; + } catch (error) { + return { committed: false, error: String(error) }; + } +} + +// ── Verification ───────────────────────────────────────────────────── + +/** + * Verify execution against acceptance criteria using a lightweight model. + * Returns pass/fail with reason. Used by the Ralph verification loop. + */ +export async function verifyExecution( + squadName: string, + agentName: string, + criteria: string, + options: { verbose?: boolean } = {} +): Promise<{ passed: boolean; reason: string }> { + const { execSync } = await import('child_process'); + const projectRoot = getProjectRoot(); + + // Gather evidence: state file + recent commits + let stateContent = ''; + const memDir = findMemoryDir(); + if (memDir) { + const statePath = join(memDir, squadName, agentName, 'state.md'); + if (existsSync(statePath)) { + stateContent = readFileSync(statePath, 'utf-8').slice(0, VERIFICATION_STATE_MAX_CHARS); + } + } + + let recentCommits = ''; + try { + recentCommits = execSync('git log --oneline -5 --no-color', { + encoding: 'utf-8', + cwd: projectRoot, + }).trim(); + } catch (e) { + if (options.verbose) writeLine(` ${colors.dim}warn: git log failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); + recentCommits = '(no commits found)'; + } + + const verifyPrompt = `You are verifying whether an agent completed its task successfully. + +Agent: ${squadName}/${agentName} + +## Acceptance Criteria +${criteria} + +## Evidence + +### Agent State File +${stateContent || '(empty or not found)'} + +### Recent Git Commits +${recentCommits} + +## Instructions +Evaluate whether the acceptance criteria are met based on the evidence. +Respond with EXACTLY one line: +PASS: +or +FAIL: `; + + try { + const escapedPrompt = verifyPrompt.replace(/'/g, "'\\''"); + const result = execSync( + `unset CLAUDECODE; claude --print --model haiku -- '${escapedPrompt}'`, + { encoding: 'utf-8', cwd: projectRoot, timeout: VERIFICATION_EXEC_TIMEOUT_MS, shell: '/bin/sh' } + ).trim(); + + if (options.verbose) { + writeLine(` ${colors.dim}Verification: ${result}${RESET}`); + } + + if (result.startsWith('PASS')) { + return { passed: true, reason: result.replace(/^PASS:\s*/, '') }; + } + return { passed: false, reason: result.replace(/^FAIL:\s*/, '') }; + } catch (error) { + if (options.verbose) { + writeLine(` ${colors.dim}Verification error (defaulting to PASS): ${error}${RESET}`); + } + return { passed: true, reason: 'Verification unavailable — defaulting to pass' }; + } +} + +// ── Preflight check ────────────────────────────────────────────────── + +/** + * Pre-flight check for the executor (Claude Code or other provider CLI). + * Runs once at the start of `squads run` before any agent execution. + * Checks: + * 1. CLI binary is available on PATH + * 2. Authentication looks configured (credentials file or API key) + * Skippable with SQUADS_SKIP_CHECKS=1 env var (for CI/CD). + * Returns true if checks pass (or are skipped), false if execution should abort. + */ +export async function preflightExecutorCheck(provider: string): Promise { + // Allow skipping for CI/CD or advanced users + if (process.env.SQUADS_SKIP_CHECKS === '1') { + return true; + } + + const isAnthropic = provider === 'anthropic'; + + // --- Check 1: CLI binary on PATH --- + let cliFound: boolean; + + if (isAnthropic) { + cliFound = await checkClaudeCliAvailable(); + } else { + cliFound = isProviderCLIAvailable(provider); + } + + if (!cliFound) { + const cliConfig = getCLIConfig(provider); + const cliName = cliConfig?.command || provider; + const installCmd = cliConfig?.install || `See ${provider} documentation`; + + writeLine(); + writeLine(` ${icons.error} ${colors.red}${cliName} CLI not found${RESET}`); + writeLine(); + writeLine(` ${colors.dim}The ${cliName} command is required to run agents but was not found on your PATH.${RESET}`); + writeLine(); + writeLine(` ${colors.cyan}Install:${RESET} ${installCmd}`); + writeLine(); + writeLine(` ${colors.dim}Skip this check: SQUADS_SKIP_CHECKS=1 squads run ...${RESET}`); + writeLine(); + return false; + } + + // Auth check removed: Claude CLI handles its own auth errors with clear messages. + // Pre-checking here caused false warnings for OAuth users (keychain auth works + // without .credentials.json or ANTHROPIC_API_KEY). See #520. + + return true; +} + +// ── Environment & logging helpers ──────────────────────────────────── + +/** Build agent environment variables for Claude execution */ +export function buildAgentEnv( + baseEnv: Record, + execContext: ExecutionContext, + options?: { effort?: EffortLevel; skills?: string[]; includeOtel?: boolean; ghToken?: string } +): Record { + // Strip CLAUDECODE to allow spawning claude from within a Claude Code session + const { CLAUDECODE: _, ...cleanEnv } = baseEnv; + const env: Record = { + ...cleanEnv, + SQUADS_SQUAD: execContext.squad, + SQUADS_AGENT: execContext.agent, + SQUADS_TASK_TYPE: execContext.taskType, + SQUADS_TRIGGER: execContext.trigger, + SQUADS_EXECUTION_ID: execContext.executionId, + BRIDGE_API: getBridgeUrl(), + }; + + // Inject bot GH_TOKEN so agents create PRs/issues as the bot identity, + // not the user's personal gh auth. This enables founder to review/approve. + if (options?.ghToken) env.GH_TOKEN = options.ghToken; + + if (options?.includeOtel) { + env.OTEL_RESOURCE_ATTRIBUTES = `squads.squad=${execContext.squad},squads.agent=${execContext.agent},squads.task_type=${execContext.taskType},squads.trigger=${execContext.trigger},squads.execution_id=${execContext.executionId}`; + } + + if (options?.effort) env.CLAUDE_EFFORT = options.effort; + if (options?.skills && options.skills.length > 0) env.CLAUDE_SKILLS = options.skills.join(','); + + return env; +} + +/** Log verbose execution config (shared by foreground and background modes) */ +export function logVerboseExecution(config: { + projectRoot: string; + mode: string; + useApi?: boolean; + execContext: ExecutionContext; + effort?: EffortLevel; + skills?: string[]; + resolvedModel?: string; + claudeModelAlias?: string; + explicitModel?: string; + logFile?: string; + mcpConfigPath?: string; +}): void { + writeLine(` ${colors.dim}Project: ${config.projectRoot}${RESET}`); + writeLine(` ${colors.dim}Mode: ${config.mode}${RESET}`); + if (config.logFile) writeLine(` ${colors.dim}Log: ${config.logFile}${RESET}`); + if (config.mcpConfigPath) writeLine(` ${colors.dim}MCP config: ${config.mcpConfigPath}${RESET}`); + if (config.useApi !== undefined) writeLine(` ${colors.dim}Auth: ${config.useApi ? 'API credits' : 'subscription'}${RESET}`); + writeLine(` ${colors.dim}Execution: ${config.execContext.executionId}${RESET}`); + writeLine(` ${colors.dim}Task type: ${config.execContext.taskType}${RESET}`); + writeLine(` ${colors.dim}Trigger: ${config.execContext.trigger}${RESET}`); + if (config.effort) writeLine(` ${colors.dim}Effort: ${config.effort}${RESET}`); + if (config.skills && config.skills.length > 0) writeLine(` ${colors.dim}Skills: ${config.skills.join(', ')}${RESET}`); + if (config.resolvedModel || config.claudeModelAlias) { + const source = config.explicitModel ? 'explicit' : 'auto-routed'; + const displayModel = config.resolvedModel || config.claudeModelAlias; + writeLine(` ${colors.dim}Model: ${displayModel} (${source})${RESET}`); + } +} + +// ── Worktree management ────────────────────────────────────────────── + +/** Resolve the target repo root from the squad's repo field (e.g. "org/squads-cli" → sibling dir) */ +export function resolveTargetRepoRoot(projectRoot: string, squad: Squad | null): string { + if (!squad?.repo) return projectRoot; + const repoName = squad.repo.split('/').pop(); + if (!repoName) return projectRoot; + const candidatePath = join(projectRoot, '..', repoName); + return existsSync(candidatePath) ? candidatePath : projectRoot; +} + +/** Create an isolated worktree for agent execution (Node.js-based, for foreground mode) */ +export function createAgentWorktree(projectRoot: string, squadName: string, agentName: string): string { + const timestamp = Date.now(); + const branchName = `agent/${squadName}/${agentName}-${timestamp}`; + const worktreePath = join(projectRoot, '..', '.worktrees', `${squadName}-${agentName}-${timestamp}`); + + try { + mkdirSync(join(projectRoot, '..', '.worktrees'), { recursive: true }); + execSync(`git worktree add '${worktreePath}' -b '${branchName}' HEAD`, { cwd: projectRoot, stdio: 'pipe' }); + return worktreePath; + } catch (e) { + writeLine(` ${colors.dim}warn: worktree creation failed, using project root: ${e instanceof Error ? e.message : String(e)}${RESET}`); + return projectRoot; + } +} + +/** Remove a worktree and its branch after agent execution completes */ +export function cleanupWorktree(worktreePath: string, projectRoot: string): void { + if (worktreePath === projectRoot) return; // fallback mode, nothing to clean + + try { + // Extract branch name from worktree before removing + const branchInfo = execSync(`git -C '${projectRoot}' worktree list --porcelain`, { encoding: 'utf-8' }); + let branchName = ''; + const lines = branchInfo.split('\n'); + for (let i = 0; i < lines.length; i++) { + if (lines[i] === `worktree ${worktreePath}` && i + 2 < lines.length) { + const branchLine = lines[i + 2]; // "branch refs/heads/..." + if (branchLine.startsWith('branch refs/heads/')) { + branchName = branchLine.replace('branch refs/heads/', ''); + } + break; + } + } + + // Remove worktree + execSync(`git -C '${projectRoot}' worktree remove '${worktreePath}' --force`, { stdio: 'pipe' }); + + // Delete the agent branch (only agent/* branches, safety check) + if (branchName && branchName.startsWith('agent/')) { + execSync(`git -C '${projectRoot}' branch -D '${branchName}'`, { stdio: 'pipe' }); + } + } catch { + // Non-critical — worktree prune will catch it later + } +} + +// ── Detached execution helpers ─────────────────────────────────────── + +/** Build shell script for detached execution with worktree isolation */ +export function buildDetachedShellScript(config: { + projectRoot: string; + squadName: string; + agentName: string; + timestamp: number; + claudeModelAlias?: string; + escapedPrompt: string; + logFile: string; + pidFile: string; +}): string { + const modelFlag = config.claudeModelAlias ? `--model ${config.claudeModelAlias}` : ''; + const branchName = `agent/${config.squadName}/${config.agentName}-${config.timestamp}`; + const worktreeDir = `${config.projectRoot}/../.worktrees/${config.squadName}-${config.agentName}-${config.timestamp}`; + const cleanup = `if [ "\${WORK_DIR}" != '${config.projectRoot}' ]; then git -C '${config.projectRoot}' worktree remove "\${WORK_DIR}" --force 2>/dev/null; BRANCH='${branchName}'; git -C '${config.projectRoot}' branch -D "\${BRANCH}" 2>/dev/null; fi`; + const script = `mkdir -p '${config.projectRoot}/../.worktrees'; WORK_DIR='${config.projectRoot}'; if git -C '${config.projectRoot}' worktree add '${worktreeDir}' -b '${branchName}' HEAD 2>/dev/null; then WORK_DIR='${worktreeDir}'; fi; cd "\${WORK_DIR}"; unset CLAUDECODE; claude --print --dangerously-skip-permissions --disable-slash-commands ${modelFlag} -- '${config.escapedPrompt}' > '${config.logFile}' 2>&1; ${cleanup}`; + return `echo $$ > '${config.pidFile}'; ${script}`; +} + +/** Prepare log directory and file paths for detached execution */ +export function prepareLogFiles(projectRoot: string, squadName: string, agentName: string, timestamp: number): { logDir: string; logFile: string; pidFile: string } { + const logDir = join(projectRoot, '.agents', 'logs', squadName); + const logFile = join(logDir, `${agentName}-${timestamp}.log`); + const pidFile = join(logDir, `${agentName}-${timestamp}.pid`); + + if (!existsSync(logDir)) { + mkdirSync(logDir, { recursive: true }); + } + + return { logDir, logFile, pidFile }; +} + +// ── Execution modes ────────────────────────────────────────────────── + +/** Execute Claude in foreground mode (direct stdio, default) */ +export function executeForeground(config: { + prompt: string; + claudeArgs: string[]; + agentEnv: Record; + projectRoot: string; + squadName: string; + agentName: string; + execContext: ExecutionContext; + startMs: number; + provider?: string; +}): Promise { + const workDir = createAgentWorktree(config.projectRoot, config.squadName, config.agentName); + + // Snapshot goals before execution + const goalsBefore = snapshotGoals(config.squadName); + + return new Promise((resolve, reject) => { + const claude = spawn('claude', config.claudeArgs, { + stdio: 'inherit', + cwd: workDir, + env: config.agentEnv, + }); + + claude.on('close', async (code) => { + const durationMs = Date.now() - config.startMs; + + // Capture token usage from Claude Code's session JSONL + const sessionUsage = captureSessionUsage(config.startMs); + + // Snapshot goals after execution and diff + const goalsAfter = snapshotGoals(config.squadName); + const goalsChanged = diffGoals(goalsBefore, goalsAfter); + + const obsRecord: ObservabilityRecord = { + ts: new Date().toISOString(), + id: config.execContext.executionId, + squad: config.squadName, + agent: config.agentName, + provider: config.provider || 'anthropic', + model: sessionUsage?.model || config.agentEnv.SQUADS_MODEL || 'unknown', + trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'], + status: code === 0 ? 'completed' : 'failed', + duration_ms: durationMs, + input_tokens: sessionUsage?.input_tokens || 0, + output_tokens: sessionUsage?.output_tokens || 0, + cache_read_tokens: sessionUsage?.cache_read_tokens || 0, + cache_write_tokens: sessionUsage?.cache_write_tokens || 0, + cost_usd: sessionUsage?.cost_usd || 0, + context_tokens: 0, + error: code !== 0 ? `Claude exited with code ${code}` : undefined, + goals_before: Object.keys(goalsBefore).length > 0 ? goalsBefore : undefined, + goals_after: Object.keys(goalsAfter).length > 0 ? goalsAfter : undefined, + goals_changed: goalsChanged.length > 0 ? goalsChanged : undefined, + }; + logObservability(obsRecord); + + if (code === 0) { + updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'completed', { + outcome: `Session completed (${sessionUsage?.input_tokens || 0} in / ${sessionUsage?.output_tokens || 0} out, $${(sessionUsage?.cost_usd || 0).toFixed(3)})`, + durationMs, + }); + + const commitResult = await autoCommitAgentWork(config.squadName, config.agentName, config.execContext.executionId, config.provider); + if (commitResult.committed) { + writeLine(); + writeLine(` ${colors.green}Auto-committed agent work${RESET}`); + } + + cleanupWorktree(workDir, config.projectRoot); + resolve('Session completed'); + } else { + updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', { + error: `Claude exited with code ${code}`, + durationMs, + }); + cleanupWorktree(workDir, config.projectRoot); + reject(new Error(`Claude exited with code ${code}`)); + } + }); + + claude.on('error', (err) => { + const durationMs = Date.now() - config.startMs; + + logObservability({ + ts: new Date().toISOString(), + id: config.execContext.executionId, + squad: config.squadName, + agent: config.agentName, + provider: config.provider || 'anthropic', + model: 'unknown', + trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'], + status: 'failed', + duration_ms: durationMs, + input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0, + cost_usd: 0, context_tokens: 0, + error: String(err), + }); + + updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', { + error: String(err), + durationMs, + }); + cleanupWorktree(workDir, config.projectRoot); + reject(err); + }); + }); +} + +/** Execute Claude in watch mode (background + tail log) */ +export async function executeWatch(config: { + projectRoot: string; + agentEnv: Record; + logFile: string; + wrapperScript: string; +}): Promise { + const child = spawn('sh', ['-c', config.wrapperScript], { + cwd: config.projectRoot, + detached: true, + stdio: 'ignore', + env: config.agentEnv, + }); + child.unref(); + + await new Promise(resolve => setTimeout(resolve, LOG_FILE_INIT_DELAY_MS)); + + writeLine(` ${colors.dim}Tailing log (Ctrl+C to stop watching, agent continues)...${RESET}`); + writeLine(); + + const tail = spawn('tail', ['-f', config.logFile], { stdio: 'inherit' }); + + process.on('SIGINT', () => { + tail.kill(); + writeLine(); + writeLine(` ${colors.dim}Stopped watching. Agent continues in background.${RESET}`); + writeLine(` ${colors.dim}Resume: tail -f ${config.logFile}${RESET}`); + process.exit(0); + }); + + return new Promise((resolve) => { + tail.on('close', () => { + resolve(`Agent running in background. Log: ${config.logFile}`); + }); + }); +} + +// ── Main execution functions ───────────────────────────────────────── + +export async function executeWithClaude( + prompt: string, + options: ExecuteWithClaudeOptions +): Promise { + const { + verbose, + timeoutMinutes: _timeoutMinutes = 30, + foreground, + background, + watch, + useApi, + effort, + skills, + trigger = 'manual', + squadName, + agentName, + model, + } = options; + + // Determine execution mode + const runInBackground = background === true && !watch; + const runInWatch = watch === true; + const runInForeground = !runInBackground && !runInWatch; + + const startMs = Date.now(); + const projectRoot = getProjectRoot(); + ensureProjectTrusted(projectRoot); + + // Resolve model and provider + const squad = squadName !== 'unknown' ? loadSquad(squadName) : null; + const mcpConfigPath = selectMcpConfig(squadName, squad); + const taskType = detectTaskType(agentName); + const resolvedModel = resolveModel(model, squad, taskType); + const provider = resolvedModel ? detectProviderFromModel(resolvedModel) : 'anthropic'; + + // Resolve target repo for worktree creation (squad.repo → sibling dir) + const targetRepoRoot = resolveTargetRepoRoot(projectRoot, squad); + + // Delegate to non-Anthropic providers + if (provider !== 'anthropic' && provider !== 'unknown') { + if (verbose) { + const source = model ? 'explicit' : 'auto-routed'; + writeLine(` ${colors.dim}Model: ${resolvedModel} (${source})${RESET}`); + writeLine(` ${colors.dim}Provider: ${provider}${RESET}`); + } + return executeWithProvider(provider, prompt, { + verbose, foreground, cwd: targetRepoRoot, squadName, agentName, + }); + } + + const claudeModelAlias = resolvedModel ? getClaudeModelAlias(resolvedModel) : undefined; + + const execContext: ExecutionContext = { + squad: squadName, agent: agentName, taskType, trigger, + executionId: generateExecutionId(), + }; + + // Build base env: remove ANTHROPIC_API_KEY unless --use-api, remove CLAUDECODE + const { ANTHROPIC_API_KEY: _apiKey, CLAUDECODE: _claudeCode, ...envWithoutApiKey } = process.env; + const spawnEnv = useApi + ? (() => { const { CLAUDECODE: _, ...rest } = process.env; return rest; })() + : envWithoutApiKey; + + const escapedPrompt = prompt.replace(/'/g, "'\\''"); + + await registerContextWithBridge(execContext); + + // Get bot token so agents create PRs/issues as bot identity (not user's personal gh auth) + let botGhToken: string | undefined; + try { + const ghEnv = await getBotGhEnv(); + botGhToken = ghEnv.GH_TOKEN; + } catch { /* graceful: falls back to user's gh auth */ } + + // ── Foreground mode ────────────────────────────────────────────────── + if (runInForeground) { + if (verbose) { + logVerboseExecution({ + projectRoot, mode: 'foreground', useApi, execContext, + effort, skills, resolvedModel, claudeModelAlias, explicitModel: model, + }); + } + + // Build claude args as array to avoid shell escaping issues with large prompts + const claudeArgs: string[] = []; + if (!process.stdin.isTTY) claudeArgs.push('--print'); + + // Permission model: scoped allowed tools instead of blanket skip + // Agents can: read/write files, run git/gh/npm/bash, use tools + // Agents cannot: bypass to arbitrary system access + if (process.env.SQUADS_SKIP_PERMISSIONS === '1') { + // Explicit opt-in for sandboxed environments (Docker, CI) + claudeArgs.push('--dangerously-skip-permissions'); + } else { + claudeArgs.push('--allowedTools', + 'Read', 'Write', 'Edit', 'Glob', 'Grep', + 'Bash(git:*)', 'Bash(gh:*)', 'Bash(npm:*)', 'Bash(npx:*)', + 'Bash(node:*)', 'Bash(python3:*)', 'Bash(curl:*)', + 'Bash(docker:*)', 'Bash(duckdb:*)', + 'Bash(ls:*)', 'Bash(mkdir:*)', 'Bash(cp:*)', 'Bash(mv:*)', + 'Bash(cat:*)', 'Bash(head:*)', 'Bash(tail:*)', 'Bash(wc:*)', + 'Bash(echo:*)', 'Bash(chmod:*)', 'Bash(date:*)', + 'Bash(squads:*)', + 'Agent', 'WebFetch', 'WebSearch', + ); + } + claudeArgs.push('--disable-slash-commands'); + if (mcpConfigPath) claudeArgs.push('--mcp-config', mcpConfigPath); + if (claudeModelAlias) claudeArgs.push('--model', claudeModelAlias); + claudeArgs.push('--', prompt); + + const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { + effort, skills, includeOtel: true, ghToken: botGhToken, + }); + + return executeForeground({ + prompt, claudeArgs, agentEnv, projectRoot: targetRepoRoot, + squadName, agentName, execContext, startMs, provider, + }); + } + + // ── Detached modes (watch + background) ────────────────────────────── + const timestamp = Date.now(); + const { logFile, pidFile } = prepareLogFiles(projectRoot, squadName, agentName, timestamp); + const agentEnv = buildAgentEnv(spawnEnv as Record, execContext, { + effort, skills, includeOtel: !runInWatch, ghToken: botGhToken, + }); + + const wrapperScript = buildDetachedShellScript({ + projectRoot: targetRepoRoot, squadName, agentName, timestamp, + claudeModelAlias, escapedPrompt, logFile, pidFile, + }); + + if (runInWatch) { + if (verbose) { + logVerboseExecution({ + projectRoot, mode: 'watch (background + tail)', + execContext, logFile, + }); + } + + return executeWatch({ projectRoot: targetRepoRoot, agentEnv, logFile, wrapperScript }); + } + + // ── Background mode ────────────────────────────────────────────────── + if (verbose) { + logVerboseExecution({ + projectRoot, mode: 'background', useApi, execContext, + effort, skills, resolvedModel, claudeModelAlias, + explicitModel: model, logFile, mcpConfigPath, + }); + } + + const child = spawn('sh', ['-c', wrapperScript], { + cwd: targetRepoRoot, + detached: true, + stdio: 'ignore', + env: agentEnv, + }); + child.unref(); + + if (verbose) { + writeLine(` ${colors.dim}Monitor: tail -f ${logFile}${RESET}`); + } + + return `Log: ${logFile}. Monitor: tail -f ${logFile}`; +} + +/** + * Execute agent with a non-Anthropic LLM CLI provider. + * + * Supports: google (gemini), openai (codex), mistral (vibe), xai (grok), aider, ollama + * + * Unlike executeWithClaude which has full session management, + * other CLIs run in simpler non-interactive mode. + */ +export async function executeWithProvider( + provider: string, + prompt: string, + options: { + verbose?: boolean; + foreground?: boolean; + cwd?: string; + squadName?: string; + agentName?: string; + model?: string; + } +): Promise { + const cliConfig = getCLIConfig(provider); + + if (!cliConfig) { + throw new Error(`Unknown provider: ${provider}. Run 'squads providers' to see available providers.`); + } + + if (!isProviderCLIAvailable(provider)) { + throw new Error(`CLI '${cliConfig.command}' not found. Install: ${cliConfig.install}`); + } + + const projectRoot = options.cwd || getProjectRoot(); + const squadName = options.squadName || 'unknown'; + const agentName = options.agentName || 'unknown'; + const timestamp = Date.now(); + + // Build clean env: remove CLAUDECODE to allow nesting, pass squad context + const { CLAUDECODE: _claudeCode, ...cleanEnv } = process.env; + const providerEnv = { + ...cleanEnv, + SQUADS_SQUAD: squadName, + SQUADS_AGENT: agentName, + SQUADS_PROVIDER: provider, + }; + + // Create isolated worktree for this agent (same pattern as executeWithClaude) + const branchName = `agent/${squadName}/${agentName}-${timestamp}`; + const worktreePath = join(projectRoot, '..', '.worktrees', `${squadName}-${agentName}-${timestamp}`); + let workDir = projectRoot; + try { + mkdirSync(join(projectRoot, '..', '.worktrees'), { recursive: true }); + execSync(`git worktree add '${worktreePath}' -b '${branchName}' HEAD`, { cwd: projectRoot, stdio: 'pipe' }); + workDir = worktreePath; + } catch (e) { + writeLine(` ${colors.dim}warn: worktree creation failed, using project root: ${e instanceof Error ? e.message : String(e)}${RESET}`); + } + + // Copy .agents directory into worktree so sandboxed providers can access + // agent definitions, memory, and config files. Providers like Gemini restrict + // file reads to the workspace directory, so these must be local. + let effectivePrompt = prompt; + if (workDir !== projectRoot) { + const agentsDir = join(projectRoot, '.agents'); + const targetAgentsDir = join(workDir, '.agents'); + if (existsSync(agentsDir) && !existsSync(targetAgentsDir)) { + try { + cpSync(agentsDir, targetAgentsDir, { recursive: true }); + } catch (e) { + writeLine(` ${colors.dim}warn: .agents copy failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); + } + } + // Rewrite absolute paths in prompt so sandboxed providers can resolve them + effectivePrompt = prompt.replaceAll(projectRoot, workDir); + } + + const buildOpts = options.model ? { model: options.model } : undefined; + const args = cliConfig.buildArgs(effectivePrompt, buildOpts); + + if (options.verbose) { + writeLine(` ${colors.dim}Provider: ${cliConfig.displayName}${RESET}`); + writeLine(` ${colors.dim}Command: ${cliConfig.command} ${args.join(' ').slice(0, VERBOSE_COMMAND_MAX_CHARS)}...${RESET}`); + writeLine(` ${colors.dim}CWD: ${workDir}${RESET}`); + if (workDir !== projectRoot) { + writeLine(` ${colors.dim}Worktree: ${branchName}${RESET}`); + } + if (cliConfig.stdinPrompt) { + writeLine(` ${colors.dim}Prompt delivery: stdin${RESET}`); + } + } + + // Foreground mode: run directly in terminal + if (options.foreground) { + return new Promise((resolve, reject) => { + const proc = spawn(cliConfig.command, args, { + stdio: cliConfig.stdinPrompt ? ['pipe', 'inherit', 'inherit'] : 'inherit', + cwd: workDir, + env: providerEnv, + }); + + // For stdinPrompt providers (e.g. Ollama), pipe the prompt via stdin + if (cliConfig.stdinPrompt && proc.stdin) { + proc.stdin.write(effectivePrompt); + proc.stdin.end(); + } + + proc.on('close', (code) => { + cleanupWorktree(workDir, projectRoot); + if (code === 0) { + resolve('Session completed'); + } else { + reject(new Error(`${cliConfig.command} exited with code ${code}`)); + } + }); + + proc.on('error', (err) => { + cleanupWorktree(workDir, projectRoot); + reject(err); + }); + }); + } + + // Background mode: run detached with log file (matches executeWithClaude pattern) + const logDir = join(projectRoot, '.agents', 'logs', squadName); + const logFile = join(logDir, `${agentName}-${timestamp}.log`); + const pidFile = join(logDir, `${agentName}-${timestamp}.pid`); + + if (!existsSync(logDir)) { + mkdirSync(logDir, { recursive: true }); + } + + const escapedPrompt = effectivePrompt.replace(/'/g, "'\\''"); + const providerArgs = cliConfig.buildArgs(escapedPrompt).map(a => `'${a}'`).join(' '); + const cleanupCmd = workDir !== projectRoot + ? `; git -C '${projectRoot}' worktree remove '${workDir}' --force 2>/dev/null; git -C '${projectRoot}' branch -D '${branchName}' 2>/dev/null` + : ''; + const shellScript = `cd '${workDir}' && ${cliConfig.command} ${providerArgs} > '${logFile}' 2>&1${cleanupCmd}`; + const wrapperScript = `echo $$ > '${pidFile}'; ${shellScript}`; + + const child = spawn('sh', ['-c', wrapperScript], { + cwd: workDir, + detached: true, + stdio: 'ignore', + env: providerEnv, + }); + + child.unref(); + + if (options.verbose) { + writeLine(` ${colors.dim}Log: ${logFile}${RESET}`); + writeLine(` ${colors.dim}PID file: ${pidFile}${RESET}`); + } + + return `Log: ${logFile}. Monitor: tail -f ${logFile}`; +} diff --git a/src/lib/execution-log.ts b/src/lib/execution-log.ts new file mode 100644 index 00000000..f870cb25 --- /dev/null +++ b/src/lib/execution-log.ts @@ -0,0 +1,318 @@ +/** + * Execution logging, cooldown tracking, and bridge/API communication. + * Extracted from src/commands/run.ts to reduce its size. + */ + +import { join, dirname } from 'path'; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { findMemoryDir } from './memory.js'; +import { getApiUrl, getBridgeUrl } from './env-config.js'; +import { colors, RESET, writeLine } from './terminal.js'; +import { type ExecutionContext } from './run-types.js'; + +// ── Constants ──────────────────────────────────────────────────────── +export const DEFAULT_LEARNINGS_LIMIT = 5; +export const EXECUTION_EVENT_TIMEOUT_MS = 5000; +export const DEFAULT_SCHEDULED_COOLDOWN_MS = 6 * 60 * 60 * 1000; // 6 hours + +// ── Interfaces ─────────────────────────────────────────────────────── + +export interface PreflightResult { + allowed: boolean; + gates: { + quota?: { ok: boolean; used: number; limit: number; remaining: number; period: string }; + cooldown?: { ok: boolean; elapsed_sec: number | null; min_gap_sec: number }; + }; + error?: string; +} + +export interface Learning { + content: string; + importance: string; + created_at: string; +} + +export interface ExecutionRecord { + squadName: string; + agentName: string; + executionId: string; + startTime: string; + endTime?: string; + durationMs?: number; + status: 'running' | 'completed' | 'failed'; + trigger?: 'manual' | 'scheduled' | 'event' | 'smart'; + taskType?: 'evaluation' | 'execution' | 'research' | 'lead'; + outcome?: string; + error?: string; +} + +// ── Bridge/API helpers ─────────────────────────────────────────────── + +/** + * Register execution context with the API for telemetry + * This allows the API to tag incoming OTel data with correct squad/agent info + */ +export async function registerContextWithBridge(ctx: ExecutionContext): Promise { + const bridgeUrl = getBridgeUrl(); + if (!bridgeUrl) return false; // Tier 1: no bridge, skip silently + + try { + const response = await fetch(`${bridgeUrl}/api/context/register`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + execution_id: ctx.executionId, + squad: ctx.squad, + agent: ctx.agent, + task_type: ctx.taskType, + trigger: ctx.trigger, + }), + signal: AbortSignal.timeout(3000), + }); + + if (!response.ok) return false; + return true; + } catch { + // Tier 2 bridge down — silent, non-fatal + return false; + } +} + +/** + * Pre-execution gate check via bridge API. + * Checks quota (monthly spend) and cooldown before running an agent. + * Fails open (allows execution) if bridge is unavailable. + */ +export async function checkPreflightGates(squad: string, agent: string): Promise { + const bridgeUrl = getBridgeUrl(); + if (!bridgeUrl) return { allowed: true, gates: {} }; // Tier 1: no gates, allow + + try { + const response = await fetch(`${bridgeUrl}/api/execution/preflight`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ squad, agent }), + signal: AbortSignal.timeout(3000), + }); + + if (!response.ok) return { allowed: true, gates: {} }; + return await response.json() as PreflightResult; + } catch { + return { allowed: true, gates: {} }; // Silent fail-open + } +} + +/** + * Fetch relevant learnings from bridge for prompt injection. + * Returns empty array if bridge is unavailable. + */ +export async function fetchLearnings(squad: string, limit = DEFAULT_LEARNINGS_LIMIT): Promise { + const bridgeUrl = getBridgeUrl(); + + try { + const response = await fetch( + `${bridgeUrl}/api/learnings/relevant?squad=${encodeURIComponent(squad)}&limit=${limit}`, + { signal: AbortSignal.timeout(3000) } + ); + + if (!response.ok) { + return []; + } + + const data = await response.json() as { learnings: Learning[] }; + return data.learnings || []; + } catch (e) { + writeLine(` ${colors.dim}warn: learnings fetch failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); + return []; + } +} + +// ── Execution logging ──────────────────────────────────────────────── + +export function getExecutionLogPath(squadName: string, agentName: string): string | null { + const memoryDir = findMemoryDir(); + if (!memoryDir) return null; + return join(memoryDir, squadName, agentName, 'executions.md'); +} + +export function logExecution(record: ExecutionRecord): void { + const logPath = getExecutionLogPath(record.squadName, record.agentName); + if (!logPath) return; + + const dir = dirname(logPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + + let content = ''; + if (existsSync(logPath)) { + content = readFileSync(logPath, 'utf-8').trimEnd(); + } else { + content = `# ${record.squadName}/${record.agentName} - Execution Log`; + } + + // Structured entry format for parsing + const entry = ` + +--- + +**${record.startTime}** | Status: ${record.status} +- ID: \`${record.executionId}\` +- Trigger: ${record.trigger || 'manual'} +- Task Type: ${record.taskType || 'execution'} +`; + + writeFileSync(logPath, content + entry); +} + +export function updateExecutionStatus( + squadName: string, + agentName: string, + executionId: string, + status: 'completed' | 'failed', + details?: { + outcome?: string; + error?: string; + durationMs?: number; + } +): void { + const logPath = getExecutionLogPath(squadName, agentName); + if (!logPath || !existsSync(logPath)) return; + + let content = readFileSync(logPath, 'utf-8'); + const endTime = new Date().toISOString(); + + // Find and update the specific execution by ID + const execMarker = ``; + const markerIndex = content.indexOf(execMarker); + + if (markerIndex === -1) return; + + // Find the next entry marker or end of file + const nextEntryIndex = content.indexOf('\n---\n', markerIndex + 1); + const entryEnd = nextEntryIndex === -1 ? content.length : nextEntryIndex; + + // Extract and update the entry + const entryStart = content.lastIndexOf('\n---\n', markerIndex); + const currentEntry = content.slice(entryStart, entryEnd); + + // Build completion details + const durationStr = details?.durationMs + ? `${(details.durationMs / 1000).toFixed(1)}s` + : 'unknown'; + + let updatedEntry = currentEntry + .replace(/Status: running/, `Status: ${status}`) + + `- Completed: ${endTime} +- Duration: ${durationStr}`; + + if (details?.outcome) { + updatedEntry += `\n- Outcome: ${details.outcome}`; + } + if (details?.error) { + updatedEntry += `\n- Error: ${details.error}`; + } + + // Replace the entry in content + content = content.slice(0, entryStart) + updatedEntry + content.slice(entryEnd); + writeFileSync(logPath, content); +} + +// ── Cooldown tracking ──────────────────────────────────────────────── + +/** + * Get the timestamp of the last execution from executions.md + */ +export function getLastExecutionTime(squadName: string, agentName: string): Date | null { + const logPath = getExecutionLogPath(squadName, agentName); + if (!logPath || !existsSync(logPath)) return null; + + const content = readFileSync(logPath, 'utf-8'); + + // Find all timestamps in the format **2026-01-21T14:00:02.358Z** + const timestamps = content.match(/\*\*(\d{4}-\d{2}-\d{2}T[\d:.]+Z)\*\*/g); + if (!timestamps || timestamps.length === 0) return null; + + // Get the last (most recent) timestamp + const lastTimestamp = timestamps[timestamps.length - 1].replace(/\*\*/g, ''); + return new Date(lastTimestamp); +} + +/** + * Local cooldown check - works without bridge + * Returns { ok: true } if allowed, { ok: false, ... } if blocked + */ +export function checkLocalCooldown( + squadName: string, + agentName: string, + cooldownMs: number +): { ok: boolean; elapsedMs?: number; cooldownMs: number } { + const lastExec = getLastExecutionTime(squadName, agentName); + if (!lastExec) return { ok: true, cooldownMs }; + + const elapsedMs = Date.now() - lastExec.getTime(); + if (elapsedMs < cooldownMs) { + return { ok: false, elapsedMs, cooldownMs }; + } + + return { ok: true, elapsedMs, cooldownMs }; +} + +// ── Execution events ───────────────────────────────────────────────── + +/** + * Emit an execution event to the API for tracking and routing. + * Non-blocking and fail-safe — falls back to file if API unavailable. + */ +export async function emitExecutionEvent( + eventType: 'agent.completed' | 'agent.failed', + data: { squad: string; agent: string; executionId: string; error?: string } +): Promise { + const apiUrl = getApiUrl(); + + if (apiUrl) { + try { + await fetch(`${apiUrl}/events/ingest`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + source: 'scheduler', + event_type: eventType, + data: { + squad: data.squad, + agent: data.agent, + execution_id: data.executionId, + ...(data.error ? { error: data.error } : {}), + }, + }), + signal: AbortSignal.timeout(EXECUTION_EVENT_TIMEOUT_MS), + }); + return; + } catch { + // API unavailable — fall through to file-based event recording + } + } + + // Fallback: write event to memory file + try { + const memDir = findMemoryDir(); + if (!memDir) return; + + const eventsDir = join(memDir, data.squad, data.agent); + if (!existsSync(eventsDir)) { + mkdirSync(eventsDir, { recursive: true }); + } + + const eventsPath = join(eventsDir, 'events.md'); + const timestamp = new Date().toISOString(); + const entry = `\n## ${timestamp}: ${eventType}\n- execution_id: ${data.executionId}\n${data.error ? `- error: ${data.error}\n` : ''}`; + + let existing = ''; + if (existsSync(eventsPath)) { + existing = readFileSync(eventsPath, 'utf-8'); + } + writeFileSync(eventsPath, existing + entry); + } catch { + // Truly fail-safe — never block execution + } +} diff --git a/src/lib/idp/catalog-loader.ts b/src/lib/idp/catalog-loader.ts new file mode 100644 index 00000000..35a0d3a1 --- /dev/null +++ b/src/lib/idp/catalog-loader.ts @@ -0,0 +1,66 @@ +/** + * Catalog loader — reads and parses YAML catalog entries from the IDP repo. + */ + +import { readdirSync, readFileSync, existsSync } from 'fs'; +import { join } from 'path'; +import matter from 'gray-matter'; +import { findIdpDir } from './resolver.js'; +import type { CatalogEntry, ScorecardDefinition, DependencyGraph } from './types.js'; + +/** Parse a YAML file using gray-matter's YAML engine */ +function loadYaml(filePath: string): T | null { + if (!existsSync(filePath)) return null; + try { + const raw = readFileSync(filePath, 'utf-8'); + // gray-matter parses YAML frontmatter — wrap raw YAML so it treats entire file as frontmatter + const { data } = matter(`---\n${raw}\n---`); + return data as T; + } catch { + return null; + } +} + +/** Load all catalog entries from the IDP catalog/ directory */ +export function loadCatalog(): CatalogEntry[] { + const idpDir = findIdpDir(); + if (!idpDir) return []; + + const catalogDir = join(idpDir, 'catalog'); + if (!existsSync(catalogDir)) return []; + + const entries: CatalogEntry[] = []; + for (const file of readdirSync(catalogDir).filter(f => f.endsWith('.yaml')).sort()) { + const entry = loadYaml(join(catalogDir, file)); + if (entry?.metadata?.name) { + entries.push(entry); + } + } + return entries; +} + +/** Load a single catalog entry by service name */ +export function loadService(name: string): CatalogEntry | null { + const idpDir = findIdpDir(); + if (!idpDir) return null; + + const filePath = join(idpDir, 'catalog', `${name}.yaml`); + return loadYaml(filePath); +} + +/** Load a scorecard definition by name */ +export function loadScorecard(name: string): ScorecardDefinition | null { + const idpDir = findIdpDir(); + if (!idpDir) return null; + + const filePath = join(idpDir, 'scorecards', `${name}.yaml`); + return loadYaml(filePath); +} + +/** Load the dependency graph */ +export function loadDependencyGraph(): DependencyGraph | null { + const idpDir = findIdpDir(); + if (!idpDir) return null; + + return loadYaml(join(idpDir, 'dependencies', 'graph.yaml')); +} diff --git a/src/lib/idp/resolver.ts b/src/lib/idp/resolver.ts new file mode 100644 index 00000000..35202570 --- /dev/null +++ b/src/lib/idp/resolver.ts @@ -0,0 +1,45 @@ +/** + * IDP directory resolver — finds the IDP repo/directory. + * + * Resolution order: + * 1. SQUADS_IDP_PATH env var (explicit override) + * 2. .agents/idp/ in project root (co-located) + * 3. ../idp/ sibling repo (our setup) + * 4. ~/agents-squads/idp/ (absolute fallback) + */ + +import { existsSync } from 'fs'; +import { join, resolve } from 'path'; +import { findProjectRoot } from '../squad-parser.js'; + +export function findIdpDir(): string | null { + // 1. Explicit env var + const envPath = process.env.SQUADS_IDP_PATH; + if (envPath && existsSync(envPath)) { + return resolve(envPath); + } + + // 2. Co-located in project + const projectRoot = findProjectRoot(); + if (projectRoot) { + const colocated = join(projectRoot, '.agents', 'idp'); + if (existsSync(join(colocated, 'catalog'))) { + return colocated; + } + + // 3. Sibling repo + const sibling = join(projectRoot, '..', 'idp'); + if (existsSync(join(sibling, 'catalog'))) { + return resolve(sibling); + } + } + + // 4. Absolute fallback + const home = process.env.HOME || process.env.USERPROFILE || ''; + const absolute = join(home, 'agents-squads', 'idp'); + if (existsSync(join(absolute, 'catalog'))) { + return absolute; + } + + return null; +} diff --git a/src/lib/idp/scorecard-engine.ts b/src/lib/idp/scorecard-engine.ts new file mode 100644 index 00000000..5a100c71 --- /dev/null +++ b/src/lib/idp/scorecard-engine.ts @@ -0,0 +1,215 @@ +/** + * Scorecard engine — evaluates services against quality checks. + * + * Sources data from: + * - Local filesystem (README exists, build works) + * - gh CLI (CI status, PRs, security alerts) — graceful if missing + * - Git log (deploy frequency, recent activity) + */ + +import { existsSync, readFileSync, statSync } from 'fs'; +import { join } from 'path'; +import { execSync } from 'child_process'; +import type { CatalogEntry, ScorecardDefinition, ScorecardResult } from './types.js'; + +function exec(cmd: string, cwd?: string): string | null { + try { + return execSync(cmd, { encoding: 'utf-8', timeout: 15000, cwd, stdio: ['pipe', 'pipe', 'pipe'] }).trim(); + } catch { + return null; + } +} + +function ghAvailable(): boolean { + return exec('gh --version') !== null; +} + +interface CheckResult { + name: string; + passed: boolean; + weight: number; + detail: string; +} + +function runCheck( + check: ScorecardDefinition['checks'][0], + service: CatalogEntry, + repoPath: string | null +): CheckResult { + const result: CheckResult = { name: check.name, passed: false, weight: check.weight, detail: 'unknown' }; + const repo = service.metadata.repo; + + switch (check.name) { + case 'ci-passing': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const out = exec(`gh api repos/${repo}/actions/runs?per_page=1&status=completed --jq '.[0].conclusion // empty'`); + // GitHub API returns runs array directly + const out2 = exec(`gh api repos/${repo}/actions/runs --jq '.workflow_runs[0].conclusion // empty'`); + const conclusion = out || out2; + if (conclusion === 'success') { result.passed = true; result.detail = 'latest run: success'; } + else if (conclusion) { result.detail = `latest run: ${conclusion}`; } + else { result.detail = 'no CI runs found'; } + break; + } + + case 'test-coverage': { + // Would need CI output parsing — for v0.1, check if test command exists + if (service.spec.ci.test_command && service.spec.ci.test_command !== 'null') { + result.passed = true; + result.detail = `test command defined: ${service.spec.ci.test_command}`; + } else { + result.detail = 'no test command configured'; + } + break; + } + + case 'build-succeeds': { + if (repoPath && service.spec.ci.build_command) { + const buildResult = exec(`cd "${repoPath}" && ${service.spec.ci.build_command} 2>&1`); + if (buildResult !== null) { result.passed = true; result.detail = 'build passed'; } + else { result.detail = 'build failed'; } + } else { + result.detail = repoPath ? 'no build command' : 'repo not found locally'; + } + break; + } + + case 'no-security-alerts': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const alerts = exec(`gh api repos/${repo}/dependabot/alerts --jq '[.[] | select(.state=="open" and (.security_advisory.severity=="high" or .security_advisory.severity=="critical"))] | length'`); + if (alerts === '0') { result.passed = true; result.detail = 'no high/critical alerts'; } + else if (alerts) { result.detail = `${alerts} high/critical alerts`; } + else { result.detail = 'could not check alerts'; } + break; + } + + case 'readme-exists': { + if (repoPath) { + const readmePath = join(repoPath, 'README.md'); + if (existsSync(readmePath)) { + const size = statSync(readmePath).size; + if (size > 100) { result.passed = true; result.detail = `README.md (${size} bytes)`; } + else { result.detail = `README.md too short (${size} bytes)`; } + } else { + result.detail = 'README.md not found'; + } + } else { + result.detail = 'repo not found locally'; + } + break; + } + + case 'branch-protection': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const protection = exec(`gh api repos/${repo}/branches/${service.spec.branches.default}/protection --jq '.required_status_checks.strict // false' 2>/dev/null`); + if (protection && protection !== 'null') { result.passed = true; result.detail = 'branch protection enabled'; } + else { result.detail = 'no branch protection'; } + break; + } + + case 'deploy-frequency': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const runs = exec(`gh api repos/${repo}/actions/runs --jq '[.workflow_runs[] | select(.event=="push" and .head_branch=="${service.spec.branches.default}")] | length'`); + const count = parseInt(runs || '0', 10); + if (count > 0) { result.passed = true; result.detail = `${count} deploys recently`; } + else { result.detail = 'no recent deploys'; } + break; + } + + case 'stale-prs': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const stalePrs = exec(`gh pr list --repo ${repo} --state open --json updatedAt --jq '[.[] | select((now - (.updatedAt | fromdateiso8601)) > 1209600)] | length'`); + const count = parseInt(stalePrs || '0', 10); + if (count === 0) { result.passed = true; result.detail = 'no stale PRs'; } + else { result.detail = `${count} PRs stale >14d`; } + break; + } + + case 'recent-activity': { + if (repoPath) { + const commits = exec(`git -C "${repoPath}" log --since="30 days ago" --oneline 2>/dev/null | wc -l`); + const count = parseInt(commits?.trim() || '0', 10); + if (count > 0) { result.passed = true; result.detail = `${count} commits in last 30d`; } + else { result.detail = 'no commits in 30 days'; } + } else if (ghAvailable()) { + const out = exec(`gh api repos/${repo}/commits?per_page=1 --jq '.[0].commit.committer.date // empty'`); + if (out) { result.passed = true; result.detail = `last commit: ${out.slice(0, 10)}`; } + else { result.detail = 'no recent commits'; } + } else { + result.detail = 'repo not found locally'; + } + break; + } + + case 'no-stale-prs': { + if (!ghAvailable()) { result.detail = 'gh CLI not available'; break; } + const stalePrs = exec(`gh pr list --repo ${repo} --state open --json updatedAt --jq '[.[] | select((now - (.updatedAt | fromdateiso8601)) > 604800)] | length'`); + const count = parseInt(stalePrs || '0', 10); + if (count === 0) { result.passed = true; result.detail = 'no stale PRs'; } + else { result.detail = `${count} PRs stale >7d`; } + break; + } + + case 'clean-structure': { + // For domain repos — check no binaries or misplaced files in root + result.passed = true; + result.detail = 'check not implemented (v0.2)'; + break; + } + + default: + result.detail = `unknown check: ${check.name}`; + } + + return result; +} + +/** Find the local path for a repo */ +function findRepoPath(repoFullName: string): string | null { + const repoName = repoFullName.split('/')[1]; + if (!repoName) return null; + + const home = process.env.HOME || ''; + const candidates = [ + join(home, 'agents-squads', repoName), + join(process.cwd(), '..', repoName), + ]; + + for (const candidate of candidates) { + if (existsSync(candidate)) return candidate; + } + return null; +} + +/** Run all scorecard checks for a service */ +export function evaluateService( + service: CatalogEntry, + scorecard: ScorecardDefinition +): ScorecardResult { + const repoPath = findRepoPath(service.metadata.repo); + const checks: CheckResult[] = []; + + for (const check of scorecard.checks) { + checks.push(runCheck(check, service, repoPath)); + } + + const totalWeight = checks.reduce((sum, c) => sum + c.weight, 0); + const earnedWeight = checks.filter(c => c.passed).reduce((sum, c) => sum + c.weight, 0); + const score = totalWeight > 0 ? Math.round((earnedWeight / totalWeight) * 100) : 0; + + // Determine grade + let grade = 'F'; + const sortedGrades = Object.entries(scorecard.grades).sort((a, b) => b[1].min - a[1].min); + for (const [g, { min }] of sortedGrades) { + if (score >= min) { grade = g; break; } + } + + return { + service: service.metadata.name, + scorecard: scorecard.metadata.name, + score, + grade, + checks, + timestamp: new Date().toISOString(), + }; +} diff --git a/src/lib/idp/types.ts b/src/lib/idp/types.ts new file mode 100644 index 00000000..18dc1d1a --- /dev/null +++ b/src/lib/idp/types.ts @@ -0,0 +1,113 @@ +/** + * IDP type definitions — mirrors the YAML schema in the idp/ repo. + */ + +export interface CatalogEntry { + apiVersion: string; + kind: 'Service'; + metadata: { + name: string; + description: string; + owner: string; + repo: string; + tags: string[]; + }; + spec: { + type: 'product' | 'domain'; + stack: string; + framework?: string; + runtime?: string; + language_version?: string; + branches: { + default: string; + development?: string | null; + workflow: 'pr-to-develop' | 'direct-to-main'; + }; + ci: { + template: string | null; + required_checks: string[]; + test_command?: string | null; + build_command?: string | null; + coverage_threshold?: number; + }; + deploy?: { + target: string; + trigger: string; + pipeline?: string; + environments?: Array<{ + name: string; + url: string; + }>; + } | null; + health: Array<{ + name: string; + url: string; + type: 'http' | 'json'; + expect: number; + }>; + dependencies: { + runtime: Array<{ + service: string; + version?: string; + type?: string; + required?: boolean; + description: string; + }>; + }; + scorecard: string; + }; +} + +export interface ScorecardDefinition { + apiVersion: string; + kind: 'Scorecard'; + metadata: { + name: string; + description: string; + }; + checks: Array<{ + name: string; + description: string; + weight: number; + source: string; + severity: 'critical' | 'high' | 'medium' | 'low'; + threshold?: { + min: number; + unit: string; + }; + }>; + grades: Record; +} + +export interface DependencyGraph { + apiVersion: string; + kind: 'DependencyGraph'; + metadata: { + name: string; + description: string; + updated: string; + }; + edges: Array<{ + consumer: string; + provider: string; + type: string; + required?: boolean; + contract?: string; + description: string; + }>; + deploy_order: string[][]; +} + +export interface ScorecardResult { + service: string; + scorecard: string; + score: number; + grade: string; + checks: Array<{ + name: string; + passed: boolean; + weight: number; + detail: string; + }>; + timestamp: string; +} diff --git a/src/lib/llm-clis.ts b/src/lib/llm-clis.ts index bd7df9f8..11a0a1b5 100644 --- a/src/lib/llm-clis.ts +++ b/src/lib/llm-clis.ts @@ -24,6 +24,9 @@ export interface CLIConfig { /** Build non-interactive args for execution */ buildArgs: (prompt: string, options?: RunOptions) => string[]; + + /** If true, pipe prompt via stdin instead of CLI arg (avoids shell arg length limits) */ + stdinPrompt?: boolean; } export interface RunOptions { @@ -107,7 +110,8 @@ export const LLM_CLIS: Record = { displayName: 'Ollama (Local)', command: 'ollama', install: 'brew install ollama', - buildArgs: (prompt, opts) => ['run', opts?.model || 'llama3.1', prompt], + buildArgs: (_prompt, opts) => ['run', opts?.model || 'llama3.1'], + stdinPrompt: true, }, }; diff --git a/src/lib/observability.ts b/src/lib/observability.ts new file mode 100644 index 00000000..8e72c0d8 --- /dev/null +++ b/src/lib/observability.ts @@ -0,0 +1,384 @@ +/** + * Local observability — execution logging to JSONL with token capture. + * + * Every squads run appends one record to .agents/observability/executions.jsonl. + * Token/cost data is captured from Claude Code's session JSONL files after run. + * + * No external dependencies. Git-tracked. Readable by agents and humans. + */ + +import { existsSync, readFileSync, appendFileSync, mkdirSync, readdirSync, statSync } from 'fs'; +import { join, dirname } from 'path'; +import { findProjectRoot } from './squad-parser.js'; + +// ── Types ──────────────────────────────────────────────────────────── + +export interface GoalChange { + name: string; + before: string; // status before run + after: string; // status after run +} + +export interface ObservabilityRecord { + ts: string; + id: string; + squad: string; + agent: string; + provider: string; + model: string; + trigger: 'manual' | 'scheduled' | 'event' | 'smart'; + status: 'completed' | 'failed' | 'timeout'; + duration_ms: number; + input_tokens: number; + output_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; + cost_usd: number; + context_tokens: number; + error?: string; + task?: string; + // Goal tracking + goals_before?: Record; // name → status before run + goals_after?: Record; // name → status after run + goals_changed?: GoalChange[]; // what moved + // Quality scoring (from COO eval) + grade?: string; // A/B/C/D/F + grade_score?: number; // 0-100 +} + +export interface QueryOptions { + squad?: string; + agent?: string; + status?: string; + since?: string; + limit?: number; +} + +export interface CostSummary { + period: string; + total_cost: number; + total_runs: number; + total_input_tokens: number; + total_output_tokens: number; + by_squad: Record; + by_model: Record; +} + +// ── Model Pricing (per 1M tokens) ──────────────────────────────────── + +const MODEL_PRICING: Record = { + 'claude-opus-4-6': { input: 15.0, output: 75.0, cache_read: 1.5, cache_write: 18.75 }, + 'claude-opus-4-5-20251101': { input: 15.0, output: 75.0, cache_read: 1.5, cache_write: 18.75 }, + 'claude-sonnet-4-6': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-sonnet-4-5-20250514': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-haiku-4-5-20251001': { input: 0.80, output: 4.0, cache_read: 0.08, cache_write: 1.0 }, + 'default': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, +}; + +// ── Paths ──────────────────────────────────────────────────────────── + +function getObservabilityDir(): string | null { + const root = findProjectRoot(); + if (!root) return null; + return join(root, '.agents', 'observability'); +} + +function getLogPath(): string | null { + const dir = getObservabilityDir(); + if (!dir) return null; + return join(dir, 'executions.jsonl'); +} + +// ── Claude Code Session Parsing ────────────────────────────────────── + +interface SessionUsage { + model: string; + input_tokens: number; + output_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; + cost_usd: number; + messages: number; +} + +/** + * Find the most recently modified Claude Code session JSONL file. + * Claude Code writes sessions to ~/.claude/projects//*.jsonl + */ +function findRecentSessionFile(afterTimestamp: number): string | null { + const home = process.env.HOME || ''; + const projectsDir = join(home, '.claude', 'projects'); + if (!existsSync(projectsDir)) return null; + + let newest: { path: string; mtime: number } | null = null; + + try { + for (const projDir of readdirSync(projectsDir)) { + const projPath = join(projectsDir, projDir); + try { + if (!statSync(projPath).isDirectory()) continue; + } catch { continue; } + + for (const file of readdirSync(projPath)) { + if (!file.endsWith('.jsonl')) continue; + const filePath = join(projPath, file); + try { + const mtime = statSync(filePath).mtimeMs; + // Only consider files modified after the run started + if (mtime > afterTimestamp && (!newest || mtime > newest.mtime)) { + newest = { path: filePath, mtime }; + } + } catch { continue; } + } + } + } catch { /* projects dir read error */ } + + return newest?.path || null; +} + +/** + * Parse a Claude Code session JSONL file and extract usage totals. + */ +function parseSessionUsage(sessionPath: string): SessionUsage | null { + try { + const content = readFileSync(sessionPath, 'utf-8'); + const lines = content.split('\n').filter(Boolean); + + const usage: SessionUsage = { + model: 'unknown', + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost_usd: 0, + messages: 0, + }; + + for (const line of lines) { + try { + const record = JSON.parse(line); + + if (record.type === 'assistant') { + const msg = record.message || {}; + const u = msg.usage || {}; + + if (u.input_tokens || u.output_tokens) { + usage.messages++; + usage.input_tokens += u.input_tokens || 0; + usage.output_tokens += u.output_tokens || 0; + usage.cache_read_tokens += u.cache_read_input_tokens || 0; + usage.cache_write_tokens += u.cache_creation_input_tokens || 0; + } + + if (!usage.model || usage.model === 'unknown') { + usage.model = msg.model || 'unknown'; + } + } + + // Capture cost if directly available + if (record.costUSD) { + usage.cost_usd += record.costUSD; + } + } catch { /* skip malformed lines */ } + } + + if (usage.messages === 0) return null; + + // Calculate cost from tokens if not directly available + if (usage.cost_usd === 0) { + const pricing = MODEL_PRICING[usage.model] || MODEL_PRICING['default']; + usage.cost_usd = ( + (usage.input_tokens / 1_000_000) * pricing.input + + (usage.output_tokens / 1_000_000) * pricing.output + + (usage.cache_read_tokens / 1_000_000) * pricing.cache_read + + (usage.cache_write_tokens / 1_000_000) * pricing.cache_write + ); + } + + return usage; + } catch { + return null; + } +} + +// ── Goal Tracking ──────────────────────────────────────────────────── + +/** + * Parse goals from a squad's goals.md file. + * Returns a map of goal name → status. + */ +export function snapshotGoals(squadName: string): Record { + const root = findProjectRoot(); + if (!root) return {}; + + const goalsPath = join(root, '.agents', 'memory', squadName, 'goals.md'); + if (!existsSync(goalsPath)) return {}; + + const content = readFileSync(goalsPath, 'utf-8'); + const goals: Record = {}; + + // Parse: **Goal name** — metric: X | ... | status: Y + const lines = content.split('\n'); + for (const line of lines) { + const match = line.match(/\*\*([^*]+)\*\*.*status:\s*(\S+)/); + if (match) { + goals[match[1].trim()] = match[2].trim(); + } + } + + return goals; +} + +/** + * Compare two goal snapshots and return what changed. + */ +export function diffGoals( + before: Record, + after: Record +): GoalChange[] { + const changes: GoalChange[] = []; + + for (const [name, afterStatus] of Object.entries(after)) { + const beforeStatus = before[name] || 'new'; + if (beforeStatus !== afterStatus) { + changes.push({ name, before: beforeStatus, after: afterStatus }); + } + } + + // Goals that disappeared (moved to achieved/abandoned) + for (const [name, beforeStatus] of Object.entries(before)) { + if (!(name in after)) { + changes.push({ name, before: beforeStatus, after: 'removed' }); + } + } + + return changes; +} + +/** + * Capture usage from the most recent Claude Code session. + * Call this after a foreground run completes. + */ +export function captureSessionUsage(runStartedAt: number): SessionUsage | null { + const sessionFile = findRecentSessionFile(runStartedAt); + if (!sessionFile) return null; + return parseSessionUsage(sessionFile); +} + +// ── Write ──────────────────────────────────────────────────────────── + +/** + * Push record to API (Tier 2 only). Fire-and-forget. + */ +async function pushToApi(record: ObservabilityRecord): Promise { + try { + const { isTier2, getTierSync } = await import('./tier-detect.js'); + if (!isTier2()) return; + + const apiUrl = getTierSync().urls.api; + if (!apiUrl) return; + + await fetch(`${apiUrl}/agent-executions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + execution_id: record.id, + squad: record.squad, + agent: record.agent, + model: record.model, + status: record.status, + input_tokens: record.input_tokens, + output_tokens: record.output_tokens, + cache_read_tokens: record.cache_read_tokens, + cache_write_tokens: record.cache_write_tokens, + cost_usd: record.cost_usd, + duration_seconds: Math.round(record.duration_ms / 1000), + error_message: record.error || null, + metadata: { trigger: record.trigger, provider: record.provider }, + }), + signal: AbortSignal.timeout(5000), + }); + } catch { + // Silent — Tier 2 API down, JSONL is the fallback + } +} + +export function logObservability(record: ObservabilityRecord): void { + const logPath = getLogPath(); + if (!logPath) return; + + const dir = dirname(logPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + + appendFileSync(logPath, JSON.stringify(record) + '\n'); + + // Dual-write: also push to API when Tier 2 is active (fire-and-forget) + pushToApi(record).catch(() => {}); +} + +// ── Read ───────────────────────────────────────────────────────────── + +export function queryExecutions(opts: QueryOptions = {}): ObservabilityRecord[] { + const logPath = getLogPath(); + if (!logPath || !existsSync(logPath)) return []; + + const lines = readFileSync(logPath, 'utf-8').trim().split('\n').filter(Boolean); + let records: ObservabilityRecord[] = []; + + for (const line of lines) { + try { records.push(JSON.parse(line)); } catch { /* skip */ } + } + + if (opts.squad) records = records.filter(r => r.squad === opts.squad); + if (opts.agent) records = records.filter(r => r.agent === opts.agent); + if (opts.status) records = records.filter(r => r.status === opts.status); + if (opts.since) { + const since = new Date(opts.since).getTime(); + records = records.filter(r => new Date(r.ts).getTime() >= since); + } + + records.sort((a, b) => new Date(b.ts).getTime() - new Date(a.ts).getTime()); + if (opts.limit) records = records.slice(0, opts.limit); + + return records; +} + +export function calculateCostSummary(period: 'today' | '7d' | '30d' | 'all' = '7d'): CostSummary { + const now = Date.now(); + const cutoffs: Record = { + 'today': now - 24 * 60 * 60 * 1000, + '7d': now - 7 * 24 * 60 * 60 * 1000, + '30d': now - 30 * 24 * 60 * 60 * 1000, + 'all': 0, + }; + + const since = new Date(cutoffs[period] || cutoffs['7d']).toISOString(); + const records = queryExecutions({ since }); + + const bySquad: Record = {}; + const byModel: Record = {}; + let totalCost = 0, totalInput = 0, totalOutput = 0; + + for (const r of records) { + totalCost += r.cost_usd; + totalInput += r.input_tokens; + totalOutput += r.output_tokens; + + if (!bySquad[r.squad]) bySquad[r.squad] = { cost: 0, runs: 0, avg_cost: 0 }; + bySquad[r.squad].cost += r.cost_usd; + bySquad[r.squad].runs += 1; + + if (!byModel[r.model]) byModel[r.model] = { cost: 0, runs: 0 }; + byModel[r.model].cost += r.cost_usd; + byModel[r.model].runs += 1; + } + + for (const squad of Object.values(bySquad)) { + squad.avg_cost = squad.runs > 0 ? squad.cost / squad.runs : 0; + } + + return { period, total_cost: totalCost, total_runs: records.length, total_input_tokens: totalInput, total_output_tokens: totalOutput, by_squad: bySquad, by_model: byModel }; +} diff --git a/src/lib/org-cycle.ts b/src/lib/org-cycle.ts new file mode 100644 index 00000000..53285d40 --- /dev/null +++ b/src/lib/org-cycle.ts @@ -0,0 +1,173 @@ +/** + * Org cycle — run the whole organization as a coordinated system. + * + * squads run --org [--dry-run] + * + * Steps: + * 1. SCAN: Check all squads — priorities freshness, goal progress, scorecard grades + * 2. PLAN: Decide what to run — skip frozen, prioritize by staleness + score + * 3. EXECUTE: Run leads in dependency order (phased) + * 4. EVALUATE: COO reviews all outputs + * 5. REPORT: Org-level summary to observability + */ + +import { existsSync, readFileSync, readdirSync, statSync } from 'fs'; +import { join } from 'path'; +import { findSquadsDir, loadSquad } from './squad-parser.js'; +import { findMemoryDir } from './memory.js'; +import { colors, bold, RESET, writeLine } from './terminal.js'; +import { logObservability, type ObservabilityRecord } from './observability.js'; + +export interface OrgScanResult { + squad: string; + status: 'active' | 'frozen' | 'stale' | 'healthy'; + prioritiesAge: number; // days since last update + goalsActive: number; + lastExecution: string | null; + lead: string | null; + repo: string | null; + reason: string; +} + +/** + * Scan all squads and return their health status. + */ +export function scanOrg(): OrgScanResult[] { + const squadsDir = findSquadsDir(); + const memoryDir = findMemoryDir(); + if (!squadsDir || !memoryDir) return []; + + const results: OrgScanResult[] = []; + const now = Date.now(); + + for (const squadName of readdirSync(squadsDir).sort()) { + const squadPath = join(squadsDir, squadName); + if (!statSync(squadPath).isDirectory()) continue; + if (!existsSync(join(squadPath, 'SQUAD.md'))) continue; + + const squad = loadSquad(squadName); + const result: OrgScanResult = { + squad: squadName, + status: 'healthy', + prioritiesAge: 999, + goalsActive: 0, + lastExecution: null, + lead: null, + repo: squad?.repo || null, + reason: '', + }; + + // Find lead agent + for (const file of readdirSync(squadPath)) { + if (file.endsWith('-lead.md') || file === 'coo.md' || file.startsWith('web-lead') || file.startsWith('intel-lead') || file.startsWith('eng-lead')) { + result.lead = file.replace('.md', ''); + break; + } + } + + // Check if frozen + const prioritiesPath = join(memoryDir, squadName, 'priorities.md'); + if (existsSync(prioritiesPath)) { + const content = readFileSync(prioritiesPath, 'utf-8'); + if (content.includes('frozen')) { + result.status = 'frozen'; + result.reason = 'Squad frozen — no work until trigger'; + results.push(result); + continue; + } + + // Check freshness from frontmatter + const updatedMatch = content.match(/updated:\s*"?(\d{4}-\d{2}-\d{2})"?/); + if (updatedMatch) { + const updated = new Date(updatedMatch[1]).getTime(); + result.prioritiesAge = Math.round((now - updated) / (24 * 60 * 60 * 1000)); + } + } + + // Check goals + const goalsPath = join(memoryDir, squadName, 'goals.md'); + if (existsSync(goalsPath)) { + const content = readFileSync(goalsPath, 'utf-8'); + const activeMatches = content.match(/status: (in-progress|not-started)/g); + result.goalsActive = activeMatches?.length || 0; + } + + // Determine status + if (result.prioritiesAge > 14) { + result.status = 'stale'; + result.reason = `Priorities ${result.prioritiesAge}d old`; + } else if (result.goalsActive === 0) { + result.status = 'stale'; + result.reason = 'No active goals'; + } else { + result.reason = `${result.goalsActive} active goals, priorities ${result.prioritiesAge}d old`; + } + + results.push(result); + } + + return results; +} + +/** + * Plan which squads to run based on scan results. + * Returns squads ordered by priority (most needy first). + */ +export function planOrgCycle(scan: OrgScanResult[]): OrgScanResult[] { + return scan + .filter(s => s.status !== 'frozen') // Skip frozen + .filter(s => s.lead !== null) // Must have a lead + .sort((a, b) => { + // Stale squads first + if (a.status === 'stale' && b.status !== 'stale') return -1; + if (b.status === 'stale' && a.status !== 'stale') return 1; + // Then by goals count (more goals = more work to do) + return b.goalsActive - a.goalsActive; + }); +} + +/** + * Display org scan results. + */ +export function displayOrgScan(scan: OrgScanResult[]): void { + writeLine(); + writeLine(` ${bold}Org Scan${RESET} (${scan.length} squads)\n`); + + const frozen = scan.filter(s => s.status === 'frozen'); + const stale = scan.filter(s => s.status === 'stale'); + const healthy = scan.filter(s => s.status === 'healthy'); + + if (healthy.length > 0) { + writeLine(` ${colors.green}Healthy (${healthy.length})${RESET}`); + for (const s of healthy) { + writeLine(` ${s.squad.padEnd(22)} ${colors.dim}${s.reason}${RESET}`); + } + writeLine(); + } + + if (stale.length > 0) { + writeLine(` ${colors.yellow}Stale (${stale.length})${RESET}`); + for (const s of stale) { + writeLine(` ${s.squad.padEnd(22)} ${colors.yellow}${s.reason}${RESET}`); + } + writeLine(); + } + + if (frozen.length > 0) { + writeLine(` ${colors.dim}Frozen (${frozen.length}): ${frozen.map(s => s.squad).join(', ')}${RESET}`); + writeLine(); + } +} + +/** + * Display execution plan. + */ +export function displayPlan(plan: OrgScanResult[]): void { + writeLine(` ${bold}Execution Plan${RESET} (${plan.length} squads)\n`); + for (let i = 0; i < plan.length; i++) { + const s = plan[i]; + const statusIcon = s.status === 'stale' ? `${colors.yellow}stale${RESET}` : `${colors.green}ready${RESET}`; + writeLine(` ${i + 1}. ${bold}${s.squad}${RESET} → ${s.lead} ${colors.dim}(${statusIcon}, ${s.goalsActive} goals)${RESET}`); + } + writeLine(); +} diff --git a/src/lib/repo-enforcement.ts b/src/lib/repo-enforcement.ts new file mode 100644 index 00000000..ddcfbb16 --- /dev/null +++ b/src/lib/repo-enforcement.ts @@ -0,0 +1,96 @@ +/** + * Repo enforcement — validates workspace layout before agent execution. + * + * Checks: + * 1. SQUAD.md repo: field points to an existing sibling repo + * 2. No nested .git directories inside hq (prevents clone-inside-hq) + * 3. Agent definitions exist in hq only (not in domain repos) + * + * Called by agent-runner before spawning Claude Code. + * Warns on mismatches, blocks on critical (nested .git). + */ + +import { existsSync, readdirSync, statSync } from 'fs'; +import { join, dirname, resolve } from 'path'; +import { findProjectRoot, loadSquad } from './squad-parser.js'; +import { colors, RESET, writeLine } from './terminal.js'; + +export interface EnforcementResult { + ok: boolean; + warnings: string[]; + errors: string[]; +} + +/** + * Validate workspace layout for a squad before execution. + */ +export function enforceRepoLayout(squadName: string, options?: { verbose?: boolean }): EnforcementResult { + const result: EnforcementResult = { ok: true, warnings: [], errors: [] }; + const projectRoot = findProjectRoot(); + if (!projectRoot) return result; // Can't validate without project root + + const parentDir = dirname(projectRoot); + + // 1. Check SQUAD.md repo: field points to existing sibling + try { + const squad = loadSquad(squadName); + if (squad?.repo) { + const repoName = squad.repo.split('/').pop(); + if (repoName) { + const siblingPath = join(parentDir, repoName); + if (!existsSync(siblingPath)) { + result.warnings.push(`Target repo '${squad.repo}' not found locally at ${siblingPath}`); + } else if (options?.verbose) { + writeLine(` ${colors.dim}Target repo: ${siblingPath}${RESET}`); + } + } + } + } catch { + // Squad not found — not our problem here + } + + // 2. Check for nested .git directories inside project root + // Only check top-level dirs (not .agents/memory/* which are fine) + try { + const topLevelDirs = readdirSync(projectRoot).filter(f => { + if (f.startsWith('.')) return false; // Skip hidden dirs + try { return statSync(join(projectRoot, f)).isDirectory(); } catch { return false; } + }); + + for (const dir of topLevelDirs) { + const nestedGit = join(projectRoot, dir, '.git'); + if (existsSync(nestedGit)) { + result.errors.push(`Nested git repo found at ${dir}/ — this breaks 'git add'. Remove it or move to a sibling directory.`); + result.ok = false; + } + } + } catch { + // Can't read directory — skip + } + + // 3. Check .agents/idp/ doesn't have a .git (the bug we hit) + const idpGit = join(projectRoot, '.agents', 'idp', '.git'); + if (existsSync(idpGit)) { + result.errors.push(`Nested git repo in .agents/idp/ — remove it. IDP instance data should be part of hq, not a separate clone.`); + result.ok = false; + } + + return result; +} + +/** + * Run enforcement and display results. Returns false if blocked. + */ +export function checkAndReport(squadName: string, options?: { verbose?: boolean }): boolean { + const result = enforceRepoLayout(squadName, options); + + for (const warning of result.warnings) { + writeLine(` ${colors.yellow}warn${RESET}: ${warning}`); + } + + for (const error of result.errors) { + writeLine(` ${colors.red}error${RESET}: ${error}`); + } + + return result.ok; +} diff --git a/src/lib/run-context.ts b/src/lib/run-context.ts index 5e0a420d..9248aaf0 100644 --- a/src/lib/run-context.ts +++ b/src/lib/run-context.ts @@ -1,54 +1,54 @@ /** * run-context.ts * - * Helpers for building agent execution context and parsing agent definitions. - * Extracted from src/commands/run.ts to reduce its size. + * Squad Context System — context assembly for agent execution. * - * Context cascade (role-based, priority-ordered): - * SYSTEM.md (immutable, outside budget) - * 1. SQUAD.md — mission + goals + output format - * 2. priorities.md — current operational priorities - * 3. directives.md — company-wide strategic overlay - * 4. feedback.md — last cycle evaluation - * 5. state.md — agent's memory from last execution - * 6. active-work.md — open PRs and issues - * 7. Agent briefs — agent-level briefing files - * 8. Squad briefs — squad-level briefing files - * 9. Daily briefing — org-wide daily briefing - * 10. Cross-squad learnings — shared learnings from other squads + * Layers flow from general to particular (no overrides, each answers a different question): + * L0: SYSTEM.md — How (system, tools, principles — immutable, outside budget) + * L1: company.md — Why (company identity, alignment) + * L2: priorities.md — Where (current focus, urgency) + * L3: goals.md — What (measurable targets) + * L4: agent.md — You (agent role, specific instructions) + * L5: state.md — Memory (continuity from last run) + * L6+: Supporting — feedback, daily-briefing, cross-squad learnings * - * Sections load in priority order. When budget is exhausted, later sections drop. - * Role determines which sections are included and the total token budget. + * SQUAD.md is metadata only (repo, agents, config) — NOT injected into prompt. + * Each layer adds a unique dimension. No layer contradicts another. + * Role determines which layers are included and the total token budget. */ import { join, dirname } from 'path'; import { existsSync, readFileSync, readdirSync } from 'fs'; +import { execSync } from 'child_process'; import { findSquadsDir } from './squad-parser.js'; import { findMemoryDir } from './memory.js'; import { colors, RESET, writeLine } from './terminal.js'; // ── Types ──────────────────────────────────────────────────────────── -export type ContextRole = 'scanner' | 'worker' | 'lead' | 'coo'; +export type ContextRole = 'scanner' | 'worker' | 'lead' | 'coo' | 'verifier'; // ── Token Budgets (chars, ~4 chars/token) ──────────────────────────── const ROLE_BUDGETS: Record = { - scanner: 4000, // ~1000 tokens — identity + priorities + state - worker: 12000, // ~3000 tokens — + directives, feedback, active-work - lead: 24000, // ~6000 tokens — all sections - coo: 32000, // ~8000 tokens — all sections + expanded + scanner: 4000, // ~1000 tokens — company + priorities + goals + agent + state + worker: 12000, // ~3000 tokens — + feedback + lead: 24000, // ~6000 tokens — all layers + coo: 32000, // ~8000 tokens — all layers + expanded + verifier: 12000, // similar needs to worker }; /** - * Which sections each role gets access to. - * Numbers correspond to section order in the cascade. + * Which layers each role gets access to. + * Numbers correspond to layer order in the Squad Context System: + * 1=company, 2=priorities, 3=goals, 4=agent, 5=state, 6=feedback, 7=daily-briefing, 8=cross-squad */ const ROLE_SECTIONS: Record> = { - scanner: new Set([1, 2, 5]), // SQUAD.md, priorities, state - worker: new Set([1, 2, 3, 4, 5, 6]), // + directives, feedback, active-work - lead: new Set([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), // all sections - coo: new Set([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), // all sections + expanded budget + scanner: new Set([1, 2, 3, 4, 5]), // identity + focus + role + memory + worker: new Set([1, 2, 3, 4, 5, 6]), // + feedback + lead: new Set([1, 2, 3, 4, 5, 6, 7, 8]), // + daily briefing + cross-squad + coo: new Set([1, 2, 3, 4, 5, 6, 7, 8]), // all layers + expanded budget + verifier: new Set([1, 2, 3, 4, 5, 6]), // same as worker }; // ── Agent Frontmatter ───────────────────────────────────────────────── @@ -61,6 +61,11 @@ export interface AgentFrontmatter { acceptance_criteria?: string; max_retries?: number; cooldown?: string; + /** + * `role:` field from agent YAML frontmatter (free text). + * Used as the primary signal for context-role selection. + */ + agent_role?: string; } /** @@ -121,6 +126,25 @@ export function parseAgentFrontmatter(agentPath: string): AgentFrontmatter { result.cooldown = cooldownMatch[1].trim(); } + // role: + // Primary signal for mapping to context role (scanner/worker/lead/verifier). + for (const line of yamlLines) { + const trimmed = line.trim(); + if (!trimmed.startsWith('role:')) continue; + let value = trimmed.slice('role:'.length).trim(); + // Strip wrapping quotes if present. + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith('\'') && value.endsWith('\'')) + ) { + value = value.slice(1, -1).trim(); + } + if (value) { + result.agent_role = value; + } + break; + } + return result; } @@ -187,18 +211,43 @@ function readAgentsFile(relativePath: string, warnLabel: string): string { // ── System Protocol ─────────────────────────────────────────────────── /** - * Load SYSTEM.md — the single base protocol for all agents. - * Replaces the old approval-instructions.md + post-execution.md split. - * Falls back to legacy approval-instructions.md if SYSTEM.md doesn't exist. + * Load SYSTEM.md (L0) — the immutable base protocol for all agents. + * Path: .agents/SYSTEM.md (top-level, next to squads/ and memory/) + * Falls back to legacy config/SYSTEM.md, then approval-instructions.md. */ export function loadSystemProtocol(): string { - const systemMd = readAgentsFile('config/SYSTEM.md', 'SYSTEM.md'); + // Primary: .agents/SYSTEM.md + const systemMd = readAgentsFile('SYSTEM.md', 'SYSTEM.md'); if (systemMd) return systemMd; - // Fallback to legacy approval-instructions.md + // Fallback: legacy path + const legacyMd = readAgentsFile('config/SYSTEM.md', 'SYSTEM.md (legacy)'); + if (legacyMd) return legacyMd; + return loadApprovalInstructions(); } +/** + * Load company.md (L1) — company context and strategic direction. + * Path: .agents/company.md + * This is the "why" layer — frames everything that follows. + */ +export function loadCompanyContext(): string { + // Primary: .agents/company.md + const companyMd = readAgentsFile('company.md', 'company.md'); + if (companyMd) return companyMd; + + // Fallback: legacy directives.md (for backward compat during migration) + const memoryDir = findMemoryDir(); + if (memoryDir) { + const directivesFile = join(memoryDir, 'company', 'directives.md'); + const content = safeRead(directivesFile); + if (content) return content; + } + + return ''; +} + /** * Legacy: load approval instructions. Kept for backward compat — prefer SYSTEM.md. * @deprecated Absorbed into SYSTEM.md. Used as fallback when SYSTEM.md absent. @@ -232,6 +281,112 @@ function safeRead(path: string): string { } } +function stripYamlFrontmatter(markdown: string): string { + const lines = markdown.split('\n'); + let dashCount = 0; + let endIdx = -1; + for (let i = 0; i < lines.length; i++) { + if (lines[i].trim() === '---') { + dashCount++; + if (dashCount === 2) { + endIdx = i; + break; + } + } + } + if (endIdx >= 0) return lines.slice(endIdx + 1).join('\n').trim(); + return markdown.trim(); +} + +function scoreByTokens(text: string, tokens: string[]): number { + const lower = text.toLowerCase(); + let score = 0; + for (const t of tokens) { + if (!t) continue; + if (lower.includes(t)) score += 1; + } + return score; +} + +/** + * Primary context-role resolver. + * + * Uses the agent YAML frontmatter `role:` free-text as the signal. + * Only when ambiguous and enabled (env var) will it ask an LLM to pick + * one of: scanner | worker | lead | verifier. + */ +export function resolveContextRoleFromAgent(agentPath: string, agentName: string): ContextRole { + const fm = parseAgentFrontmatter(agentPath); + const roleText = fm.agent_role || ''; + const normalized = roleText.trim().toLowerCase(); + + // Direct match — new structured schema uses exact role values + const directRoles: ContextRole[] = ['scanner', 'worker', 'lead', 'verifier']; + for (const r of directRoles) { + if (normalized === r) return r; + } + // COO is a lead with expanded budget + if (normalized === 'coo') return 'coo'; + + // Deterministic mapping from role text. Avoids brittle regex coupling. + const scannerTokens = ['scan', 'monitor', 'detect', 'find', 'opportun', 'scout', 'gap', 'bottleneck']; + const workerTokens = ['execute', 'implement', 'write', 'create', 'build', 'prototype', 'file', 'issue', 'worker']; + const leadTokens = ['lead', 'orchestrate', 'own', 'strategy', 'roadmap', 'coordinate', 'triage', 'review', 'mvp']; + const verifierTokens = ['verify', 'validation', 'compliance', 'audit', 'approve', 'reject', 'check', 'test', 'critic', 'verifier']; + + const scored: Array<[ContextRole, number]> = [ + ['scanner', scoreByTokens(normalized, scannerTokens)], + ['worker', scoreByTokens(normalized, workerTokens)], + ['lead', scoreByTokens(normalized, leadTokens)], + ['verifier', scoreByTokens(normalized, verifierTokens)], + ]; + + scored.sort((a, b) => b[1] - a[1]); + const best = scored[0]; + const second = scored[1]; + + // Clean mapping => unique non-zero best score. + const clean = best[1] > 0 && (!second || second[1] === 0); + if (clean) return best[0]; + + const llmEnabled = process.env.SQUADS_CONTEXT_ROLE_LLM === '1'; + if (!llmEnabled) return 'worker'; + + // LLM fallback: best-effort classification. If it fails, return worker. + try { + const raw = safeRead(agentPath); + const body = stripYamlFrontmatter(raw); + const excerpt = body.slice(0, 1600); + + const prompt = [ + 'Classify the agent into exactly ONE Agents Squads context role.', + 'Return EXACTLY one token from: scanner, worker, lead, verifier.', + '', + `Agent name: ${agentName}`, + `Agent frontmatter role: ${roleText || '(missing)'}`, + '', + 'Agent definition excerpt:', + excerpt, + ].join('\n'); + + const escapedPrompt = prompt.replace(/'/g, "'\\''"); + const model = process.env.SQUADS_CONTEXT_ROLE_LLM_MODEL || 'claude-haiku-4-5'; + const out = execSync( + `claude --print --dangerously-skip-permissions --disable-slash-commands --model ${model} -- '${escapedPrompt}'`, + { encoding: 'utf-8', timeout: 60_000, maxBuffer: 2 * 1024 * 1024 } + ).trim().toLowerCase(); + + const tokens: ContextRole[] = ['scanner', 'worker', 'lead', 'verifier']; + for (const t of tokens) { + if (out === t || out.includes(t)) return t; + } + + return 'worker'; + } catch { + return 'worker'; + } +} + /** Read all .md files from a directory, concatenated */ function readDirMd(dirPath: string, maxChars: number): string { if (!existsSync(dirPath)) return ''; @@ -252,13 +407,22 @@ function readDirMd(dirPath: string, maxChars: number): string { } } -// ── Squad Context Assembly ──────────────────────────────────────────── +// ── Squad Context System Assembly ───────────────────────────────────── /** - * Gather squad context for prompt injection. + * Gather context for agent execution. + * + * Layers flow general → particular (each adds a unique dimension): + * 1. company.md — Why (company identity, alignment) + * 2. priorities.md — Where (current focus, urgency) + * 3. goals.md — What (measurable targets) + * 4. agent.md — You (agent role, instructions) + * 5. state.md — Memory (continuity from last run) + * 6. feedback.md — Supporting (squad feedback) + * 7. daily-briefing — Supporting (org pulse, leads+coo only) + * 8. cross-squad — Supporting (learnings from other squads) * - * Role-based context cascade (10 sections, priority-ordered): - * Sections load in order until the token budget is exhausted. + * SQUAD.md is NOT injected — it's metadata for the CLI (repo, agents, config). * Missing files are skipped gracefully — no crashes on first run or new squads. */ export function gatherSquadContext( @@ -271,26 +435,26 @@ export function gatherSquadContext( const memoryDir = findMemoryDir(); const role = options.role || 'worker'; - const budget = options.maxTokens ? options.maxTokens * 4 : ROLE_BUDGETS[role]; - const allowedSections = ROLE_SECTIONS[role]; + const budget = options.maxTokens ? options.maxTokens * 4 : (ROLE_BUDGETS[role] ?? ROLE_BUDGETS.worker); + const allowedSections = ROLE_SECTIONS[role] ?? ROLE_SECTIONS.worker; const sections: string[] = []; let usedChars = 0; - /** Try to add a section. Returns true if added, false if budget exceeded or not allowed. */ - function addSection(sectionNum: number, header: string, content: string, maxChars?: number): boolean { - if (!allowedSections.has(sectionNum)) return false; + /** Try to add a layer. Returns true if added, false if budget exceeded or not allowed. */ + function addLayer(layerNum: number, header: string, content: string, maxChars?: number): boolean { + if (!allowedSections.has(layerNum)) return false; if (!content) return false; let text = content; - const cap = maxChars || (budget - usedChars); + const remaining = Math.max(0, budget - usedChars); + const cap = maxChars !== undefined ? Math.min(maxChars, remaining) : remaining; if (text.length > cap) { text = text.substring(0, cap) + '\n...'; } if (usedChars + text.length > budget) { - // Budget exhausted — drop this and all later sections if (options.verbose) { - writeLine(` ${colors.dim}Context budget exhausted at section ${sectionNum} (${header})${RESET}`); + writeLine(` ${colors.dim}Context budget exhausted at layer ${layerNum} (${header})${RESET}`); } return false; } @@ -300,99 +464,72 @@ export function gatherSquadContext( return true; } - // ── Section 1: SQUAD.md ── - const squadFile = join(squadsDir, squadName, 'SQUAD.md'); - if (existsSync(squadFile)) { - try { - const content = readFileSync(squadFile, 'utf-8'); - // Extract mission section; fall back to first N chars - const missionMatch = content.match(/## Mission[\s\S]*?(?=\n## |$)/i); - const squad = missionMatch ? missionMatch[0] : content.substring(0, 2000); - addSection(1, `Squad: ${squadName}`, squad.trim()); - } catch (e) { - if (options.verbose) writeLine(` ${colors.dim}warn: failed reading SQUAD.md: ${e instanceof Error ? e.message : String(e)}${RESET}`); - } + // ── L1: company.md — Why (company identity, alignment) ── + const companyContext = loadCompanyContext(); + if (companyContext) { + addLayer(1, 'Company', stripYamlFrontmatter(companyContext)); } - // ── Section 2: priorities.md (fallback to goals.md for backward compat) ── + // ── L2: priorities.md — Where (current focus, urgency) ── if (memoryDir) { const prioritiesFile = join(memoryDir, squadName, 'priorities.md'); - const goalsFile = join(memoryDir, squadName, 'goals.md'); - const file = existsSync(prioritiesFile) ? prioritiesFile : goalsFile; - const content = safeRead(file); + const content = safeRead(prioritiesFile); if (content) { - addSection(2, 'Priorities', content); + addLayer(2, 'Priorities', stripYamlFrontmatter(content)); } } - // ── Section 3: directives.md ── + // ── L3: goals.md — What (measurable targets) ── if (memoryDir) { - const directivesFile = join(memoryDir, 'company', 'directives.md'); - const content = safeRead(directivesFile); + const goalsFile = join(memoryDir, squadName, 'goals.md'); + const content = safeRead(goalsFile); if (content) { - addSection(3, 'Directives', content); + addLayer(3, 'Goals', stripYamlFrontmatter(content)); } } - // ── Section 4: feedback.md ── - if (memoryDir) { - const feedbackFile = join(memoryDir, squadName, 'feedback.md'); - const content = safeRead(feedbackFile); - if (content) { - addSection(4, 'Feedback', content); + // ── L4: agent.md — You (agent role, instructions) ── + if (options.agentPath) { + const agentContent = safeRead(options.agentPath); + if (agentContent) { + // Strip YAML frontmatter — inject the markdown body only + const body = stripYamlFrontmatter(agentContent); + addLayer(4, `Agent: ${agentName}`, body); } } - // ── Section 5: state.md ── + // ── L5: state.md — Memory (continuity from last run) ── if (memoryDir) { const stateFile = join(memoryDir, squadName, agentName, 'state.md'); const content = safeRead(stateFile); if (content) { - // Scanner gets capped state, lead/coo get full - const stateCap = role === 'scanner' ? 2000 : undefined; - addSection(5, 'Previous State', content, stateCap); - } - } - - // ── Section 6: active-work.md ── - if (memoryDir) { - const activeWorkFile = join(memoryDir, squadName, 'active-work.md'); - const content = safeRead(activeWorkFile); - if (content) { - addSection(6, 'Active Work', content); + // Strip frontmatter — LLM gets the body (Current/Blockers/Carry Forward) + const body = stripYamlFrontmatter(content); + const stateCap = (role === 'scanner' || role === 'verifier') ? 2000 : undefined; + addLayer(5, 'Previous State', body, stateCap); } } - // ── Section 7: Agent briefs ── + // ── L6: feedback.md — Supporting (squad-level feedback) ── if (memoryDir) { - const briefsDir = join(memoryDir, squadName, agentName, 'briefs'); - const content = readDirMd(briefsDir, 3000); - if (content) { - addSection(7, 'Agent Briefs', content); - } - } - - // ── Section 8: Squad briefs ── - if (memoryDir) { - const briefsDir = join(memoryDir, squadName, '_briefs'); - const content = readDirMd(briefsDir, 3000); + const feedbackFile = join(memoryDir, squadName, 'feedback.md'); + const content = safeRead(feedbackFile); if (content) { - addSection(8, 'Squad Briefs', content); + addLayer(6, 'Feedback', content); } } - // ── Section 9: Daily briefing ── + // ── L7: Daily briefing — Supporting (org pulse, leads+coo only) ── if (memoryDir) { const dailyFile = join(memoryDir, 'daily-briefing.md'); const content = safeRead(dailyFile); if (content) { - addSection(9, 'Daily Briefing', content); + addLayer(7, 'Daily Briefing', content); } } - // ── Section 10: Cross-squad learnings ── + // ── L8: Cross-squad learnings — Supporting (from context_from agents) ── if (memoryDir) { - // Load from context_from squads if defined in agent frontmatter const frontmatter = options.agentPath ? parseAgentFrontmatter(options.agentPath) : {}; const contextSquads = frontmatter.context_from || []; const learningParts: string[] = []; @@ -401,17 +538,19 @@ export function gatherSquadContext( const content = safeRead(learningsFile); if (content) { learningParts.push(`### ${ctx}\n${content}`); + } else if (options.verbose) { + writeLine(` ${colors.dim}context_from: no learnings found for squad '${ctx}'${RESET}`); } } if (learningParts.length > 0) { - addSection(10, 'Cross-Squad Learnings', learningParts.join('\n\n')); + addLayer(8, 'Cross-Squad Learnings', learningParts.join('\n\n')); } } if (sections.length === 0) return ''; if (options.verbose) { - writeLine(` ${colors.dim}Context: ${sections.length} sections, ~${Math.ceil(usedChars / 4)} tokens (${role} role, budget: ~${Math.ceil(budget / 4)})${RESET}`); + writeLine(` ${colors.dim}Context: ${sections.length} layers, ~${Math.ceil(usedChars / 4)} tokens (${role} role, budget: ~${Math.ceil(budget / 4)})${RESET}`); } return `\n# CONTEXT\n${sections.join('\n\n')}\n`; diff --git a/src/lib/run-modes.ts b/src/lib/run-modes.ts new file mode 100644 index 00000000..3d269f10 --- /dev/null +++ b/src/lib/run-modes.ts @@ -0,0 +1,870 @@ +/** + * Squad execution modes: autopilot, squad loop, lead mode, and post-evaluation. + * Extracted from commands/run.ts to reduce its size. + */ + +import { spawn } from 'child_process'; +import { join } from 'path'; +import { existsSync, readFileSync } from 'fs'; +import { + type RunOptions, + DEFAULT_TIMEOUT_MINUTES, + TOOL_USE_PROVIDERS, +} from './run-types.js'; +import { + checkClaudeCliAvailable, + getProjectRoot, +} from './run-utils.js'; +import { + executeWithClaude, + executeWithProvider, +} from './execution-engine.js'; +import { + checkLocalCooldown, + DEFAULT_SCHEDULED_COOLDOWN_MS, +} from './execution-log.js'; +import { runAgent } from './agent-runner.js'; +import { + findSquadsDir, + loadSquad, +} from './squad-parser.js'; +import { + type LoopState, + loadLoopState, + saveLoopState, + getSquadRepos, + scoreSquads, + checkCooldown, + classifyRunOutcome, + pushMemorySignals, + slackNotify, + computePhases, + scoreSquadsForPhase, +} from './squad-loop.js'; +import { + loadCognitionState, + saveCognitionState, + seedBeliefsIfEmpty, + runCognitionCycle, +} from './cognition.js'; +import { + runConversation, + saveTranscript, + type ConversationOptions, +} from './workflow.js'; +import { + reportExecutionStart, + reportConversationResult, + pushCognitionSignal, +} from './api-client.js'; +import { getBotGhEnv } from './github.js'; +import { + colors, + bold, + RESET, + gradient, + icons, + writeLine, +} from './terminal.js'; +import { + getCLIConfig, + isProviderCLIAvailable, +} from './llm-clis.js'; +import { getBridgeUrl } from './env-config.js'; +import { classifyAgent } from './conversation.js'; +import ora from 'ora'; + +// ── Post-run evaluation ───────────────────────────────────────────── +// After any squad run, dispatch the COO (company-lead) to evaluate outputs. +// This is the feedback loop that makes the system learn. + +const EVAL_TIMEOUT_MINUTES = 15; + +/** + * Run the COO evaluation after squad execution. + * Dispatches company-lead with a scoped evaluation task for the squads that just ran. + * Generates feedback.md and active-work.md per squad. + */ +export async function runPostEvaluation( + squadsRun: string[], + options: RunOptions, +): Promise { + // Skip if running company squad itself (prevent recursion) + if (squadsRun.length === 1 && squadsRun[0] === 'company') return; + // Skip if evaluation disabled + if (options.eval === false) return; + // Skip dry-run + if (options.dryRun) return; + // Skip background runs — evaluation needs foreground context + if (options.background) return; + + const squadsDir = findSquadsDir(); + if (!squadsDir) return; + + // Find company-lead agent + const cooPath = join(squadsDir, 'company', 'company-lead.md'); + if (!existsSync(cooPath)) { + if (options.verbose) { + writeLine(` ${colors.dim}Skipping evaluation: company-lead.md not found${RESET}`); + } + return; + } + + const squadList = squadsRun.join(', '); + writeLine(); + writeLine(` ${gradient('eval')} ${colors.dim}COO evaluating: ${squadList}${RESET}`); + + const evalTask = `Post-run evaluation for: ${squadList}. + +## Evaluation Process + +For each squad (${squadList}): + +### 1. Read previous feedback FIRST +Read \`.agents/memory/{squad}/feedback.md\` if it exists. Note the previous grade, identified patterns, and priorities. This is your baseline — you are measuring CHANGE, not just current state. + +### 2. Gather current evidence +- PRs (last 7 days): \`gh pr list --state all --limit 20 --json number,title,state,mergedAt,createdAt\` +- Recent commits (last 7 days): \`gh api repos/{owner}/{repo}/commits?since=YYYY-MM-DDT00:00:00Z&per_page=20 --jq '.[].commit.message'\` +- Open issues: \`gh issue list --state open --limit 15 --json number,title,labels\` +- Read \`.agents/memory/{squad}/priorities.md\` and \`.agents/memory/company/directives.md\` +- Read \`.agents/memory/{squad}/active-work.md\` (previous cycle's work tracking) + +### 3. Write feedback.md (APPEND history, don't overwrite) +\`\`\`markdown +# Feedback — {squad} + +## Current Assessment (YYYY-MM-DD): [A-F] +Merge rate: X% | Noise ratio: Y% | Priority alignment: Z% + +## Trajectory: [improving | stable | declining | new] +Previous grade: [grade] → Current: [grade]. [1-line explanation of why] + +## Valuable (continue) +- [specific PR/issue that advanced priorities] + +## Noise (stop) +- [specific anti-pattern observed] + +## Next Cycle Priorities +1. [specific actionable item] + +## History +| Date | Grade | Key Signal | +|------|-------|------------| +| YYYY-MM-DD | X | [what drove this grade] | +[keep last 10 entries, append new row] +\`\`\` + +### 4. Write active-work.md +\`\`\`markdown +# Active Work — {squad} (YYYY-MM-DD) +## Continue (open PRs) +- #{number}: {title} — {status/next action} +## Backlog (assigned issues) +- #{number}: {title} — {priority} +## Do NOT Create +- {description of known duplicate patterns from feedback history} +\`\`\` + +### 5. Commit to hq main +${squadsRun.length > 1 ? ` +### 6. Cross-squad assessment +Evaluate how outputs from ${squadList} connect: +- Duplicated efforts across squads? +- Missing handoffs (one squad's output should feed another)? +- Coordination gaps (conflicting PRs, redundant issues)? +- Combined trajectory: is the org getting more effective or more noisy? +Write cross-squad findings to \`.agents/memory/company/cross-squad-review.md\`. +` : ''} +CRITICAL: You are measuring DIRECTION not just position. A C-grade squad improving from F is better than a B-grade squad declining from A. The history table IS the feedback loop — agents read it next cycle.`; + + await runAgent('company-lead', cooPath, 'company', { + ...options, + task: evalTask, + timeout: EVAL_TIMEOUT_MINUTES, + eval: false, // prevent recursion + trigger: 'manual', + }); +} + +// ── Autopilot mode ────────────────────────────────────────────────── +// When `squads run` is called with no target, it becomes the daemon: +// score all squads, dispatch the full loop (scanner→lead→worker→verifier) +// for top-priority squads, push cognition signals, repeat. + +// Default cooldowns per agent role (ms) +const ROLE_COOLDOWNS: Record = { + scanner: 60 * 60 * 1000, // 1h — fast, cheap + lead: 4 * 60 * 60 * 1000, // 4h — orchestration + worker: 30 * 60 * 1000, // 30m — if work exists + verifier: 30 * 60 * 1000, // 30m — follows workers + 'issue-solver': 30 * 60 * 1000, // 30m — default worker +}; + +/** + * Classify an agent's role from its name. + * Uses classifyAgent from conversation.ts, falls back to 'worker'. + */ +function classifyAgentRole(name: string): string { + return classifyAgent(name) ?? 'worker'; +} + +/** + * Autopilot: continuous loop that scores squads and dispatches full squad loops. + * Replaces the daemon command — same state file, same scoring, but dispatches + * the full agent roster instead of just issue-solver. + */ +export async function runAutopilot( + squadsDir: string, + options: RunOptions, +): Promise { + const interval = parseInt(String(options.interval || '30'), 10); + const maxParallel = parseInt(String(options.maxParallel || '2'), 10); + const budget = parseFloat(String(options.budget || '0')); + const once = !!options.once; + + // Seed cognition beliefs on first run + const cognitionState = loadCognitionState(); + seedBeliefsIfEmpty(cognitionState); + saveCognitionState(cognitionState); + + writeLine(); + writeLine(` ${gradient('squads')} ${colors.dim}autopilot${RESET}`); + writeLine(` ${colors.dim}Interval: ${interval}m | Parallel: ${maxParallel} | Budget: ${budget > 0 ? '$' + budget + '/day' : 'unlimited'}${RESET}`); + writeLine(` ${colors.dim}Cognition: ${cognitionState.beliefs.length} beliefs, ${cognitionState.signals.length} signals${RESET}`); + writeLine(); + + let running = true; + const handleSignal = () => { running = false; }; + process.on('SIGINT', handleSignal); + process.on('SIGTERM', handleSignal); + + while (running) { + const cycleStart = Date.now(); + const state = loadLoopState(); + + // Reset daily cost at midnight + const today = new Date().toISOString().slice(0, 10); + if (state.dailyCostDate !== today) { + state.dailyCost = 0; + state.dailyCostDate = today; + } + + // Budget check + if (budget > 0 && state.dailyCost >= budget) { + writeLine(` ${icons.warning} ${colors.yellow}Daily budget reached ($${state.dailyCost.toFixed(2)}/$${budget})${RESET}`); + saveLoopState(state); + if (once) break; + await sleep(interval * 60 * 1000); + continue; + } + + writeLine(` ${colors.dim}── Cycle ${new Date().toLocaleTimeString()} ──${RESET}`); + + // Get bot env for GitHub API calls + let ghEnv: Record = {}; + try { ghEnv = await getBotGhEnv(); } catch { /* use default */ } + + // Score squads + const squadRepos = getSquadRepos(); + + let dispatchedSquadNames: string[]; + const failed: string[] = []; + const completed: string[] = []; + + if (options.phased) { + // ── Phased dispatch: execute squads in dependency order ── + const phases = computePhases(); + const phaseCount = phases.size; + writeLine(` ${colors.dim}Phased mode: ${phaseCount} phase(s)${RESET}`); + + dispatchedSquadNames = []; + + for (const [phaseNum, phaseSquads] of phases) { + writeLine(` ${colors.dim}── Phase ${phaseNum} (${phaseSquads.join(', ')}) ──${RESET}`); + + // Score only squads in this phase + const phaseSignals = scoreSquadsForPhase(phaseSquads, state, squadRepos, ghEnv); + const phaseDispatch = phaseSignals + .filter(s => s.score > 0) + .slice(0, maxParallel); + + if (phaseDispatch.length === 0) { + writeLine(` ${colors.dim}No squads need attention in this phase${RESET}`); + continue; + } + + for (const sig of phaseDispatch) { + writeLine(` ${colors.cyan}${sig.squad}${RESET} (score: ${sig.score}) — ${sig.reason}`); + } + + if (options.dryRun) { + continue; + } + + // Dispatch phase squads in parallel, wait for all before next phase + const phaseResults = await Promise.allSettled( + phaseDispatch.map(sig => { + const squad = loadSquad(sig.squad); + if (!squad) return Promise.resolve(); + return runSquadLoop(squad, squadsDir, state, ghEnv, options); + }) + ); + + for (let i = 0; i < phaseResults.length; i++) { + const name = phaseDispatch[i].squad; + dispatchedSquadNames.push(name); + if (phaseResults[i].status === 'rejected') { + failed.push(name); + state.failCounts[name] = (state.failCounts[name] || 0) + 1; + } else { + completed.push(name); + delete state.failCounts[name]; + } + } + } + + if (options.dryRun) { + writeLine(` ${colors.yellow}[DRY RUN] Would dispatch above squads in phase order${RESET}`); + saveLoopState(state); + if (once) break; + await sleep(interval * 60 * 1000); + continue; + } + } else { + // ── Flat dispatch: score-based, no phase ordering ── + const signals = scoreSquads(state, squadRepos, ghEnv); + + if (signals.length === 0 || signals.every(s => s.score <= 0)) { + writeLine(` ${colors.dim}No squads need attention${RESET}`); + saveLoopState(state); + if (once) break; + await sleep(interval * 60 * 1000); + continue; + } + + // Pick top N squads to dispatch + const toDispatch = signals + .filter(s => s.score > 0) + .slice(0, maxParallel); + + writeLine(` ${colors.dim}Dispatching ${toDispatch.length} squad(s):${RESET}`); + for (const sig of toDispatch) { + writeLine(` ${colors.cyan}${sig.squad}${RESET} (score: ${sig.score}) — ${sig.reason}`); + } + + if (options.dryRun) { + writeLine(` ${colors.yellow}[DRY RUN] Would dispatch above squads${RESET}`); + saveLoopState(state); + if (once) break; + await sleep(interval * 60 * 1000); + continue; + } + + // Dispatch squad loops in parallel + const results = await Promise.allSettled( + toDispatch.map(sig => { + const squad = loadSquad(sig.squad); + if (!squad) return Promise.resolve(); + return runSquadLoop(squad, squadsDir, state, ghEnv, options); + }) + ); + + for (let i = 0; i < results.length; i++) { + const r = results[i]; + const name = toDispatch[i].squad; + if (r.status === 'rejected') { + failed.push(name); + state.failCounts[name] = (state.failCounts[name] || 0) + 1; + } else { + completed.push(name); + delete state.failCounts[name]; + } + } + + dispatchedSquadNames = toDispatch.map(s => s.squad); + } + + // Estimate cost (rough: $1 per squad loop) + const cycleCost = dispatchedSquadNames.length * 1.0; + state.dailyCost += cycleCost; + + // Push memory signals for dispatched squads + await pushMemorySignals(dispatchedSquadNames, state, !!options.verbose); + + // Trim and save state + state.recentRuns = state.recentRuns.slice(-100); + state.lastCycle = new Date().toISOString(); + saveLoopState(state); + + // Slack: only on failures + if (failed.length > 0) { + slackNotify([ + `*Autopilot cycle — failures*`, + `Failed: ${failed.join(', ')}`, + `Completed: ${completed.join(', ')}`, + `Daily: $${state.dailyCost.toFixed(2)}${budget > 0 ? '/$' + budget : ''}`, + ].join('\n')); + } + + // Escalate persistent failures + for (const [key, count] of Object.entries(state.failCounts)) { + if (count >= 3) { + slackNotify(`🚨 *Escalation*: ${key} has failed ${count} times consecutively.`); + } + } + + // ── Post-run COO evaluation ── + // Evaluate outputs from all dispatched squads (skips if company was the only one) + if (dispatchedSquadNames.length > 0) { + await runPostEvaluation(dispatchedSquadNames, options); + } + + // ── Cognition: learn from this cycle ── + // Ingest memory → synthesize signals → evaluate decisions → reflect + writeLine(` ${colors.dim}Cognition cycle...${RESET}`); + const cognitionResult = await runCognitionCycle(dispatchedSquadNames, !!options.verbose); + if (cognitionResult.signalsIngested > 0 || cognitionResult.beliefsUpdated > 0 || cognitionResult.reflected) { + writeLine(` ${colors.dim}🧠 ${cognitionResult.signalsIngested} signals → ${cognitionResult.beliefsUpdated} beliefs updated${cognitionResult.reflected ? ' → reflected' : ''}${RESET}`); + } + + const elapsed = ((Date.now() - cycleStart) / 1000).toFixed(0); + writeLine(` ${colors.dim}Cycle done in ${elapsed}s | Daily: $${state.dailyCost.toFixed(2)}${RESET}`); + writeLine(); + + if (once) break; + await sleep(interval * 60 * 1000); + } + + process.off('SIGINT', handleSignal); + process.off('SIGTERM', handleSignal); +} + +/** + * Run the full squad loop: scanner → lead → worker → verifier. + * Each step checks cooldowns and pushes cognition signals. + * This is the core intelligence loop. + */ +async function runSquadLoop( + squad: NonNullable>, + squadsDir: string, + state: LoopState, + ghEnv: Record, + options: RunOptions, +): Promise { + writeLine(` ${gradient('▸')} ${colors.cyan}${squad.name}${RESET} — full loop`); + + // Discover agents and classify by role + const agentsByRole: Record> = { + scanner: [], + lead: [], + worker: [], + verifier: [], + }; + + for (const agent of squad.agents) { + const role = classifyAgentRole(agent.name); + const agentPath = join(squadsDir, squad.dir, `${agent.name}.md`); + if (existsSync(agentPath)) { + agentsByRole[role].push({ name: agent.name, path: agentPath }); + } + } + + const loopSteps: Array<{ role: string; agents: Array<{ name: string; path: string }> }> = [ + { role: 'scanner', agents: agentsByRole.scanner }, + { role: 'lead', agents: agentsByRole.lead }, + { role: 'worker', agents: agentsByRole.worker }, + { role: 'verifier', agents: agentsByRole.verifier }, + ]; + + for (const step of loopSteps) { + if (step.agents.length === 0) continue; + + for (const agent of step.agents) { + const cooldownMs = ROLE_COOLDOWNS[step.role] || ROLE_COOLDOWNS.worker; + if (!checkCooldown(state, squad.name, agent.name, cooldownMs)) { + if (options.verbose) { + writeLine(` ${colors.dim}↳ ${agent.name} (${step.role}) — in cooldown, skip${RESET}`); + } + continue; + } + + writeLine(` ${colors.dim}↳ ${agent.name} (${step.role})${RESET}`); + + const startMs = Date.now(); + try { + // For workers with no specific agent flag, use conversation mode + // For scanners/leads/verifiers, run as direct agent + if (step.role === 'worker' && step.agents.length > 1) { + // Multiple workers → conversation mode coordinates them + const convOptions: ConversationOptions = { + task: options.task, + maxTurns: options.maxTurns || 20, + costCeiling: options.costCeiling || 25, + verbose: options.verbose, + model: options.model, + }; + await runConversation(squad, convOptions); + } else { + await runAgent(agent.name, agent.path, squad.dir, { + ...options, + background: false, + watch: false, + execute: true, + }); + } + + const durationMs = Date.now() - startMs; + const outcome = classifyRunOutcome(0, durationMs); + + // Update cooldown + state.cooldowns[`${squad.name}:${agent.name}`] = Date.now(); + + // Record run + state.recentRuns.push({ + squad: squad.name, + agent: agent.name, + at: new Date().toISOString(), + result: outcome === 'skipped' ? 'completed' : outcome, + durationMs, + }); + + // Push cognition signal + pushCognitionSignal({ + source: 'execution', + signal_type: `${step.role}_${outcome}`, + value: durationMs / 1000, + unit: 'seconds', + data: { + squad: squad.name, + agent: agent.name, + role: step.role, + duration_ms: durationMs, + }, + entity_type: 'agent', + entity_id: `${squad.name}/${agent.name}`, + confidence: 0.9, + }); + + if (outcome === 'skipped') { + writeLine(` ${colors.dim}↳ ${agent.name} — phantom (${(durationMs / 1000).toFixed(0)}s), skipped${RESET}`); + } + + // If this was a worker step, break after first conversation + if (step.role === 'worker' && step.agents.length > 1) break; + + } catch (err) { + const durationMs = Date.now() - startMs; + state.cooldowns[`${squad.name}:${agent.name}`] = Date.now(); + state.recentRuns.push({ + squad: squad.name, + agent: agent.name, + at: new Date().toISOString(), + result: 'failed', + durationMs, + }); + + writeLine(` ${colors.red}↳ ${agent.name} failed: ${err instanceof Error ? err.message : 'unknown'}${RESET}`); + } + } + } + + writeLine(` ${colors.dim}↳ ${squad.name} loop complete${RESET}`); +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Lead mode: Single orchestrator session that uses Task tool for parallel work. + * Benefits over --parallel: + * - Single session overhead vs N sessions + * - Lead coordinates and routes work intelligently + * - Task agents share context when needed + * - Better parallelization (Claude's native Task tool) + */ +export async function runLeadMode( + squad: ReturnType, + squadsDir: string, + options: RunOptions +): Promise { + if (!squad) return; + + const agentFiles = squad.agents + .map(a => ({ + name: a.name, + path: join(squadsDir, squad.dir, `${a.name}.md`), + role: a.role || '', + })) + .filter(a => existsSync(a.path)); + + if (agentFiles.length === 0) { + writeLine(` ${icons.error} ${colors.red}No agent files found${RESET}`); + return; + } + + // Block lead mode for providers without tool use support + const squadProvider = options.provider || squad?.providers?.default || 'anthropic'; + if (!TOOL_USE_PROVIDERS.has(squadProvider)) { + const cliConfig = getCLIConfig(squadProvider); + const providerName = cliConfig?.displayName || squadProvider; + writeLine(` ${icons.warning} ${colors.yellow}Lead mode requires tool-use support (Claude, Gemini)${RESET}`); + writeLine(` ${colors.dim}${providerName} cannot spawn sub-agents via Task tool.${RESET}`); + writeLine(); + writeLine(` ${colors.dim}Options:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET} --provider ${squadProvider} ${colors.dim}← sequential mode (recommended)${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}/${agentFiles[0]?.name || 'agent'}${RESET} --provider ${squadProvider} ${colors.dim}← single agent${RESET}`); + writeLine(); + return; + } + + writeLine(` ${bold}Lead mode${RESET} ${colors.dim}orchestrating ${agentFiles.length} agents${RESET}`); + writeLine(); + + // List available agents + for (const agent of agentFiles) { + writeLine(` ${icons.empty} ${colors.cyan}${agent.name}${RESET} ${colors.dim}${agent.role}${RESET}`); + } + writeLine(); + + if (!options.execute) { + writeLine(` ${colors.dim}Launch lead session:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET} --lead`); + writeLine(); + return; + } + + // Build the lead prompt + const timeoutMins = options.timeout || DEFAULT_TIMEOUT_MINUTES; + const agentList = agentFiles.map(a => `- ${a.name}: ${a.role}`).join('\n'); + const agentPaths = agentFiles.map(a => `- ${a.name}: ${a.path}`).join('\n'); + + const prompt = `You are the Lead of the ${squad.name} squad. + +## Mission +${squad.mission || 'Execute squad operations efficiently.'} + +## Available Agents +${agentList} + +## Agent Definition Files +${agentPaths} + +## Your Role as Lead + +1. **Assess the situation**: Check for pending work: + - Run \`gh issue list --repo agents-squads/hq --label squad:${squad.name}\` for assigned issues + - Check .agents/memory/${squad.dir}/ for squad state and pending tasks + - Review recent activity with \`git log --oneline -10\` + +2. **Delegate work using Task tool**: For each piece of work: + - Use the Task tool with subagent_type="general-purpose" + - Include the agent definition file path in the prompt + - Spawn multiple Task agents IN PARALLEL when work is independent + - Example: "Read ${agentFiles[0]?.path || 'agent.md'} and execute its instructions for [specific task]" + +3. **Coordinate parallel execution**: + - Independent tasks → spawn Task agents in parallel (single message, multiple tool calls) + - Dependent tasks → run sequentially + - Monitor progress and handle failures + +4. **Report and update memory**: + - Update .agents/memory/${squad.dir}/state.md with completed work + - Log learnings to learnings.md + - Create issues for follow-up work if needed + +## Time Budget +You have ${timeoutMins} minutes. Prioritize high-impact work. + +## Critical Instructions +- Use Task tool for delegation, NOT direct execution of agent work +- Spawn parallel Task agents when work is independent +- When done, type /exit to end the session +- Do NOT wait for user input - work autonomously + +## Async Mode (CRITICAL) +This is ASYNC execution - Task agents must be fully autonomous: +- **Findings** → Create GitHub issues (gh issue create) +- **Code changes** → Create PRs (gh pr create) +- **Analysis results** → Write to .agents/outputs/ or memory files +- **NEVER wait for human review** - complete the work and move on +- **NEVER ask clarifying questions** - make reasonable decisions + +Instruct each Task agent: "Work autonomously. Output findings to GitHub issues. Output code changes as PRs. Do not wait for review." + +Begin by assessing pending work, then delegate to agents via Task tool.`; + + // Determine provider + const provider = options.provider || squad?.providers?.default || 'anthropic'; + const isAnthropic = provider === 'anthropic'; + + if (isAnthropic) { + const claudeAvailable = await checkClaudeCliAvailable(); + if (!claudeAvailable) { + writeLine(` ${colors.yellow}Claude CLI not found${RESET}`); + writeLine(` ${colors.dim}Install: npm install -g @anthropic-ai/claude-code${RESET}`); + return; + } + } else { + if (!isProviderCLIAvailable(provider)) { + const cliConfig = getCLIConfig(provider); + writeLine(` ${colors.yellow}${cliConfig?.displayName || provider} CLI not found${RESET}`); + if (cliConfig?.install) { + writeLine(` ${colors.dim}Install: ${cliConfig.install}${RESET}`); + } + return; + } + } + + // Determine execution mode (foreground is default, background is opt-in) + const isBackground = options.background === true && !options.watch; + const isWatch = options.watch === true; + const isForeground = !isBackground && !isWatch; + + const modeText = isBackground ? ' (background)' : isWatch ? ' (watch)' : ''; + const providerDisplay = isAnthropic ? 'Claude' : (getCLIConfig(provider)?.displayName || provider); + writeLine(` ${gradient('Launching')} lead session${modeText} with ${providerDisplay}...`); + writeLine(); + + try { + // Find lead agent name from agent files or use default + const leadAgentName = agentFiles.find(a => a.name.includes('lead'))?.name || `${squad.dir}-lead`; + + let result: string; + if (isAnthropic) { + result = await executeWithClaude(prompt, { + verbose: options.verbose, + timeoutMinutes: timeoutMins, + foreground: options.foreground, + background: options.background, + watch: options.watch, + useApi: options.useApi, + effort: options.effort, + skills: options.skills, + trigger: options.trigger || 'manual', + squadName: squad.dir, + agentName: leadAgentName, + model: options.model, + }); + } else { + result = await executeWithProvider(provider, prompt, { + verbose: options.verbose, + foreground: isForeground || isWatch, + squadName: squad.dir, + agentName: leadAgentName, + }); + } + + if (isForeground || isWatch) { + writeLine(); + writeLine(` ${icons.success} Lead session completed`); + } else { + writeLine(` ${icons.success} Lead session launched in background`); + writeLine(` ${colors.dim}${result}${RESET}`); + writeLine(); + writeLine(` ${colors.dim}The lead will:${RESET}`); + writeLine(` ${colors.dim} 1. Assess pending work (issues, memory)${RESET}`); + writeLine(` ${colors.dim} 2. Spawn Task agents for parallel execution${RESET}`); + writeLine(` ${colors.dim} 3. Coordinate and report results${RESET}`); + writeLine(); + writeLine(` ${colors.dim}Monitor: squads workers${RESET}`); + } + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + writeLine(` ${icons.error} ${colors.red}Failed to launch agent${RESET}`); + writeLine(` ${colors.dim}${msg}${RESET}`); + writeLine(` ${colors.dim}Run \`squads doctor\` to check your setup.${RESET}`); + } +} + +// ── Sequential mode ────────────────────────────────────────────────── +// For providers without tool-use (Ollama, Codex, etc.): run each agent +// one at a time. No output chaining — each agent reads its own context. + +/** + * Run all squad agents sequentially with a non-tool-use provider. + * Each agent runs in foreground, one at a time (Ollama saturates hardware). + */ +export async function runSequentialMode( + squad: NonNullable>, + squadsDir: string, + provider: string, + options: RunOptions, +): Promise { + const cliConfig = getCLIConfig(provider); + const providerName = cliConfig?.displayName || provider; + + const agentFiles = squad.agents + .map(a => ({ + name: a.name, + role: a.role || '', + path: join(squadsDir, squad.dir, `${a.name}.md`), + })) + .filter(a => existsSync(a.path)); + + if (agentFiles.length === 0) { + writeLine(` ${icons.error} ${colors.red}No agent files found${RESET}`); + return; + } + + writeLine(` ${bold}Sequential mode${RESET} ${colors.dim}(${providerName} — agents run one at a time)${RESET}`); + writeLine(); + + for (const agent of agentFiles) { + writeLine(` ${icons.empty} ${colors.cyan}${agent.name}${RESET} ${colors.dim}${agent.role}${RESET}`); + } + writeLine(); + + if (!options.execute) { + writeLine(` ${colors.dim}Run sequentially:${RESET}`); + writeLine(` ${colors.dim}$${RESET} squads run ${colors.cyan}${squad.name}${RESET} --provider ${provider}`); + writeLine(); + return; + } + + const startMs = Date.now(); + let completed = 0; + let failed = 0; + + for (let i = 0; i < agentFiles.length; i++) { + const agent = agentFiles[i]; + const label = `[${i + 1}/${agentFiles.length}]`; + writeLine(` ${colors.dim}${label}${RESET} Running ${colors.cyan}${agent.name}${RESET}...`); + + try { + // Read agent definition for the prompt + const definition = readFileSync(agent.path, 'utf-8'); + + // Build prompt: agent definition + squad context + const { gatherSquadContext } = await import('./run-context.js'); + const context = gatherSquadContext(squad.dir, agent.name, { + verbose: options.verbose, + agentPath: agent.path, + }); + + const prompt = `${definition}\n${context}`; + + await executeWithProvider(provider, prompt, { + verbose: options.verbose, + foreground: true, + squadName: squad.dir, + agentName: agent.name, + model: options.model, + }); + + completed++; + writeLine(` ${icons.success} ${colors.dim}${label}${RESET} ${agent.name} ${colors.green}complete${RESET}`); + } catch (err) { + failed++; + writeLine(` ${icons.error} ${colors.dim}${label}${RESET} ${agent.name} ${colors.red}failed: ${err instanceof Error ? err.message : String(err)}${RESET}`); + } + + writeLine(); + } + + const elapsed = Math.round((Date.now() - startMs) / 1000); + writeLine(` ${gradient('Sequential run complete')} ${colors.dim}(${completed} ok, ${failed} failed, ${elapsed}s)${RESET}`); + writeLine(); +} diff --git a/src/lib/run-types.ts b/src/lib/run-types.ts new file mode 100644 index 00000000..06f49c12 --- /dev/null +++ b/src/lib/run-types.ts @@ -0,0 +1,58 @@ +/** + * Shared types and constants for the `squads run` command. + * Extracted from commands/run.ts to enable reuse across run-* modules. + */ +import type { EffortLevel } from './squad-parser.js'; + +// ── Constants ──────────────────────────────────────────────────────── +export const DEFAULT_TIMEOUT_MINUTES = 30; +export const SOFT_DEADLINE_RATIO = 0.7; + +/** Providers that support tool use (sub-agent spawning, conversation orchestration) */ +export const TOOL_USE_PROVIDERS = new Set(['anthropic', 'google']); + +// ── Interfaces ─────────────────────────────────────────────────────── + +export interface RunOptions { + verbose?: boolean; + dryRun?: boolean; + agent?: string; + timeout?: number; // minutes, default 30 + execute?: boolean; + parallel?: boolean; // Run all agents in parallel + lead?: boolean; // Run as lead session using Task tool for parallelization + foreground?: boolean; // Run in foreground (deprecated, now default) + background?: boolean; // Run in background (detached process) + watch?: boolean; // Run in background but tail the log + useApi?: boolean; // Use API credits instead of subscription + effort?: EffortLevel; // Effort level: high, medium, low + skills?: string[]; // Skills to load (skill IDs or local paths) + trigger?: 'manual' | 'scheduled' | 'event' | 'smart'; // Trigger source for telemetry + provider?: string; // LLM provider: anthropic, google, openai, mistral, xai, aider, ollama + model?: string; // Model to use (Claude aliases or full model IDs like gemini-2.5-flash) + verify?: boolean; // Post-execution verification (default true, --no-verify to skip) + cloud?: boolean; // Dispatch to cloud worker via API instead of local execution + conversation?: boolean; // Run squad as multi-agent conversation (default for squad runs) + task?: string; // Founder directive — replaces lead briefing in conversation mode + maxTurns?: number; // Max conversation turns (default: 20) + costCeiling?: number; // Cost ceiling in USD (default: 25) + interval?: number | string; // Autopilot: minutes between cycles + maxParallel?: number | string; // Autopilot: max parallel squad loops + budget?: number | string; // Autopilot: daily budget cap ($) + once?: boolean; // Autopilot: run one cycle then exit + phased?: boolean; // Autopilot: use dependency-based phase ordering + eval?: boolean; // Post-run COO evaluation (default: true, --no-eval to skip) + org?: boolean; // Org cycle: scan → plan → execute all leads → report +} + +/** + * Execution context for telemetry tagging. + * Passed to Claude via environment variables for per-agent cost tracking. + */ +export interface ExecutionContext { + squad: string; + agent: string; + taskType: 'evaluation' | 'execution' | 'research' | 'lead'; + trigger: 'manual' | 'scheduled' | 'event' | 'smart'; + executionId: string; +} diff --git a/src/lib/run-utils.ts b/src/lib/run-utils.ts new file mode 100644 index 00000000..50f24a94 --- /dev/null +++ b/src/lib/run-utils.ts @@ -0,0 +1,244 @@ +/** + * Pure utility functions for the `squads run` command. + * Extracted from commands/run.ts — no side effects, no state. + */ +import { spawn } from 'child_process'; +import { join, dirname } from 'path'; +import { existsSync, readFileSync, writeFileSync } from 'fs'; +import { findSquadsDir, type Squad } from './squad-parser.js'; +import { resolveMcpConfigPath } from './mcp-config.js'; +import { findMemoryDir } from './memory.js'; +import { colors, RESET, writeLine } from './terminal.js'; +import type { ExecutionContext } from './run-types.js'; + +// ── Execution ID ───────────────────────────────────────────────────── + +/** + * Generate a unique execution ID for telemetry tracking + */ +export function generateExecutionId(): string { + const timestamp = Date.now().toString(36); + const random = Math.random().toString(36).substring(2, 8); + return `exec_${timestamp}_${random}`; +} + +// ── MCP config resolution ──────────────────────────────────────────── + +/** + * Select MCP config based on squad name and context + * Uses three-tier resolution: + * 1. Squad context.mcp from SQUAD.md frontmatter (dynamic) + * 2. User override at ~/.claude/mcp-configs/{squad}.json + * 3. Legacy hardcoded mapping (backward compatibility) + * 4. Fallback to ~/.claude.json + */ +export function selectMcpConfig(squadName: string, squad?: Squad | null): string { + // Tier 1 & 2: Use new context-based resolution if squad has context.mcp + if (squad?.context?.mcp && squad.context.mcp.length > 0) { + return resolveMcpConfigPath(squadName, squad.context.mcp); + } + + // Tier 3: Legacy hardcoded mapping (for squads without context block) + const home = process.env.HOME || ''; + const configsDir = join(home, '.claude', 'mcp-configs'); + + const squadConfigs: Record = { + website: 'website.json', + research: 'research.json', + intelligence: 'research.json', + analytics: 'data.json', + engineering: 'data.json', + }; + + const configFile = squadConfigs[squadName.toLowerCase()]; + if (configFile) { + const configPath = join(configsDir, configFile); + if (existsSync(configPath)) { + return configPath; + } + } + + // Tier 4: No MCP config — return empty string to skip --mcp-config flag. + // Previously fell back to ~/.claude.json but that's Claude's settings file, + // not an MCP config, and causes claude to exit silently with no output. + return ''; +} + +// ── Task type detection ────────────────────────────────────────────── + +/** + * Detect task type from agent name patterns + * - *-eval, *-critic, *-review → evaluation + * - *-lead, *-orchestrator → lead + * - *-research, *-analyst → research + * - everything else → execution + */ +export function detectTaskType(agentName: string): ExecutionContext['taskType'] { + const name = agentName.toLowerCase(); + if (name.includes('eval') || name.includes('critic') || name.includes('review') || name.includes('test')) { + return 'evaluation'; + } + if (name.includes('lead') || name.includes('orchestrator')) { + return 'lead'; + } + if (name.includes('research') || name.includes('analyst') || name.includes('intel')) { + return 'research'; + } + return 'execution'; +} + +// ── Model resolution ───────────────────────────────────────────────── + +/** Claude Code --model flag aliases */ +export type ClaudeModelAlias = 'opus' | 'sonnet' | 'haiku'; + +/** + * Map full model names to Claude Code --model aliases. + * Claude Code only accepts: opus, sonnet, haiku (not full model IDs) + */ +export function getClaudeModelAlias(model: string): ClaudeModelAlias | undefined { + const lower = model.toLowerCase(); + + // Direct aliases + if (lower === 'opus' || lower === 'sonnet' || lower === 'haiku') { + return lower as ClaudeModelAlias; + } + + // Full model name mapping + if (lower.includes('opus')) return 'opus'; + if (lower.includes('sonnet')) return 'sonnet'; + if (lower.includes('haiku')) return 'haiku'; + + // Unknown Claude model - let Claude Code handle it + return undefined; +} + +/** + * Resolve model based on squad context and task type. + * Priority: explicit --model flag > squad context routing > undefined (provider default) + * + * Supports multi-provider models: + * - Anthropic: claude-opus-4-5, claude-sonnet-4, claude-3-5-haiku, opus, sonnet, haiku + * - Google: gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash + * - Others: model names passed through to provider CLI + * + * Routing logic: + * - evaluation (critics, tests) → cheap model - simple validation + * - research (analysts, intel) → default model - balanced + * - execution (builders, fixers) → default model - balanced + * - lead (orchestrators) → expensive model - complex coordination + */ +export function resolveModel( + explicitModel: string | undefined, + squad: Squad | null, + taskType: ExecutionContext['taskType'] +): string | undefined { + // Explicit --model flag always wins + if (explicitModel) { + return explicitModel; + } + + // No squad context = let provider decide + const modelConfig = squad?.context?.model; + if (!modelConfig) { + return undefined; + } + + // Route by task type + switch (taskType) { + case 'evaluation': + // Critics/evals are simple - use cheap model + return modelConfig.cheap || modelConfig.default; + case 'lead': + // Leads need complex reasoning - use expensive model + return modelConfig.expensive || modelConfig.default; + case 'research': + case 'execution': + default: + // Default for most tasks + return modelConfig.default; + } +} + +// ── Project trust ──────────────────────────────────────────────────── + +/** + * Ensure the project directory is trusted in Claude's config. + * This prevents the workspace trust dialog from blocking autonomous execution. + */ +export function ensureProjectTrusted(projectPath: string): void { + const configPath = join(process.env.HOME || '', '.claude.json'); + + if (!existsSync(configPath)) { + // No Claude config yet - will be created on first interactive run + return; + } + + try { + const config = JSON.parse(readFileSync(configPath, 'utf-8')); + + if (!config.projects) { + config.projects = {}; + } + + if (!config.projects[projectPath]) { + config.projects[projectPath] = {}; + } + + // Mark as trusted for autonomous execution + if (!config.projects[projectPath].hasTrustDialogAccepted) { + config.projects[projectPath].hasTrustDialogAccepted = true; + writeFileSync(configPath, JSON.stringify(config, null, 2)); + } + } catch (e) { + // Don't fail execution if we can't update config — the trust dialog will just appear + writeLine(` ${colors.dim}warn: config update failed: ${e instanceof Error ? e.message : String(e)}${RESET}`); + } +} + +// ── Project root ───────────────────────────────────────────────────── + +/** + * Get the project root directory (where .agents/ lives) + */ +export function getProjectRoot(): string { + const squadsDir = findSquadsDir(); + if (squadsDir) { + // .agents/squads -> .agents -> project root + return dirname(dirname(squadsDir)); + } + return process.cwd(); +} + +// ── Duration formatting ────────────────────────────────────────────── + +/** + * Format milliseconds as human-readable duration + */ +export function formatDuration(ms: number): string { + const hours = Math.floor(ms / (60 * 60 * 1000)); + const minutes = Math.floor((ms % (60 * 60 * 1000)) / (60 * 1000)); + + if (hours >= 24) { + const days = Math.floor(hours / 24); + const remainingHours = hours % 24; + return remainingHours > 0 ? `${days}d ${remainingHours}h` : `${days}d`; + } + if (hours > 0) { + return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h`; + } + return `${minutes}m`; +} + +// ── CLI availability check ─────────────────────────────────────────── + +/** + * Check if the Claude CLI binary is available on PATH + */ +export async function checkClaudeCliAvailable(): Promise { + return new Promise((resolve) => { + const check = spawn('which', ['claude'], { stdio: 'pipe' }); + check.on('close', (code) => resolve(code === 0)); + check.on('error', () => resolve(false)); + }); +} diff --git a/src/lib/setup-checks.ts b/src/lib/setup-checks.ts index 3b76b669..6e7b5ba6 100644 --- a/src/lib/setup-checks.ts +++ b/src/lib/setup-checks.ts @@ -260,12 +260,13 @@ export function checkProviderAuth(providerId: string): CheckResult { return { name: provider.name, status: 'ok' }; } - // Check CLI if required + // Check CLI if required — missing CLI is a warning (not an error) during init + // Users can scaffold first and install the provider CLI later if (provider.cliCheck) { if (!commandExists(provider.cliCheck)) { return { name: provider.name, - status: 'missing', + status: 'warning', message: `CLI not installed`, hint: provider.installCmd ? `Install: ${provider.installCmd}` : undefined, fixCommand: provider.installCmd, diff --git a/src/lib/squad-loop.ts b/src/lib/squad-loop.ts index 4021949f..2505481f 100644 --- a/src/lib/squad-loop.ts +++ b/src/lib/squad-loop.ts @@ -757,3 +757,42 @@ export function scoreSquadsForPhase( const allSignals = scoreSquads(state, squadRepos, ghEnv); return allSignals.filter(s => phaseSquads.includes(s.squad)); } + +/** + * Tier 2: fetch pending triggers from the API. + * Falls back to local scoring if API unavailable. + */ +export async function fetchTriggersFromApi(): Promise { + try { + const { isTier2, getTierSync } = await import('./tier-detect.js'); + if (!isTier2()) return null; + + const apiUrl = getTierSync().urls.api; + if (!apiUrl) return null; + + const response = await fetch(`${apiUrl}/triggers/pending`, { + signal: AbortSignal.timeout(5000), + }); + + if (!response.ok) return null; + + const triggers = await response.json() as Array<{ + squad: string; + agent: string; + trigger_name: string; + priority: number; + context: Record; + }>; + + return triggers.map(t => ({ + squad: t.squad, + score: t.priority * 10, + reason: `Trigger: ${t.trigger_name}`, + issues: [], + agent: t.agent, + context: t.context, + })); + } catch { + return null; // API unavailable — caller should fall back to local scoring + } +} diff --git a/src/lib/tier-detect.ts b/src/lib/tier-detect.ts new file mode 100644 index 00000000..7e8954aa --- /dev/null +++ b/src/lib/tier-detect.ts @@ -0,0 +1,96 @@ +/** + * Tier detection — determines which infrastructure tier is active. + * + * Tier 1: File-based only (JSONL, markdown, git). Zero dependencies. + * Tier 2: Local services (Postgres, Redis, API, Bridge via Docker). + * + * Cached per process. First call probes services (async), subsequent + * calls return cached result (sync). + */ + +export interface TierInfo { + tier: 1 | 2; + services: { + api: boolean; + bridge: boolean; + postgres: boolean; + redis: boolean; + }; + urls: { + api: string | null; + bridge: string | null; + }; +} + +const DEFAULT_API_URL = 'http://localhost:8090'; +const DEFAULT_BRIDGE_URL = 'http://localhost:8088'; +const PROBE_TIMEOUT_MS = 1500; + +let cached: TierInfo | null = null; + +/** Probe a URL for health (returns true if 2xx) */ +async function probe(url: string): Promise { + try { + const response = await fetch(`${url}/health`, { + signal: AbortSignal.timeout(PROBE_TIMEOUT_MS), + }); + return response.ok; + } catch { + return false; + } +} + +/** + * Detect the active tier. First call probes services (async). + * Subsequent calls return cached result. + */ +export async function detectTier(): Promise { + if (cached) return cached; + + // Probe API and Bridge in parallel + const [apiOk, bridgeOk] = await Promise.all([ + probe(DEFAULT_API_URL), + probe(DEFAULT_BRIDGE_URL), + ]); + + // Tier 2 requires at least the API to be healthy + const tier = apiOk ? 2 : 1; + + cached = { + tier, + services: { + api: apiOk, + bridge: bridgeOk, + postgres: apiOk, // If API is up, Postgres is up (API depends on it) + redis: apiOk, // If API is up, Redis is up (API depends on it) + }, + urls: { + api: apiOk ? DEFAULT_API_URL : null, + bridge: bridgeOk ? DEFAULT_BRIDGE_URL : null, + }, + }; + + return cached; +} + +/** + * Get cached tier info synchronously. Returns Tier 1 if not yet detected. + * Use this in hot paths where async is not possible. + */ +export function getTierSync(): TierInfo { + return cached || { + tier: 1, + services: { api: false, bridge: false, postgres: false, redis: false }, + urls: { api: null, bridge: null }, + }; +} + +/** Check if Tier 2 services are available */ +export function isTier2(): boolean { + return getTierSync().tier === 2; +} + +/** Reset cache (for testing) */ +export function resetTierCache(): void { + cached = null; +} diff --git a/templates/seed/config/SYSTEM.md b/templates/seed/config/SYSTEM.md index ffa9a11d..b2c3dc11 100644 --- a/templates/seed/config/SYSTEM.md +++ b/templates/seed/config/SYSTEM.md @@ -1,3 +1,9 @@ +--- +version: "1.0" +scope: "all-agents" +authority: "squads-cli" +--- + # System Protocol Immutable rules for all agent executions. Every agent reads this before starting work. diff --git a/templates/seed/idp/catalog/service.yaml.template b/templates/seed/idp/catalog/service.yaml.template new file mode 100644 index 00000000..e4c9a7cd --- /dev/null +++ b/templates/seed/idp/catalog/service.yaml.template @@ -0,0 +1,25 @@ +apiVersion: squads/v1 +kind: Service +metadata: + name: "{{SERVICE_NAME}}" + description: "{{BUSINESS_DESCRIPTION}}" + owner: "{{OWNER_SQUAD}}" + repo: "{{REPO_NAME}}" + tags: [] +spec: + type: "{{SERVICE_TYPE}}" + stack: "{{SERVICE_STACK}}" + branches: + default: main + development: {{BRANCHES_DEVELOPMENT}} + workflow: "{{BRANCHES_WORKFLOW}}" + ci: + template: {{CI_TEMPLATE}} + required_checks: [] + build_command: {{BUILD_COMMAND}} + test_command: {{TEST_COMMAND}} + deploy: null + health: [] + dependencies: + runtime: [] + scorecard: "{{SERVICE_SCORECARD}}" diff --git a/templates/seed/memory/_squad/goals.md b/templates/seed/memory/_squad/goals.md new file mode 100644 index 00000000..8ead206c --- /dev/null +++ b/templates/seed/memory/_squad/goals.md @@ -0,0 +1,23 @@ +--- +squad: "{{SQUAD_NAME}}" +updated: "{{CURRENT_DATE}}" +review_by: "{{REVIEW_DATE}}" +owner: "{{SQUAD_LEAD}}" +--- + +# {{SQUAD_LABEL}} Goals + +## Active + +(No goals set yet — add your first goal here) + +Example format: +1. **Goal name** — metric: what_to_measure | baseline: unknown | target: X | deadline: YYYY-MM-DD | status: not-started + +## Achieved + +(none yet) + +## Abandoned + +(none yet) diff --git a/templates/seed/memory/_squad/priorities.md b/templates/seed/memory/_squad/priorities.md new file mode 100644 index 00000000..11b37974 --- /dev/null +++ b/templates/seed/memory/_squad/priorities.md @@ -0,0 +1,25 @@ +--- +squad: "{{SQUAD_NAME}}" +updated: "{{CURRENT_DATE}}" +review_by: "{{REVIEW_DATE}}" +owner: "{{SQUAD_LEAD}}" +--- + +# {{SQUAD_LABEL}} Priorities + +## Focus + +1. **Deliver first results** — produce at least one concrete output per cycle +2. **Learn the context** — read BUSINESS_BRIEF.md and recent squad state before acting +3. **Collaborate** — coordinate with other squads through memory, not direct calls + +## Not Now + +- Deep refactoring without a clear need +- Experimental features not tied to business goals + +## Standing Rules + +- Always read state.md before starting — don't repeat work +- Always write state.md after completing — enable the next run +- Escalate blockers immediately — don't spin in place diff --git a/templates/seed/memory/company/company.md b/templates/seed/memory/company/company.md new file mode 100644 index 00000000..d95db9e0 --- /dev/null +++ b/templates/seed/memory/company/company.md @@ -0,0 +1,31 @@ +--- +type: "company-context" +updated: "{{CURRENT_DATE}}" +owner: "manager" +--- + +# Company Context + +## Mission + +{{BUSINESS_DESCRIPTION}} + +## What We Build + +(Add product or service details here) + +## Who Uses It + +(Add target customer details here) + +## Product + +| Offering | Role | +|----------|------| +| (Add your products/services) | | + +## Alignment + +- Results over promises — ship working things +- Simple over clever — prefer straightforward solutions +- Customer zero — use your own product diff --git a/templates/seed/skills/squads-cli/SKILL.md b/templates/seed/skills/squads-cli/SKILL.md index 255d0a6b..8ea47e21 100644 --- a/templates/seed/skills/squads-cli/SKILL.md +++ b/templates/seed/skills/squads-cli/SKILL.md @@ -1,84 +1,329 @@ -# squads-cli — Operations Manual +--- +name: squads-cli +description: Squads CLI reference for autonomous agents — run squads, manage memory, check status, set goals, and operate the AI workforce. TRIGGER when using squads commands, dispatching agents, reading/writing memory, checking squad status, or operating the autonomous loop. +context: fork +--- -You have access to the `squads` CLI for managing your AI workforce. +# Squads CLI -## Execute (Daily Operations) +The `squads` CLI is the operating system for your AI workforce. Agents are the primary users — they call these commands during execution to understand context, persist learnings, and coordinate with other squads. + +## Core Concepts + +| Concept | Description | +|---------|-------------| +| **Squad** | A team of agents in `.agents/squads/{name}/` — defined by `SQUAD.md` | +| **Agent** | A markdown file (`{agent}.md`) inside a squad directory | +| **Memory** | Persistent state in `.agents/memory/{squad}/{agent}/` — survives across runs | +| **Target** | `squad/agent` notation (e.g., `engineering/issue-solver`) | +| **Context cascade** | Layered context injection: SYSTEM → SQUAD → priorities → directives → state | + +## File Structure + +``` +.agents/ +├── config/SYSTEM.md # Immutable rules (all agents) +├── squads/{squad}/ +│ ├── SQUAD.md # Squad identity, goals, KPIs +│ └── {agent}.md # Agent definition +└── memory/ + ├── {squad}/ + │ ├── priorities.md # Current operational focus + │ ├── feedback.md # Last cycle evaluation + │ ├── active-work.md # Open PRs/issues + │ └── {agent}/ + │ ├── state.md # Agent's persistent state + │ └── learnings.md # Accumulated insights + ├── company/directives.md # Strategic overlay + └── daily-briefing.md # Cross-squad context +``` + +--- + +## Running Agents + +### Single Agent + +```bash +# Run specific agent (two equivalent notations) +squads run engineering/issue-solver +squads run engineering -a issue-solver + +# With founder directive (replaces lead briefing) +squads run engineering/issue-solver --task "Fix CI pipeline for PR #593" + +# Dry run — preview without executing +squads run engineering --dry-run + +# Background execution +squads run engineering/scanner -b # Detached +squads run engineering/scanner -w # Detached but tail logs + +# Use different LLM provider +squads run research/analyst --provider=google +squads run research/analyst --provider=google --model=gemini-2.5-flash +``` + +### Squad Conversation + +Run an entire squad as a coordinated team. Lead briefs → workers execute → lead reviews → iterate until convergence. + +```bash +squads run research # Sequential conversation +squads run research --parallel # All agents in parallel (tmux) +squads run research --lead # Single orchestrator with Task tool +squads run research --max-turns 10 # Limit conversation turns +squads run research --cost-ceiling 15 # Budget cap in USD +``` + +### Autopilot (Autonomous Dispatch) + +No target = autopilot mode. CLI scores squads by priority, dispatches automatically. + +```bash +squads run # Start autopilot +squads run --once # Single cycle then exit +squads run --once --dry-run # Preview what would dispatch +squads run -i 15 --budget 50 # 15-min cycles, $50/day cap +squads run --phased # Respect depends_on ordering +squads run --max-parallel 3 # Up to 3 squads simultaneously +``` + +### Execution Options + +| Flag | Purpose | +|------|---------| +| `--verbose` | Detailed output with context sections logged | +| `--timeout ` | Execution timeout (default: 30 min) | +| `--effort ` | `high`, `medium`, `low` (default: from SQUAD.md or high) | +| `--skills ` | Load additional skills | +| `--cloud` | Dispatch to cloud worker (requires `squads login`) | +| `--no-verify` | Skip post-execution verification | +| `--no-eval` | Skip post-run COO evaluation | +| `--json` | Machine-readable output | + +--- + +## Memory Operations + +Memory is how agents persist knowledge across runs. Files-first — everything is markdown on disk. + +### Read Memory + +```bash +# View all memory for a squad +squads memory read engineering + +# Search across ALL squad memory +squads memory query "CI pipeline failures" +squads memory query "agent performance" +``` + +### Write Memory + +```bash +# Write insight to squad memory +squads memory write research "MCP adoption rate at 15% — up from 8% last month" + +# Write to specific agent +squads memory write engineering --agent issue-solver "PR #593 blocked by flaky test" +``` + +### Capture Learnings + +```bash +# Quick learning capture +squads learn "Google blocks headless Chrome OAuth — use cookie injection" \ + --squad engineering --category pattern --tags "auth,chrome,e2e" + +# View learnings +squads learnings +squads learnings --squad engineering +``` + +### Sync Memory + +```bash +squads sync # Pull remote changes +squads sync --push # Pull + push local changes +squads sync --postgres # Also sync to Postgres +``` + +--- + +## Status & Monitoring + +### Squad Status ```bash -squads run / # Execute a specific agent -squads run --lead # Orchestrate full squad -squads run --parallel # Run all agents in parallel -squads list # Discover squads and agents -squads exec list # Recent execution history -squads exec stats # Execution statistics -squads orchestrate # Multi-agent coordination -squads env prompt -a # Get agent prompt +squads status # All squads overview +squads status engineering # Specific squad details +squads status -v # Verbose with agent details +squads status --json # Machine-readable ``` -## Understand (Situational Awareness) +### Dashboards ```bash -squads dash --json # Full dashboard (use --json for parsing) -squads status [squad] # Quick status snapshot -squads context --json # Business context feed -squads cost # Cost tracking -squads budget # Budget check -squads history # Execution history +squads dash # Overview dashboard +squads dash engineering # Squad-specific dashboard +squads dash --ceo # Executive summary +squads dash --full # Include GitHub PR/issue stats (~30s) +squads dash --list # List available dashboards ``` -## Track (Objectives + Metrics) +### Execution History ```bash -squads goal set "" # Set a business objective -squads goal list [squad] # List goals -squads goal complete # Mark goal done -squads kpi list # List all KPIs -squads kpi show # Squad KPIs -squads kpi trend # Show trend -squads feedback add <1-5> "" # Rate output -squads autonomy # Self-assessment +squads exec list # Recent executions +squads exec list --squad eng # Filter by squad +squads exec show # Execution details +squads exec stats # Aggregate statistics ``` -## Learn (Memory + Knowledge) +### Cost Tracking ```bash -squads memory read # Load squad memory -squads memory write "" # Persist learning -squads memory search "" # Search all memory -squads memory list # List all entries -squads learn "" # Capture learning -squads learnings show # View learnings -squads sync --push # Sync memory to git +squads cost # Today + this week +squads cost --squad research # Squad-specific costs +squads cost --json # Machine-readable ``` -## Schedule (Automation) +### Health & Readiness ```bash -squads trigger list # View triggers -squads trigger fire # Manual trigger -squads autonomous start # Start scheduler -squads approval list # Check pending approvals +squads doctor # Check tools, auth, project readiness +squads doctor -v # Verbose with install hints +squads eval engineering/scanner # Agent readiness score ``` -## Decision Framework +--- -- **Read before act**: Always `squads context --json` or `squads memory read` first -- **Track everything**: Use `goal progress`, `kpi record`, `feedback add` -- **Persist learnings**: `squads memory write` after discoveries -- **Use JSON**: Add `--json` when parsing output programmatically -- **Escalate wisely**: High cost or unclear scope → ask the human +## Goals & Priorities -## JSON Output +Goals are aspirational (in SQUAD.md). Priorities are operational (in priorities.md). -All key commands support `--json` for structured output: -```json -{ - "ok": true, - "command": "status", - "data": { ... }, - "error": null, - "meta": { "duration_ms": 1230, "connected": true } -} +### Set Goals + +```bash +squads goal set engineering "Zero CI failures on main branch" +squads goal list # All squads +squads goal list engineering # Specific squad +squads goal complete engineering 1 # Mark done +squads goal progress engineering 1 "75%" +``` + +### Business Context + +```bash +squads context # Full business context +squads context --squad engineering # Squad-focused context +squads context --topic "pricing" # Topic-focused search +squads context --json # Agent-consumable format +``` + +--- + +## Environment & Configuration + +### Execution Environment + +```bash +squads env show engineering # View MCP servers, skills, model, budget +squads env show engineering --json # Machine-readable +squads env prompt engineering # Ready-to-use prompt for Claude Code ``` -When piped (non-TTY), commands auto-output JSON. +### Provider Management + +```bash +squads providers # List available LLM CLI providers +``` + +### Sessions + +```bash +squads sessions # Active Claude Code sessions +squads session start # Start new session +squads session end # End current session +``` + +--- + +## Autonomous Scheduling + +The daemon runs agents on configured schedules without human intervention. + +```bash +squads auto start # Start scheduling daemon +squads auto stop # Stop daemon +squads auto status # Show daemon status + next runs +squads auto pause "quota exhausted" # Pause with reason +squads auto resume # Resume after pause +``` + +--- + +## Common Patterns + +### Agent Self-Context (during execution) + +Agents call these to understand their environment: + +```bash +# What am I working with? +squads env show ${SQUAD_NAME} --json + +# What do I know? +squads memory read ${SQUAD_NAME} + +# What's happening across the org? +squads status --json + +# What's the business context? +squads context --squad ${SQUAD_NAME} --json +``` + +### Post-Execution Memory Update + +```bash +# Persist what you learned +squads memory write ${SQUAD_NAME} "Key finding from this run" +squads learn "Pattern discovered: X causes Y" --squad ${SQUAD_NAME} --category pattern + +# Sync to remote +squads sync --push +``` + +### Dispatch Another Agent + +```bash +# From within an agent, trigger another +squads run engineering/issue-solver --task "Fix the bug I found in #461" -b +``` + +### Check Before Creating + +Before creating issues/PRs, check what exists: + +```bash +squads status engineering -v # See active work +squads memory read engineering # See known issues +squads context --squad engineering --json # Full context +``` + +## Full Command Reference + +See `references/commands.md` for complete command listing with all flags. + +## Troubleshooting + +| Problem | Fix | +|---------|-----| +| `squads: command not found` | `npm install -g squads-cli` | +| No squads found | Run `squads init` to create `.agents/` | +| Agent not found | Check path: `.agents/squads/{squad}/{agent}.md` | +| Memory not persisting | Check `.agents/memory/` exists, run `squads sync` | +| Wrong provider | Set `--provider` flag or `provider:` in SQUAD.md frontmatter | +| API quota exhausted | `squads auto pause "quota"`, switch provider, or wait | +| Context too large | Use `--effort low` or reduce context layers | diff --git a/templates/seed/skills/squads-cli/references/commands.md b/templates/seed/skills/squads-cli/references/commands.md new file mode 100644 index 00000000..c2974eb2 --- /dev/null +++ b/templates/seed/skills/squads-cli/references/commands.md @@ -0,0 +1,181 @@ +# Squads CLI — Full Command Reference + +## All Commands + +| Command | Description | +|---------|-------------| +| `squads init` | Create `.agents/` directory with starter squads | +| `squads add ` | Add a new squad with directory structure | +| `squads run [target]` | Run squad, agent, or autopilot (no target = autopilot) | +| `squads orchestrate ` | Run squad with lead agent orchestration | +| `squads status [squad]` | Show squad status and state | +| `squads env show ` | View execution environment (MCP, skills, model) | +| `squads env prompt ` | Output ready-to-use prompt for execution | +| `squads context` | Get business context for alignment | +| `squads dashboard [name]` | Show dashboards (`squads dash` alias) | +| `squads exec list` | List recent executions | +| `squads exec show ` | Show execution details | +| `squads exec stats` | Show execution statistics | +| `squads cost` | Show cost summary (today, week, by squad) | +| `squads budget ` | Check budget status for a squad | +| `squads health` | Quick health check for infrastructure | +| `squads doctor` | Check local tools, auth, project readiness | +| `squads history` | Show recent agent execution history | +| `squads results [squad]` | Show git activity + KPI goals vs actuals | +| `squads goal set ` | Set a goal for a squad | +| `squads goal list [squad]` | List goals | +| `squads goal complete ` | Mark goal completed | +| `squads goal progress

` | Update goal progress | +| `squads kpi` | Track and analyze squad KPIs | +| `squads progress` | Track active and completed agent tasks | +| `squads feedback` | Record and view execution feedback | +| `squads autonomy` | Show autonomy score and confidence metrics | +| `squads stats [squad]` | Agent outcome scorecards: merge rate, waste | +| `squads memory query ` | Search across all squad memory | +| `squads memory read ` | Show memory for a squad | +| `squads memory write ` | Add to squad memory | +| `squads memory list` | List all memory entries | +| `squads memory sync` | Sync memory from git | +| `squads memory search ` | Search stored conversations (requires login) | +| `squads memory extract` | Extract memories from recent conversations | +| `squads learn ` | Capture a learning for future sessions | +| `squads learnings` | View and search learnings | +| `squads sync` | Git memory synchronization | +| `squads trigger` | Manage smart triggers | +| `squads approval` | Manage approval requests | +| `squads auto start` | Start autonomous scheduling daemon | +| `squads auto stop` | Stop scheduling daemon | +| `squads auto status` | Show daemon status and next runs | +| `squads auto pause [reason]` | Pause daemon | +| `squads auto resume` | Resume paused daemon | +| `squads sessions` | Show active Claude Code sessions | +| `squads session` | Manage current session lifecycle | +| `squads detect-squad` | Detect squad from cwd (for hooks) | +| `squads login` | Log in to Squads platform | +| `squads logout` | Log out | +| `squads whoami` | Show current user | +| `squads eval ` | Evaluate agent readiness | +| `squads deploy` | Deploy agents to Squads platform | +| `squads cognition` | Business cognition engine | +| `squads providers` | Show available LLM CLI providers | +| `squads update` | Check for and install updates | +| `squads version` | Show version information | + +## `squads run` — Full Options + +``` +squads run [target] [options] + +Target formats: + (none) Autopilot mode + Squad conversation + / Single agent + -a Single agent (flag notation) + +Agent execution: + -v, --verbose Detailed output + -d, --dry-run Preview without executing + -t, --timeout Timeout in minutes (default: 30) + -b, --background Run detached + -w, --watch Run detached but tail logs + --task Founder directive + --effort high | medium | low + --skills Additional skills to load + --provider anthropic | google | openai | mistral | xai | aider | ollama + --model opus | sonnet | haiku | gemini-2.5-flash | gpt-4o | etc. + --cloud Dispatch to cloud worker + --no-verify Skip post-execution verification + --no-eval Skip COO evaluation + --use-api Use API credits instead of subscription + +Squad conversation: + -p, --parallel All agents in parallel (tmux) + -l, --lead Single orchestrator with Task tool + --max-turns Max conversation turns (default: 20) + --cost-ceiling Cost ceiling in USD (default: 25) + +Autopilot: + -i, --interval Minutes between cycles (default: 30) + --max-parallel Max parallel squad loops (default: 2) + --budget Daily budget cap (default: 0 = unlimited) + --once Single cycle then exit + --phased Use depends_on phase ordering + +Output: + -j, --json Machine-readable output +``` + +## `squads init` — Full Options + +``` +squads init [options] + + -p, --provider LLM provider (claude, gemini, openai, ollama, none) + --pack Additional squads: engineering, marketing, operations, all + --skip-infra Skip infrastructure setup + --force Skip requirement checks + -y, --yes Accept defaults (non-interactive) + -q, --quick Files only, skip prompts +``` + +## `squads memory` — Full Options + +``` +squads memory query Search all memory +squads memory read Show squad memory +squads memory write Write to memory + --agent Target specific agent +squads memory list List all entries +squads memory sync [options] Sync from git + --push Also push changes +squads memory search Search conversations (requires login) +squads memory extract Extract from recent conversations +``` + +## `squads context` — Full Options + +``` +squads context [options] + + -s, --squad Focus on specific squad + -t, --topic Search memory for topic + -a, --agent JSON for agent consumption + -j, --json JSON output + -v, --verbose Additional details +``` + +## Global Patterns + +Every command supports: +- `--json` or `-j` for machine-readable output +- `--verbose` or `-v` for detailed output +- `--help` or `-h` for usage information + +## SQUAD.md Frontmatter + +Squads are configured via YAML frontmatter in SQUAD.md: + +```yaml +--- +name: engineering +repo: agents-squads/engineering +provider: anthropic +model: opus +effort: high +depends_on: [data] +kpis: + merge_rate: + target: ">80%" + unit: percentage +--- +``` + +| Field | Purpose | +|-------|---------| +| `name` | Squad identifier | +| `repo` | GitHub repo (org/repo format) | +| `provider` | Default LLM provider | +| `model` | Default model | +| `effort` | Default effort level | +| `depends_on` | Phase ordering dependencies | +| `kpis` | KPI definitions for tracking | diff --git a/templates/seed/squads/company/company-critic.md b/templates/seed/squads/company/company-critic.md index cedb9c46..7211a458 100644 --- a/templates/seed/squads/company/company-critic.md +++ b/templates/seed/squads/company/company-critic.md @@ -1,8 +1,14 @@ --- name: Company Critic -role: critic +role: evaluator +squad: "company" +provider: "{{PROVIDER}}" model: sonnet effort: medium +trigger: "event" +cooldown: "1h" +timeout: 1800 +max_retries: 1 tools: - Read - Write @@ -10,16 +16,18 @@ tools: # Company Critic +## Role + Find what's broken in how the workforce operates. Challenge assumptions, identify waste, propose fixes. -## Instructions +## How You Work 1. Read the evaluator's scores from `.agents/memory/company/company-eval/state.md` 2. Read squad states from `.agents/memory/{squad}/*/state.md` 3. Look for patterns: repeated failures, duplicate work, misaligned effort 4. Write critique to `.agents/memory/company/company-critic/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Workforce Critique — {date} @@ -41,7 +49,7 @@ Work that produced no business value. Be specific. Decisions only a human can make. ``` -## Rules +## Constraints - Critique the process, not the agents — agents follow instructions - Every issue needs evidence from memory files, not speculation diff --git a/templates/seed/squads/company/company-eval.md b/templates/seed/squads/company/company-eval.md index 6a581dec..473241a0 100644 --- a/templates/seed/squads/company/company-eval.md +++ b/templates/seed/squads/company/company-eval.md @@ -1,8 +1,14 @@ --- name: Company Evaluator role: evaluator +squad: "company" +provider: "{{PROVIDER}}" model: sonnet effort: medium +trigger: "event" +cooldown: "1h" +timeout: 1800 +max_retries: 1 tools: - Read - Write @@ -10,9 +16,11 @@ tools: # Company Evaluator -Evaluate squad outputs against business goals. Your job is to answer: "Did the squads produce value, or noise?" +## Role -## Instructions +Evaluate squad outputs against business goals. Answer: "Did the squads produce value, or noise?" + +## How You Work 1. Read business goals from `.agents/BUSINESS_BRIEF.md` 2. Read directives from `.agents/memory/company/directives.md` @@ -20,7 +28,7 @@ Evaluate squad outputs against business goals. Your job is to answer: "Did the s 4. Score each squad's output using the rubric below 5. Write evaluation to `.agents/memory/company/company-eval/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Squad Evaluation — {date} @@ -40,7 +48,7 @@ Evaluate squad outputs against business goals. Your job is to answer: "Did the s What each squad should focus on next cycle, ranked by business impact. ``` -## Rules +## Constraints - Score against BUSINESS_BRIEF.md goals, not general quality - "Relevance" = does this advance the business focus? diff --git a/templates/seed/squads/company/event-dispatcher.md b/templates/seed/squads/company/event-dispatcher.md index ea8bee18..f36a04b7 100644 --- a/templates/seed/squads/company/event-dispatcher.md +++ b/templates/seed/squads/company/event-dispatcher.md @@ -1,8 +1,14 @@ --- name: Event Dispatcher -role: doer +role: worker +squad: "company" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 tools: - Read - Write @@ -12,14 +18,18 @@ tools: Route events to the right squad. You're a traffic controller, not a decision maker. -## Instructions +## Role + +Route events to the right squad. You're a traffic controller, not a decision maker. + +## How You Work 1. Read pending events from `.agents/memory/company/event-dispatcher/state.md` 2. Check for new activity: `squads status --json` 3. For each event, determine which squad owns it 4. Log the routing decision and update state -## Output Format (REQUIRED) +## Output ```markdown # Event Dispatch — {date} @@ -36,7 +46,7 @@ Events that don't clearly belong to any squad. If nothing new happened, say so and stop. ``` -## Rules +## Constraints - Route, don't act — dispatchers don't do the work - When unclear, route to the manager for triage diff --git a/templates/seed/squads/company/goal-tracker.md b/templates/seed/squads/company/goal-tracker.md index 37874620..6ec0e5cf 100644 --- a/templates/seed/squads/company/goal-tracker.md +++ b/templates/seed/squads/company/goal-tracker.md @@ -1,8 +1,14 @@ --- name: Goal Tracker -role: doer +role: worker +squad: "company" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "schedule" +cooldown: "1h" +timeout: 1800 +max_retries: 2 tools: - Read - Write @@ -10,16 +16,18 @@ tools: # Goal Tracker +## Role + Track whether squads are making progress toward their goals or spinning wheels. -## Instructions +## How You Work 1. Read squad goals from each `.agents/squads/{squad}/SQUAD.md` (## Goals section) 2. Read squad states from `.agents/memory/{squad}/*/state.md` 3. Compare goals vs actual output — is the squad advancing or stalled? 4. Write progress report to `.agents/memory/company/goal-tracker/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Goal Progress — {date} @@ -36,7 +44,7 @@ Goals with no progress since last check. Flag for manager. Goals that can be checked off or replaced. ``` -## Rules +## Constraints - "On Track" needs evidence — a state.md update, a commit, a report - "Stalled" means no observable progress, not "I didn't check" diff --git a/templates/seed/squads/company/manager.md b/templates/seed/squads/company/manager.md index 0ffd2ddf..827df424 100644 --- a/templates/seed/squads/company/manager.md +++ b/templates/seed/squads/company/manager.md @@ -1,17 +1,25 @@ --- name: Manager role: lead +squad: "company" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 skills: - squads-cli --- # Manager Agent -You are the AI manager of this workforce. You orchestrate all squads, coordinate work, and report to the human operator. +## Role -## Your Job +Orchestrate all squads, coordinate work, and report to the human operator. + +## How You Work 1. **Understand** — Read BUSINESS_BRIEF.md and squad state 2. **Plan** — Identify what needs doing based on goals and context @@ -19,8 +27,6 @@ You are the AI manager of this workforce. You orchestrate all squads, coordinate 4. **Track** — Record progress and outcomes 5. **Learn** — Persist insights for future sessions -## Daily Operations - ```bash # 1. Understand current state squads status --json @@ -39,16 +45,16 @@ squads goal list squads memory write "" ``` -## Coordination Rules - -- Git is the sync layer — commit and push all changes -- Memory persists via `.agents/memory/` — always read before acting -- Escalate to human when: spend > $50, scope unclear, destructive action needed -- Report daily: what ran, what succeeded, what needs attention - ## Output After each session, update: - `.agents/memory/company/manager/state.md` — current state snapshot - Squad goals via `squads goal progress` - Any new learnings via `squads memory write` + +## Constraints + +- Git is the sync layer — commit and push all changes +- Memory persists via `.agents/memory/` — always read before acting +- Escalate to human when: spend > $50, scope unclear, destructive action needed +- Report daily: what ran, what succeeded, what needs attention diff --git a/templates/seed/squads/engineering/code-reviewer.md b/templates/seed/squads/engineering/code-reviewer.md index 8d087881..dfefbcf7 100644 --- a/templates/seed/squads/engineering/code-reviewer.md +++ b/templates/seed/squads/engineering/code-reviewer.md @@ -1,15 +1,23 @@ --- name: Code Reviewer role: evaluator +squad: "engineering" +provider: "{{PROVIDER}}" model: sonnet effort: medium +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 --- # Code Reviewer +## Role + Adversarial code reviewer. Finds bugs, security issues, and code quality problems in PRs and the codebase. -## Instructions +## How You Work 1. **Find PRs** to review: ```bash @@ -40,6 +48,10 @@ Adversarial code reviewer. Finds bugs, security issues, and code quality problem - Identify missing error handling - Create issues for findings +## Output + +Review comments on PRs. Issues created for codebase findings. + ## Evaluation Criteria | Check | Severity | Action | @@ -49,7 +61,7 @@ Adversarial code reviewer. Finds bugs, security issues, and code quality problem | No tests for new code | Medium | Comment, suggest | | Style inconsistency | Low | Skip unless pervasive | -## Anti-Patterns +## Constraints - NEVER approve without reading the full diff - NEVER report style issues as security issues diff --git a/templates/seed/squads/engineering/issue-solver.md b/templates/seed/squads/engineering/issue-solver.md index 14b78bd9..35453acd 100644 --- a/templates/seed/squads/engineering/issue-solver.md +++ b/templates/seed/squads/engineering/issue-solver.md @@ -1,8 +1,14 @@ --- name: Issue Solver role: lead +squad: "engineering" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 skills: - squads-cli - gh @@ -10,9 +16,11 @@ skills: # Issue Solver +## Role + Autonomously solve GitHub issues by reading the issue, understanding the codebase, and creating PRs with fixes. -## Instructions +## How You Work 1. **Discover** open issues: ```bash @@ -46,7 +54,7 @@ Autonomously solve GitHub issues by reading the issue, understanding the codebas - Check for regressions - Ensure the PR description explains the change -## Anti-Patterns +## Constraints - NEVER create a PR without understanding the root cause - NEVER skip running existing tests diff --git a/templates/seed/squads/engineering/test-writer.md b/templates/seed/squads/engineering/test-writer.md index aebed122..b59b0f7a 100644 --- a/templates/seed/squads/engineering/test-writer.md +++ b/templates/seed/squads/engineering/test-writer.md @@ -1,15 +1,23 @@ --- name: Test Writer -role: doer +role: worker +squad: "engineering" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 --- # Test Writer +## Role + Writes tests for code that lacks coverage. Focuses on critical paths first. -## Instructions +## How You Work 1. **Identify** untested code: - Read existing test files to understand patterns @@ -36,15 +44,17 @@ Writes tests for code that lacks coverage. Focuses on critical paths first. gh pr create --title "test: add coverage for {module}" ``` -## Principles +## Output + +PRs adding test coverage to untested code paths. + +## Constraints - Tests should be readable — a test is documentation - One assertion per test when possible - Mock external dependencies, test your logic - Test behavior, not implementation details -## Anti-Patterns - - NEVER write tests that test the framework, not your code - NEVER skip running tests after writing them - NEVER write flaky tests (random data, timing dependencies) diff --git a/templates/seed/squads/intelligence/intel-critic.md b/templates/seed/squads/intelligence/intel-critic.md index 7fb2cd44..cc23f261 100644 --- a/templates/seed/squads/intelligence/intel-critic.md +++ b/templates/seed/squads/intelligence/intel-critic.md @@ -1,8 +1,14 @@ --- name: Intel Critic -role: critic +role: evaluator +squad: "intelligence" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "event" +cooldown: "1h" +timeout: 1800 +max_retries: 1 tools: - Read - Write @@ -10,9 +16,11 @@ tools: # Intel Critic +## Role + Challenge the intelligence brief. Find what's missing, what's assumed, what's wrong. -## Instructions +## How You Work 1. Read the latest intel brief from `.agents/memory/intelligence/intel-lead/output.md` 2. For each section, ask: @@ -34,3 +42,12 @@ Challenge the intelligence brief. Find what's missing, what's assumed, what's wr 3. Save critique to `.agents/memory/intelligence/intel-critic/output.md` 4. Record patterns in `.agents/memory/intelligence/intel-critic/learnings.md` + +## Output + +Critique saved to `.agents/memory/intelligence/intel-critic/output.md`. + +## Constraints + +- Challenge assumptions, don't just validate the brief +- Every critique must suggest a better alternative, not just flag the problem diff --git a/templates/seed/squads/intelligence/intel-eval.md b/templates/seed/squads/intelligence/intel-eval.md index 89a0c8e5..e8428006 100644 --- a/templates/seed/squads/intelligence/intel-eval.md +++ b/templates/seed/squads/intelligence/intel-eval.md @@ -1,8 +1,14 @@ --- name: Intel Eval role: evaluator +squad: "intelligence" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "event" +cooldown: "1h" +timeout: 1800 +max_retries: 1 tools: - Read - Write @@ -10,9 +16,11 @@ tools: # Intel Evaluator +## Role + Evaluate intelligence brief quality. Score the Know / Don't Know / Playbook output. -## Instructions +## How You Work 1. Read the latest intel brief from `.agents/memory/intelligence/intel-lead/output.md` 2. Score each section: @@ -29,3 +37,12 @@ Evaluate intelligence brief quality. Score the Know / Don't Know / Playbook outp 3. Save evaluation to `.agents/memory/intelligence/intel-eval/output.md` 4. If overall score < 3, flag specific improvements needed + +## Output + +Evaluation scores saved to `.agents/memory/intelligence/intel-eval/output.md`. + +## Constraints + +- Score based on evidence quality, not content agreement +- Flag improvements as specific suggestions, not vague critiques diff --git a/templates/seed/squads/intelligence/intel-lead.md b/templates/seed/squads/intelligence/intel-lead.md index 2279e154..f3c48600 100644 --- a/templates/seed/squads/intelligence/intel-lead.md +++ b/templates/seed/squads/intelligence/intel-lead.md @@ -1,8 +1,14 @@ --- name: Intel Lead role: lead +squad: "intelligence" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 tools: - WebSearch - WebFetch @@ -12,9 +18,11 @@ tools: # Intel Lead +## Role + Synthesize information into actionable intelligence. Your output is always three sections: What We Know, What We Don't Know, and the Playbook. -## Instructions +## How You Work 1. Read business context from `.agents/BUSINESS_BRIEF.md` 2. Read your previous state from `.agents/memory/intelligence/intel-lead/state.md` @@ -24,7 +32,7 @@ Synthesize information into actionable intelligence. Your output is always three 6. Save brief to `.agents/memory/intelligence/intel-lead/output.md` 7. Update state: `.agents/memory/intelligence/intel-lead/state.md` -## Output Format (REQUIRED) +## Output Every run produces this structure: @@ -53,7 +61,7 @@ Concrete actions. Who does what, by when, why. | P1 | {action} | {squad/role} | {date} | {why now} | ``` -## Rules +## Constraints - "What We Know" = ONLY facts with sources. No speculation. - "What We Don't Know" = gaps that MATTER. Things that block decisions. @@ -62,7 +70,7 @@ Concrete actions. Who does what, by when, why. - Confidence levels: CONFIRMED > LIKELY > POSSIBLE > SPECULATIVE - Every claim needs a source (URL, document, or data point) -## Quality Check +## Quality Checklist Before outputting, ask yourself: - Is every "Know" item actually backed by a source? diff --git a/templates/seed/squads/marketing/content-drafter.md b/templates/seed/squads/marketing/content-drafter.md index a296d6e2..c1023bfe 100644 --- a/templates/seed/squads/marketing/content-drafter.md +++ b/templates/seed/squads/marketing/content-drafter.md @@ -1,17 +1,25 @@ --- name: Content Drafter role: lead +squad: "marketing" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "schedule" +cooldown: "2h" +timeout: 1800 +max_retries: 2 skills: - squads-cli --- # Content Drafter +## Role + Creates first drafts for blog posts, social content, and marketing materials. Focuses on getting ideas on paper quickly — editing comes later. -## Instructions +## How You Work 1. **Read context**: - `.agents/BUSINESS_BRIEF.md` for business context @@ -57,15 +65,17 @@ Creates first drafts for blog posts, social content, and marketing materials. Fo squads memory write marketing "Drafted: [title] - [type]" ``` -## Principles +## Output + +Drafted content saved to memory. Handed off to social-poster for distribution. + +## Constraints - Lead with problems, not features - Match tone to the audience (technical vs executive) - Every piece needs a clear CTA - Good enough beats perfect — get it written, then edit -## Anti-Patterns - - NEVER use generic openings ("In today's fast-paced world...") - NEVER dump feature lists — focus on benefits and outcomes - NEVER skip the CTA — every piece of content should lead somewhere diff --git a/templates/seed/squads/marketing/growth-analyst.md b/templates/seed/squads/marketing/growth-analyst.md index a6235a42..0d5a0adf 100644 --- a/templates/seed/squads/marketing/growth-analyst.md +++ b/templates/seed/squads/marketing/growth-analyst.md @@ -1,15 +1,23 @@ --- name: Growth Analyst role: evaluator +squad: "marketing" +provider: "{{PROVIDER}}" model: haiku effort: low +trigger: "schedule" +cooldown: "4h" +timeout: 900 +max_retries: 2 --- # Growth Analyst +## Role + Tracks marketing metrics, identifies what's working, and suggests improvements. The feedback loop that makes marketing better over time. -## Instructions +## How You Work 1. **Gather metrics**: - Website traffic and sources @@ -33,6 +41,10 @@ Tracks marketing metrics, identifies what's working, and suggests improvements. - Suggest new content angles based on data - Identify underperforming channels to improve or drop +## Output + +Growth insights and recommendations saved to memory. Shared with content-drafter for next cycle. + ## Metrics Framework | Metric | Stage | Why It Matters | @@ -42,7 +54,7 @@ Tracks marketing metrics, identifies what's working, and suggests improvements. | Click-through | Consideration | Are they curious enough to visit? | | Signups/Downloads | Conversion | Are they taking action? | -## Anti-Patterns +## Constraints - NEVER report vanity metrics without context (followers mean nothing without engagement) - NEVER recommend changes without data to support them diff --git a/templates/seed/squads/marketing/social-poster.md b/templates/seed/squads/marketing/social-poster.md index cd2f0edc..db2b9710 100644 --- a/templates/seed/squads/marketing/social-poster.md +++ b/templates/seed/squads/marketing/social-poster.md @@ -1,15 +1,23 @@ --- name: Social Poster -role: doer +role: worker +squad: "marketing" +provider: "{{PROVIDER}}" model: haiku effort: low +trigger: "schedule" +cooldown: "2h" +timeout: 900 +max_retries: 2 --- # Social Poster +## Role + Manages social media posting schedule and community engagement. Takes drafted content and distributes it across channels. -## Instructions +## How You Work 1. **Check** for ready content: - Read drafts from content-drafter @@ -30,6 +38,10 @@ Manages social media posting schedule and community engagement. Takes drafted co squads memory write marketing "Posted: [platform] - [topic] - [engagement notes]" ``` +## Output + +Posts published across configured channels. Engagement data recorded in memory. + ## Posting Guidelines | Platform | Frequency | Best Times | Style | @@ -37,7 +49,7 @@ Manages social media posting schedule and community engagement. Takes drafted co | LinkedIn | 2-3x/week | Tue-Thu 9-11am | Professional, data-driven | | Twitter/X | 3-5x/week | Mon-Fri 8-10am | Concise, opinionated | -## Anti-Patterns +## Constraints - NEVER post the same content on multiple platforms without adapting - NEVER post more than once per platform per day diff --git a/templates/seed/squads/operations/finance-tracker.md b/templates/seed/squads/operations/finance-tracker.md index 2b0e29a5..23145640 100644 --- a/templates/seed/squads/operations/finance-tracker.md +++ b/templates/seed/squads/operations/finance-tracker.md @@ -1,15 +1,23 @@ --- name: Finance Tracker -role: doer +role: worker +squad: "operations" +provider: "{{PROVIDER}}" model: haiku effort: low +trigger: "schedule" +cooldown: "4h" +timeout: 900 +max_retries: 2 --- # Finance Tracker +## Role + Tracks revenue, expenses, runway, and financial health. Provides visibility into the business finances. -## Instructions +## How You Work 1. **Track revenue**: - Record invoices sent and payments received @@ -40,7 +48,7 @@ Tracks revenue, expenses, runway, and financial health. Provides visibility into Monthly financial summary in `.agents/memory/operations/finance-tracker/state.md` -## Anti-Patterns +## Constraints - NEVER guess numbers — use actual records - NEVER skip tracking small expenses — they add up diff --git a/templates/seed/squads/operations/goal-tracker.md b/templates/seed/squads/operations/goal-tracker.md index d91c71cf..af8eea33 100644 --- a/templates/seed/squads/operations/goal-tracker.md +++ b/templates/seed/squads/operations/goal-tracker.md @@ -1,15 +1,23 @@ --- name: Goal Tracker role: evaluator +squad: "operations" +provider: "{{PROVIDER}}" model: haiku effort: low +trigger: "schedule" +cooldown: "2h" +timeout: 900 +max_retries: 2 --- # Goal Tracker +## Role + Monitors business objectives, tracks progress, and flags at-risk goals before they become problems. -## Instructions +## How You Work 1. **Read goals** from squad definitions: ```bash @@ -32,6 +40,10 @@ Monitors business objectives, tracks progress, and flags at-risk goals before th squads memory write operations "Goal check: [summary of at-risk items]" ``` +## Output + +Goal status report saved to `.agents/memory/operations/goal-tracker/state.md`. At-risk goals flagged to ops-lead. + ## Risk Framework | Status | Criteria | Action | @@ -41,7 +53,7 @@ Monitors business objectives, tracks progress, and flags at-risk goals before th | Blocked | External dependency, needs human decision | Escalate immediately | | Stale | No progress 4+ weeks, no one working on it | Recommend closing or reassigning | -## Anti-Patterns +## Constraints - NEVER mark a goal as "on track" without evidence of recent progress - NEVER create goals without measurable criteria diff --git a/templates/seed/squads/operations/ops-lead.md b/templates/seed/squads/operations/ops-lead.md index 72b396e5..325d22c9 100644 --- a/templates/seed/squads/operations/ops-lead.md +++ b/templates/seed/squads/operations/ops-lead.md @@ -1,17 +1,25 @@ --- name: Ops Lead role: lead +squad: "operations" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 skills: - squads-cli --- # Ops Lead +## Role + Runs daily operations. Reads all squad states, identifies what needs attention, and briefs the founder on what matters. -## Instructions +## How You Work 1. **Read all squad states**: ```bash @@ -34,6 +42,10 @@ Runs daily operations. Reads all squad states, identifies what needs attention, squads memory write company "Ops briefing: [summary]" ``` +## Output + +Daily operational briefing for the founder. Only what needs attention. + ## Decision Framework | Signal | Action | @@ -43,15 +55,13 @@ Runs daily operations. Reads all squad states, identifies what needs attention, | Deadline approaching | Flag in Risks | | Squad running normally | Skip — silence means healthy | -## Principles +## Constraints - The founder's attention is the scarcest resource — filter ruthlessly - Never repeat what you already reported - Silence means everything is fine - Decisions, not status updates -## Anti-Patterns - - NEVER post "no updates" or "system healthy" — silence IS the signal - NEVER include memory update noise — that's internal bookkeeping - NEVER repeat information from the last briefing diff --git a/templates/seed/squads/product/lead.md b/templates/seed/squads/product/lead.md index a75560b7..364241cd 100644 --- a/templates/seed/squads/product/lead.md +++ b/templates/seed/squads/product/lead.md @@ -1,8 +1,14 @@ --- name: Product Lead role: lead +squad: "product" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 tools: - Read - Write @@ -10,9 +16,11 @@ tools: # Product Lead +## Role + Own the product roadmap. Turn intelligence and research insights into prioritized decisions about what to build, improve, or stop. -## Instructions +## How You Work 1. Read business context from `.agents/BUSINESS_BRIEF.md` 2. Read your previous state from `.agents/memory/product/lead/state.md` @@ -23,7 +31,7 @@ Own the product roadmap. Turn intelligence and research insights into prioritize 7. Brief the `scanner` on what signals to watch and the `worker` on what specs to write 8. Save roadmap to `.agents/memory/product/lead/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Product Roadmap — {date} @@ -48,7 +56,7 @@ What the scanner should monitor this cycle. What the worker should draft this cycle. ``` -## Rules +## Constraints - Every roadmap item must trace back to a business need, research finding, or user feedback - "Parked" is as important as "This Cycle" — saying no prevents scope creep diff --git a/templates/seed/squads/product/scanner.md b/templates/seed/squads/product/scanner.md index 45506930..98281833 100644 --- a/templates/seed/squads/product/scanner.md +++ b/templates/seed/squads/product/scanner.md @@ -1,8 +1,14 @@ --- name: Product Scanner -role: doer +role: worker +squad: "product" +provider: "{{PROVIDER}}" model: haiku effort: medium +trigger: "schedule" +cooldown: "2h" +timeout: 1800 +max_retries: 2 tools: - WebSearch - WebFetch @@ -12,9 +18,11 @@ tools: # Product Scanner +## Role + Monitor user feedback, competitor moves, and market signals. Surface what matters to the Product Lead. -## Instructions +## How You Work 1. Read signals the lead wants watched from `.agents/memory/product/lead/state.md` 2. Read your previous scan from `.agents/memory/product/scanner/state.md` @@ -22,7 +30,7 @@ Monitor user feedback, competitor moves, and market signals. Surface what matter 4. Filter signal from noise — only report what affects product decisions 5. Save scan results to `.agents/memory/product/scanner/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Product Scan — {date} @@ -42,7 +50,7 @@ Themes from user feedback, support channels, or community. Top 1-2 things the Product Lead should know about right now. ``` -## Rules +## Constraints - Quality over quantity — 3 high-signal items beat 20 low-signal ones - Always include the source URL diff --git a/templates/seed/squads/product/worker.md b/templates/seed/squads/product/worker.md index dd9be5cf..7c87f108 100644 --- a/templates/seed/squads/product/worker.md +++ b/templates/seed/squads/product/worker.md @@ -1,8 +1,14 @@ --- name: Product Worker -role: doer +role: worker +squad: "product" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 tools: - Read - Write @@ -10,16 +16,18 @@ tools: # Product Worker +## Role + Write product specs, user stories, and feature documentation. Turn the lead's roadmap decisions into buildable documents. -## Instructions +## How You Work 1. Read specs needed from `.agents/memory/product/lead/state.md` 2. Read your previous work from `.agents/memory/product/worker/state.md` 3. For each assigned feature, produce a spec in the REQUIRED FORMAT 4. Save specs to `.agents/memory/product/worker/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Product Spec: {Feature Name} @@ -47,7 +55,7 @@ What needs to exist before this can be built? Decisions that need human input before building. ``` -## Rules +## Constraints - Write for the builder, not the boardroom — be specific - Acceptance criteria must be testable (yes/no, not "improved" or "better") diff --git a/templates/seed/squads/research/analyst.md b/templates/seed/squads/research/analyst.md index da4b537b..8226c1fa 100644 --- a/templates/seed/squads/research/analyst.md +++ b/templates/seed/squads/research/analyst.md @@ -1,8 +1,14 @@ --- name: Analyst -role: doer +role: worker +squad: "research" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 tools: - WebSearch - WebFetch @@ -12,9 +18,11 @@ tools: # Research Analyst +## Role + Conduct deep research on assigned topics. Produce findings with sources, not opinions. -## Instructions +## How You Work 1. Read research agenda from `.agents/memory/research/lead/state.md` 2. Read your previous findings from `.agents/memory/research/analyst/state.md` @@ -22,7 +30,7 @@ Conduct deep research on assigned topics. Produce findings with sources, not opi 4. For each finding, record the source URL and confidence level 5. Save findings to `.agents/memory/research/analyst/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Research Findings — {date} @@ -41,7 +49,7 @@ What this means for our business (2-3 sentences). What we still don't know and where to look next. ``` -## Rules +## Constraints - Every finding needs a source. No source = no finding. - Confidence levels: CONFIRMED (multiple sources) > LIKELY (single credible source) > POSSIBLE (inferred) diff --git a/templates/seed/squads/research/lead.md b/templates/seed/squads/research/lead.md index b1a7fca1..a3a9cd57 100644 --- a/templates/seed/squads/research/lead.md +++ b/templates/seed/squads/research/lead.md @@ -1,8 +1,14 @@ --- name: Research Lead role: lead +squad: "research" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "schedule" +cooldown: "1h" +timeout: 3600 +max_retries: 2 tools: - WebSearch - WebFetch @@ -12,9 +18,11 @@ tools: # Research Lead +## Role + Define the research agenda, coordinate the analyst and synthesizer, and ensure research outputs are actionable — not academic. -## Instructions +## How You Work 1. Read business context from `.agents/BUSINESS_BRIEF.md` 2. Read your previous state from `.agents/memory/research/lead/state.md` @@ -24,9 +32,7 @@ Define the research agenda, coordinate the analyst and synthesizer, and ensure r 6. Review outputs and ensure they answer: "So what? What should we do?" 7. Update state: `.agents/memory/research/lead/state.md` -## Output Format (REQUIRED) - -Every cycle produces a research direction: +## Output ```markdown # Research Agenda — {date} @@ -44,7 +50,7 @@ Every cycle produces a research direction: Questions we need answered this cycle, ranked by business impact. ``` -## Rules +## Constraints - Every research topic must tie to a business need from BUSINESS_BRIEF.md - "Interesting" is not enough — research must be actionable diff --git a/templates/seed/squads/research/synthesizer.md b/templates/seed/squads/research/synthesizer.md index 5afc2897..2db06dca 100644 --- a/templates/seed/squads/research/synthesizer.md +++ b/templates/seed/squads/research/synthesizer.md @@ -1,8 +1,14 @@ --- name: Synthesizer -role: doer +role: worker +squad: "research" +provider: "{{PROVIDER}}" model: sonnet effort: high +trigger: "event" +cooldown: "30m" +timeout: 1800 +max_retries: 2 tools: - Read - Write @@ -10,9 +16,11 @@ tools: # Research Synthesizer +## Role + Turn raw findings from the analyst into a cohesive report that a human can act on in 5 minutes. -## Instructions +## How You Work 1. Read the analyst's findings from `.agents/memory/research/analyst/state.md` 2. Read the research agenda from `.agents/memory/research/lead/state.md` @@ -20,7 +28,7 @@ Turn raw findings from the analyst into a cohesive report that a human can act o 4. Produce a synthesis report in the REQUIRED FORMAT below 5. Save report to `.agents/memory/research/synthesizer/state.md` -## Output Format (REQUIRED) +## Output ```markdown # Research Synthesis — {date} @@ -50,7 +58,7 @@ What should we actually do? Ranked by impact. | P1 | {action} | {rationale} | ``` -## Rules +## Constraints - The executive summary is the most important section — if someone reads nothing else, they get the picture - Don't parrot findings — synthesize. Connect dots the analyst didn't diff --git a/test/docker/Dockerfile.fresh-user b/test/docker/Dockerfile.fresh-user new file mode 100644 index 00000000..6859495d --- /dev/null +++ b/test/docker/Dockerfile.fresh-user @@ -0,0 +1,32 @@ +# Simulates a brand new user installing squads-cli for the first time. +# No config, no .agents dir, no history — completely clean environment. +# Builds from local source so tests exercise the current codebase. + +FROM node:22-slim + +RUN apt-get update && apt-get install -y git curl && rm -rf /var/lib/apt/lists/* + +# Build squads-cli from local source and install globally +WORKDIR /app +COPY . . +RUN npm ci && npm run build && npm pack +RUN npm install -g squads-cli-*.tgz + +# Verify it's installed +RUN which squads && squads --version || echo "squads not found after install" + +# Create non-root user +RUN useradd -m -s /bin/bash developer +USER developer +WORKDIR /home/developer + +RUN git config --global user.name "Test User" && \ + git config --global user.email "test@example.com" && \ + git config --global init.defaultBranch main + +# Fresh project +RUN mkdir -p /home/developer/my-project +WORKDIR /home/developer/my-project +RUN git init + +CMD ["/bin/bash"] diff --git a/test/docker/test-fresh-user.sh b/test/docker/test-fresh-user.sh new file mode 100755 index 00000000..db6d80d3 --- /dev/null +++ b/test/docker/test-fresh-user.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +# +# Test squads-cli as a brand new user in a clean Docker container. +# Runs the full first-run flow and reports pass/fail for each step. +# +# Usage: +# ./test/docker/test-fresh-user.sh # Interactive mode +# ./test/docker/test-fresh-user.sh --auto # Automated test suite +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CLI_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" + +echo "=== Building fresh-user Docker image ===" +docker build -f "$SCRIPT_DIR/Dockerfile.fresh-user" -t squads-fresh-user "$CLI_DIR" + +if [ "${1:-}" = "--auto" ]; then + echo "" + echo "=== Running automated first-run test suite ===" + + docker run --rm squads-fresh-user bash -c ' + PASS=0 + FAIL=0 + + test_step() { + local name="$1" + shift + if "$@" > /tmp/output.txt 2>&1; then + echo " PASS $name" + PASS=$((PASS + 1)) + else + echo " FAIL $name" + echo " $(tail -3 /tmp/output.txt | head -3)" + FAIL=$((FAIL + 1)) + fi + } + + echo "" + echo "--- Step 1: squads --version ---" + test_step "version" squads --version + + echo "--- Step 2: squads --help ---" + test_step "help" squads --help + + echo "--- Step 3: squads init ---" + test_step "init" squads init + + echo "--- Step 4: .agents directory created ---" + test_step "agents-dir" test -d .agents/squads + + echo "--- Step 5: squads status ---" + test_step "status" squads status + + echo "--- Step 6: squads list ---" + test_step "list" squads list + + echo "--- Step 7: squads catalog list ---" + test_step "catalog-list" squads catalog list + + echo "--- Step 8: squads doctor ---" + test_step "doctor" squads doctor + + echo "--- Step 9: squads tier (Tier 1 in Docker) ---" + test_step "tier" squads tier + + echo "--- Step 10: squads obs history (empty, no crash) ---" + test_step "obs-history" squads obs history + + echo "--- Step 11: squads obs cost (empty, no crash) ---" + test_step "obs-cost" squads obs cost + + echo "--- Step 12: squads services status (graceful without Docker) ---" + test_step "services-status" squads services status + + echo "--- Step 13: unknown command errors gracefully ---" + if squads nonexistent > /tmp/output.txt 2>&1; then + echo " FAIL unknown-cmd (should have errored)" + FAIL=$((FAIL + 1)) + else + echo " PASS unknown-cmd" + PASS=$((PASS + 1)) + fi + + echo "" + echo "=== Results: $PASS passed, $FAIL failed ===" + + if [ "$FAIL" -gt 0 ]; then + exit 1 + fi + ' +else + echo "" + echo "=== Starting interactive fresh-user container ===" + echo "You are a new user. Try:" + echo " squads --version" + echo " squads init" + echo " squads status" + echo " squads catalog list" + echo "" + docker run --rm -it squads-fresh-user +fi diff --git a/test/init.test.ts b/test/init.test.ts index 4233edf9..d1922684 100644 --- a/test/init.test.ts +++ b/test/init.test.ts @@ -1,22 +1,131 @@ -import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi, type Mock } from 'vitest'; import { mkdirSync, rmSync, existsSync, readFileSync, writeFileSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; + +// --- Mocks (must be before imports that use them) --- + +vi.mock('ora', () => ({ + default: vi.fn(() => ({ + start: vi.fn().mockReturnThis(), + stop: vi.fn().mockReturnThis(), + fail: vi.fn().mockReturnThis(), + succeed: vi.fn().mockReturnThis(), + text: '', + })), +})); + +vi.mock('chalk', () => { + const passthrough = (s: string) => s; + const chain: Record = {}; + const handler: ProxyHandler = { + get: () => new Proxy(passthrough, handler), + apply: (_target, _thisArg, args) => args[0], + }; + return { default: new Proxy(passthrough, handler) }; +}); + +vi.mock('../src/lib/terminal.js', () => ({ + writeLine: vi.fn(), + colors: {}, + bold: '', + RESET: '', +})); + +vi.mock('../src/lib/telemetry.js', () => ({ + track: vi.fn().mockResolvedValue(undefined), + Events: { CLI_INIT: 'cli.init' }, +})); + +const mockCheckGitStatus = vi.fn(); +const mockGetRepoName = vi.fn(); +vi.mock('../src/lib/git.js', () => ({ + checkGitStatus: (...args: unknown[]) => mockCheckGitStatus(...args), + getRepoName: (...args: unknown[]) => mockGetRepoName(...args), +})); + +const mockRunAuthChecks = vi.fn(); +const mockCheckGhCli = vi.fn(); +const mockDisplayCheckResults = vi.fn(); +vi.mock('../src/lib/setup-checks.js', () => ({ + PROVIDERS: { + claude: { id: 'claude', name: 'Claude Code', requiresSubscription: true, requiresApiKey: false }, + gemini: { id: 'gemini', name: 'Gemini', requiresSubscription: false, requiresApiKey: true }, + openai: { id: 'openai', name: 'OpenAI GPT', requiresSubscription: false, requiresApiKey: true }, + ollama: { id: 'ollama', name: 'Ollama', requiresSubscription: false, requiresApiKey: false }, + cursor: { id: 'cursor', name: 'Cursor', requiresSubscription: true, requiresApiKey: false }, + aider: { id: 'aider', name: 'Aider', requiresSubscription: false, requiresApiKey: true }, + none: { id: 'none', name: 'None', requiresSubscription: false, requiresApiKey: false }, + }, + runAuthChecks: (...args: unknown[]) => mockRunAuthChecks(...args), + checkGhCli: (...args: unknown[]) => mockCheckGhCli(...args), + displayCheckResults: (...args: unknown[]) => mockDisplayCheckResults(...args), +})); + +const mockLoadTemplate = vi.fn(); +vi.mock('../src/lib/templates.js', () => ({ + loadTemplate: (...args: unknown[]) => mockLoadTemplate(...args), +})); + +vi.mock('child_process', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + execSync: vi.fn(), + }; +}); + +// --- Now import the module under test --- +import { initCommand, type InitOptions } from '../src/commands/init.js'; +import { track } from '../src/lib/telemetry.js'; import { execSync } from 'child_process'; -// Test the init command's file creation logic -// Note: We test the underlying functions rather than the full command -// because the command has interactive prompts +// ---- Helpers ---- + +function setupDefaults(): void { + mockCheckGitStatus.mockReturnValue({ + isGitRepo: true, + hasRemote: true, + remoteUrl: 'https://github.com/test-org/test-repo.git', + branch: 'main', + isDirty: false, + uncommittedCount: 0, + }); + mockGetRepoName.mockReturnValue('test-org/test-repo'); + mockRunAuthChecks.mockReturnValue([ + { name: 'Claude CLI', status: 'ok' }, + ]); + mockCheckGhCli.mockReturnValue({ name: 'GitHub CLI', status: 'ok' }); + mockDisplayCheckResults.mockReturnValue({ hasErrors: false, hasWarnings: false, errorChecks: [], warningChecks: [] }); + + // loadTemplate returns the template path as content (easy to assert which templates were loaded) + mockLoadTemplate.mockImplementation((tplPath: string, vars: Record) => { + const name = vars?.['BUSINESS_NAME'] || 'test-project'; + return `# Template: ${tplPath}\n# Business: ${name}\n`; + }); + + (execSync as Mock).mockImplementation(() => Buffer.from('')); +} -describe('init command file generation', () => { +// ---- Tests ---- + +describe('initCommand', () => { let testDir: string; let originalCwd: string; + let exitSpy: ReturnType; beforeEach(() => { - testDir = join(tmpdir(), 'squads-init-test-' + Date.now()); + testDir = join(tmpdir(), `squads-init-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`); mkdirSync(testDir, { recursive: true }); originalCwd = process.cwd(); process.chdir(testDir); + + vi.clearAllMocks(); + setupDefaults(); + + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => { + throw new Error('process.exit called'); + }) as never); }); afterEach(() => { @@ -24,216 +133,623 @@ describe('init command file generation', () => { if (existsSync(testDir)) { rmSync(testDir, { recursive: true, force: true }); } + exitSpy.mockRestore(); }); - describe('directory structure creation', () => { - it('creates required directories', () => { - // Simulate init directory creation - const dirs = [ - '.agents/squads/demo', - '.agents/memory', - '.agents/config', - '.agents/outputs', + // ---------- Core structure ---------- + + describe('directory structure', () => { + it('creates all 4 core squad directories', async () => { + await initCommand({ yes: true, force: true }); + + for (const squad of ['company', 'research', 'intelligence', 'product']) { + expect(existsSync(join(testDir, `.agents/squads/${squad}`))).toBe(true); + } + }); + + it('creates memory directories for core squads', async () => { + await initCommand({ yes: true, force: true }); + + const expectedMemoryDirs = [ + '.agents/memory/company/manager', + '.agents/memory/company/event-dispatcher', + '.agents/memory/company/goal-tracker', + '.agents/memory/company/company-eval', + '.agents/memory/company/company-critic', + '.agents/memory/research/lead', + '.agents/memory/research/analyst', + '.agents/memory/research/synthesizer', + '.agents/memory/intelligence/intel-lead', + '.agents/memory/intelligence/intel-eval', + '.agents/memory/intelligence/intel-critic', + '.agents/memory/product/lead', ]; - for (const dir of dirs) { - mkdirSync(join(testDir, dir), { recursive: true }); + for (const dir of expectedMemoryDirs) { + expect(existsSync(join(testDir, dir))).toBe(true); } + }); + + it('creates skills directories', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, '.agents/skills/squads-cli'))).toBe(true); + expect(existsSync(join(testDir, '.agents/skills/gh'))).toBe(true); + }); + + it('creates config directory', async () => { + await initCommand({ yes: true, force: true }); - expect(existsSync(join(testDir, '.agents/squads'))).toBe(true); - expect(existsSync(join(testDir, '.agents/memory'))).toBe(true); expect(existsSync(join(testDir, '.agents/config'))).toBe(true); - expect(existsSync(join(testDir, '.agents/outputs'))).toBe(true); }); + }); - it('creates demo squad with SQUAD.md', () => { - const demoDir = join(testDir, '.agents', 'squads', 'demo'); - mkdirSync(demoDir, { recursive: true }); + // ---------- Squad files ---------- - const squadContent = `# Demo Squad + describe('squad file creation', () => { + it('writes core squad definition files from templates', async () => { + await initCommand({ yes: true, force: true }); -Research squad that creates a competitive analysis of enterprise AI agent frameworks. + // Check that loadTemplate was called for core squads + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); -## Goals + expect(templateCalls).toContain('seed/squads/company/SQUAD.md'); + expect(templateCalls).toContain('seed/squads/company/manager.md'); + expect(templateCalls).toContain('seed/squads/research/SQUAD.md'); + expect(templateCalls).toContain('seed/squads/research/lead.md'); + expect(templateCalls).toContain('seed/squads/intelligence/SQUAD.md'); + expect(templateCalls).toContain('seed/squads/intelligence/intel-lead.md'); + expect(templateCalls).toContain('seed/squads/product/SQUAD.md'); + expect(templateCalls).toContain('seed/squads/product/lead.md'); + }); -- [ ] Generate a report comparing enterprise agent frameworks + it('writes squad files to disk', async () => { + await initCommand({ yes: true, force: true }); -## Agents + // Files should exist on disk with template content + const squadMd = readFileSync(join(testDir, '.agents/squads/company/SQUAD.md'), 'utf-8'); + expect(squadMd).toContain('Template: seed/squads/company/SQUAD.md'); + }); + }); + + // ---------- Config and skills ---------- + + describe('config and skills', () => { + it('creates provider.yaml', async () => { + await initCommand({ yes: true, force: true }); + + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/config/provider.yaml'); + expect(existsSync(join(testDir, '.agents/config/provider.yaml'))).toBe(true); + }); -| Agent | Purpose | -|-------|---------| -| researcher | Researches enterprise AI agent frameworks | -| reporter | Creates a markdown report from research | + it('creates SYSTEM.md', async () => { + await initCommand({ yes: true, force: true }); -## Pipeline + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/config/SYSTEM.md'); + expect(existsSync(join(testDir, '.agents/config/SYSTEM.md'))).toBe(true); + }); + + it('creates squads-cli skill files', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, '.agents/skills/squads-cli/SKILL.md'))).toBe(true); + expect(existsSync(join(testDir, '.agents/skills/squads-cli/references/commands.md'))).toBe(true); + }); -\`researcher\` → \`reporter\` -`; - writeFileSync(join(demoDir, 'SQUAD.md'), squadContent); + it('creates gh skill', async () => { + await initCommand({ yes: true, force: true }); - expect(existsSync(join(demoDir, 'SQUAD.md'))).toBe(true); - const content = readFileSync(join(demoDir, 'SQUAD.md'), 'utf-8'); - expect(content).toContain('Demo Squad'); - expect(content).toContain('researcher'); - expect(content).toContain('reporter'); + expect(existsSync(join(testDir, '.agents/skills/gh/SKILL.md'))).toBe(true); }); + }); + + // ---------- Memory and state ---------- + + describe('memory state files', () => { + it('creates core memory state files', async () => { + await initCommand({ yes: true, force: true }); + + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/memory/company/manager/state.md'); + expect(templateCalls).toContain('seed/memory/research/lead/state.md'); + expect(templateCalls).toContain('seed/memory/intelligence/intel-lead/state.md'); + expect(templateCalls).toContain('seed/memory/product/lead/state.md'); + }); + + it('creates priorities.md and goals.md for each squad', async () => { + await initCommand({ yes: true, force: true }); + + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/memory/_squad/priorities.md'); + expect(templateCalls).toContain('seed/memory/_squad/goals.md'); + + for (const squad of ['company', 'research', 'intelligence', 'product']) { + expect(existsSync(join(testDir, `.agents/memory/${squad}/priorities.md`))).toBe(true); + expect(existsSync(join(testDir, `.agents/memory/${squad}/goals.md`))).toBe(true); + } + }); + + it('does not overwrite existing state files on re-run', async () => { + // First run + await initCommand({ yes: true, force: true }); + + // Write custom content to a state file + const statePath = join(testDir, '.agents/memory/company/manager/state.md'); + writeFileSync(statePath, '# Custom state'); + + // Second run + await initCommand({ yes: true, force: true }); - it('creates agent definition files', () => { - const demoDir = join(testDir, '.agents', 'squads', 'demo'); - mkdirSync(demoDir, { recursive: true }); + const content = readFileSync(statePath, 'utf-8'); + expect(content).toBe('# Custom state'); + }); - const researcherContent = `# Researcher Agent + it('does not overwrite existing priorities on re-run', async () => { + // First run + await initCommand({ yes: true, force: true }); -## Purpose -Research enterprise AI agent frameworks. + const prioPath = join(testDir, '.agents/memory/company/priorities.md'); + writeFileSync(prioPath, '# Custom priorities'); -## Tools -- WebSearch -- WebFetch -`; - writeFileSync(join(demoDir, 'researcher.md'), researcherContent); + // Second run + await initCommand({ yes: true, force: true }); - expect(existsSync(join(demoDir, 'researcher.md'))).toBe(true); - const content = readFileSync(join(demoDir, 'researcher.md'), 'utf-8'); - expect(content).toContain('Researcher Agent'); - expect(content).toContain('WebSearch'); + const content = readFileSync(prioPath, 'utf-8'); + expect(content).toBe('# Custom priorities'); }); }); - describe('AGENTS.md generation', () => { - it('creates AGENTS.md at project root', () => { - const agentsContent = `# AI Agent Configuration + // ---------- Root-level files ---------- -This file configures Claude Code's behavior for this project. -`; - writeFileSync(join(testDir, 'AGENTS.md'), agentsContent); + describe('root-level files', () => { + it('creates AGENTS.md', async () => { + await initCommand({ yes: true, force: true }); expect(existsSync(join(testDir, 'AGENTS.md'))).toBe(true); }); - it('does not overwrite existing AGENTS.md', () => { - const originalContent = '# My Existing Config'; - writeFileSync(join(testDir, 'AGENTS.md'), originalContent); + it('does not overwrite existing AGENTS.md', async () => { + writeFileSync(join(testDir, 'AGENTS.md'), '# Existing'); + await initCommand({ yes: true, force: true }); + + expect(readFileSync(join(testDir, 'AGENTS.md'), 'utf-8')).toBe('# Existing'); + }); + + it('creates BUSINESS_BRIEF.md', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, '.agents/BUSINESS_BRIEF.md'))).toBe(true); + }); + + it('creates company.md context', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, '.agents/memory/company/company.md'))).toBe(true); + }); + + it('creates directives.md', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, '.agents/memory/company/directives.md'))).toBe(true); + }); + + it('creates README.md when none exists', async () => { + await initCommand({ yes: true, force: true }); + + expect(existsSync(join(testDir, 'README.md'))).toBe(true); + }); + + it('does not overwrite existing README.md with real content', async () => { + writeFileSync(join(testDir, 'README.md'), '# My Project\n\nDescription here.\n'); + await initCommand({ yes: true, force: true }); + + expect(readFileSync(join(testDir, 'README.md'), 'utf-8')).toBe('# My Project\n\nDescription here.\n'); + }); + + it('overwrites stub README.md (single-line heading only)', async () => { + writeFileSync(join(testDir, 'README.md'), '# test-repo\n'); + await initCommand({ yes: true, force: true }); + + const content = readFileSync(join(testDir, 'README.md'), 'utf-8'); + expect(content).toContain('Template: seed/README.md.template'); + }); + }); + + // ---------- Claude provider ---------- + + describe('Claude provider', () => { + it('creates .claude directory', async () => { + await initCommand({ yes: true, force: true, provider: 'claude' }); + + expect(existsSync(join(testDir, '.claude'))).toBe(true); + }); + + it('creates CLAUDE.md', async () => { + await initCommand({ yes: true, force: true, provider: 'claude' }); + + expect(existsSync(join(testDir, 'CLAUDE.md'))).toBe(true); + }); + + it('creates .claude/settings.json', async () => { + await initCommand({ yes: true, force: true, provider: 'claude' }); + + expect(existsSync(join(testDir, '.claude/settings.json'))).toBe(true); + }); + + it('does not create .claude dir for non-Claude provider', async () => { + await initCommand({ yes: true, force: true, provider: 'gemini' }); + + expect(existsSync(join(testDir, '.claude'))).toBe(false); + expect(existsSync(join(testDir, 'CLAUDE.md'))).toBe(false); + }); + }); + + // ---------- Pack support ---------- + + describe('pack support', () => { + it('--pack engineering adds engineering squad', async () => { + await initCommand({ yes: true, force: true, pack: ['engineering'] }); + + expect(existsSync(join(testDir, '.agents/squads/engineering'))).toBe(true); + expect(existsSync(join(testDir, '.agents/memory/engineering/issue-solver'))).toBe(true); + + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/squads/engineering/SQUAD.md'); + expect(templateCalls).toContain('seed/squads/engineering/issue-solver.md'); + }); + + it('--pack marketing adds marketing squad', async () => { + await initCommand({ yes: true, force: true, pack: ['marketing'] }); + + expect(existsSync(join(testDir, '.agents/squads/marketing'))).toBe(true); + expect(existsSync(join(testDir, '.agents/memory/marketing/content-drafter'))).toBe(true); + }); + + it('--pack operations adds operations squad', async () => { + await initCommand({ yes: true, force: true, pack: ['operations'] }); + + expect(existsSync(join(testDir, '.agents/squads/operations'))).toBe(true); + expect(existsSync(join(testDir, '.agents/memory/operations/ops-lead'))).toBe(true); + }); + + it('--pack all adds all three squads', async () => { + await initCommand({ yes: true, force: true, pack: ['all'] }); - // Simulate init behavior - check before writing - if (!existsSync(join(testDir, 'AGENTS.md'))) { - writeFileSync(join(testDir, 'AGENTS.md'), '# New Content'); + for (const squad of ['engineering', 'marketing', 'operations']) { + expect(existsSync(join(testDir, `.agents/squads/${squad}`))).toBe(true); } + }); + + it('deduplicates squads when same pack specified twice', async () => { + await initCommand({ yes: true, force: true, pack: ['engineering', 'engineering'] }); + + // Should not fail — dedup works + expect(existsSync(join(testDir, '.agents/squads/engineering'))).toBe(true); + + // Count how many times engineering SQUAD.md template was loaded + const engineeringSquadCalls = mockLoadTemplate.mock.calls + .filter((c: unknown[]) => c[0] === 'seed/squads/engineering/SQUAD.md'); + expect(engineeringSquadCalls.length).toBe(1); + }); + + it('creates priorities and goals for pack squads', async () => { + await initCommand({ yes: true, force: true, pack: ['engineering'] }); - const content = readFileSync(join(testDir, 'AGENTS.md'), 'utf-8'); - expect(content).toBe(originalContent); + expect(existsSync(join(testDir, '.agents/memory/engineering/priorities.md'))).toBe(true); + expect(existsSync(join(testDir, '.agents/memory/engineering/goals.md'))).toBe(true); }); }); - describe('Claude-specific setup', () => { - it('creates .claude directory for Claude provider', () => { - const claudeDir = join(testDir, '.claude'); - mkdirSync(claudeDir, { recursive: true }); + // ---------- IDP catalog ---------- + + describe('IDP catalog', () => { + it('creates IDP catalog entry', async () => { + await initCommand({ yes: true, force: true }); + + const idpDir = join(testDir, '.agents/idp/catalog'); + expect(existsSync(idpDir)).toBe(true); + + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).toContain('seed/idp/catalog/service.yaml.template'); + }); + + it('skips IDP catalog if .agents/idp/catalog already exists', async () => { + mkdirSync(join(testDir, '.agents/idp/catalog'), { recursive: true }); + await initCommand({ yes: true, force: true }); - expect(existsSync(claudeDir)).toBe(true); + const templateCalls = mockLoadTemplate.mock.calls.map((c: unknown[]) => c[0]); + expect(templateCalls).not.toContain('seed/idp/catalog/service.yaml.template'); }); - it('creates settings.json with hooks', () => { - const claudeDir = join(testDir, '.claude'); - mkdirSync(claudeDir, { recursive: true }); + it('detects Node stack from package.json', async () => { + writeFileSync(join(testDir, 'package.json'), JSON.stringify({ + name: 'my-app', + dependencies: { react: '^18.0.0' }, + })); - const settings = { - hooks: { - SessionStart: [{ - hooks: [{ - type: 'command', - command: 'squads status', - timeout: 10, - }], - }], - }, - }; + await initCommand({ yes: true, force: true }); - writeFileSync( - join(claudeDir, 'settings.json'), - JSON.stringify(settings, null, 2) + // Check the variables passed to the IDP template + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', ); + expect(idpCall).toBeDefined(); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('react'); + expect(vars['SERVICE_TYPE']).toBe('product'); + expect(vars['BUILD_COMMAND']).toBe('npm run build'); + expect(vars['TEST_COMMAND']).toBe('npm test'); + }); + + it('detects Go stack from go.mod', async () => { + writeFileSync(join(testDir, 'go.mod'), 'module example.com/myapp\n\ngo 1.21\n'); - expect(existsSync(join(claudeDir, 'settings.json'))).toBe(true); - const content = JSON.parse(readFileSync(join(claudeDir, 'settings.json'), 'utf-8')); - expect(content.hooks.SessionStart).toBeDefined(); + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('go'); + expect(vars['BUILD_COMMAND']).toBe('go build ./...'); + expect(vars['TEST_COMMAND']).toBe('go test ./...'); }); - it('creates CLAUDE.md for Claude provider', () => { - const claudeContent = `# Claude Code Instructions + it('detects Python stack from requirements.txt', async () => { + writeFileSync(join(testDir, 'requirements.txt'), 'flask==2.0\n'); -You are working in a Squads-managed project. -`; - writeFileSync(join(testDir, 'CLAUDE.md'), claudeContent); + await initCommand({ yes: true, force: true }); - expect(existsSync(join(testDir, 'CLAUDE.md'))).toBe(true); - const content = readFileSync(join(testDir, 'CLAUDE.md'), 'utf-8'); - expect(content).toContain('Claude Code'); + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('python'); + expect(vars['TEST_COMMAND']).toBe('pytest'); + }); + + it('detects Rust stack from Cargo.toml', async () => { + writeFileSync(join(testDir, 'Cargo.toml'), '[package]\nname = "myapp"\n'); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('rust'); + expect(vars['BUILD_COMMAND']).toBe('cargo build'); + expect(vars['TEST_COMMAND']).toBe('cargo test'); + }); + + it('detects Ruby stack from Gemfile', async () => { + writeFileSync(join(testDir, 'Gemfile'), "source 'https://rubygems.org'\n"); + + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('ruby'); + expect(vars['TEST_COMMAND']).toBe('bundle exec rspec'); + }); + + it('defaults to unknown stack when no project files found', async () => { + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['SERVICE_STACK']).toBe('unknown'); + expect(vars['SERVICE_TYPE']).toBe('domain'); + }); + + it('uses repo name from git remote', async () => { + await initCommand({ yes: true, force: true }); + + const idpCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/idp/catalog/service.yaml.template', + ); + const vars = idpCall![1] as Record; + expect(vars['REPO_NAME']).toBe('test-org/test-repo'); + expect(vars['SERVICE_NAME']).toBe('test-repo'); }); }); - describe('provider configuration', () => { - it('creates provider.yaml in config', () => { - const configDir = join(testDir, '.agents', 'config'); - mkdirSync(configDir, { recursive: true }); + // ---------- Template variables ---------- + + describe('template variables', () => { + it('passes correct business variables in --yes mode', async () => { + await initCommand({ yes: true, force: true }); + + // Find the BUSINESS_BRIEF template call + const briefCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/BUSINESS_BRIEF.md.template', + ); + expect(briefCall).toBeDefined(); + const vars = briefCall![1] as Record; + // In --yes mode, business name = directory basename + expect(vars['BUSINESS_NAME']).toBe(testDir.split('/').pop()); + expect(vars['BUSINESS_DESCRIPTION']).toContain('AI smart capabilities'); + expect(vars['PROVIDER']).toBe('claude'); // default + }); - const providerYaml = `# Provider Configuration -provider: claude -version: 1.0.0 -`; - writeFileSync(join(configDir, 'provider.yaml'), providerYaml); + it('passes provider name to templates', async () => { + await initCommand({ yes: true, force: true, provider: 'gemini' }); - expect(existsSync(join(configDir, 'provider.yaml'))).toBe(true); - const content = readFileSync(join(configDir, 'provider.yaml'), 'utf-8'); - expect(content).toContain('provider: claude'); + const briefCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/BUSINESS_BRIEF.md.template', + ); + const vars = briefCall![1] as Record; + expect(vars['PROVIDER']).toBe('gemini'); + expect(vars['PROVIDER_NAME']).toBe('Gemini'); + }); + + it('includes CURRENT_DATE in variables', async () => { + await initCommand({ yes: true, force: true }); + + const briefCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/BUSINESS_BRIEF.md.template', + ); + const vars = briefCall![1] as Record; + expect(vars['CURRENT_DATE']).toMatch(/^\d{4}-\d{2}-\d{2}$/); }); }); -}); -describe('git hook installation', () => { - let testDir: string; - let originalCwd: string; + // ---------- Auto-commit ---------- - beforeEach(() => { - testDir = join(tmpdir(), 'squads-hooks-test-' + Date.now()); - mkdirSync(testDir, { recursive: true }); - originalCwd = process.cwd(); - process.chdir(testDir); + describe('auto-commit', () => { + it('attempts git add + commit after scaffolding', async () => { + await initCommand({ yes: true, force: true }); + + expect(execSync).toHaveBeenCalledWith( + expect.stringContaining('git add -A && git commit'), + expect.objectContaining({ stdio: 'ignore' }), + ); + }); + + it('does not fail if auto-commit fails', async () => { + (execSync as Mock).mockImplementation(() => { + throw new Error('nothing to commit'); + }); + + // Should not throw + await initCommand({ yes: true, force: true }); + }); }); - afterEach(() => { - process.chdir(originalCwd); - if (existsSync(testDir)) { - rmSync(testDir, { recursive: true, force: true }); - } + // ---------- Telemetry ---------- + + describe('telemetry', () => { + it('tracks CLI_INIT event on success', async () => { + await initCommand({ yes: true, force: true }); + + expect(track).toHaveBeenCalledWith('cli.init', expect.objectContaining({ + success: true, + provider: 'claude', + hasGit: true, + hasRemote: true, + })); + }); + + it('tracks agent and squad counts', async () => { + await initCommand({ yes: true, force: true, pack: ['all'] }); + + expect(track).toHaveBeenCalledWith('cli.init', expect.objectContaining({ + agentCount: expect.any(Number), + squadCount: expect.any(Number), + })); + + const call = (track as Mock).mock.calls[0]; + const props = call[1] as Record; + // Core: 14 agents + engineering(3) + marketing(3) + operations(3) = 23 + expect(props['agentCount']).toBe(23); + // Core: 4 squads + 3 pack squads = 7 + expect(props['squadCount']).toBe(7); + }); }); - it('creates post-commit hook when git repo exists', () => { - // Initialize git repo - try { - execSync('git init', { cwd: testDir, stdio: 'ignore' }); - } catch { - // Skip test if git not available - return; - } + // ---------- Prerequisite checks ---------- + + describe('prerequisite checks', () => { + it('exits when checks fail without --force', async () => { + mockDisplayCheckResults.mockReturnValue({ + hasErrors: true, + hasWarnings: false, + errorChecks: [{ name: 'Claude CLI', status: 'missing' }], + warningChecks: [], + }); + + await expect(initCommand({ yes: true })).rejects.toThrow('process.exit'); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it('continues when checks fail with --force', async () => { + mockDisplayCheckResults.mockReturnValue({ + hasErrors: true, + hasWarnings: false, + errorChecks: [{ name: 'Claude CLI', status: 'missing' }], + warningChecks: [], + }); + + // Should NOT throw + await initCommand({ yes: true, force: true }); + expect(existsSync(join(testDir, '.agents/squads/company'))).toBe(true); + }); - const hooksDir = join(testDir, '.git', 'hooks'); - mkdirSync(hooksDir, { recursive: true }); + it('adds git repo check when not a git repo', async () => { + mockCheckGitStatus.mockReturnValue({ + isGitRepo: false, + hasRemote: false, + isDirty: false, + uncommittedCount: 0, + }); + mockDisplayCheckResults.mockReturnValue({ hasErrors: false, hasWarnings: false, errorChecks: [], warningChecks: [] }); + + await initCommand({ yes: true, force: true }); + + // displayCheckResults should receive a check with 'Git Repository' name + const checksArg = mockDisplayCheckResults.mock.calls[0][0] as Array<{ name: string; status: string }>; + const gitCheck = checksArg.find(c => c.name === 'Git Repository'); + expect(gitCheck).toBeDefined(); + expect(gitCheck!.status).toBe('missing'); + }); - const hookContent = `#!/bin/bash -# Auto-sync Slack channels when SQUAD.md files change -CHANGED_SQUADS=$(git diff-tree --no-commit-id --name-only -r HEAD | grep "SQUAD.md" || true) -`; - writeFileSync(join(hooksDir, 'post-commit'), hookContent, { mode: 0o755 }); + it('shows git repo as ok when in a git repo', async () => { + await initCommand({ yes: true, force: true }); - expect(existsSync(join(hooksDir, 'post-commit'))).toBe(true); - const content = readFileSync(join(hooksDir, 'post-commit'), 'utf-8'); - expect(content).toContain('SQUAD.md'); + const checksArg = mockDisplayCheckResults.mock.calls[0][0] as Array<{ name: string; status: string }>; + const gitCheck = checksArg.find(c => c.name === 'Git Repository'); + expect(gitCheck).toBeDefined(); + expect(gitCheck!.status).toBe('ok'); + }); }); - it('skips hook installation when not a git repo', () => { - // No git init - should skip gracefully - const hooksDir = join(testDir, '.git', 'hooks'); - expect(existsSync(hooksDir)).toBe(false); + // ---------- Non-interactive mode ---------- + + describe('non-interactive mode (--yes)', () => { + it('uses directory name as business name', async () => { + await initCommand({ yes: true, force: true }); + + const briefCall = mockLoadTemplate.mock.calls.find( + (c: unknown[]) => c[0] === 'seed/BUSINESS_BRIEF.md.template', + ); + const vars = briefCall![1] as Record; + expect(vars['BUSINESS_NAME']).toBe(testDir.split('/').pop()); + }); + + it('selects custom use case (core squads only)', async () => { + await initCommand({ yes: true, force: true }); + + // No engineering/marketing/operations unless --pack is used + expect(existsSync(join(testDir, '.agents/squads/engineering'))).toBe(false); + expect(existsSync(join(testDir, '.agents/squads/marketing'))).toBe(false); + expect(existsSync(join(testDir, '.agents/squads/operations'))).toBe(false); + }); + }); + + // ---------- Error handling ---------- + + describe('error handling', () => { + it('exits with code 1 on template loading failure', async () => { + mockLoadTemplate.mockImplementation(() => { + throw Object.assign(new Error('EACCES: permission denied'), { code: 'EACCES' }); + }); + + await expect(initCommand({ yes: true, force: true })).rejects.toThrow('process.exit'); + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it('handles ENOENT errors gracefully', async () => { + mockLoadTemplate.mockImplementation(() => { + throw Object.assign(new Error('ENOENT: no such file'), { code: 'ENOENT', path: '/missing/template' }); + }); + + await expect(initCommand({ yes: true, force: true })).rejects.toThrow('process.exit'); + }); }); }); diff --git a/test/setup-checks.test.ts b/test/setup-checks.test.ts index c5b0d1d5..6b4977b4 100644 --- a/test/setup-checks.test.ts +++ b/test/setup-checks.test.ts @@ -296,12 +296,12 @@ describe('setup-checks', () => { expect(result.message).toContain('Unknown provider'); }); - it('returns missing when provider CLI not installed', () => { - // commandExists('claude') -> false + it('returns warning when provider CLI not installed', () => { + // commandExists('claude') -> false — treated as warning, not error mockedExecSync.mockImplementationOnce(() => { throw new Error(); }); const result = checkProviderAuth('claude'); - expect(result.status).toBe('missing'); + expect(result.status).toBe('warning'); expect(result.fixCommand).toBeDefined(); }); @@ -338,11 +338,11 @@ describe('setup-checks', () => { } }); - it('returns missing when ollama CLI not installed', () => { + it('returns warning when ollama CLI not installed', () => { mockedExecSync.mockImplementationOnce(() => { throw new Error(); }); const result = checkProviderAuth('ollama'); - expect(result.status).toBe('missing'); + expect(result.status).toBe('warning'); }); }); diff --git a/test/templates.test.ts b/test/templates.test.ts index ed07775d..2a1c1c5b 100644 --- a/test/templates.test.ts +++ b/test/templates.test.ts @@ -6,7 +6,7 @@ import { templateExists, } from '../src/lib/templates'; import { formatLocalStatus } from '../src/lib/local'; -import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from 'fs'; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync, readFileSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; @@ -99,6 +99,24 @@ describe('templates utilities', () => { expect(['repo', 'global', 'bundled']).toContain(source.type); }); }); + + describe('CURRENT_DATE template variable substitution', () => { + it('state.md seed templates contain {{CURRENT_DATE}} placeholder', () => { + const templatePath = join(__dirname, '..', 'templates', 'seed', 'memory', 'research', 'lead', 'state.md'); + const content = readFileSync(templatePath, 'utf-8'); + expect(content).toContain('{{CURRENT_DATE}}'); + }); + + it('{{CURRENT_DATE}} is replaced when substitution is applied', () => { + const templatePath = join(__dirname, '..', 'templates', 'seed', 'memory', 'research', 'lead', 'state.md'); + let content = readFileSync(templatePath, 'utf-8'); + const today = new Date().toISOString().split('T')[0]; + // Replicate the substitution logic from loadTemplateFromPath + content = content.replace(/\{\{CURRENT_DATE\}\}/g, today); + expect(content).toContain(`Last update: ${today}`); + expect(content).not.toContain('{{CURRENT_DATE}}'); + }); + }); }); describe('local utilities', () => {