From 8b7a53dcdd17b314549ad5554c8219e55ace0dcf Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 11:06:28 -0400 Subject: [PATCH 01/33] feat(agy): add support for antigravity cli and curated model validation --- src/cli/auth.ts | 1 + src/cli/config.ts | 39 ++++++- src/cli/install.ts | 50 +++++++- src/cli/uninstall.ts | 98 ++++++++++++---- src/providers/agy.ts | 148 ++++++++++++++++++++++++ src/providers/gemini.ts | 8 +- src/providers/index.ts | 3 + src/services/stall/log-path-resolver.ts | 4 + src/tools/execute-prompt.ts | 13 ++- src/tools/register-member.ts | 12 +- src/tools/update-member.ts | 13 ++- src/types.ts | 5 +- tests/install-multi-provider.test.ts | 4 +- tests/log-path-resolver.test.ts | 11 ++ tests/providers.test.ts | 65 ++++++++++- tests/security-hardening.test.ts | 38 ++++++ tests/tool-provider.test.ts | 20 +++- 17 files changed, 485 insertions(+), 47 deletions(-) create mode 100644 src/providers/agy.ts diff --git a/src/cli/auth.ts b/src/cli/auth.ts index 04b45ead..c22823d7 100644 --- a/src/cli/auth.ts +++ b/src/cli/auth.ts @@ -9,6 +9,7 @@ const PROVIDER_AUTH_ENV: Record = { gemini: 'GEMINI_API_KEY', codex: 'OPENAI_API_KEY', copilot: 'COPILOT_GITHUB_TOKEN', + agy: 'GEMINI_API_KEY', }; export async function runAuth(args: string[]): Promise { diff --git a/src/cli/config.ts b/src/cli/config.ts index 32e11066..20eff0a2 100644 --- a/src/cli/config.ts +++ b/src/cli/config.ts @@ -12,11 +12,40 @@ export const SCRIPTS_DIR = path.join(FLEET_BASE, 'scripts'); export const DATA_DIR = path.join(FLEET_BASE, 'data'); export const INSTALL_CONFIG_PATH = path.join(DATA_DIR, 'install-config.json'); +export const CURATED_CHEAP_MODELS = [ + 'gpt-oss-120b', + 'gpt-120', + 'gemini-3.5-flash-lite', + 'gemini-3.1-flash-lite-preview', + 'claude-haiku-4-5', + 'gpt-5.4-mini', +] as const; + +export const CURATED_STANDARD_MODELS = [ + 'gemini-3.5-flash', + 'gpt-oss-120b', + 'gpt-120', + 'claude-sonnet-4.6', + 'claude-sonnet-4-5', + 'gemini-3-flash-preview', + 'gpt-5.4', +] as const; + +export const CURATED_PREMIUM_MODELS = [ + 'claude-sonnet-4.6', + 'claude-opus-4.6', + 'claude-opus-4-6', + 'claude-opus-4-7', + 'gpt-oss-120b', + 'gemini-3.1-pro-preview', +] as const; + export const PROVIDER_STANDARD_MODELS: Record = { claude: 'claude-sonnet-4-6', - gemini: 'gemini-3-flash-preview', + gemini: 'gemini-3.5-flash', codex: 'gpt-5.4', copilot: 'claude-sonnet-4-5', + agy: 'gemini-3.5-flash', }; export interface ProviderInstallConfig { @@ -36,6 +65,14 @@ export interface MultiProviderInstallConfig { export function getProviderInstallConfig(provider: LlmProvider): ProviderInstallConfig { switch (provider) { + case 'agy': + return { + configDir: path.join(home, '.gemini', 'antigravity-cli'), + settingsFile: path.join(home, '.gemini', 'antigravity-cli', 'settings.json'), + skillsDir: path.join(home, '.gemini', 'antigravity-cli', 'skills', 'pm'), + fleetSkillsDir: path.join(home, '.gemini', 'antigravity-cli', 'skills', 'fleet'), + name: 'Antigravity', + }; case 'gemini': return { configDir: path.join(home, '.gemini'), diff --git a/src/cli/install.ts b/src/cli/install.ts index b6ac7be8..29a7b7b5 100644 --- a/src/cli/install.ts +++ b/src/cli/install.ts @@ -161,7 +161,23 @@ const GEMINI_HOOK_NAME_MAP: Record = { }; function mergeHooksConfig(paths: ProviderInstallConfig, hooksConfig: any, provider: LlmProvider): void { - const settings = readConfig(paths); + let settingsFile = paths.settingsFile; + const isAgy = provider === 'agy'; + + let settings: any = {}; + if (isAgy) { + const configDir = path.join(os.homedir(), '.gemini', 'config'); + fs.mkdirSync(configDir, { recursive: true }); + settingsFile = path.join(configDir, 'hooks.json'); + if (fs.existsSync(settingsFile)) { + try { + settings = JSON.parse(fs.readFileSync(settingsFile, 'utf-8')); + } catch {} + } + } else { + settings = readConfig(paths); + } + settings.hooks = settings.hooks || {}; for (const [claudeName, hookEntries] of Object.entries(hooksConfig.hooks || {})) { @@ -188,7 +204,11 @@ function mergeHooksConfig(paths: ProviderInstallConfig, hooksConfig: any, provid } } - writeConfig(paths, settings); + if (isAgy) { + fs.writeFileSync(settingsFile, JSON.stringify(settings, null, 2) + '\n'); + } else { + writeConfig(paths, settings); + } } @@ -242,6 +262,24 @@ function mergeGeminiConfig(paths: ProviderInstallConfig, mcpConfig: any): void { writeConfig(paths, settings); } +function mergeAgyConfig(paths: ProviderInstallConfig, mcpConfig: any): void { + const configDir = path.join(os.homedir(), '.gemini', 'config'); + fs.mkdirSync(configDir, { recursive: true }); + const mcpConfigFile = path.join(configDir, 'mcp_config.json'); + + let settings: any = {}; + if (fs.existsSync(mcpConfigFile)) { + try { + settings = JSON.parse(fs.readFileSync(mcpConfigFile, 'utf-8')); + } catch {} + } + + settings.mcpServers = settings.mcpServers || {}; + settings.mcpServers['apra-fleet'] = mcpConfig; + + fs.writeFileSync(mcpConfigFile, JSON.stringify(settings, null, 2) + '\n'); +} + function writeDefaultModel(paths: ProviderInstallConfig, standardModel: string): void { const settings = readConfig(paths); settings.defaultModel = standardModel; @@ -319,11 +357,11 @@ Usage: apra-fleet install --skill none Skip skill installation apra-fleet install --no-skill Same as --skill none apra-fleet install --force Stop a running server before installing - apra-fleet install --llm Target LLM provider: claude (default), gemini, codex, copilot + apra-fleet install --llm Target LLM provider: claude (default), gemini, codex, copilot, agy apra-fleet install --help Show this help Options: - --llm LLM provider to configure. Supported: claude, gemini, codex, copilot. + --llm LLM provider to configure. Supported: claude, gemini, codex, copilot, agy. Defaults to claude. Note: --llm gemini shows a warning about sequential dispatch — Gemini does not support background agents, so fleet operations run sequentially rather than in parallel. @@ -346,7 +384,7 @@ Options: } } - const supported: LlmProvider[] = ['claude', 'gemini', 'codex', 'copilot']; + const supported: LlmProvider[] = ['claude', 'gemini', 'codex', 'copilot', 'agy']; if (!supported.includes(llm)) { console.error(`Error: Unsupported LLM provider "${llm}". Supported: ${supported.join(', ')}`); process.exit(1); @@ -505,6 +543,8 @@ ${killHint} mergeCodexConfig(paths, mcpConfig); } else if (llm === 'copilot') { mergeCopilotConfig(paths, mcpConfig); + } else if (llm === 'agy') { + mergeAgyConfig(paths, mcpConfig); } // --- Step 6: Install fleet skill (optional) --- diff --git a/src/cli/uninstall.ts b/src/cli/uninstall.ts index 7c1d0362..69589c23 100644 --- a/src/cli/uninstall.ts +++ b/src/cli/uninstall.ts @@ -1,5 +1,6 @@ import fs from 'node:fs'; import path from 'node:path'; +import os from 'node:os'; import { execSync } from 'node:child_process'; import * as readlinePromises from 'node:readline/promises'; import { serverVersion } from '../version.js'; @@ -24,19 +25,37 @@ function run(cmd: string, opts?: Record): void { } function cleanupSettings(paths: ProviderInstallConfig, dryRun: boolean): boolean { + const providerKey = paths.name === 'Antigravity' ? 'agy' : (paths.name.toLowerCase() as LlmProvider); const settings = readConfig(paths); let changed = false; // 1. MCP Servers - if (settings.mcpServers?.['apra-fleet']) { - console.log(` - Removing MCP server 'apra-fleet' from settings`); - if (!dryRun) delete settings.mcpServers['apra-fleet']; - changed = true; - } - if (settings.mcp_servers?.['apra-fleet']) { - console.log(` - Removing MCP server 'apra-fleet' from settings (Codex format)`); - if (!dryRun) delete settings.mcp_servers['apra-fleet']; - changed = true; + if (providerKey === 'agy') { + const mcpConfigFile = path.join(os.homedir(), '.gemini', 'config', 'mcp_config.json'); + if (fs.existsSync(mcpConfigFile)) { + try { + const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigFile, 'utf-8')); + if (mcpConfig.mcpServers?.['apra-fleet']) { + console.log(` - Removing MCP server 'apra-fleet' from mcp_config.json`); + if (!dryRun) { + delete mcpConfig.mcpServers['apra-fleet']; + fs.writeFileSync(mcpConfigFile, JSON.stringify(mcpConfig, null, 2) + '\n'); + } + changed = true; + } + } catch {} + } + } else { + if (settings.mcpServers?.['apra-fleet']) { + console.log(` - Removing MCP server 'apra-fleet' from settings`); + if (!dryRun) delete settings.mcpServers['apra-fleet']; + changed = true; + } + if (settings.mcp_servers?.['apra-fleet']) { + console.log(` - Removing MCP server 'apra-fleet' from settings (Codex format)`); + if (!dryRun) delete settings.mcp_servers['apra-fleet']; + changed = true; + } } // 2. Permissions @@ -64,18 +83,50 @@ function cleanupSettings(paths: ProviderInstallConfig, dryRun: boolean): boolean } } - // 3. Hooks — Claude uses "PostToolUse", Gemini uses "AfterTool" - const hookEventNames = ['PostToolUse', 'AfterTool']; - for (const eventName of hookEventNames) { - if (settings.hooks?.[eventName]) { - const originalCount = settings.hooks[eventName].length; - const filtered = (settings.hooks[eventName] as any[]).filter(h => - !h.matcher?.includes('apra-fleet') - ); - if (filtered.length !== originalCount) { - console.log(` - Removing ${originalCount - filtered.length} fleet hooks (${eventName})`); - if (!dryRun) settings.hooks[eventName] = filtered; - changed = true; + // 3. Hooks + if (providerKey === 'agy') { + const hooksFile = path.join(os.homedir(), '.gemini', 'config', 'hooks.json'); + if (fs.existsSync(hooksFile)) { + try { + const hooksConfig = JSON.parse(fs.readFileSync(hooksFile, 'utf-8')); + const hooksObj = hooksConfig.hooks || {}; + let hooksChanged = false; + + const hookEventNames = ['PostToolUse', 'PreToolUse', 'UserPromptSubmit', 'Stop', 'PreCompact']; + for (const eventName of hookEventNames) { + if (hooksObj[eventName]) { + const originalCount = hooksObj[eventName].length; + const filtered = (hooksObj[eventName] as any[]).filter(h => + !h.matcher?.includes('apra-fleet') + ); + if (filtered.length !== originalCount) { + console.log(` - Removing ${originalCount - filtered.length} fleet hooks (${eventName}) from hooks.json`); + if (!dryRun) hooksObj[eventName] = filtered; + hooksChanged = true; + changed = true; + } + } + } + if (hooksChanged && !dryRun) { + hooksConfig.hooks = hooksObj; + fs.writeFileSync(hooksFile, JSON.stringify(hooksConfig, null, 2) + '\n'); + } + } catch {} + } + } else { + // Claude uses "PostToolUse", Gemini uses "AfterTool" + const hookEventNames = ['PostToolUse', 'AfterTool']; + for (const eventName of hookEventNames) { + if (settings.hooks?.[eventName]) { + const originalCount = settings.hooks[eventName].length; + const filtered = (settings.hooks[eventName] as any[]).filter(h => + !h.matcher?.includes('apra-fleet') + ); + if (filtered.length !== originalCount) { + console.log(` - Removing ${originalCount - filtered.length} fleet hooks (${eventName})`); + if (!dryRun) settings.hooks[eventName] = filtered; + changed = true; + } } } } @@ -88,7 +139,6 @@ function cleanupSettings(paths: ProviderInstallConfig, dryRun: boolean): boolean } // 5. Default Model - const providerKey = paths.name.toLowerCase() as LlmProvider; const standardModel = PROVIDER_STANDARD_MODELS[providerKey]; if (settings.defaultModel === standardModel) { console.log(` - Removing defaultModel '${standardModel}' (matches fleet standard)`); @@ -118,7 +168,7 @@ Usage: apra-fleet uninstall --help Show this help Options: - --llm Specific provider to clean up: claude, gemini, codex, copilot. + --llm Specific provider to clean up: claude, gemini, codex, copilot, agy. --skill Skills to remove: fleet, pm, or all (default). --dry-run Preview the uninstall process without modifying anything. --force Automatically stop the running server before uninstalling. @@ -189,7 +239,7 @@ Options: const recordedProviders = Object.keys(installConfig.providers) as LlmProvider[]; const isFallback = recordedProviders.length === 0; const providersToClean = targetLlm === 'all' - ? (recordedProviders.length > 0 ? recordedProviders : (['claude', 'gemini', 'codex', 'copilot'] as LlmProvider[])) + ? (recordedProviders.length > 0 ? recordedProviders : (['claude', 'gemini', 'codex', 'copilot', 'agy'] as LlmProvider[])) : [targetLlm]; if (isFallback && targetLlm === 'all') { diff --git a/src/providers/agy.ts b/src/providers/agy.ts new file mode 100644 index 00000000..e0005060 --- /dev/null +++ b/src/providers/agy.ts @@ -0,0 +1,148 @@ +import type { ProviderAdapter, PromptOptions, ParsedResponse } from './provider.js'; +import type { LlmProvider, SSHExecResult } from '../types.js'; +import type { PromptErrorCategory } from '../utils/prompt-errors.js'; +import { classifyPromptError } from '../utils/prompt-errors.js'; +import { escapeDoubleQuoted } from '../os/os-commands.js'; + +export class AgyProvider implements ProviderAdapter { + readonly name: LlmProvider = 'agy'; + readonly processName = 'agy'; + readonly authEnvVar = 'GEMINI_API_KEY'; + readonly credentialPath = '~/.gemini/antigravity-cli/settings.json'; + readonly instructionFileName = 'GEMINI.md'; + + cliCommand(args: string): string { + return `agy ${args}`; + } + + versionCommand(): string { + return 'agy --version 2>&1'; + } + + installCommand(os: 'linux' | 'macos' | 'windows'): string { + return 'npm install -g @google/antigravity-cli'; + } + + updateCommand(): string { + return 'agy update'; + } + + buildPromptCommand(opts: PromptOptions): string { + const { folder, promptFile, sessionId, resuming, unattended, inv } = opts; + const escapedFolder = escapeDoubleQuoted(folder); + let instruction = `Your task is described in ${promptFile} in the current directory. Read that file first, then execute the task.`; + if (inv) { + instruction = `[${inv}] ${instruction}`; + } + + let cmd = `cd "${escapedFolder}" && agy -p "${instruction}"`; + + if (resuming && sessionId) { + cmd += ` --conversation "${escapeDoubleQuoted(sessionId)}"`; + } + + if (unattended === 'dangerous') { + cmd += ' --dangerously-skip-permissions'; + } + + return cmd; + } + + skipPermissionsFlag(): string { + return '--dangerously-skip-permissions'; + } + + permissionModeAutoFlag(): string | null { + return null; + } + + parseResponse(result: SSHExecResult): ParsedResponse { + const raw = result.stdout.trim(); + return { + result: raw, + sessionId: undefined, + isError: result.code !== 0, + raw, + usage: undefined, + }; + } + + supportsResume(): boolean { + return true; + } + + supportsMaxTurns(): boolean { + return false; + } + + resumeFlag(sessionId?: string, resuming?: boolean): string { + if (!sessionId) return ''; + return resuming ? `--conversation "${escapeDoubleQuoted(sessionId)}"` : ''; + } + + modelTiers(): Record<'cheap' | 'standard' | 'premium', string> { + return { + cheap: 'gemini-3.5-flash-lite', + standard: 'gemini-3.5-flash', + premium: 'claude-sonnet-4.6', + }; + } + + modelForTier(tier: 'cheap' | 'mid' | 'premium'): string { + if (tier === 'cheap') return 'gemini-3.5-flash-lite'; + if (tier === 'premium') return 'claude-sonnet-4.6'; + return 'gemini-3.5-flash'; + } + + modelFlag(model: string): string { + return ''; + } + + classifyError(output: string): PromptErrorCategory { + return classifyPromptError(output); + } + + permissionConfigPaths(): string[] { + return ['.gemini/antigravity-cli/settings.json']; + } + + composePermissionConfig(_role: 'doer' | 'reviewer', allow: string[] = []): Array | string> { + return [{ permissions: { allow }, mcpServers: { 'apra-fleet': { disabled: true } }, skillOverrides: { pm: 'off', fleet: 'off' } }]; + } + + supportsOAuthCopy(): boolean { + return false; + } + + supportsApiKey(): boolean { + return true; + } + + oauthCredentialFiles(): Array<{ localPath: string; remotePath: string }> | null { + return null; + } + + oauthSettingsMerge(): Record | null { + return null; + } + + oauthEnvVarsToUnset(): string[] { + return []; + } + + authEnvVarForToken(token: string): string { + return 'GEMINI_API_KEY'; + } + + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + return `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; + } + + jsonOutputFlag(): string { + return ''; + } + + headlessInvocation(promptLiteral: string): string { + return `-p "${promptLiteral}"`; + } +} diff --git a/src/providers/gemini.ts b/src/providers/gemini.ts index dc7c072d..6c8bbde0 100644 --- a/src/providers/gemini.ts +++ b/src/providers/gemini.ts @@ -124,16 +124,16 @@ export class GeminiProvider implements ProviderAdapter { modelTiers(): Record<'cheap' | 'standard' | 'premium', string> { return { - cheap: 'gemini-3.1-flash-lite-preview', - standard: 'gemini-3-flash-preview', + cheap: 'gemini-3.5-flash-lite', + standard: 'gemini-3.5-flash', premium: 'gemini-3.1-pro-preview', }; } modelForTier(tier: 'cheap' | 'mid' | 'premium'): string { - if (tier === 'cheap') return 'gemini-3.1-flash-lite-preview'; + if (tier === 'cheap') return 'gemini-3.5-flash-lite'; if (tier === 'premium') return 'gemini-3.1-pro-preview'; - return 'gemini-3-flash-preview'; + return 'gemini-3.5-flash'; } modelFlag(model: string): string { diff --git a/src/providers/index.ts b/src/providers/index.ts index d0e48831..bf56df74 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -4,12 +4,14 @@ import { ClaudeProvider } from './claude.js'; import { GeminiProvider } from './gemini.js'; import { CodexProvider } from './codex.js'; import { CopilotProvider } from './copilot.js'; +import { AgyProvider } from './agy.js'; const providers: Record = { claude: new ClaudeProvider(), gemini: new GeminiProvider(), codex: new CodexProvider(), copilot: new CopilotProvider(), + agy: new AgyProvider(), }; export function getProvider(llmProvider?: LlmProvider | null): ProviderAdapter { @@ -28,3 +30,4 @@ export { ClaudeProvider } from './claude.js'; export { GeminiProvider } from './gemini.js'; export { CodexProvider } from './codex.js'; export { CopilotProvider } from './copilot.js'; +export { AgyProvider } from './agy.js'; diff --git a/src/services/stall/log-path-resolver.ts b/src/services/stall/log-path-resolver.ts index b50f5dec..30fba82e 100644 --- a/src/services/stall/log-path-resolver.ts +++ b/src/services/stall/log-path-resolver.ts @@ -42,5 +42,9 @@ export function resolveSessionLogPath( return join(home, '.gemini', 'tmp', projectName, 'chats', `${sessionId}.jsonl`); } + if (provider === 'agy' || provider === 'codex' || provider === 'copilot') { + throw new Error(`Unsupported log polling for provider: ${provider}`); + } + throw new Error(`Unknown LLM provider: ${provider}`); } diff --git a/src/tools/execute-prompt.ts b/src/tools/execute-prompt.ts index 90d937ba..a3116fb4 100644 --- a/src/tools/execute-prompt.ts +++ b/src/tools/execute-prompt.ts @@ -156,9 +156,16 @@ export async function executePrompt(input: ExecutePromptInput, extra?: any): Pro const authPrefix = buildAuthEnvPrefix(agent, getAgentOS(agent)); const tiers = provider.modelTiers(); - const resolvedModel = input.model - ? (tiers[input.model as keyof typeof tiers] ?? input.model) - : tiers.standard; + let resolvedModel = input.model || 'standard'; + if (resolvedModel === 'cheap') { + resolvedModel = agent.modelCheap || tiers.cheap; + } else if (resolvedModel === 'standard') { + resolvedModel = agent.modelStandard || tiers.standard; + } else if (resolvedModel === 'premium') { + resolvedModel = agent.modelPremium || tiers.premium; + } else { + resolvedModel = tiers[resolvedModel as keyof typeof tiers] ?? resolvedModel; + } const deprecationWarning = input.dangerously_skip_permissions ? '⚠️ DEPRECATION: dangerously_skip_permissions is deprecated and ignored. Use update_member(unattended="dangerous") instead.\n\n' diff --git a/src/tools/register-member.ts b/src/tools/register-member.ts index 400c0c21..8509da63 100644 --- a/src/tools/register-member.ts +++ b/src/tools/register-member.ts @@ -15,6 +15,7 @@ import { awsProvider } from '../services/cloud/aws.js'; import { collectOobPassword, collectOobApiKey } from '../services/auth-socket.js'; import { classifySshError } from '../utils/ssh-error-messages.js'; import { logLine } from '../utils/log-helpers.js'; +import { CURATED_CHEAP_MODELS, CURATED_STANDARD_MODELS, CURATED_PREMIUM_MODELS } from '../cli/config.js'; export const registerMemberSchema = z.object({ friendly_name: z.string() @@ -40,7 +41,10 @@ export const registerMemberSchema = z.object({ cloud_profile: z.string().optional().describe('AWS CLI profile name (e.g. "apra")'), cloud_idle_timeout_min: z.number().min(1, 'cloud_idle_timeout_min must be at least 1 minute').max(1440, 'cloud_idle_timeout_min must be at most 1440 minutes (24 hours)').optional().default(30).describe('Minutes of inactivity before auto-stop (default: 30)'), cloud_activity_command: z.string().min(1).optional().describe('Custom shell command for workload detection. Must output "busy" or "idle" on stdout. Checked after GPU, before process check. Useful for CPU-intensive tasks, downloads, or any non-GPU workload.'), - llm_provider: z.enum(['claude', 'gemini', 'codex', 'copilot']).optional().default('claude').describe('LLM provider for this member (default: "claude"). Determines which CLI is used for execute_prompt, provision_llm_auth, and update_llm_cli.'), + llm_provider: z.enum(['claude', 'gemini', 'codex', 'copilot', 'agy']).optional().default('claude').describe('LLM provider for this member (default: "claude"). Determines which CLI is used for execute_prompt, provision_llm_auth, and update_llm_cli.'), + model_cheap: z.enum(CURATED_CHEAP_MODELS).optional().describe('Custom cheap model choice from a curated list'), + model_standard: z.enum(CURATED_STANDARD_MODELS).optional().describe('Custom standard model choice from a curated list'), + model_premium: z.enum(CURATED_PREMIUM_MODELS).optional().describe('Custom premium model choice from a curated list'), unattended: z.union([z.literal(false), z.literal('auto'), z.literal('dangerous')]).optional().describe('Permission mode for unattended execution. false (default) = interactive prompts; "auto" = auto-approve safe operations; "dangerous" = skip all permission checks.'), }); @@ -173,6 +177,9 @@ export async function registerMember(input: RegisterMemberInput): Promise { if (resolvedIcon) updates.icon = resolvedIcon; if (input.friendly_name) updates.friendlyName = input.friendly_name; if (input.llm_provider !== undefined) updates.llmProvider = input.llm_provider; + if (input.model_cheap !== undefined) updates.modelCheap = input.model_cheap; + if (input.model_standard !== undefined) updates.modelStandard = input.model_standard; + if (input.model_premium !== undefined) updates.modelPremium = input.model_premium; if (input.unattended !== undefined) updates.unattended = input.unattended; if (input.host) updates.host = input.host; if (input.port) updates.port = input.port; @@ -172,6 +179,10 @@ export async function updateMember(input: UpdateMemberInput): Promise { if (updated.authType) { result += ` Auth: ${updated.authType}\n`; } + result += ` Provider: ${updated.llmProvider ?? 'claude'}\n`; + if (updated.modelCheap) result += ` Model Cheap: ${updated.modelCheap}\n`; + if (updated.modelStandard) result += ` Model Standard: ${updated.modelStandard}\n`; + if (updated.modelPremium) result += ` Model Premium: ${updated.modelPremium}\n`; if (warnings.length > 0) { result += '\n'; diff --git a/src/types.ts b/src/types.ts index 20de1fb5..3059abb7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,7 +1,7 @@ export type { CloudConfig } from './services/cloud/types.js'; import type { CloudConfig } from './services/cloud/types.js'; -export type LlmProvider = 'claude' | 'gemini' | 'codex' | 'copilot'; +export type LlmProvider = 'claude' | 'gemini' | 'codex' | 'copilot' | 'agy'; export interface Agent { id: string; @@ -25,6 +25,9 @@ export interface Agent { vcsProvider?: 'github' | 'bitbucket' | 'azure-devops'; vcsTokenExpiresAt?: string; // ISO 8601 llmProvider?: LlmProvider; // default: 'claude' for backwards compat + modelCheap?: string; + modelStandard?: string; + modelPremium?: string; encryptedEnvVars?: Record; // envVarName -> encrypted value lastBranch?: string; tokenUsage?: { input: number; output: number }; diff --git a/tests/install-multi-provider.test.ts b/tests/install-multi-provider.test.ts index bada13ab..47d276c8 100644 --- a/tests/install-multi-provider.test.ts +++ b/tests/install-multi-provider.test.ts @@ -349,7 +349,7 @@ describe('runInstall multi-provider', () => { expect(parsed.defaultModel).toBe('claude-sonnet-4-6'); }); - it('writes defaultModel for Gemini (gemini-3-flash-preview) to settings.json', async () => { + it('writes defaultModel for Gemini (gemini-3.5-flash) to settings.json', async () => { await runInstall(['--llm', 'gemini']); const geminiSettings = path.join(mockHome, '.gemini', 'settings.json'); @@ -360,7 +360,7 @@ describe('runInstall multi-provider', () => { const defaultModelWrite = writes.find(c => c[1].toString().includes('"defaultModel"')); expect(defaultModelWrite).toBeDefined(); const parsed = JSON.parse(defaultModelWrite![1].toString()); - expect(parsed.defaultModel).toBe('gemini-3-flash-preview'); + expect(parsed.defaultModel).toBe('gemini-3.5-flash'); }); it('writes defaultModel for Codex (gpt-5.4) to config.toml', async () => { diff --git a/tests/log-path-resolver.test.ts b/tests/log-path-resolver.test.ts index 8dc04b7c..0d2d7ea9 100644 --- a/tests/log-path-resolver.test.ts +++ b/tests/log-path-resolver.test.ts @@ -59,6 +59,17 @@ describe('resolveSessionLogPath', () => { }).not.toThrow(); }); + it('throws error for agy (unsupported log polling)', () => { + expect(() => { + resolveSessionLogPath( + 'agy', + 'session-123', + '/tmp/project', + '/home/user' + ); + }).toThrow('Unsupported log polling for provider: agy'); + }); + it('throws error for unknown provider', () => { expect(() => { resolveSessionLogPath( diff --git a/tests/providers.test.ts b/tests/providers.test.ts index d29747b7..84b4ddf0 100644 --- a/tests/providers.test.ts +++ b/tests/providers.test.ts @@ -3,6 +3,7 @@ import { ClaudeProvider } from '../src/providers/claude.js'; import { GeminiProvider } from '../src/providers/gemini.js'; import { CodexProvider } from '../src/providers/codex.js'; import { CopilotProvider } from '../src/providers/copilot.js'; +import { AgyProvider } from '../src/providers/agy.js'; import { getProvider } from '../src/providers/index.js'; import { buildResumeFlag, buildSessionIdFlag } from '../src/providers/provider.js'; import type { SSHExecResult } from '../src/types.js'; @@ -400,15 +401,15 @@ describe('GeminiProvider', () => { }); it('maps model tiers', () => { - expect(p.modelForTier('cheap')).toBe('gemini-3.1-flash-lite-preview'); - expect(p.modelForTier('mid')).toBe('gemini-3-flash-preview'); + expect(p.modelForTier('cheap')).toBe('gemini-3.5-flash-lite'); + expect(p.modelForTier('mid')).toBe('gemini-3.5-flash'); expect(p.modelForTier('premium')).toBe('gemini-3.1-pro-preview'); }); it('modelTiers() returns cheap/standard/premium mapping', () => { const tiers = p.modelTiers(); - expect(tiers.cheap).toBe('gemini-3.1-flash-lite-preview'); - expect(tiers.standard).toBe('gemini-3-flash-preview'); + expect(tiers.cheap).toBe('gemini-3.5-flash-lite'); + expect(tiers.standard).toBe('gemini-3.5-flash'); expect(tiers.premium).toBe('gemini-3.1-pro-preview'); }); @@ -861,3 +862,59 @@ describe('backwards compatibility', () => { expect(cmd).toContain('--max-turns 50'); }); }); + +describe('AgyProvider', () => { + const p = new AgyProvider(); + + it('has correct metadata', () => { + expect(p.name).toBe('agy'); + expect(p.processName).toBe('agy'); + expect(p.authEnvVar).toBe('GEMINI_API_KEY'); + expect(p.credentialPath).toBe('~/.gemini/antigravity-cli/settings.json'); + expect(p.instructionFileName).toBe('GEMINI.md'); + }); + + it('builds cliCommand', () => { + expect(p.cliCommand('--version')).toBe('agy --version'); + }); + + it('builds versionCommand', () => { + expect(p.versionCommand()).toBe('agy --version 2>&1'); + }); + + it('builds installCommand', () => { + expect(p.installCommand('linux')).toBe('npm install -g @google/antigravity-cli'); + }); + + it('builds updateCommand', () => { + expect(p.updateCommand()).toBe('agy update'); + }); + + it('builds prompt command with defaults', () => { + const cmd = p.buildPromptCommand({ folder: '/home/user/project', promptFile: '.fleet-task.md' }); + expect(cmd).toContain('agy -p'); + expect(cmd).not.toContain('--model'); + expect(cmd).not.toContain('--conversation'); + expect(cmd).not.toContain('--dangerously-skip-permissions'); + }); + + it('builds prompt command with resume flag', () => { + const cmd = p.buildPromptCommand({ folder: '/home/user/project', promptFile: '.fleet-task.md', sessionId: 'sess-abc', resuming: true }); + expect(cmd).toContain('--conversation "sess-abc"'); + }); + + it('builds prompt command with unattended=dangerous', () => { + const cmd = p.buildPromptCommand({ folder: '/home/user/project', promptFile: '.fleet-task.md', unattended: 'dangerous' }); + expect(cmd).toContain('--dangerously-skip-permissions'); + }); + + it('modelFlag returns empty string', () => { + expect(p.modelFlag('gemini-3.5-flash')).toBe(''); + }); + + it('modelTiers and modelForTier return correct mappings', () => { + expect(p.modelForTier('cheap')).toBe('gemini-3.5-flash-lite'); + expect(p.modelForTier('mid')).toBe('gemini-3.5-flash'); + expect(p.modelForTier('premium')).toBe('claude-sonnet-4.6'); + }); +}); diff --git a/tests/security-hardening.test.ts b/tests/security-hardening.test.ts index 8b35cd95..2e9f37a7 100644 --- a/tests/security-hardening.test.ts +++ b/tests/security-hardening.test.ts @@ -188,6 +188,44 @@ describe('registerMemberSchema cloud config validation', () => { }); }); +describe('curated model tier validations', () => { + it('accepts valid/optional and rejects invalid model inputs for registration and update schemas', () => { + // cheap + expect(registerMemberSchema.shape.model_cheap.safeParse(undefined).success).toBe(true); + expect(registerMemberSchema.shape.model_cheap.safeParse('gpt-oss-120b').success).toBe(true); + expect(registerMemberSchema.shape.model_cheap.safeParse('gemini-3.5-flash-lite').success).toBe(true); + expect(registerMemberSchema.shape.model_cheap.safeParse('claude-opus-4.6').success).toBe(false); + + expect(updateMemberSchema.shape.model_cheap.safeParse(undefined).success).toBe(true); + expect(updateMemberSchema.shape.model_cheap.safeParse('gpt-oss-120b').success).toBe(true); + expect(updateMemberSchema.shape.model_cheap.safeParse('gemini-3.5-flash-lite').success).toBe(true); + expect(updateMemberSchema.shape.model_cheap.safeParse('claude-opus-4.6').success).toBe(false); + + // standard + expect(registerMemberSchema.shape.model_standard.safeParse(undefined).success).toBe(true); + expect(registerMemberSchema.shape.model_standard.safeParse('gemini-3.5-flash').success).toBe(true); + expect(registerMemberSchema.shape.model_standard.safeParse('claude-sonnet-4.6').success).toBe(true); + expect(registerMemberSchema.shape.model_standard.safeParse('claude-haiku-4-5').success).toBe(false); + + expect(updateMemberSchema.shape.model_standard.safeParse(undefined).success).toBe(true); + expect(updateMemberSchema.shape.model_standard.safeParse('gemini-3.5-flash').success).toBe(true); + expect(updateMemberSchema.shape.model_standard.safeParse('claude-sonnet-4.6').success).toBe(true); + expect(updateMemberSchema.shape.model_standard.safeParse('claude-haiku-4-5').success).toBe(false); + + // premium + expect(registerMemberSchema.shape.model_premium.safeParse(undefined).success).toBe(true); + expect(registerMemberSchema.shape.model_premium.safeParse('claude-opus-4.6').success).toBe(true); + expect(registerMemberSchema.shape.model_premium.safeParse('gemini-3.1-pro-preview').success).toBe(true); + expect(registerMemberSchema.shape.model_premium.safeParse('gemini-3.5-flash-lite').success).toBe(false); + + expect(updateMemberSchema.shape.model_premium.safeParse(undefined).success).toBe(true); + expect(updateMemberSchema.shape.model_premium.safeParse('claude-opus-4.6').success).toBe(true); + expect(updateMemberSchema.shape.model_premium.safeParse('gemini-3.1-pro-preview').success).toBe(true); + expect(updateMemberSchema.shape.model_premium.safeParse('gemini-3.5-flash-lite').success).toBe(false); + }); +}); + + // --- T1: Credential leakage audit --- // Mocks for lifecycle test diff --git a/tests/tool-provider.test.ts b/tests/tool-provider.test.ts index b65fdf74..3998c35c 100644 --- a/tests/tool-provider.test.ts +++ b/tests/tool-provider.test.ts @@ -88,6 +88,23 @@ describe('executePrompt — provider routing', () => { expect(cmd).toContain('gemini'); }); + it('routes Agy member through agy CLI and parses response', async () => { + const member = makeTestAgent({ friendlyName: 'agy-member', llmProvider: 'agy' }); + addAgent(member); + mockExecCommand.mockResolvedValue({ + stdout: 'agy response', + stderr: '', + code: 0, + }); + + const result = await executePrompt({ member_id: member.id, prompt: 'hi', resume: false, timeout_s: 5 }); + expect(result).toContain('agy response'); + + // calls[0] = writePromptFile, calls[1] = main prompt command + const cmd = mockExecCommand.mock.calls[1][0] as string; + expect(cmd).toContain('agy -p'); + }); + it('routes Codex member through codex CLI', async () => { const member = makeTestAgent({ friendlyName: 'codex-member', llmProvider: 'codex' }); addAgent(member); @@ -166,7 +183,7 @@ describe('provisionAuth — API key per provider', () => { restoreRegistry(); }); - const providerNames: LlmProvider[] = ['claude', 'gemini', 'codex', 'copilot']; + const providerNames: LlmProvider[] = ['claude', 'gemini', 'codex', 'copilot', 'agy']; for (const llmProvider of providerNames) { it(`provisions ${llmProvider} API key using correct env var`, async () => { @@ -279,6 +296,7 @@ describe('fleetProcessCheck — processName per provider', () => { { provider: 'gemini', processName: 'gemini' }, { provider: 'codex', processName: 'codex' }, { provider: 'copilot', processName: 'copilot' }, + { provider: 'agy', processName: 'agy' }, ]; for (const { provider, processName } of cases) { From d989dcd0a78463aef56378848d788baa868e2ac7 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 12:35:20 -0400 Subject: [PATCH 02/33] docs(agy): align provider docs to treat agy as primary and demote gemini --- README.md | 24 ++++---- docs/FAQ.md | 2 +- docs/architecture.md | 39 ++++++------- docs/features/stall-detector.md | 3 + docs/install.md | 15 +++-- docs/provider-guide.md | 17 +++--- docs/provider-matrix.md | 54 +++++++++--------- llms-full.txt | 97 +++++++++++++++++---------------- llms.txt | 2 +- skills/fleet/SKILL.md | 10 ++-- skills/fleet/onboarding.md | 7 ++- skills/fleet/troubleshooting.md | 12 ++-- src/cli/install.ts | 1 + 13 files changed, 151 insertions(+), 132 deletions(-) diff --git a/README.md b/README.md index 61303daf..f8fe4529 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ ### One goal. A team of AI agents that plan, execute, and review each other's work, and run across every machine you own. Apra Fleet is an open-source **MCP server** that turns AI agents (Claude -Code, Gemini, Codex, Copilot) into a coordinated team instead of a lone +Code, Antigravity, Codex, Copilot, Gemini) into a coordinated team instead of a lone assistant. Any job that needs more than one agent -- software sprints, customer-support triage, cost and operations-efficiency analysis, infrastructure surveys -- becomes a fleet you direct in plain conversation. Need more horsepower? Fleet reaches across every machine on your network over SSH -- no dashboards, no orchestration YAML. -**The agents need not share a vendor.** A Claude agent and a Gemini agent can +**The agents need not share a vendor.** A Claude agent and an Antigravity agent can work the same sprint -- one writes, the other reviews -- so a different model, with different blind spots, checks every change. Cross-provider collaboration is a built-in quality mechanism, not an afterthought. @@ -64,11 +64,11 @@ curl -fsSL https://github.com/Apra-Labs/apra-fleet/releases/latest/download/apra Invoke-WebRequest -Uri https://github.com/Apra-Labs/apra-fleet/releases/latest/download/apra-fleet-installer-win-x64.exe -OutFile apra-fleet-installer.exe; .\apra-fleet-installer.exe install ``` -> Installing for **Gemini**, Codex, or Copilot instead of Claude? Add the +> Installing for **Antigravity**, Codex, Copilot, or Gemini instead of Claude? Add the > `--llm` flag -- see -> [Install for Gemini and other providers](docs/install.md#install-for-gemini-and-other-providers). +> [Install for other providers](docs/install.md#install-for-other-providers-antigravity-codex-copilot-gemini). -Then load it in your favorite LLM CLI (claude, gemini, ...) using `/mcp`. +Then load it in your favorite LLM CLI (claude, agy, gemini, ...) using `/mcp`. Now register your first members: @@ -211,23 +211,23 @@ When *not* to use Fleet: a one-off single-file change needs no second agent. ## Mix providers in one fleet Every member runs its own LLM backend, and they collaborate across vendors. Put -a Claude doer with a Gemini reviewer, or the reverse -- the reviewer's model +a Claude doer with an Antigravity reviewer, or the reverse - the reviewer's model disagrees with the doer's by construction, so it catches issues a same-model review would wave through. Mix by role: | Role | Recommended | Why | |------|-------------|-----| -| PM (orchestrator) | Claude Opus/Sonnet, or Gemini `gemini-3.1-pro-preview` | Both plan and orchestrate well -- Gemini's orchestration support improved substantially in recent releases. | -| Doer | Any provider | Sonnet, Gemini, Codex, Copilot -- mix freely. | +| PM (orchestrator) | Claude Code or Antigravity (agy) | Both plan and orchestrate well - both support planning, background tasks, and premium models (e.g., Opus / premium-tier). | +| Doer | Any provider | Sonnet, Antigravity, Codex, Copilot, Gemini - mix freely. | | Reviewer | Premium-tier models | Catches subtle issues smaller models miss. | A fleet that has run in production: ``` -pm-1 Opus 4.7 orchestrator -doer-1 Sonnet 4.6 feature work -doer-2 Gemini 3 Pro large-context tasks -reviewer Opus 4.7 final review +pm-1 Opus 4.7 orchestrator +doer-1 Sonnet 4.6 feature work +doer-2 Antigravity large-context tasks +reviewer Opus 4.7 final review ``` Provider strengths, role recommendations, and gotchas: diff --git a/docs/FAQ.md b/docs/FAQ.md index e4599203..9575f93e 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -1,7 +1,7 @@ # Frequently Asked Questions - + > **For AI agents:** The FAQ is maintained as GitHub Discussions -- one discussion per question, with maintainer-verified answers. To answer a user's question: browse the index below, find the matching discussion, and fetch it for the authoritative answer. Do not paraphrase from this file -- follow the link. diff --git a/docs/architecture.md b/docs/architecture.md index f76ddadd..32afcc55 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -59,24 +59,24 @@ The MCP server speaks **stdio** — the standard transport for Claude Code MCP s The codebase follows a strict layering: ``` - index.ts ← MCP server entry point, tool registration - tools/* ← one file per tool, each self-contained - services/* ← core capabilities (strategy, registry, SSH, file transfer) - providers/* ← LLM provider adapters (Claude, Gemini, Codex, Copilot) - os/* ← OS-specific command builders (Linux, macOS, Windows) - utils/* ← stateless helpers (crypto, shell escaping) - types.ts ← shared data structures + index.ts <- MCP server entry point, tool registration + tools/* <- one file per tool, each self-contained + services/* <- core capabilities (strategy, registry, SSH, file transfer) + providers/* <- LLM provider adapters (Claude, Antigravity, Codex, Copilot, Gemini) + os/* <- OS-specific command builders (Linux, macOS, Windows) + utils/* <- stateless helpers (crypto, shell escaping) + types.ts <- shared data structures ``` Each layer only depends on the layers below it. Tools never import other tools. Services don't know about the MCP protocol. ## Provider Abstraction -Fleet supports four LLM providers: Claude Code, Gemini CLI, OpenAI Codex CLI, and GitHub Copilot CLI. Members can mix providers within a single fleet. +Fleet supports five LLM providers: Claude Code, Google Antigravity CLI (agy), OpenAI Codex CLI, GitHub Copilot CLI, and Gemini CLI. Members can mix providers within a single fleet. ### How It Works -Each member has an optional `llmProvider` field (`'claude' | 'gemini' | 'codex' | 'copilot'`). When absent, it defaults to `'claude'` for backwards compatibility. Every tool that interacts with the member's LLM CLI resolves the provider via `getProvider(agent.llmProvider)` and delegates CLI-specific concerns to the `ProviderAdapter` interface. +Each member has an optional `llmProvider` field (`'claude' | 'agy' | 'codex' | 'copilot' | 'gemini'`). When absent, it defaults to `'claude'` for backwards compatibility. Every tool that interacts with the member's LLM CLI resolves the provider via `getProvider(agent.llmProvider)` and delegates CLI-specific concerns to the `ProviderAdapter` interface. ``` ┌──────────┐ getProvider() ┌─────────────────┐ @@ -99,12 +99,13 @@ The `OsCommands` layer sits below this: it handles OS-specific shell wrapping (P ``` src/providers/ - provider.ts — ProviderAdapter interface + shared types - claude.ts — ClaudeProvider - gemini.ts — GeminiProvider - codex.ts — CodexProvider (NDJSON parser) - copilot.ts — CopilotProvider - index.ts — getProvider() singleton factory + provider.ts - ProviderAdapter interface + shared types + claude.ts - ClaudeProvider + agy.ts - AgyProvider + codex.ts - CodexProvider (NDJSON parser) + copilot.ts - CopilotProvider + gemini.ts - GeminiProvider + index.ts - getProvider() singleton factory ``` ### Mix-and-Match Fleet @@ -124,10 +125,10 @@ All four members use the same `execute_prompt` tool call. The tool builds provid ### Key Differences Across Providers -- **`max_turns`** — Claude-only. Ignored for Gemini, Codex, and Copilot. -- **OAuth credential copy** — Claude-only. Non-Claude providers require an API key (`provision_llm_auth` with `api_key`). -- **JSON output format** — Codex emits NDJSON (one event per line). All others emit a single JSON object. Handled transparently by `provider.parseResponse()`. -- **Session resume** — Claude stores a server-side session ID. Others resume the most recent local session via a generic flag. +- **`max_turns`** - Claude-only. Ignored for Antigravity, Codex, Copilot, and Gemini. +- **OAuth credential copy** - Claude-only. Non-Claude providers require an API key (`provision_llm_auth` with `api_key`). +- **JSON output format** - Codex emits NDJSON (one event per line). All others emit a single JSON object. Handled transparently by `provider.parseResponse()`. +- **Session resume** - Claude, Antigravity, and Gemini support resuming specific session IDs. Codex and Copilot resume the most recent local session. See `docs/provider-matrix.md` for the full comparison table. diff --git a/docs/features/stall-detector.md b/docs/features/stall-detector.md index f770a15b..1d7f3808 100644 --- a/docs/features/stall-detector.md +++ b/docs/features/stall-detector.md @@ -97,6 +97,9 @@ function sessionLogDir(provider: string, workFolder: string): string { const encoded = workFolder.replace(/[\/\\:]/g, '-'); return join(home, '.claude', 'projects', encoded); } + if (provider === 'agy') { + throw new Error("Stall detection log polling not supported"); + } if (provider === 'gemini') { return join(home, '.gemini', 'tmp', basename(workFolder), 'chats'); } diff --git a/docs/install.md b/docs/install.md index 6cdda480..2fb24525 100644 --- a/docs/install.md +++ b/docs/install.md @@ -5,8 +5,8 @@ which skills are installed, uninstalling, and self-updating. ## Requirements -- An AI coding agent CLI on the machine where you run Fleet -- Claude Code, - Gemini, Codex, or Copilot. +- An AI coding agent CLI on the machine where you run Fleet - Claude Code, + Antigravity (agy), Codex, Copilot, or Gemini. - SSH access to any remote machines you want to register as members. The local machine needs nothing extra; remote members need only an SSH server. @@ -66,6 +66,8 @@ chmod +x apra-fleet-installer-linux-x64 && ./apra-fleet-installer-linux-x64 inst | `~/.claude/skills/fleet/` | Fleet skill (MCP tool docs for Claude) | | `~/.claude/skills/pm/` | PM orchestration skill | +For other providers, these are written to that provider's skill/config directories. For example, for Antigravity (`agy`), settings are written to `~/.gemini/antigravity-cli/settings.json`, and hooks / MCP configs are merged into `~/.gemini/config/hooks.json` and `~/.gemini/config/mcp_config.json`. + The install also registers the MCP server (`claude mcp add apra-fleet`) and configures a status bar icon showing fleet member activity. @@ -91,15 +93,16 @@ control exactly which skills are installed: | `install --skill none` | neither | | `install --no-skill` | neither (same as `--skill none`) | -## Install for Gemini and other providers +## Install for other providers (Antigravity, Codex, Copilot, Gemini) By default, `install` configures Apra Fleet for **Claude Code**. Use the `--llm` flag to install for a different provider instead: ```bash -apra-fleet install --llm gemini # Gemini CLI +apra-fleet install --llm agy # Google Antigravity CLI apra-fleet install --llm codex # OpenAI Codex CLI apra-fleet install --llm copilot # GitHub Copilot CLI +apra-fleet install --llm gemini # Gemini CLI apra-fleet install --llm claude # Claude Code (the default) ``` @@ -110,7 +113,7 @@ provider's config directory -- for example `~/.gemini/` for Gemini -- instead of `install` once per provider. `--llm` combines with `--skill`, e.g. `apra-fleet install --llm gemini --skill -pm`. Supported values: `claude` (default), `gemini`, `codex`, `copilot`. +pm`. Supported values: `claude` (default), `agy`, `codex`, `copilot`, `gemini`. After a non-Claude install, load the server by restarting that provider's CLI -- only Claude Code uses `/mcp`. @@ -139,7 +142,7 @@ apra-fleet uninstall | `--dry-run` | Preview what would be removed, without modifying anything | | `--force` | Automatically stop the running fleet server before uninstalling | | `--yes` | Skip the confirmation prompt | -| `--llm ` | Remove only a specific provider (`claude`, `gemini`, `codex`, `copilot`) | +| `--llm ` | Remove only a specific provider (`claude`, `agy`, `codex`, `copilot`, `gemini`) | | `--skill fleet\|pm\|all` | Remove only the specified skill directories (default: `all`) | Examples: diff --git a/docs/provider-guide.md b/docs/provider-guide.md index 8ccc7cc2..a15aa65a 100644 --- a/docs/provider-guide.md +++ b/docs/provider-guide.md @@ -4,26 +4,27 @@ # Choosing an LLM Provider -Fleet supports Claude, Gemini, Codex, and Copilot. Members can run different providers and mix them freely within a single fleet. +Fleet supports Claude, Antigravity (agy), Codex, Copilot, and Gemini. Members can run different providers and mix them freely within a single fleet. ## Provider strengths -- **Claude** -- Balanced coding and reasoning; fine-grained per-tool permissions via `settings.local.json`. -- **Gemini** -- 1M-token native context window; built-in Google Search for researching APIs and docs without an external tool. -- **Codex** -- Structured-output enforcement via `--output-schema`; native subagent parallelism for concurrent subtasks with less orchestration overhead. -- **Copilot** -- Multi-model marketplace (Claude + GPT families in one CLI); auto-compaction keeps sessions running indefinitely. +- **Claude** - Balanced coding and reasoning; fine-grained per-tool permissions via `settings.local.json`. +- **Antigravity** - High-performance Gemini-based agentic CLI; supports large context windows, background tasks, and native beads task tracking. +- **Codex** - Structured-output enforcement via `--output-schema`; native subagent parallelism for concurrent subtasks with less orchestration overhead. +- **Copilot** - Multi-model marketplace (Claude + GPT families in one CLI); auto-compaction keeps sessions running indefinitely. +- **Gemini** - 1M-token native context window; built-in Google Search for researching APIs and docs without an external tool. ## Recommended provider by role | Role | Recommended | Why | |------|-------------|-----| -| PM (orchestrator) | Claude Opus/Sonnet, or Gemini `gemini-3.1-pro-preview` | Both plan and orchestrate well -- Gemini's orchestration support improved substantially in recent releases. | -| Doer | Any provider | Sonnet, Gemini, Codex, Copilot -- mix freely. | +| PM (orchestrator) | Claude Code or Antigravity (agy) | Both plan and orchestrate well - both support planning, background tasks, and premium models (e.g., Opus / premium-tier). | +| Doer | Any provider | Sonnet, Antigravity, Codex, Copilot, Gemini - mix freely. | | Reviewer | Premium-tier models | Catches subtle issues smaller models miss. | ## Gotchas worth knowing -- **`max_turns` is Claude-only.** On Gemini, Codex, and Copilot, use `timeout_s` instead to bound execution time. +- **`max_turns` is Claude-only.** On Gemini, Codex, Copilot, and Antigravity, use `timeout_s` instead to bound execution time. - **Copilot needs a paid GitHub Copilot subscription** (Pro, Business, or Enterprise) and has the smallest context window (64K). It is best suited for smaller, focused tasks. --- diff --git a/docs/provider-matrix.md b/docs/provider-matrix.md index 4f6f9c86..8ac1e853 100644 --- a/docs/provider-matrix.md +++ b/docs/provider-matrix.md @@ -12,27 +12,27 @@ Reference tables for all LLM providers supported by Apra Fleet. Extracted from ` ## Strategic Comparison -| Feature | Claude Code | Gemini CLI | OpenAI Codex CLI | GitHub Copilot CLI | -|---------|-------------|------------|------------------|-------------------| -| **Install** | Native binary / `curl \| bash` | `npm i -g @google/gemini-cli` (Node 20+) | `npm i -g @openai/codex` / Homebrew / binary (Node 18+) | `npm i -g @github/copilot` / Homebrew / WinGet | -| **Headless prompt** | `claude -p "..."` | `gemini -p "..."` | `codex exec "..."` | `copilot -p "..."` | -| **Session resume** | `--resume ` | `-r` / `--resume` (loads most recent) | `codex exec resume` (positional) | `--continue` / `--resume` | -| **JSON output** | `--output-format json` | `--output-format json` (also `stream-json`) | `--json` (NDJSON -- one event per state change) | `--format json` | -| **Model selection** | `--model opus/sonnet/haiku` | `--model ` or `GEMINI_MODEL` env var | `--model` / `-m` | `--model ` or `/model` interactive | -| **Max turns** | `--max-turns N` | **Not available** | **Not available** | **Not available** (auto-compaction) | -| **Skip permissions** | `--dangerously-skip-permissions` | `--yolo` / `-y` | `--ask-for-approval never` + `--sandbox danger-full-access` | `--allow-all-tools` / `--yolo` | -| **Auth env var** | `ANTHROPIC_API_KEY` | `GEMINI_API_KEY` | `OPENAI_API_KEY` (or `CODEX_API_KEY` in exec mode) | `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN` | -| **OAuth / login** | `~/.claude/.credentials.json` (copyable) | Google OAuth (browser-based) | `codex login` (ChatGPT account or API key) | `gh auth login` or `/login` (device flow) | -| **Version check** | `claude --version` | `gemini --version` | `codex --version` | `copilot --version` | -| **Install cmd (Linux)** | `curl -fsSL https://claude.ai/install.sh \| bash` | `npm i -g @google/gemini-cli` | `npm i -g @openai/codex` | `curl -fsSL https://gh.io/copilot-install \| bash` | -| **Install cmd (macOS)** | `curl -fsSL https://claude.ai/install.sh \| bash` | `npm i -g @google/gemini-cli` | `brew install --cask codex` | `brew install --cask copilot` | -| **Install cmd (Windows)** | `irm https://claude.ai/install.ps1 \| iex` | `npm i -g @google/gemini-cli` | Binary from GitHub releases (experimental) | `winget install GitHub.CopilotCLI` | -| **Update command** | `claude update` | `npm update -g @google/gemini-cli` | `npm update -g @openai/codex` | `copilot update` | -| **Process name** | `claude` | `gemini` | `codex` | `copilot` | -| **Credential path** | `~/.claude/.credentials.json` | `~/.gemini/` | `~/.codex/` | `~/.config/gh/` or `~/.copilot/` | -| **Session storage** | Fleet-minted UUID; passed as `--session-id `; resumed with `--resume ` | Fleet-minted UUID; passed as `--session-id `; resumed with `--resume ` | Local (exec resume) | Local: `~/.copilot/session-state/` (SQLite) | -| **Agentic capabilities** | File edit, shell, MCP tools | File edit, shell, web search, MCP tools | File edit, shell, MCP tools, subagents | File edit, shell, MCP tools, custom agents | -| **Context window** | 200K (Sonnet) / 1M (Opus 4.7) | 1M tokens | 192K tokens | 64K tokens (auto-compaction at 95%) | +| Feature | Claude Code | Google Antigravity CLI (agy) | OpenAI Codex CLI | GitHub Copilot CLI | Gemini CLI | +|---------|-------------|------------------------------|------------------|-------------------|------------| +| **Install** | Native binary / `curl \| bash` | `npm install -g @google/antigravity-cli` | `npm i -g @openai/codex` / Homebrew / binary (Node 18+) | `npm i -g @github/copilot` / Homebrew / WinGet | `npm i -g @google/gemini-cli` (Node 20+) | +| **Headless prompt** | `claude -p "..."` | `agy -p "..."` | `codex exec "..."` | `copilot -p "..."` | `gemini -p "..."` | +| **Session resume** | `--resume ` | `--conversation ""` | `codex exec resume` (positional) | `--continue` / `--resume` | `-r` / `--resume` (loads most recent) | +| **JSON output** | `--output-format json` | **Not available** | `--json` (NDJSON -- one event per state change) | `--format json` | `--output-format json` (also `stream-json`) | +| **Model selection** | `--model opus/sonnet/haiku` | **Not available** (custom models configured in apra-fleet registry) | `--model` / `-m` | `--model ` or `/model` interactive | `--model ` or `GEMINI_MODEL` env var | +| **Max turns** | `--max-turns N` | **Not available** | **Not available** | **Not available** (auto-compaction) | **Not available** | +| **Skip permissions** | `--dangerously-skip-permissions` | `--dangerously-skip-permissions` | `--ask-for-approval never` + `--sandbox danger-full-access` | `--allow-all-tools` / `--yolo` | `--yolo` / `-y` | +| **Auth env var** | `ANTHROPIC_API_KEY` | `GEMINI_API_KEY` | `OPENAI_API_KEY` (or `CODEX_API_KEY` in exec mode) | `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN` | `GEMINI_API_KEY` | +| **OAuth / login** | `~/.claude/.credentials.json` (copyable) | Browser OAuth / settings.json | `codex login` (ChatGPT account or API key) | `gh auth login` or `/login` (device flow) | Google OAuth (browser-based) | +| **Version check** | `claude --version` | `agy --version 2>&1` | `codex --version` | `copilot --version` | `gemini --version` | +| **Install cmd (Linux)** | `curl -fsSL https://claude.ai/install.sh \| bash` | `npm install -g @google/antigravity-cli` | `npm i -g @openai/codex` | `curl -fsSL https://gh.io/copilot-install \| bash` | `npm i -g @google/gemini-cli` | +| **Install cmd (macOS)** | `curl -fsSL https://claude.ai/install.sh \| bash` | `npm install -g @google/antigravity-cli` | `brew install --cask codex` | `brew install --cask copilot` | `npm i -g @google/gemini-cli` | +| **Install cmd (Windows)** | `irm https://claude.ai/install.ps1 \| iex` | `npm install -g @google/antigravity-cli` | Binary from GitHub releases (experimental) | `winget install GitHub.CopilotCLI` | `npm i -g @google/gemini-cli` | +| **Update command** | `claude update` | `agy update` | `npm update -g @openai/codex` | `copilot update` | `npm update -g @google/gemini-cli` | +| **Process name** | `claude` | `agy` | `codex` | `copilot` | `gemini` | +| **Credential path** | `~/.claude/.credentials.json` | `~/.gemini/antigravity-cli/settings.json` | `~/.codex/` | `~/.config/gh/` or `~/.copilot/` | `~/.gemini/` | +| **Session storage** | Fleet-minted UUID; passed as `--session-id `; resumed with `--resume ` | Local cache; resumed with `--conversation ""` | Local (exec resume) | Local: `~/.copilot/session-state/` (SQLite) | Fleet-minted UUID; passed as `--session-id `; resumed with `--resume ` | +| **Agentic capabilities** | File edit, shell, MCP tools | File edit, shell, MCP tools, web search, beads | File edit, shell, MCP tools, subagents | File edit, shell, MCP tools, custom agents | File edit, shell, web search, MCP tools | +| **Context window** | 200K (Sonnet) / 1M (Opus 4.7) | 1M tokens | 192K tokens | 64K tokens (auto-compaction at 95%) | 1M tokens | --- @@ -40,11 +40,11 @@ Reference tables for all LLM providers supported by Apra Fleet. Extracted from ` Used by the PM for model escalation (`cheap -> mid -> premium`). -| Tier | Purpose | Claude | Gemini | OpenAI Codex | Copilot | -|------|---------|--------|--------|--------------|---------| -| **cheap** | Execution, status, tests, deploys | `haiku` | `gemini-3.1-flash-lite-preview` | `gpt-5.4-mini` | `claude-haiku-4-5` | -| **mid** | Construction, code, config | `sonnet` | `gemini-3-flash-preview` | `gpt-5.4` | `claude-sonnet-4-5` | -| **premium** | Planning, review, architecture | `opus` | `gemini-3.1-pro-preview` | `gpt-5.4` (no separate tier) | `claude-sonnet-4-5` (highest available) | +| Tier | Purpose | Claude | Antigravity | OpenAI Codex | Copilot | Gemini | +|------|---------|--------|-------------|--------------|---------|--------| +| **cheap** | Execution, status, tests, deploys | `haiku` | `gemini-3.5-flash-lite` | `gpt-5.4-mini` | `claude-haiku-4-5` | `gemini-3.5-flash-lite` | +| **mid** | Construction, code, config | `sonnet` | `gemini-3.5-flash` | `gpt-5.4` | `claude-sonnet-4-5` | `gemini-3.5-flash` | +| **premium** | Planning, review, architecture | `opus` | `claude-sonnet-4.6` | `gpt-5.4` (no separate tier) | `claude-sonnet-4-5` (highest available) | `gemini-3.1-pro-preview` | **Note:** Codex currently lacks a distinct premium tier beyond its best model. Copilot exposes Anthropic's Claude models directly, so it uses the same tier names. @@ -91,6 +91,7 @@ Known limitations when using non-Claude providers in a fleet. |----------|---------|--------| | Claude | `ANTHROPIC_API_KEY` | console.anthropic.com | | Gemini | `GEMINI_API_KEY` | aistudio.google.com | +| Antigravity (agy) | `GEMINI_API_KEY` | aistudio.google.com | | Codex | `OPENAI_API_KEY` | platform.openai.com | | Copilot | `COPILOT_GITHUB_TOKEN` | github.com/settings/tokens (fine-grained PAT with "Copilot Requests" permission) | @@ -104,6 +105,7 @@ Each provider auto-loads a provider-specific instruction file from the working d |----------|-----------------| | Claude | `CLAUDE.md` | | Gemini | `GEMINI.md` | +| Antigravity (agy) | `GEMINI.md` | | Codex | `AGENTS.md` | | Copilot | `COPILOT.md` | diff --git a/llms-full.txt b/llms-full.txt index 08d9e756..4f2fc22e 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -11,14 +11,14 @@ ### One goal. A team of AI agents that plan, execute, and review each other's work, and run across every machine you own. Apra Fleet is an open-source **MCP server** that turns AI agents (Claude -Code, Gemini, Codex, Copilot) into a coordinated team instead of a lone +Code, Antigravity, Codex, Copilot, Gemini) into a coordinated team instead of a lone assistant. Any job that needs more than one agent -- software sprints, customer-support triage, cost and operations-efficiency analysis, infrastructure surveys -- becomes a fleet you direct in plain conversation. Need more horsepower? Fleet reaches across every machine on your network over SSH -- no dashboards, no orchestration YAML. -**The agents need not share a vendor.** A Claude agent and a Gemini agent can +**The agents need not share a vendor.** A Claude agent and an Antigravity agent can work the same sprint -- one writes, the other reviews -- so a different model, with different blind spots, checks every change. Cross-provider collaboration is a built-in quality mechanism, not an afterthought. @@ -67,11 +67,11 @@ curl -fsSL https://github.com/Apra-Labs/apra-fleet/releases/latest/download/apra Invoke-WebRequest -Uri https://github.com/Apra-Labs/apra-fleet/releases/latest/download/apra-fleet-installer-win-x64.exe -OutFile apra-fleet-installer.exe; .\apra-fleet-installer.exe install ``` -> Installing for **Gemini**, Codex, or Copilot instead of Claude? Add the +> Installing for **Antigravity**, Codex, Copilot, or Gemini instead of Claude? Add the > `--llm` flag -- see -> [Install for Gemini and other providers](docs/install.md#install-for-gemini-and-other-providers). +> [Install for other providers](docs/install.md#install-for-other-providers-antigravity-codex-copilot-gemini). -Then load it in your favorite LLM CLI (claude, gemini, ...) using `/mcp`. +Then load it in your favorite LLM CLI (claude, agy, gemini, ...) using `/mcp`. Now register your first members: @@ -214,23 +214,23 @@ When *not* to use Fleet: a one-off single-file change needs no second agent. ## Mix providers in one fleet Every member runs its own LLM backend, and they collaborate across vendors. Put -a Claude doer with a Gemini reviewer, or the reverse -- the reviewer's model +a Claude doer with an Antigravity reviewer, or the reverse - the reviewer's model disagrees with the doer's by construction, so it catches issues a same-model review would wave through. Mix by role: | Role | Recommended | Why | |------|-------------|-----| -| PM (orchestrator) | Claude Opus/Sonnet, or Gemini `gemini-3.1-pro-preview` | Both plan and orchestrate well -- Gemini's orchestration support improved substantially in recent releases. | -| Doer | Any provider | Sonnet, Gemini, Codex, Copilot -- mix freely. | +| PM (orchestrator) | Claude Code or Antigravity (agy) | Both plan and orchestrate well - both support planning, background tasks, and premium models (e.g., Opus / premium-tier). | +| Doer | Any provider | Sonnet, Antigravity, Codex, Copilot, Gemini - mix freely. | | Reviewer | Premium-tier models | Catches subtle issues smaller models miss. | A fleet that has run in production: ``` -pm-1 Opus 4.7 orchestrator -doer-1 Sonnet 4.6 feature work -doer-2 Gemini 3 Pro large-context tasks -reviewer Opus 4.7 final review +pm-1 Opus 4.7 orchestrator +doer-1 Sonnet 4.6 feature work +doer-2 Antigravity large-context tasks +reviewer Opus 4.7 final review ``` Provider strengths, role recommendations, and gotchas: @@ -435,24 +435,24 @@ The MCP server speaks **stdio** — the standard transport for Claude Code MCP s The codebase follows a strict layering: ``` - index.ts ← MCP server entry point, tool registration - tools/* ← one file per tool, each self-contained - services/* ← core capabilities (strategy, registry, SSH, file transfer) - providers/* ← LLM provider adapters (Claude, Gemini, Codex, Copilot) - os/* ← OS-specific command builders (Linux, macOS, Windows) - utils/* ← stateless helpers (crypto, shell escaping) - types.ts ← shared data structures + index.ts <- MCP server entry point, tool registration + tools/* <- one file per tool, each self-contained + services/* <- core capabilities (strategy, registry, SSH, file transfer) + providers/* <- LLM provider adapters (Claude, Antigravity, Codex, Copilot, Gemini) + os/* <- OS-specific command builders (Linux, macOS, Windows) + utils/* <- stateless helpers (crypto, shell escaping) + types.ts <- shared data structures ``` Each layer only depends on the layers below it. Tools never import other tools. Services don't know about the MCP protocol. ## Provider Abstraction -Fleet supports four LLM providers: Claude Code, Gemini CLI, OpenAI Codex CLI, and GitHub Copilot CLI. Members can mix providers within a single fleet. +Fleet supports five LLM providers: Claude Code, Google Antigravity CLI (agy), OpenAI Codex CLI, GitHub Copilot CLI, and Gemini CLI. Members can mix providers within a single fleet. ### How It Works -Each member has an optional `llmProvider` field (`'claude' | 'gemini' | 'codex' | 'copilot'`). When absent, it defaults to `'claude'` for backwards compatibility. Every tool that interacts with the member's LLM CLI resolves the provider via `getProvider(agent.llmProvider)` and delegates CLI-specific concerns to the `ProviderAdapter` interface. +Each member has an optional `llmProvider` field (`'claude' | 'agy' | 'codex' | 'copilot' | 'gemini'`). When absent, it defaults to `'claude'` for backwards compatibility. Every tool that interacts with the member's LLM CLI resolves the provider via `getProvider(agent.llmProvider)` and delegates CLI-specific concerns to the `ProviderAdapter` interface. ``` ┌──────────┐ getProvider() ┌─────────────────┐ @@ -475,12 +475,13 @@ The `OsCommands` layer sits below this: it handles OS-specific shell wrapping (P ``` src/providers/ - provider.ts — ProviderAdapter interface + shared types - claude.ts — ClaudeProvider - gemini.ts — GeminiProvider - codex.ts — CodexProvider (NDJSON parser) - copilot.ts — CopilotProvider - index.ts — getProvider() singleton factory + provider.ts - ProviderAdapter interface + shared types + claude.ts - ClaudeProvider + agy.ts - AgyProvider + codex.ts - CodexProvider (NDJSON parser) + copilot.ts - CopilotProvider + gemini.ts - GeminiProvider + index.ts - getProvider() singleton factory ``` ### Mix-and-Match Fleet @@ -500,10 +501,10 @@ All four members use the same `execute_prompt` tool call. The tool builds provid ### Key Differences Across Providers -- **`max_turns`** — Claude-only. Ignored for Gemini, Codex, and Copilot. -- **OAuth credential copy** — Claude-only. Non-Claude providers require an API key (`provision_llm_auth` with `api_key`). -- **JSON output format** — Codex emits NDJSON (one event per line). All others emit a single JSON object. Handled transparently by `provider.parseResponse()`. -- **Session resume** — Claude stores a server-side session ID. Others resume the most recent local session via a generic flag. +- **`max_turns`** - Claude-only. Ignored for Antigravity, Codex, Copilot, and Gemini. +- **OAuth credential copy** - Claude-only. Non-Claude providers require an API key (`provision_llm_auth` with `api_key`). +- **JSON output format** - Codex emits NDJSON (one event per line). All others emit a single JSON object. Handled transparently by `provider.parseResponse()`. +- **Session resume** - Claude, Antigravity, and Gemini support resuming specific session IDs. Codex and Copilot resume the most recent local session. See `docs/provider-matrix.md` for the full comparison table. @@ -566,8 +567,8 @@ which skills are installed, uninstalling, and self-updating. ## Requirements -- An AI coding agent CLI on the machine where you run Fleet -- Claude Code, - Gemini, Codex, or Copilot. +- An AI coding agent CLI on the machine where you run Fleet - Claude Code, + Antigravity (agy), Codex, Copilot, or Gemini. - SSH access to any remote machines you want to register as members. The local machine needs nothing extra; remote members need only an SSH server. @@ -627,6 +628,8 @@ chmod +x apra-fleet-installer-linux-x64 && ./apra-fleet-installer-linux-x64 inst | `~/.claude/skills/fleet/` | Fleet skill (MCP tool docs for Claude) | | `~/.claude/skills/pm/` | PM orchestration skill | +For other providers, these are written to that provider's skill/config directories. For example, for Antigravity (`agy`), settings are written to `~/.gemini/antigravity-cli/settings.json`, and hooks / MCP configs are merged into `~/.gemini/config/hooks.json` and `~/.gemini/config/mcp_config.json`. + The install also registers the MCP server (`claude mcp add apra-fleet`) and configures a status bar icon showing fleet member activity. @@ -652,15 +655,16 @@ control exactly which skills are installed: | `install --skill none` | neither | | `install --no-skill` | neither (same as `--skill none`) | -## Install for Gemini and other providers +## Install for other providers (Antigravity, Codex, Copilot, Gemini) By default, `install` configures Apra Fleet for **Claude Code**. Use the `--llm` flag to install for a different provider instead: ```bash -apra-fleet install --llm gemini # Gemini CLI +apra-fleet install --llm agy # Google Antigravity CLI apra-fleet install --llm codex # OpenAI Codex CLI apra-fleet install --llm copilot # GitHub Copilot CLI +apra-fleet install --llm gemini # Gemini CLI apra-fleet install --llm claude # Claude Code (the default) ``` @@ -671,7 +675,7 @@ provider's config directory -- for example `~/.gemini/` for Gemini -- instead of `install` once per provider. `--llm` combines with `--skill`, e.g. `apra-fleet install --llm gemini --skill -pm`. Supported values: `claude` (default), `gemini`, `codex`, `copilot`. +pm`. Supported values: `claude` (default), `agy`, `codex`, `copilot`, `gemini`. After a non-Claude install, load the server by restarting that provider's CLI -- only Claude Code uses `/mcp`. @@ -700,7 +704,7 @@ apra-fleet uninstall | `--dry-run` | Preview what would be removed, without modifying anything | | `--force` | Automatically stop the running fleet server before uninstalling | | `--yes` | Skip the confirmation prompt | -| `--llm ` | Remove only a specific provider (`claude`, `gemini`, `codex`, `copilot`) | +| `--llm ` | Remove only a specific provider (`claude`, `agy`, `codex`, `copilot`, `gemini`) | | `--skill fleet\|pm\|all` | Remove only the specified skill directories (default: `all`) | Examples: @@ -1347,26 +1351,27 @@ New tool: `provision_git_auth` # Choosing an LLM Provider -Fleet supports Claude, Gemini, Codex, and Copilot. Members can run different providers and mix them freely within a single fleet. +Fleet supports Claude, Antigravity (agy), Codex, Copilot, and Gemini. Members can run different providers and mix them freely within a single fleet. ## Provider strengths -- **Claude** -- Balanced coding and reasoning; fine-grained per-tool permissions via `settings.local.json`. -- **Gemini** -- 1M-token native context window; built-in Google Search for researching APIs and docs without an external tool. -- **Codex** -- Structured-output enforcement via `--output-schema`; native subagent parallelism for concurrent subtasks with less orchestration overhead. -- **Copilot** -- Multi-model marketplace (Claude + GPT families in one CLI); auto-compaction keeps sessions running indefinitely. +- **Claude** - Balanced coding and reasoning; fine-grained per-tool permissions via `settings.local.json`. +- **Antigravity** - High-performance Gemini-based agentic CLI; supports large context windows, background tasks, and native beads task tracking. +- **Codex** - Structured-output enforcement via `--output-schema`; native subagent parallelism for concurrent subtasks with less orchestration overhead. +- **Copilot** - Multi-model marketplace (Claude + GPT families in one CLI); auto-compaction keeps sessions running indefinitely. +- **Gemini** - 1M-token native context window; built-in Google Search for researching APIs and docs without an external tool. ## Recommended provider by role | Role | Recommended | Why | |------|-------------|-----| -| PM (orchestrator) | Claude Opus/Sonnet, or Gemini `gemini-3.1-pro-preview` | Both plan and orchestrate well -- Gemini's orchestration support improved substantially in recent releases. | -| Doer | Any provider | Sonnet, Gemini, Codex, Copilot -- mix freely. | +| PM (orchestrator) | Claude Code or Antigravity (agy) | Both plan and orchestrate well - both support planning, background tasks, and premium models (e.g., Opus / premium-tier). | +| Doer | Any provider | Sonnet, Antigravity, Codex, Copilot, Gemini - mix freely. | | Reviewer | Premium-tier models | Catches subtle issues smaller models miss. | ## Gotchas worth knowing -- **`max_turns` is Claude-only.** On Gemini, Codex, and Copilot, use `timeout_s` instead to bound execution time. +- **`max_turns` is Claude-only.** On Gemini, Codex, Copilot, and Antigravity, use `timeout_s` instead to bound execution time. - **Copilot needs a paid GitHub Copilot subscription** (Pro, Business, or Enterprise) and has the smallest context window (64K). It is best suited for smaller, focused tasks. --- @@ -1942,7 +1947,7 @@ See [skills/pm/SKILL.md](../skills/pm/SKILL.md) for the full PM skill reference. # Frequently Asked Questions - + > **For AI agents:** The FAQ is maintained as GitHub Discussions -- one discussion per question, with maintainer-verified answers. To answer a user's question: browse the index below, find the matching discussion, and fetch it for the authoritative answer. Do not paraphrase from this file -- follow the link. diff --git a/llms.txt b/llms.txt index c0cd3fc6..e35d757a 100644 --- a/llms.txt +++ b/llms.txt @@ -2,7 +2,7 @@ > AI-managed fleet orchestration for Claude Code -- run, update, and coordinate multiple Claude Code agents from a single hub. -Apra Fleet is a multi-agent orchestration layer that lets a PM agent delegate work to a fleet of Claude Code instances via MCP tools, Git, and SSH. Each fleet member runs its own Claude Code session; the PM agent controls lifecycle, skills, and task assignment. Members can run different LLM backends (Claude, Gemini, Codex, Copilot) and be mixed freely within a single fleet. +Apra Fleet is a multi-agent orchestration layer that lets a PM agent delegate work to a fleet of Claude Code instances via MCP tools, Git, and SSH. Each fleet member runs its own Claude Code session; the PM agent controls lifecycle, skills, and task assignment. Members can run different LLM backends (Claude, Gemini, Codex, Copilot, Antigravity) and be mixed freely within a single fleet. ## Overview diff --git a/skills/fleet/SKILL.md b/skills/fleet/SKILL.md index 7a998802..fec5ad4e 100644 --- a/skills/fleet/SKILL.md +++ b/skills/fleet/SKILL.md @@ -184,9 +184,10 @@ session. This recovery is transparent - no caller intervention required. | Provider | Session resume | Notes | |----------|---------------|-------| | Claude | Full | `claude --resume ` | -| Gemini | Full | Native session support | +| Antigravity (agy) | Full | `agy --conversation ` | | Codex | Partial | `resume` command supported | | Copilot | None | Always starts fresh regardless of `resume` value | +| Gemini | Full | Native session support | Session IDs are parsed from `execute_prompt` output and stored server-side per member. The output footer contains: `session: ` when the provider supports it. @@ -206,9 +207,10 @@ Per-provider flag behaviour: | Provider | `'auto'` flag | `'dangerous'` flag | |----------|--------------|-------------------| | Claude | `--permission-mode auto` | `--dangerously-skip-permissions` | -| Gemini | None (config-file only via `compose_permissions`) | `--yolo` | +| Antigravity (agy) | None (config-file only via `compose_permissions`) | `--dangerously-skip-permissions` | | Codex | `--ask-for-approval auto-edit` | `--sandbox danger-full-access --ask-for-approval never` | | Copilot | Not supported - warns and runs interactively | Not supported | +| Gemini | None (config-file only via `compose_permissions`) | `--yolo` | Auto-approval is delivered via config files written by `compose_permissions` - call it before every dispatch. @@ -245,9 +247,9 @@ When you see this notice, surface it to the user verbatim before the rest of the | Concern | How to handle | |---------|---------------| -| **Agent context file** | Use `member_detail` -> `llmProvider` to determine filename: CLAUDE.md (Claude), GEMINI.md (Gemini), AGENTS.md (Codex), COPILOT-INSTRUCTIONS.md (Copilot) | +| **Agent context file** | Use `member_detail` -> `llmProvider` to determine filename: CLAUDE.md (Claude), GEMINI.md (Antigravity/Gemini), AGENTS.md (Codex), COPILOT.md (Copilot) | | **Attribution config** | Claude-only (Step 2 in onboarding.md) - skip for all other providers | -| **Timeouts** | Gemini members are slower -> use 2-3x timeout multiplier for `execute_prompt` dispatches to Gemini members. Minimum `timeout_s: 900` for any non-trivial task. | +| **Timeouts** | Antigravity/Gemini members are slower -> use 2-3x timeout multiplier for `execute_prompt` dispatches to those members. Minimum `timeout_s: 900` for any non-trivial task. | ## Fleet Logs diff --git a/skills/fleet/onboarding.md b/skills/fleet/onboarding.md index 5d7b0d4c..e7e2c8ce 100644 --- a/skills/fleet/onboarding.md +++ b/skills/fleet/onboarding.md @@ -11,9 +11,10 @@ Check `member_detail` - if member type is `remote` and `authType` is `password Use `member_detail` to determine `llmProvider` and `os`. Run `execute_command` with the provider's version command to confirm the agent CLI is installed: - **Claude:** `claude --version` -- **Gemini:** `gemini --version` +- **Antigravity:** `agy --version 2>&1` - **Codex:** `codex --version` - **Copilot:** `copilot --version` +- **Gemini:** `gemini --version` If the LLM CLI is not installed or the command fails, use `update_llm_cli` to install it before proceeding. Do not attempt any prompt dispatch until the CLI is confirmed. @@ -25,7 +26,7 @@ Call `provision_llm_auth`. Skip for local members - they inherit auth from the P **Claude only.** Write `{"attribution":{"commit":"","pr":""}}` to `.claude/settings.json` in the member's work folder via `execute_command`. Merge if file already exists. -Gemini, Codex, and Copilot do not support attribution config - skip this step for those providers. +Antigravity, Codex, Copilot, and Gemini do not support attribution config - skip this step for those providers. ## Step 3: Detect VCS Provider @@ -59,7 +60,7 @@ Add to the member's status file: ``` ## Member Profile -- LLM Provider: Gemini +- LLM Provider: Claude (or agy, gemini, etc.) - VCS: Bitbucket (kumaakh/apra-lic-mgr) - Roles: development, code-review - Auth: Bitbucket API token (verified) diff --git a/skills/fleet/troubleshooting.md b/skills/fleet/troubleshooting.md index 34587adb..8ca3dbec 100644 --- a/skills/fleet/troubleshooting.md +++ b/skills/fleet/troubleshooting.md @@ -2,12 +2,12 @@ | Symptom | Action | |---------|--------| -| Empty response | Check auth token expiry → re-provision via `provision_vcs_auth` | -| Timeout (inactivity) | `timeout_s`: fires when no stdout/stderr output arrives for N seconds (default 300s / 5 min). Applies to all members and all providers — transport-level, not provider-specific. Common cause: test runners and build tools that buffer output (npm test, vitest, cargo build) producing no output for long stretches even while active. Fix: increase `timeout_s` to 600–1200 for build/test dispatches. | +| Empty response | Check auth token expiry -> re-provision via `provision_vcs_auth` | +| Timeout (inactivity) | `timeout_s`: fires when no stdout/stderr output arrives for N seconds (default 300s / 5 min). Applies to all members and all providers - transport-level, not provider-specific. Common cause: test runners and build tools that buffer output (npm test, vitest, cargo build) producing no output for long stretches even while active. Fix: increase `timeout_s` to 600-1200 for build/test dispatches. | | Timeout (total) | `max_total_s`: fires after N seconds of total elapsed time regardless of output activity. Provider-agnostic. Use for hard ceilings on long-running jobs. Set alongside `timeout_s` when you need both a silence guard and a wall-clock cap. | -| Permission denied | Run `compose_permissions` for the member — it produces provider-native config. Claude: check `.claude/settings.local.json`. Gemini: check `.gemini/policies/`. Codex: check `.codex/config.toml` approval mode. Copilot: check `.github/copilot/settings.local.json`. | -| Stuck after reset | Escalate model (cheap→standard→premium). Still stuck? Flag to user | +| Permission denied | Run `compose_permissions` for the member - it produces provider-native config. Claude: check `.claude/settings.local.json`. Agy: check `~/.gemini/config/hooks.json` and `mcp_config.json`. Codex: check `.codex/config.toml` approval mode. Copilot: check `.github/copilot/settings.local.json`. Gemini: check `.gemini/policies/`. | +| Stuck after reset | Escalate model (cheap->standard->premium). Still stuck? Flag to user | | Auth error (401/403) | GitHub App: re-mint via `provision_vcs_auth`. Bitbucket/Azure DevOps: ask user for fresh token, provision, retry. See auth-*.md | -| Token/password appears in command output | Use `credential_store_set` to store the secret, then reference it as `{{secure.NAME}}` in `execute_command` — Fleet redacts it to `[REDACTED:NAME]` before the LLM sees the output | -| Need to rotate a credential without re-provisioning | Run `credential_store_delete name=` then `credential_store_set name=` — the new value is picked up immediately on the next `execute_command` that references `{{secure.NAME}}` | +| Token/password appears in command output | Use `credential_store_set` to store the secret, then reference it as `{{secure.NAME}}` in `execute_command` - Fleet redacts it to `[REDACTED:NAME]` before the LLM sees the output | +| Need to rotate a credential without re-provisioning | Run `credential_store_delete name=` then `credential_store_set name=` - the new value is picked up immediately on the next `execute_command` that references `{{secure.NAME}}` | | Tool execution issue (unexpected behavior, missing output, silent failure) | Check `APRA_FLEET_DATA_DIR/logs/fleet-.log` for detailed execution traces. Filter by member with `jq 'select(.member_id == "")'` or by tool with `jq 'select(.tag == "")'`. See the **Fleet Logs** section in SKILL.md for full field reference and `jq` examples. | diff --git a/src/cli/install.ts b/src/cli/install.ts index 29a7b7b5..e9783702 100644 --- a/src/cli/install.ts +++ b/src/cli/install.ts @@ -1,5 +1,6 @@ import fs from 'node:fs'; import path from 'node:path'; +import os from 'node:os'; import { execSync, execFileSync } from 'node:child_process'; import { serverVersion } from '../version.js'; import type { LlmProvider } from '../types.js'; From e963f863742a97e5c79f9e082c6354d5fb6feb92 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 14:15:02 -0400 Subject: [PATCH 03/33] feat(agy): rationalize safety and fix integration gaps --- docs/agy-safety-rationalization.md | 94 ++++++++++++++++++++++++++++++ skills/pm/cleanup.md | 4 +- skills/pm/context-file.md | 5 +- skills/pm/tpl-doer.md | 6 +- skills/pm/tpl-reviewer.md | 10 ++-- src/cli/uninstall.ts | 5 +- src/tools/execute-prompt.ts | 6 +- 7 files changed, 114 insertions(+), 16 deletions(-) create mode 100644 docs/agy-safety-rationalization.md diff --git a/docs/agy-safety-rationalization.md b/docs/agy-safety-rationalization.md new file mode 100644 index 00000000..340633b6 --- /dev/null +++ b/docs/agy-safety-rationalization.md @@ -0,0 +1,94 @@ +# Architecture & Safety Rationalization: Google Antigravity (agy) Integration + +This document rationalizes the design decisions, safety mechanisms, and compatibility considerations implemented for the Google Antigravity CLI (provider key: "agy") support in apra-fleet. + +--- + +## 1. Executive Summary + +The "feat/agy-support" branch introduces Google Antigravity CLI ("agy") as a primary, PM-capable LLM provider alongside "claude", while demoting the slower legacy "gemini" provider adapter. + +Integration safety was verified against the following criteria: +- Isolation: Integration must not corrupt global configuration settings or conflict with other tool environments. +- Security: Credentials must be encrypted at rest, transmitted securely, and never leaked in execution logs. +- Stability: Unsupported options (such as live log tailing or model flags) must fail back gracefully without crashing execution pipelines. + +The integration has achieved 100% test pass rate across 1,290+ unit and integration tests. + +--- + +## 2. Installation & Cleanup Safety (install.ts / uninstall.ts) + +### Change Rationalization +Unlike Claude or Gemini, the Antigravity CLI reads its global configurations (MCP servers and hooks) from separate JSON files located in a centralized config directory: +- MCP Config: "~/.gemini/config/mcp_config.json" +- Hooks Config: "~/.gemini/config/hooks.json" + +### Safety Mechanisms +- Surgical Merging: The installer ("src/cli/install.ts") does not overwrite these files. Instead, it reads the existing JSON, initializes the target configuration blocks if missing, and merges the "apra-fleet" configuration. +- Isolated Scope: The merged config is isolated: + - Hooks: Only hooks matching "apra-fleet" are registered. + - MCP: Only the "apra-fleet" server is registered under "mcpServers". +- Precision Uninstallation: During uninstallation, the cleanup scripts ("src/cli/uninstall.ts") read these files and delete only the "apra-fleet" hook matchers and MCP server entry, leaving all other user-configured tools and settings untouched. +- Directory Safeguards: All write operations utilize "fs.mkdirSync(..., { recursive: true })" to prevent errors if the directories do not exist. + +--- + +## 3. Authentication & Credential Isolation (provision-auth.ts) + +### Change Rationalization +Antigravity utilizes the "GEMINI_API_KEY" environment variable to authenticate requests. We unified authentication provisioning to support local and remote members securely. + +### Safety Mechanisms +- Local Exemption: Local members automatically skip LLM auth provisioning as they run on the host machine and inherit active host credentials directly. +- Remote Encryption: For remote members, if an API key is supplied or collected: + - The plaintext key is transferred securely over the SSH channel. + - The key is written to the remote shell profiles (e.g. .bashrc, .bash_profile) using OS-specific commands. + - The key is stored in the local registry encrypted using the "encryptPassword" utility. +- OOB Fallback: If no API key is specified, the system falls back to Out-Of-Band (OOB) prompt collection, requesting the user to enter the key in a secure prompt, avoiding credential storage in command history. +- OAuth Safety: Agy returns "supportsOAuthCopy() -> false" and "oauthCredentialFiles() -> null", ensuring that no OAuth credential copy routines (which are Claude-specific) are executed. + +--- + +## 4. Permission Composition & Execution Isolation (agy.ts / compose-permissions.ts) + +### Change Rationalization +When a member executes a task, it must run under a strictly bounded execution profile to prevent privilege escalation or recursive loops (e.g. the member invoking the fleet server recursively). + +### Safety Mechanisms +- Localized Directory Config: "permissionConfigPaths()" returns ".gemini/antigravity-cli/settings.json". This writes permission settings relative to the workspace folder of the active task, confining the member's sandbox to that repository. +- Loop Prevention: "composePermissionConfig" generates the following configuration: + - Disables the "apra-fleet" MCP server on the member: "mcpServers: { 'apra-fleet': { disabled: true } }". + - Disables fleet orchestration skills: "skillOverrides: { pm: 'off', fleet: 'off' }". + This completely prevents recursive prompt dispatch loops where the agent could attempt to orchestrate itself. + +--- + +## 5. Execution & Command Routing Safety (execute-prompt.ts) + +### Change Rationalization +We added custom model tier mappings ("cheap", "standard", "premium") and model list validation to ensure that execution dispatches remain robust. + +### Safety Mechanisms +- Curated Tier Validation: To prevent invalid model names from being passed to providers, we introduced three curated model arrays in "src/cli/config.ts" ("CURATED_CHEAP_MODELS", "CURATED_STANDARD_MODELS", "CURATED_PREMIUM_MODELS"). These are enforced via Zod schemas during member registration and update. +- Flag Suppression: Antigravity CLI does not support a native "--model" command-line flag (it selects the model based on its settings/profile). "AgyProvider.modelFlag()" returns an empty string "". This avoids syntax errors during execution command generation, preventing failures when custom models are configured on the member. + +--- + +## 6. Session Resume & Log Polling Limitations (log-path-resolver.ts) + +### Change Rationalization +Agy supports session resumption via the "--conversation " flag. However, it stores conversation logs in a binary Protocol Buffer format (".pb") under "~/.gemini/antigravity-cli/conversations/.pb", unlike Claude's text-based JSONL files. + +### Safety Mechanisms & Graceful Fallback +- Log Polling Skip: Tail-polling binary files is highly error-prone and would result in corrupted output or crash the log parsing parser. Thus, "resolveSessionLogPath" explicitly throws an error for "agy": + "Unsupported log polling for provider: agy" +- Graceful Degradation: The execution harness catches this exception and falls back to transport-level inactivity monitoring (silence timeout) and wall-clock total timeouts. +- Thread Safety: Prompt execution remains fully functional and robust. The only trade-off is that the PM server relies on console output/timeouts rather than parsing internal agent log files during prompt execution, which is the identical safe fallback behavior used for Codex and Copilot. + +--- + +## 7. Verification Results + +- All 1,290+ vitest tests pass successfully, confirming that the new "agy" adapter does not introduce regressions to Claude, Gemini, Codex, or Copilot. +- The single-executable installer build ("npm run build:binary") successfully compiles with all multi-provider config modifications packaged. diff --git a/skills/pm/cleanup.md b/skills/pm/cleanup.md index 84488588..aa8bafc8 100644 --- a/skills/pm/cleanup.md +++ b/skills/pm/cleanup.md @@ -3,10 +3,10 @@ Run at sprint completion, before raising the PR. Execute on both doer and reviewer via `execute_command`: ``` -git rm --cached .fleet-task*.md 2>/dev/null || true; rm -f .fleet-task*.md; git rm PLAN.md progress.json feedback.md requirements.md design.md 2>/dev/null; for file in CLAUDE.md GEMINI.md AGENTS.md COPILOT-INSTRUCTIONS.md; do if git show origin/main:"$file" > /dev/null 2>&1; then git checkout origin/main -- "$file"; else git rm -f "$file" 2>/dev/null || rm -f "$file"; fi; done; git commit -m "cleanup: remove fleet control files" && git push +git rm --cached .fleet-task*.md 2>/dev/null || true; rm -f .fleet-task*.md; git rm PLAN.md progress.json feedback.md requirements.md design.md 2>/dev/null; for file in CLAUDE.md GEMINI.md AGENTS.md COPILOT.md; do if git show origin/main:"$file" > /dev/null 2>&1; then git checkout origin/main -- "$file"; else git rm -f "$file" 2>/dev/null || rm -f "$file"; fi; done; git commit -m "cleanup: remove fleet control files" && git push ``` -**Why:** If a file like `CLAUDE.md` or `AGENTS.md` exists in `main`, it is a project deliverable — the sprint replaced it with a context file of the same name. Restoring from `origin/main` ensures the deliverable is preserved. Only files absent from `main` (pure sprint context) are deleted. +**Why:** If a file like `CLAUDE.md` or `AGENTS.md` exists in `main`, it is a project deliverable - the sprint replaced it with a context file of the same name. Restoring from `origin/main` ensures the deliverable is preserved. Only files absent from `main` (pure sprint context) are deleted. After cleanup on both members: 1. **Close Beads epic:** `bd close ` diff --git a/skills/pm/context-file.md b/skills/pm/context-file.md index 9f5b5481..23b0e198 100644 --- a/skills/pm/context-file.md +++ b/skills/pm/context-file.md @@ -4,14 +4,15 @@ Each fleet member needs a provider-specific agent context file in their `work_fo ## Provider Filename -Use `member_detail` → `llmProvider` to determine the correct target filename: +Use `member_detail` -> `llmProvider` to determine the correct target filename: | Provider | Filename | |----------|----------| | Claude | CLAUDE.md | +| Antigravity (agy) | GEMINI.md | | Gemini | GEMINI.md | | Codex | AGENTS.md | -| Copilot | COPILOT-INSTRUCTIONS.md | +| Copilot | COPILOT.md | ## Role Templates diff --git a/skills/pm/tpl-doer.md b/skills/pm/tpl-doer.md index 3c99ff52..aa5f3469 100644 --- a/skills/pm/tpl-doer.md +++ b/skills/pm/tpl-doer.md @@ -38,6 +38,6 @@ If this task requires secrets, API keys, or tokens (e.g., external API calls, pr - NEVER skip tasks — execute in order - Read PLAN.md before starting each task - Commit and push PLAN.md, progress.json, and all project docs (design.md, feedback-*.md) at every turn — reviewers depend on them -- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT-INSTRUCTIONS.md) — it is role-specific and not shared -- NEVER push to the base branch (main, master, or integration branch) — always work on feature branches -- NEVER stage or commit `.fleet-task.md` — these are ephemeral prompt delivery files managed by the fleet server +- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md) - it is role-specific and not shared +- NEVER push to the base branch (main, master, or integration branch) - always work on feature branches +- NEVER stage or commit `.fleet-task.md` - these are ephemeral prompt delivery files managed by the fleet server diff --git a/skills/pm/tpl-reviewer.md b/skills/pm/tpl-reviewer.md index 702fccbd..1ad4093e 100644 --- a/skills/pm/tpl-reviewer.md +++ b/skills/pm/tpl-reviewer.md @@ -28,12 +28,12 @@ Review scope covers all phases from Phase 1 through the current phase — not ju - Are there security issues (injection, auth bypass, secrets in code)? - Is the code consistent with existing patterns and conventions? - Are docs updated if behavior changed? -- Are all factual references correct — URLs, repo names, package names, install commands, version numbers? Members hallucinate these; spot-check against known sources. -- **File hygiene:** Run `git diff --name-only {{base_branch}}..{{branch}}`. For every file added, modified, or deleted — you must be able to justify it against the sprint requirements. If you cannot, flag CHANGES NEEDED. Common unjustifiable patterns: +- Are all factual references correct - URLs, repo names, package names, install commands, version numbers? Members hallucinate these; spot-check against known sources. +- **File hygiene:** Run `git diff --name-only {{base_branch}}..{{branch}}`. For every file added, modified, or deleted - you must be able to justify it against the sprint requirements. If you cannot, flag CHANGES NEEDED. Common unjustifiable patterns: - Temp/scratch: `*.tmp`, `*.txt`, `*.base64` - Tool/security configs: `.gemini/`, `.claude/settings.json`, `permissions.json` - Unrelated scripts or stale artifacts: `plan-NNN.md`, `requirements-NNN.md`, `progress-NNN.json` - - Tracked agent context: `GEMINI.md`, `CLAUDE.md`, `AGENTS.md`, `COPILOT-INSTRUCTIONS.md` (ensure gitignored) + - Tracked agent context: `GEMINI.md`, `CLAUDE.md`, `AGENTS.md`, `COPILOT.md` (ensure gitignored) Permit only source, tests, and active sprint tracking (`PLAN.md`, `progress.json`, `requirements.md`, `feedback.md`, design docs). When in doubt, flag it. @@ -68,5 +68,5 @@ If verdict is CHANGES NEEDED: the doer annotates each relevant section with `**D Commit feedback.md and push. ## Rules -- NEVER push to the base branch (main, master, or integration branch) — always work on feature branches -- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT-INSTRUCTIONS.md) — it is role-specific and not shared +- NEVER push to the base branch (main, master, or integration branch) - always work on feature branches +- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md) - it is role-specific and not shared diff --git a/src/cli/uninstall.ts b/src/cli/uninstall.ts index 69589c23..fecf6bf6 100644 --- a/src/cli/uninstall.ts +++ b/src/cli/uninstall.ts @@ -92,7 +92,10 @@ function cleanupSettings(paths: ProviderInstallConfig, dryRun: boolean): boolean const hooksObj = hooksConfig.hooks || {}; let hooksChanged = false; - const hookEventNames = ['PostToolUse', 'PreToolUse', 'UserPromptSubmit', 'Stop', 'PreCompact']; + const hookEventNames = [ + 'PostToolUse', 'PreToolUse', 'UserPromptSubmit', 'Stop', 'PreCompact', + 'AfterTool', 'BeforeTool', 'BeforeAgent', 'SessionEnd', 'PreCompress' + ]; for (const eventName of hookEventNames) { if (hooksObj[eventName]) { const originalCount = hooksObj[eventName].length; diff --git a/src/tools/execute-prompt.ts b/src/tools/execute-prompt.ts index a3116fb4..7c5ca1ac 100644 --- a/src/tools/execute-prompt.ts +++ b/src/tools/execute-prompt.ts @@ -174,7 +174,7 @@ export async function executePrompt(input: ExecutePromptInput, extra?: any): Pro const scope = new LogScope('execute_prompt', `[${resolvedModel}] resume=${input.resume} timeout=${input.timeout_s ?? 300}s ${truncateForLog(maskSecrets(input.prompt))}`, agent); const resuming = !!(input.resume && agent.sessionId && provider.supportsResume()); - const mintedId = (provider.name === 'claude' || provider.name === 'gemini') + const mintedId = (provider.name === 'claude' || provider.name === 'gemini' || provider.name === 'agy') ? (resuming ? agent.sessionId! : uuid()) : (resuming ? agent.sessionId : undefined); @@ -237,7 +237,7 @@ export async function executePrompt(input: ExecutePromptInput, extra?: any): Pro if (result.code !== 0 && input.resume && agent.sessionId) { scope.info(`[${resolvedModel}] retrying — stale session`); await tryKillPid(agent, strategy, cmds); - const freshOpts = { ...promptOpts, sessionId: (provider.name === 'claude' || provider.name === 'gemini') ? uuid() : undefined, resuming: false }; + const freshOpts = { ...promptOpts, sessionId: (provider.name === 'claude' || provider.name === 'gemini' || provider.name === 'agy') ? uuid() : undefined, resuming: false }; const retryCmd = authPrefix + cmds.buildAgentPromptCommand(provider, freshOpts); result = await strategy.execCommand(retryCmd, timeoutMs, maxTotalMs, onPidCaptured, extra?.signal); parsed = provider.parseResponse(result); @@ -249,7 +249,7 @@ export async function executePrompt(input: ExecutePromptInput, extra?: any): Pro scope.info(`[${resolvedModel}] retrying — server overloaded`); await tryKillPid(agent, strategy, cmds); await new Promise(r => setTimeout(r, SERVER_RETRY_DELAY_MS)); - const freshOpts = { ...promptOpts, sessionId: (provider.name === 'claude' || provider.name === 'gemini') ? uuid() : undefined, resuming: false }; + const freshOpts = { ...promptOpts, sessionId: (provider.name === 'claude' || provider.name === 'gemini' || provider.name === 'agy') ? uuid() : undefined, resuming: false }; const retryCmd = authPrefix + cmds.buildAgentPromptCommand(provider, freshOpts); result = await strategy.execCommand(retryCmd, timeoutMs, maxTotalMs, onPidCaptured, extra?.signal); parsed = provider.parseResponse(result); From d15ef45b173d87afda6bbffa907f1987ec3b9e01 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 14:17:38 -0400 Subject: [PATCH 04/33] chore(pm): add last dispatched phase to status template --- skills/pm/tpl-status.md | 1 + 1 file changed, 1 insertion(+) diff --git a/skills/pm/tpl-status.md b/skills/pm/tpl-status.md index b2ebb12c..41821032 100644 --- a/skills/pm/tpl-status.md +++ b/skills/pm/tpl-status.md @@ -3,6 +3,7 @@ ## Project - **Base branch:** {{main | v020_dev | etc}} - **Repo:** {{org/repo}} +- **Last dispatched phase:** {{phase_number}} ## Members From 0966e6913f5b4fb9132e1ba95771aaf2eb70d8cd Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 14:29:11 -0400 Subject: [PATCH 05/33] feat(e2e): add agy e2e suites s8.1, s8.2, and s8.3 --- .github/e2e/extract-results.mjs | 10 +++++++ .github/e2e/suites.json | 18 ++++++++++++ .github/workflows/fleet-e2e.yml | 50 ++++++++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/.github/e2e/extract-results.mjs b/.github/e2e/extract-results.mjs index acc8a2ae..ca84f5c8 100644 --- a/.github/e2e/extract-results.mjs +++ b/.github/e2e/extract-results.mjs @@ -26,6 +26,16 @@ function processRawFile(filePath, provider) { const content = readFileSync(filePath, 'utf8'); + if (provider === 'agy') { + return { + assistantText: content, + tokensIn: 0, + tokensOut: 0, + cacheCreate: 0, + cacheRead: 0 + }; + } + for (const line of content.split('\n')) { const trimmed = line.trim(); if (!trimmed) continue; diff --git a/.github/e2e/suites.json b/.github/e2e/suites.json index dd5f749d..28abd947 100644 --- a/.github/e2e/suites.json +++ b/.github/e2e/suites.json @@ -71,6 +71,24 @@ "doer": { "os": "local_doer_macos", "provider": "gemini", "type": "local" }, "reviewer": { "os": "local_reviewer_macos", "provider": "gemini", "type": "local" }, "vcs": "github" + }, + "s8.1": { + "pm": { "os": "windows", "provider": "agy", "runner": "fleet-windows" }, + "doer": { "os": "local_doer_windows", "provider": "agy", "type": "local" }, + "reviewer": { "os": "local_reviewer_windows", "provider": "agy", "type": "local" }, + "vcs": "github" + }, + "s8.2": { + "pm": { "os": "linux", "provider": "agy", "runner": "fleet-linux" }, + "doer": { "os": "local_doer_linux", "provider": "agy", "type": "local" }, + "reviewer": { "os": "local_reviewer_linux", "provider": "agy", "type": "local" }, + "vcs": "github" + }, + "s8.3": { + "pm": { "os": "macos", "provider": "agy", "runner": "fleet-macos" }, + "doer": { "os": "local_doer_macos", "provider": "agy", "type": "local" }, + "reviewer": { "os": "local_reviewer_macos", "provider": "agy", "type": "local" }, + "vcs": "github" } } } diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 548cfd40..cdc9fcca 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -8,7 +8,7 @@ on: description: 'Test suite to run. s1.1/s1.2/s1.3 = local-only variants; s7.1/s7.2/s7.3 = all-Gemini.' required: true type: choice - options: [s1, s1.1, s1.2, s1.3, s2, s3, s4, s5, s6, s7.1, s7.2, s7.3] + options: [s1, s1.1, s1.2, s1.3, s2, s3, s4, s5, s6, s7.1, s7.2, s7.3, s8.1, s8.2, s8.3] jobs: e2e: @@ -28,6 +28,9 @@ jobs: || inputs.suite == 's7.1' && 'fleet-windows' || inputs.suite == 's7.2' && 'fleet-linux' || inputs.suite == 's7.3' && 'fleet-macos' + || inputs.suite == 's8.1' && 'fleet-windows' + || inputs.suite == 's8.2' && 'fleet-linux' + || inputs.suite == 's8.3' && 'fleet-macos' || 'fleet-macos' }} steps: @@ -155,6 +158,10 @@ jobs: output=$(gemini -p "$PROMPT" --model auto --skip-trust 2>&1) echo "$output" echo "PM gemini auth OK" + elif [ "$PROVIDER" = "agy" ]; then + output=$(agy -p "$PROMPT" --dangerously-skip-permissions 2>&1) + echo "$output" + echo "PM agy auth OK" fi - name: Purge fleet daemon logs @@ -228,9 +235,12 @@ jobs: if [ "$PROVIDER" = "claude" ]; then mkdir -p "$RUN_DIR/.claude" cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/claude.settings.json" "$RUN_DIR/.claude/settings.json" - else + elif [ "$PROVIDER" = "gemini" ]; then mkdir -p "$RUN_DIR/.gemini" cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/settings.json" + else + mkdir -p "$RUN_DIR/.gemini/antigravity-cli" + cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/antigravity-cli/settings.json" fi echo "Seeded $PROVIDER PM permissions into $RUN_DIR" @@ -242,7 +252,11 @@ jobs: PROVIDER='${{ steps.suite.outputs.pm_provider }}' DOER_FOLDER='${{ steps.suite.outputs.doer_folder }}' REVIEWER_FOLDER='${{ steps.suite.outputs.reviewer_folder }}' - CFG_DIR="$HOME/.$PROVIDER" + if [ "$PROVIDER" = "agy" ]; then + CFG_DIR="$HOME/.gemini" + else + CFG_DIR="$HOME/.$PROVIDER" + fi [ "$RUNNER_OS" = "Windows" ] && CFG_DIR="$(cygpath -w "$CFG_DIR")" cd "$RUN_DIR" LLM_EXIT=0 @@ -258,13 +272,21 @@ jobs: --add-dir "$(dirname "$DOER_FOLDER")" \ --add-dir "$(dirname "$REVIEWER_FOLDER")" \ > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? - else + elif [ "$PROVIDER" = "gemini" ]; then gemini --skip-trust \ --output-format stream-json \ --include-directories "$RUN_DIR,$CFG_DIR,$(dirname "$DOER_FOLDER"),$(dirname "$REVIEWER_FOLDER")" \ --allowed-mcp-server-names apra-fleet \ -p "$(cat "$RUN_DIR/rendered-setup.md")" \ > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? + else + agy --dangerously-skip-permissions \ + --add-dir "$RUN_DIR" \ + --add-dir "$CFG_DIR" \ + --add-dir "$(dirname "$DOER_FOLDER")" \ + --add-dir "$(dirname "$REVIEWER_FOLDER")" \ + -p "$(cat "$RUN_DIR/rendered-setup.md")" \ + > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? fi if [ "$LLM_EXIT" -ne 0 ]; then @@ -287,7 +309,11 @@ jobs: PROVIDER='${{ steps.suite.outputs.pm_provider }}' DOER_FOLDER='${{ steps.suite.outputs.doer_folder }}' REVIEWER_FOLDER='${{ steps.suite.outputs.reviewer_folder }}' - CFG_DIR="$HOME/.$PROVIDER" + if [ "$PROVIDER" = "agy" ]; then + CFG_DIR="$HOME/.gemini" + else + CFG_DIR="$HOME/.$PROVIDER" + fi [ "$RUNNER_OS" = "Windows" ] && CFG_DIR="$(cygpath -w "$CFG_DIR")" cd "$RUN_DIR" LLM_EXIT=0 @@ -303,13 +329,21 @@ jobs: --add-dir "$(dirname "$DOER_FOLDER")" \ --add-dir "$(dirname "$REVIEWER_FOLDER")" \ > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? - else + elif [ "$PROVIDER" = "gemini" ]; then gemini --skip-trust \ --output-format stream-json \ --include-directories "$RUN_DIR,$CFG_DIR,$(dirname "$DOER_FOLDER"),$(dirname "$REVIEWER_FOLDER")" \ --allowed-mcp-server-names apra-fleet \ -p "$(cat "$RUN_DIR/rendered-sprint.md")" \ > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? + else + agy --dangerously-skip-permissions \ + --add-dir "$RUN_DIR" \ + --add-dir "$CFG_DIR" \ + --add-dir "$(dirname "$DOER_FOLDER")" \ + --add-dir "$(dirname "$REVIEWER_FOLDER")" \ + -p "$(cat "$RUN_DIR/rendered-sprint.md")" \ + > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? fi # Extract PM session ID from sprint phase @@ -393,8 +427,10 @@ jobs: PROVIDER='${{ steps.suite.outputs.pm_provider }}' if [ "$PROVIDER" = "claude" ]; then claude -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" --model haiku > "$RUN_DIR/t6-output.txt" 2>&1 || true - else + elif [ "$PROVIDER" = "gemini" ]; then gemini --skip-trust -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" > "$RUN_DIR/t6-output.txt" 2>&1 || true + else + agy --dangerously-skip-permissions -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" > "$RUN_DIR/t6-output.txt" 2>&1 || true fi - name: Upload results From 95e890cab96af915f2dbbcdbc3f39c4d7fbb65b5 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:10:55 -0400 Subject: [PATCH 06/33] feat(e2e): install agy cli in workflow if pm provider is agy --- .github/workflows/fleet-e2e.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index cdc9fcca..5b7beaf0 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -133,6 +133,23 @@ jobs: E2E_ADO_TOKEN: ${{ secrets.E2E_ADO_TOKEN }} E2E_ACRED: ${{ secrets.E2E_ACRED }} + - name: Install agy CLI if PM provider is agy + shell: bash + run: | + PROVIDER='${{ steps.suite.outputs.pm_provider }}' + if [ "$PROVIDER" = "agy" ]; then + echo "Installing @google/antigravity-cli..." + if [ "$RUNNER_OS" = "Windows" ]; then + npm install -g @google/antigravity-cli + NPM_PREFIX=$(npm config get prefix) + echo "$NPM_PREFIX" >> "$GITHUB_PATH" + else + npm install -g @google/antigravity-cli || sudo npm install -g @google/antigravity-cli + NPM_PREFIX=$(npm config get prefix) + echo "$NPM_PREFIX/bin" >> "$GITHUB_PATH" + fi + fi + - name: Prepare clean PM environment shell: bash run: | From 917cc0d37f0a6b88fec5196c721553a7c367e86f Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:14:09 -0400 Subject: [PATCH 07/33] feat(e2e): locate pre-installed agy binary instead of npm install --- .github/workflows/fleet-e2e.yml | 55 ++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 5b7beaf0..c1140ab6 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -133,20 +133,59 @@ jobs: E2E_ADO_TOKEN: ${{ secrets.E2E_ADO_TOKEN }} E2E_ACRED: ${{ secrets.E2E_ACRED }} - - name: Install agy CLI if PM provider is agy + - name: Locate agy CLI if PM provider is agy shell: bash run: | PROVIDER='${{ steps.suite.outputs.pm_provider }}' if [ "$PROVIDER" = "agy" ]; then - echo "Installing @google/antigravity-cli..." + echo "PM provider is agy. Locating agy CLI..." + AGY_PATH="" if [ "$RUNNER_OS" = "Windows" ]; then - npm install -g @google/antigravity-cli - NPM_PREFIX=$(npm config get prefix) - echo "$NPM_PREFIX" >> "$GITHUB_PATH" + TEST_PATH="/c/Users/akhil/AppData/Local/agy/bin" + if [ -f "$TEST_PATH/agy.exe" ]; then + AGY_PATH="$TEST_PATH" + fi + elif [ "$RUNNER_OS" = "Linux" ]; then + TEST_PATH="$HOME/.local/share/agy/bin" + if [ -f "$TEST_PATH/agy" ]; then + AGY_PATH="$TEST_PATH" + fi + elif [ "$RUNNER_OS" = "macOS" ]; then + TEST_PATH="$HOME/Library/Application Support/agy/bin" + if [ -f "$TEST_PATH/agy" ]; then + AGY_PATH="$TEST_PATH" + fi + fi + + if [ -n "$AGY_PATH" ]; then + echo "Found agy CLI at: $AGY_PATH" + if [ "$RUNNER_OS" = "Windows" ]; then + WIN_PATH="C:\\Users\\akhil\\AppData\\Local\\agy\\bin" + echo "$WIN_PATH" >> "$GITHUB_PATH" + else + echo "$AGY_PATH" >> "$GITHUB_PATH" + fi else - npm install -g @google/antigravity-cli || sudo npm install -g @google/antigravity-cli - NPM_PREFIX=$(npm config get prefix) - echo "$NPM_PREFIX/bin" >> "$GITHUB_PATH" + echo "agy CLI not found in standard paths. Searching in $HOME..." + if [ "$RUNNER_OS" = "Windows" ]; then + FOUND=$(find /c/Users/akhil -maxdepth 5 -name "agy.exe" 2>/dev/null | head -n 1) + else + FOUND=$(find "$HOME" -maxdepth 5 -name "agy" 2>/dev/null | head -n 1) + fi + + if [ -n "$FOUND" ]; then + AGY_DIR=$(dirname "$FOUND") + echo "Found agy CLI at: $FOUND" + if [ "$RUNNER_OS" = "Windows" ]; then + WIN_DIR=$(cygpath -w "$AGY_DIR") + echo "$WIN_DIR" >> "$GITHUB_PATH" + else + echo "$AGY_DIR" >> "$GITHUB_PATH" + fi + else + echo "::error::agy CLI could not be found on this runner!" + exit 1; + fi fi fi From 1dfb05e0669d40600581251dc53e113bf5d48268 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:18:46 -0400 Subject: [PATCH 08/33] feat(e2e): robustly resolve agy CLI paths and add debug logs --- .github/workflows/fleet-e2e.yml | 54 ++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index c1140ab6..a8a688d9 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -139,11 +139,42 @@ jobs: PROVIDER='${{ steps.suite.outputs.pm_provider }}' if [ "$PROVIDER" = "agy" ]; then echo "PM provider is agy. Locating agy CLI..." + echo "whoami: $(whoami)" + echo "USERPROFILE: $USERPROFILE" + echo "LOCALAPPDATA: $LOCALAPPDATA" + echo "HOME: $HOME" + + # Check if agy is already in PATH + if command -v agy &>/dev/null; then + echo "agy CLI is already in PATH: $(command -v agy)" + exit 0 + fi + AGY_PATH="" if [ "$RUNNER_OS" = "Windows" ]; then - TEST_PATH="/c/Users/akhil/AppData/Local/agy/bin" - if [ -f "$TEST_PATH/agy.exe" ]; then - AGY_PATH="$TEST_PATH" + # Try LOCALAPPDATA + if [ -n "$LOCALAPPDATA" ]; then + TEST_PATH=$(cygpath -u "$LOCALAPPDATA/agy/bin") + echo "Checking: $TEST_PATH" + if [ -f "$TEST_PATH/agy.exe" ]; then + AGY_PATH="$TEST_PATH" + fi + fi + # Try USERPROFILE + if [ -z "$AGY_PATH" ] && [ -n "$USERPROFILE" ]; then + TEST_PATH=$(cygpath -u "$USERPROFILE/AppData/Local/agy/bin") + echo "Checking: $TEST_PATH" + if [ -f "$TEST_PATH/agy.exe" ]; then + AGY_PATH="$TEST_PATH" + fi + fi + # Fallback hardcoded path + if [ -z "$AGY_PATH" ]; then + TEST_PATH="/c/Users/akhil/AppData/Local/agy/bin" + echo "Checking fallback: $TEST_PATH" + if [ -f "$TEST_PATH/agy.exe" ]; then + AGY_PATH="$TEST_PATH" + fi fi elif [ "$RUNNER_OS" = "Linux" ]; then TEST_PATH="$HOME/.local/share/agy/bin" @@ -160,17 +191,26 @@ jobs: if [ -n "$AGY_PATH" ]; then echo "Found agy CLI at: $AGY_PATH" if [ "$RUNNER_OS" = "Windows" ]; then - WIN_PATH="C:\\Users\\akhil\\AppData\\Local\\agy\\bin" + WIN_PATH=$(cygpath -w "$AGY_PATH") echo "$WIN_PATH" >> "$GITHUB_PATH" else echo "$AGY_PATH" >> "$GITHUB_PATH" fi else - echo "agy CLI not found in standard paths. Searching in $HOME..." + echo "agy CLI not found in standard paths. Searching..." + SEARCH_DIR="" if [ "$RUNNER_OS" = "Windows" ]; then - FOUND=$(find /c/Users/akhil -maxdepth 5 -name "agy.exe" 2>/dev/null | head -n 1) + if [ -n "$USERPROFILE" ]; then + SEARCH_DIR=$(cygpath -u "$USERPROFILE") + else + SEARCH_DIR="/c/Users/akhil" + fi + echo "Searching in $SEARCH_DIR..." + FOUND=$(find "$SEARCH_DIR" -maxdepth 5 -name "agy.exe" 2>/dev/null | head -n 1) else - FOUND=$(find "$HOME" -maxdepth 5 -name "agy" 2>/dev/null | head -n 1) + SEARCH_DIR="$HOME" + echo "Searching in $SEARCH_DIR..." + FOUND=$(find "$SEARCH_DIR" -maxdepth 5 -name "agy" 2>/dev/null | head -n 1) fi if [ -n "$FOUND" ]; then From e4e7925fd898b901b05d8f5fc45fa52f84c8e4b5 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:22:18 -0400 Subject: [PATCH 09/33] feat(e2e): add AppData/Local file listing for agy debug --- .github/workflows/fleet-e2e.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index a8a688d9..b4326edd 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -152,6 +152,13 @@ jobs: AGY_PATH="" if [ "$RUNNER_OS" = "Windows" ]; then + echo "Listing AppData/Local content matching agy:" + ls -la "/c/Users/akhil/AppData/Local" | grep -i agy || true + if [ -d "/c/Users/akhil/AppData/Local/agy" ]; then + echo "Listing AppData/Local/agy recursively:" + find "/c/Users/akhil/AppData/Local/agy" 2>/dev/null || true + fi + # Try LOCALAPPDATA if [ -n "$LOCALAPPDATA" ]; then TEST_PATH=$(cygpath -u "$LOCALAPPDATA/agy/bin") From a6479010df055e63df2b42ce81d1f73cf28016a2 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:26:21 -0400 Subject: [PATCH 10/33] feat(e2e): debug gemini CLI location on runner --- .github/workflows/fleet-e2e.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index b4326edd..58578876 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -139,6 +139,8 @@ jobs: PROVIDER='${{ steps.suite.outputs.pm_provider }}' if [ "$PROVIDER" = "agy" ]; then echo "PM provider is agy. Locating agy CLI..." + echo "which gemini: $(which gemini || echo 'not found')" + echo "where gemini: $(where gemini 2>/dev/null || echo 'not found')" echo "whoami: $(whoami)" echo "USERPROFILE: $USERPROFILE" echo "LOCALAPPDATA: $LOCALAPPDATA" From 0d16fc6f218b666b97f552cf539947dc949a58fb Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 15:33:05 -0400 Subject: [PATCH 11/33] feat(e2e): add local download fallback for agy CLI on Windows runner --- .github/workflows/fleet-e2e.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 58578876..1cf29a56 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -152,6 +152,20 @@ jobs: exit 0 fi + # Fallback: Attempt to download from local host machine HTTP server if missing + if [ "$RUNNER_OS" = "Windows" ]; then + if ! command -v agy &>/dev/null && [ ! -f "/c/Users/akhil/AppData/Local/agy/bin/agy.exe" ]; then + echo "Attempting to download agy.exe from local host machine..." + mkdir -p "/c/Users/akhil/AppData/Local/agy/bin" + if curl -fsSL --connect-timeout 5 http://192.168.1.158:8888/agy.exe -o "/c/Users/akhil/AppData/Local/agy/bin/agy.exe"; then + echo "Successfully downloaded agy.exe from http://192.168.1.158:8888/agy.exe" + chmod +x "/c/Users/akhil/AppData/Local/agy/bin/agy.exe" + else + echo "Failed to download agy.exe from local host server" + fi + fi + fi + AGY_PATH="" if [ "$RUNNER_OS" = "Windows" ]; then echo "Listing AppData/Local content matching agy:" From 333e86899a558f02197e1e04599629ba031e2759 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 19:02:42 -0400 Subject: [PATCH 12/33] feat: support agy LLM provider OAuth and simplify CLI location in workflow --- .github/workflows/fleet-e2e.yml | 114 ++------------------------------ src/cli/install.ts | 3 +- src/providers/agy.ts | 7 +- 3 files changed, 13 insertions(+), 111 deletions(-) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 1cf29a56..2844279e 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -138,117 +138,15 @@ jobs: run: | PROVIDER='${{ steps.suite.outputs.pm_provider }}' if [ "$PROVIDER" = "agy" ]; then - echo "PM provider is agy. Locating agy CLI..." - echo "which gemini: $(which gemini || echo 'not found')" - echo "where gemini: $(where gemini 2>/dev/null || echo 'not found')" - echo "whoami: $(whoami)" - echo "USERPROFILE: $USERPROFILE" - echo "LOCALAPPDATA: $LOCALAPPDATA" - echo "HOME: $HOME" - - # Check if agy is already in PATH - if command -v agy &>/dev/null; then - echo "agy CLI is already in PATH: $(command -v agy)" - exit 0 - fi - - # Fallback: Attempt to download from local host machine HTTP server if missing - if [ "$RUNNER_OS" = "Windows" ]; then - if ! command -v agy &>/dev/null && [ ! -f "/c/Users/akhil/AppData/Local/agy/bin/agy.exe" ]; then - echo "Attempting to download agy.exe from local host machine..." - mkdir -p "/c/Users/akhil/AppData/Local/agy/bin" - if curl -fsSL --connect-timeout 5 http://192.168.1.158:8888/agy.exe -o "/c/Users/akhil/AppData/Local/agy/bin/agy.exe"; then - echo "Successfully downloaded agy.exe from http://192.168.1.158:8888/agy.exe" - chmod +x "/c/Users/akhil/AppData/Local/agy/bin/agy.exe" - else - echo "Failed to download agy.exe from local host server" - fi - fi - fi - - AGY_PATH="" + echo "Adding standard agy CLI installation paths to PATH..." if [ "$RUNNER_OS" = "Windows" ]; then - echo "Listing AppData/Local content matching agy:" - ls -la "/c/Users/akhil/AppData/Local" | grep -i agy || true - if [ -d "/c/Users/akhil/AppData/Local/agy" ]; then - echo "Listing AppData/Local/agy recursively:" - find "/c/Users/akhil/AppData/Local/agy" 2>/dev/null || true - fi - - # Try LOCALAPPDATA - if [ -n "$LOCALAPPDATA" ]; then - TEST_PATH=$(cygpath -u "$LOCALAPPDATA/agy/bin") - echo "Checking: $TEST_PATH" - if [ -f "$TEST_PATH/agy.exe" ]; then - AGY_PATH="$TEST_PATH" - fi - fi - # Try USERPROFILE - if [ -z "$AGY_PATH" ] && [ -n "$USERPROFILE" ]; then - TEST_PATH=$(cygpath -u "$USERPROFILE/AppData/Local/agy/bin") - echo "Checking: $TEST_PATH" - if [ -f "$TEST_PATH/agy.exe" ]; then - AGY_PATH="$TEST_PATH" - fi - fi - # Fallback hardcoded path - if [ -z "$AGY_PATH" ]; then - TEST_PATH="/c/Users/akhil/AppData/Local/agy/bin" - echo "Checking fallback: $TEST_PATH" - if [ -f "$TEST_PATH/agy.exe" ]; then - AGY_PATH="$TEST_PATH" - fi - fi + AGY_DIR="$LOCALAPPDATA/agy/bin" + [ -z "$LOCALAPPDATA" ] && AGY_DIR="$USERPROFILE/AppData/Local/agy/bin" + echo "$(cygpath -w "$AGY_DIR")" >> "$GITHUB_PATH" elif [ "$RUNNER_OS" = "Linux" ]; then - TEST_PATH="$HOME/.local/share/agy/bin" - if [ -f "$TEST_PATH/agy" ]; then - AGY_PATH="$TEST_PATH" - fi + echo "$HOME/.local/share/agy/bin" >> "$GITHUB_PATH" elif [ "$RUNNER_OS" = "macOS" ]; then - TEST_PATH="$HOME/Library/Application Support/agy/bin" - if [ -f "$TEST_PATH/agy" ]; then - AGY_PATH="$TEST_PATH" - fi - fi - - if [ -n "$AGY_PATH" ]; then - echo "Found agy CLI at: $AGY_PATH" - if [ "$RUNNER_OS" = "Windows" ]; then - WIN_PATH=$(cygpath -w "$AGY_PATH") - echo "$WIN_PATH" >> "$GITHUB_PATH" - else - echo "$AGY_PATH" >> "$GITHUB_PATH" - fi - else - echo "agy CLI not found in standard paths. Searching..." - SEARCH_DIR="" - if [ "$RUNNER_OS" = "Windows" ]; then - if [ -n "$USERPROFILE" ]; then - SEARCH_DIR=$(cygpath -u "$USERPROFILE") - else - SEARCH_DIR="/c/Users/akhil" - fi - echo "Searching in $SEARCH_DIR..." - FOUND=$(find "$SEARCH_DIR" -maxdepth 5 -name "agy.exe" 2>/dev/null | head -n 1) - else - SEARCH_DIR="$HOME" - echo "Searching in $SEARCH_DIR..." - FOUND=$(find "$SEARCH_DIR" -maxdepth 5 -name "agy" 2>/dev/null | head -n 1) - fi - - if [ -n "$FOUND" ]; then - AGY_DIR=$(dirname "$FOUND") - echo "Found agy CLI at: $FOUND" - if [ "$RUNNER_OS" = "Windows" ]; then - WIN_DIR=$(cygpath -w "$AGY_DIR") - echo "$WIN_DIR" >> "$GITHUB_PATH" - else - echo "$AGY_DIR" >> "$GITHUB_PATH" - fi - else - echo "::error::agy CLI could not be found on this runner!" - exit 1; - fi + echo "$HOME/Library/Application Support/agy/bin" >> "$GITHUB_PATH" fi fi diff --git a/src/cli/install.ts b/src/cli/install.ts index e9783702..aa3395cb 100644 --- a/src/cli/install.ts +++ b/src/cli/install.ts @@ -622,8 +622,9 @@ ${killHint} beadsVersion = 'not available'; } + const clientName = llm === 'claude' ? 'Claude Code' : paths.name; const instructions = llm === 'claude' ? 'Run /mcp in Claude Code to load the server.' : `Restart ${paths.name} to load the server.`; - const forceNote = force ? '\nRestart Claude Code to reload the MCP server.' : ''; + const forceNote = force ? `\nRestart ${clientName} to reload the MCP server.` : ''; console.log(` Apra Fleet ${serverVersion} installed successfully for ${paths.name}. Binary: ${BIN_DIR} diff --git a/src/providers/agy.ts b/src/providers/agy.ts index e0005060..6d95e27f 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -119,7 +119,10 @@ export class AgyProvider implements ProviderAdapter { } oauthCredentialFiles(): Array<{ localPath: string; remotePath: string }> | null { - return null; + return [ + { localPath: '~/.gemini/oauth_creds.json', remotePath: '~/.gemini/oauth_creds.json' }, + { localPath: '~/.gemini/google_accounts.json', remotePath: '~/.gemini/google_accounts.json' }, + ]; } oauthSettingsMerge(): Record | null { @@ -127,7 +130,7 @@ export class AgyProvider implements ProviderAdapter { } oauthEnvVarsToUnset(): string[] { - return []; + return ['GEMINI_API_KEY']; } authEnvVarForToken(token: string): string { From b867282300f27aebd1bb96707e8a5833df3e3c90 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 22:42:51 -0400 Subject: [PATCH 13/33] Set agy print timeout to 45m in E2E workflow --- .github/workflows/fleet-e2e.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 2844279e..d9628180 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -298,6 +298,7 @@ jobs: > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ + --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ --add-dir "$(dirname "$DOER_FOLDER")" \ @@ -355,6 +356,7 @@ jobs: > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ + --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ --add-dir "$(dirname "$DOER_FOLDER")" \ @@ -447,7 +449,7 @@ jobs: elif [ "$PROVIDER" = "gemini" ]; then gemini --skip-trust -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" > "$RUN_DIR/t6-output.txt" 2>&1 || true else - agy --dangerously-skip-permissions -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" > "$RUN_DIR/t6-output.txt" 2>&1 || true + agy --dangerously-skip-permissions --print-timeout 45m -p "$(cat "$GITHUB_WORKSPACE/.github/e2e/t6-teardown.md")" > "$RUN_DIR/t6-output.txt" 2>&1 || true fi - name: Upload results From 8d22caff3ed6b967e182a8c4d0f993b64af8a3fe Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Thu, 21 May 2026 23:42:27 -0400 Subject: [PATCH 14/33] feat(agy): support agy CLI as provider, decouple to AGY.md, add print-mode keep-alive rules and E2E timeout settings --- .github/e2e/setup-script.md | 3 +++ .github/e2e/sprint-script.md | 3 +++ AGY.md | 25 +++++++++++++++++++++++++ skills/pm/cleanup.md | 2 +- skills/pm/context-file.md | 2 +- skills/pm/tpl-doer.md | 2 +- skills/pm/tpl-reviewer.md | 4 ++-- src/providers/agy.ts | 2 +- tests/providers.test.ts | 2 +- 9 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 AGY.md diff --git a/.github/e2e/setup-script.md b/.github/e2e/setup-script.md index 5a4d1daf..adf9c80b 100644 --- a/.github/e2e/setup-script.md +++ b/.github/e2e/setup-script.md @@ -6,6 +6,9 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}} Do all work yourself in this conversation -- no sub-agents. If a step fails, move on to the next one. +> [!IMPORTANT] +> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags) and are waiting for a background task or moving to the next checkpoint, you MUST execute a tool call (such as a status check command or reading progress files) in every intermediate turn. Do not return a text-only response until the entire script/phase/task is fully completed. + ## Checkpoints When you finish a step, print one line, exactly like this, as plain text (no code block, no backticks): diff --git a/.github/e2e/sprint-script.md b/.github/e2e/sprint-script.md index c7e6fbc2..c16994d1 100644 --- a/.github/e2e/sprint-script.md +++ b/.github/e2e/sprint-script.md @@ -9,6 +9,9 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}} - **doer** (name: `doer`, provider: {{DOER_PROVIDER}}) - **reviewer** (name: `reviewer`, provider: {{REVIEWER_PROVIDER}}) +> [!IMPORTANT] +> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags) and are waiting for a background task or moving to the next checkpoint, you MUST execute a tool call (such as a status check command or reading progress files) in every intermediate turn. Do not return a text-only response until the entire script/phase/task is fully completed. + ## Checkpoints When you finish a step, print one line, exactly like this, as plain text (no code block, no backticks): diff --git a/AGY.md b/AGY.md new file mode 100644 index 00000000..23f18612 --- /dev/null +++ b/AGY.md @@ -0,0 +1,25 @@ +# Apra Fleet — Antigravity (agy) Context + +Read `README.md` in this repo for the full tool reference, installation, member registration, multi-provider setup, git authentication, PM skill commands, and troubleshooting. + +## Dev commands + +```bash +npm install && npm run build # Build from source +npm test # Unit tests (vitest) +npm run build:binary # Build single-executable binary +node dist/index.js install # Dev-mode install +``` + +## Conventions + +- Branch naming: `feat/`, `fix/`, `chore/` +- Commit style: `(): ` - e.g. `fix(ssh): handle key rotation timeout` +- Never push to `main` directly; open a PR +- See [Architecture](docs/architecture.md) for internal structure +- ASCII only: never write non-ASCII characters to any file. Use `-` for dashes, `->` for arrows, `[OK]` for checkmarks, etc. + +## Non-Interactive Print Mode Keep-Alive Rule +If you are running in non-interactive print mode (such as via `agy -p` / `--print`) and are waiting for a background task or checkpoint: +- **YOU MUST CALL A TOOL ON EVERY TURN** (e.g. `run_command` checking status, reading a log file, or scheduling a timer/check). +- **NEVER** return a text-only response (a turn with no tool calls) until the entire task/sprint/phase is fully completed. A text-only response will cause the CLI manager to exit immediately, aborting the run. diff --git a/skills/pm/cleanup.md b/skills/pm/cleanup.md index aa8bafc8..e1ba776f 100644 --- a/skills/pm/cleanup.md +++ b/skills/pm/cleanup.md @@ -3,7 +3,7 @@ Run at sprint completion, before raising the PR. Execute on both doer and reviewer via `execute_command`: ``` -git rm --cached .fleet-task*.md 2>/dev/null || true; rm -f .fleet-task*.md; git rm PLAN.md progress.json feedback.md requirements.md design.md 2>/dev/null; for file in CLAUDE.md GEMINI.md AGENTS.md COPILOT.md; do if git show origin/main:"$file" > /dev/null 2>&1; then git checkout origin/main -- "$file"; else git rm -f "$file" 2>/dev/null || rm -f "$file"; fi; done; git commit -m "cleanup: remove fleet control files" && git push +git rm --cached .fleet-task*.md 2>/dev/null || true; rm -f .fleet-task*.md; git rm PLAN.md progress.json feedback.md requirements.md design.md 2>/dev/null; for file in CLAUDE.md GEMINI.md AGENTS.md COPILOT.md AGY.md; do if git show origin/main:"$file" > /dev/null 2>&1; then git checkout origin/main -- "$file"; else git rm -f "$file" 2>/dev/null || rm -f "$file"; fi; done; git commit -m "cleanup: remove fleet control files" && git push ``` **Why:** If a file like `CLAUDE.md` or `AGENTS.md` exists in `main`, it is a project deliverable - the sprint replaced it with a context file of the same name. Restoring from `origin/main` ensures the deliverable is preserved. Only files absent from `main` (pure sprint context) are deleted. diff --git a/skills/pm/context-file.md b/skills/pm/context-file.md index 23b0e198..df5b24e3 100644 --- a/skills/pm/context-file.md +++ b/skills/pm/context-file.md @@ -9,7 +9,7 @@ Use `member_detail` -> `llmProvider` to determine the correct target filename: | Provider | Filename | |----------|----------| | Claude | CLAUDE.md | -| Antigravity (agy) | GEMINI.md | +| Antigravity (agy) | AGY.md | | Gemini | GEMINI.md | | Codex | AGENTS.md | | Copilot | COPILOT.md | diff --git a/skills/pm/tpl-doer.md b/skills/pm/tpl-doer.md index aa5f3469..9d4405f3 100644 --- a/skills/pm/tpl-doer.md +++ b/skills/pm/tpl-doer.md @@ -38,6 +38,6 @@ If this task requires secrets, API keys, or tokens (e.g., external API calls, pr - NEVER skip tasks — execute in order - Read PLAN.md before starting each task - Commit and push PLAN.md, progress.json, and all project docs (design.md, feedback-*.md) at every turn — reviewers depend on them -- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md) - it is role-specific and not shared +- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md / AGY.md) - it is role-specific and not shared - NEVER push to the base branch (main, master, or integration branch) - always work on feature branches - NEVER stage or commit `.fleet-task.md` - these are ephemeral prompt delivery files managed by the fleet server diff --git a/skills/pm/tpl-reviewer.md b/skills/pm/tpl-reviewer.md index 1ad4093e..8fee611e 100644 --- a/skills/pm/tpl-reviewer.md +++ b/skills/pm/tpl-reviewer.md @@ -33,7 +33,7 @@ Review scope covers all phases from Phase 1 through the current phase — not ju - Temp/scratch: `*.tmp`, `*.txt`, `*.base64` - Tool/security configs: `.gemini/`, `.claude/settings.json`, `permissions.json` - Unrelated scripts or stale artifacts: `plan-NNN.md`, `requirements-NNN.md`, `progress-NNN.json` - - Tracked agent context: `GEMINI.md`, `CLAUDE.md`, `AGENTS.md`, `COPILOT.md` (ensure gitignored) + - Tracked agent context: `GEMINI.md`, `CLAUDE.md`, `AGENTS.md`, `COPILOT.md`, `AGY.md` (ensure gitignored) Permit only source, tests, and active sprint tracking (`PLAN.md`, `progress.json`, `requirements.md`, `feedback.md`, design docs). When in doubt, flag it. @@ -69,4 +69,4 @@ Commit feedback.md and push. ## Rules - NEVER push to the base branch (main, master, or integration branch) - always work on feature branches -- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md) - it is role-specific and not shared +- NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md / AGY.md) - it is role-specific and not shared diff --git a/src/providers/agy.ts b/src/providers/agy.ts index 6d95e27f..fe33e667 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -9,7 +9,7 @@ export class AgyProvider implements ProviderAdapter { readonly processName = 'agy'; readonly authEnvVar = 'GEMINI_API_KEY'; readonly credentialPath = '~/.gemini/antigravity-cli/settings.json'; - readonly instructionFileName = 'GEMINI.md'; + readonly instructionFileName = 'AGY.md'; cliCommand(args: string): string { return `agy ${args}`; diff --git a/tests/providers.test.ts b/tests/providers.test.ts index 84b4ddf0..3ae0a83a 100644 --- a/tests/providers.test.ts +++ b/tests/providers.test.ts @@ -871,7 +871,7 @@ describe('AgyProvider', () => { expect(p.processName).toBe('agy'); expect(p.authEnvVar).toBe('GEMINI_API_KEY'); expect(p.credentialPath).toBe('~/.gemini/antigravity-cli/settings.json'); - expect(p.instructionFileName).toBe('GEMINI.md'); + expect(p.instructionFileName).toBe('AGY.md'); }); it('builds cliCommand', () => { From 1a174f2a41fa37a56c2c12a57224eaf8e6ba8107 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 09:08:33 -0400 Subject: [PATCH 15/33] fix(pm): enforce local linter checks in doer and reviewer templates --- skills/pm/tpl-doer.md | 14 +++++++------- skills/pm/tpl-reviewer.md | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/skills/pm/tpl-doer.md b/skills/pm/tpl-doer.md index 9d4405f3..9a415882 100644 --- a/skills/pm/tpl-doer.md +++ b/skills/pm/tpl-doer.md @@ -1,4 +1,4 @@ -# {{PROJECT_NAME}} — Plan Execution +# {{PROJECT_NAME}} - Plan Execution ## Context Recovery Before starting any work: `git log --oneline -10` @@ -16,11 +16,11 @@ On each invocation: ## Verify Checkpoints Tasks with type "verify" are checkpoints. When you reach one: -1. Run the project build step (e.g. `npm run build`, `tsc`, `cargo build`) first, then run the full test suite (unit, integration, e2e). Both must pass. +1. Run the project build step (e.g. `npm run build`, `tsc`, `cargo build`) and linter check (e.g. `npm run lint`, `eslint`, `cargo clippy` if configured) first, then run the full test suite (unit, integration, e2e). All of them must pass. 2. Confirm all prior tasks in the group work correctly 3. Update progress.json with test results and issues found -4. `git push origin {{branch}}` — code must be on origin before PM reviews -5. STOP — do not continue. Report status so the PM can review. +4. `git push origin {{branch}}` - code must be on origin before PM reviews +5. STOP - do not continue. Report status so the PM can review. ## Branch Hygiene - Before creating a branch: `git fetch origin && git checkout origin/{{base_branch}}` @@ -32,12 +32,12 @@ If this task requires secrets, API keys, or tokens (e.g., external API calls, pr ## Rules - ONE task at a time, then commit, then continue -- After every commit: run fast/unit tests. If they fail, fix before moving to the next task. +- After every commit: run fast/unit tests and linter checks. If they fail, fix before moving to the next task. - Always update progress.json after each task - Blocker? Set status to "blocked" with notes, then STOP -- NEVER skip tasks — execute in order +- NEVER skip tasks - execute in order - Read PLAN.md before starting each task -- Commit and push PLAN.md, progress.json, and all project docs (design.md, feedback-*.md) at every turn — reviewers depend on them +- Commit and push PLAN.md, progress.json, and all project docs (design.md, feedback-*.md) at every turn - reviewers depend on them - NEVER commit this agent context file (CLAUDE.md / GEMINI.md / AGENTS.md / COPILOT.md / AGY.md) - it is role-specific and not shared - NEVER push to the base branch (main, master, or integration branch) - always work on feature branches - NEVER stage or commit `.fleet-task.md` - these are ephemeral prompt delivery files managed by the fleet server diff --git a/skills/pm/tpl-reviewer.md b/skills/pm/tpl-reviewer.md index 8fee611e..bddafc25 100644 --- a/skills/pm/tpl-reviewer.md +++ b/skills/pm/tpl-reviewer.md @@ -1,4 +1,4 @@ -# {{PROJECT_NAME}} — Code Review +# {{PROJECT_NAME}} - Code Review ## Context Recovery Before starting any review: `git log --oneline {{base_branch}}..{{branch}}` @@ -6,17 +6,17 @@ Before starting any review: `git log --oneline {{base_branch}}..{{branch}}` ## Review Model You are reviewing work tracked in PLAN.md and progress.json. -Review scope covers all phases from Phase 1 through the current phase — not just the latest diff. Code written in earlier phases may have regressed or been invalidated by later changes. +Review scope covers all phases from Phase 1 through the current phase - not just the latest diff. Code written in earlier phases may have regressed or been invalidated by later changes. ## On each review 1. Run `git log --oneline -- feedback.md` then `git show ` on prior versions to understand previous findings and how the doer addressed them. Incorporate the doer's responses into your review notes so the full picture is captured in the new write-up. -2. Read progress.json — identify which tasks are marked completed since last review -3. Read PLAN.md, requirements.md, and any design docs in the work folder — verify code aligns with requirements intent, not just plan mechanics +2. Read progress.json - identify which tasks are marked completed since last review +3. Read PLAN.md, requirements.md, and any design docs in the work folder - verify code aligns with requirements intent, not just plan mechanics 4. `git diff` the relevant commits against the base branch 5. Check each completed task against its "done" criteria in PLAN.md -6. Run the project build step first, then run ALL tests (unit, integration, e2e). Both must pass — if either fails, CHANGES NEEDED. -7. Verify CI passes for the latest push — if CI is red, CHANGES NEEDED regardless of code quality +6. Run the project build step and linter check first, then run ALL tests (unit, integration, e2e). All of them must pass - if any fail, CHANGES NEEDED. +7. Verify CI passes for the latest push - if CI is red, CHANGES NEEDED regardless of code quality 8. Check for regressions in previously approved phases ## What to check @@ -42,7 +42,7 @@ Review scope covers all phases from Phase 1 through the current phase — not ju Overwrite feedback.md with this structure: ``` -# {{sprint_name}} — Code Review +# {{sprint_name}} - Code Review **Reviewer:** {{member_name}} **Date:** YYYY-MM-DD HH:MM:SS+TZ @@ -63,7 +63,7 @@ Overwrite feedback.md with this structure: ``` -If verdict is CHANGES NEEDED: the doer annotates each relevant section with `**Doer:** fixed in commit ` before requesting re-review. +If verdict is CHANGES NEEDED: the doer annotates each relevant section with **Doer:** fixed in commit - before requesting re-review. Commit feedback.md and push. From c42f6fe82b49180e0264c5cf7e9cfe3216897dc2 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 12:46:47 -0400 Subject: [PATCH 16/33] fix(e2e): fix s8.1 agy suite output capture and checkpoint parsing - Add --output-format stream-json to agy PM invocations (setup + sprint) so raw-setup.txt and raw-sprint.txt are populated instead of empty. Previously agy wrote nothing to stdout when output was redirected to a file because no JSON streaming flag was set. - Fix Seed PM permissions to write agy settings to ~/.gemini/antigravity-cli/ (home dir) instead of \/.gemini/antigravity-cli/. agy resolves settings from the home directory, not the working directory, so the seeded allow-list was silently ignored on every s8.x run. - Replace extract-results.mjs agy stub (which hardcoded tokens=0 and returned the raw file as opaque text) with proper JSONL parsing matching the gemini schema. agy emits the same stream-json event types (type:message, type:result with stats), so CHECKPOINTs and token telemetry are now correctly extracted. - Fix session ID grep to also match type:init (emitted by gemini/agy) in addition to type:system (emitted by claude), so session IDs are captured for all providers. - Improve error-diagnosis greps in both phases: fall back to tail -1 when no type:result/type:final line exists (happens for agy in plain-text fallback mode), and pipe jq through || echo so non-JSON last lines are still surfaced in the log. --- .github/e2e/extract-results.mjs | 36 ++++++++++++++++++++++++++------- .github/workflows/fleet-e2e.yml | 34 ++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/.github/e2e/extract-results.mjs b/.github/e2e/extract-results.mjs index ca84f5c8..627f2e1f 100644 --- a/.github/e2e/extract-results.mjs +++ b/.github/e2e/extract-results.mjs @@ -27,13 +27,35 @@ function processRawFile(filePath, provider) { const content = readFileSync(filePath, 'utf8'); if (provider === 'agy') { - return { - assistantText: content, - tokensIn: 0, - tokensOut: 0, - cacheCreate: 0, - cacheRead: 0 - }; + // agy emits the same JSONL stream-json schema as gemini CLI; parse identically. + // Falls back gracefully if the file is plain text (no JSON lines parsed -> empty text). + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + let obj; + try { obj = JSON.parse(trimmed); } catch { continue; } + + // Token telemetry: agy uses the same result.stats shape as gemini + if (obj.type === 'result' && obj.stats) { + const s = obj.stats; + tokensIn += (s.input ?? 0); + tokensOut += (s.output_tokens ?? 0); + cacheRead += (s.cached ?? 0); + // cacheCreate stays 0: agy does not report cache writes (same as gemini) + } + + // Text extraction: result block, assistant message blocks, or delta messages + if (obj.type === 'result' && obj.result) { + assistantText += '\n' + obj.result; + } else if (obj.type === 'assistant') { + for (const block of obj.message?.content ?? []) { + if (block?.type === 'text' && block.text) assistantText += '\n' + block.text; + } + } else if (obj.type === 'message' && obj.role === 'assistant' && typeof obj.content === 'string') { + assistantText += obj.content; + } + } + return { assistantText, tokensIn, tokensOut, cacheCreate, cacheRead }; } for (const line of content.split('\n')) { diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index d9628180..b108e25c 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -256,10 +256,11 @@ jobs: mkdir -p "$RUN_DIR/.gemini" cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/settings.json" else - mkdir -p "$RUN_DIR/.gemini/antigravity-cli" - cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/antigravity-cli/settings.json" + # agy reads settings from ~/.gemini/antigravity-cli/ (home dir), not from RUN_DIR + mkdir -p "$HOME/.gemini/antigravity-cli" + cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$HOME/.gemini/antigravity-cli/settings.json" fi - echo "Seeded $PROVIDER PM permissions into $RUN_DIR" + echo "Seeded $PROVIDER PM permissions into home dir" - name: Run fleet e2e - setup phase (${{ steps.suite.outputs.pm_provider }}) id: e2e_setup @@ -298,6 +299,7 @@ jobs: > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ + --output-format stream-json \ --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ @@ -310,11 +312,16 @@ jobs: if [ "$LLM_EXIT" -ne 0 ]; then echo "::error::LLM CLI ($PROVIDER) setup phase exited with code $LLM_EXIT" RAW="$RUN_DIR/raw-setup.txt" - R=$(grep '"type":"result"' "$RAW" | tail -1) + R=$(grep '"type":"result"\|"type":"final"' "$RAW" | tail -1) + if [ -z "$R" ]; then + # agy (and other providers in plain-text mode) may not emit a result line + R=$(tail -1 "$RAW" 2>/dev/null) + fi if [ -n "$R" ]; then - echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' + echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' 2>/dev/null \ + || echo "last line: $R" else - echo "(no result line -- LLM produced no final output)" + echo "(no output -- LLM produced no output at all)" fi exit "$LLM_EXIT" fi @@ -356,6 +363,7 @@ jobs: > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ + --output-format stream-json \ --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ @@ -366,7 +374,8 @@ jobs: fi # Extract PM session ID from sprint phase - SESSION_ID=$(grep -m1 '"type":"system"' "$RUN_DIR/raw-sprint.txt" \ + # gemini/agy emit type:init; claude emits type:system + SESSION_ID=$(grep -m1 '"type":"system"\|"type":"init"' "$RUN_DIR/raw-sprint.txt" \ | jq -r '.session_id // ""' 2>/dev/null || true) echo "session_id=$SESSION_ID" >> "$GITHUB_OUTPUT" @@ -383,11 +392,16 @@ jobs: if [ "$LLM_EXIT" -ne 0 ]; then echo "::error::LLM CLI ($PROVIDER) sprint phase exited with code $LLM_EXIT" RAW="$RUN_DIR/raw-sprint.txt" - R=$(grep '"type":"result"' "$RAW" | tail -1) + R=$(grep '"type":"result"\|"type":"final"' "$RAW" | tail -1) + if [ -z "$R" ]; then + # agy (and other providers in plain-text mode) may not emit a result line + R=$(tail -1 "$RAW" 2>/dev/null) + fi if [ -n "$R" ]; then - echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' + echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' 2>/dev/null \ + || echo "last line: $R" else - echo "(no result line -- LLM produced no final output)" + echo "(no output -- LLM produced no output at all)" fi exit "$LLM_EXIT" fi From 4479fbb861d9edc8d60c6d9d10c958c46bc76bab Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 14:17:51 -0400 Subject: [PATCH 17/33] Revert "fix(e2e): fix s8.1 agy suite output capture and checkpoint parsing" This reverts commit c42f6fe82b49180e0264c5cf7e9cfe3216897dc2. --- .github/e2e/extract-results.mjs | 36 +++++++-------------------------- .github/workflows/fleet-e2e.yml | 34 +++++++++---------------------- 2 files changed, 17 insertions(+), 53 deletions(-) diff --git a/.github/e2e/extract-results.mjs b/.github/e2e/extract-results.mjs index 627f2e1f..ca84f5c8 100644 --- a/.github/e2e/extract-results.mjs +++ b/.github/e2e/extract-results.mjs @@ -27,35 +27,13 @@ function processRawFile(filePath, provider) { const content = readFileSync(filePath, 'utf8'); if (provider === 'agy') { - // agy emits the same JSONL stream-json schema as gemini CLI; parse identically. - // Falls back gracefully if the file is plain text (no JSON lines parsed -> empty text). - for (const line of content.split('\n')) { - const trimmed = line.trim(); - if (!trimmed) continue; - let obj; - try { obj = JSON.parse(trimmed); } catch { continue; } - - // Token telemetry: agy uses the same result.stats shape as gemini - if (obj.type === 'result' && obj.stats) { - const s = obj.stats; - tokensIn += (s.input ?? 0); - tokensOut += (s.output_tokens ?? 0); - cacheRead += (s.cached ?? 0); - // cacheCreate stays 0: agy does not report cache writes (same as gemini) - } - - // Text extraction: result block, assistant message blocks, or delta messages - if (obj.type === 'result' && obj.result) { - assistantText += '\n' + obj.result; - } else if (obj.type === 'assistant') { - for (const block of obj.message?.content ?? []) { - if (block?.type === 'text' && block.text) assistantText += '\n' + block.text; - } - } else if (obj.type === 'message' && obj.role === 'assistant' && typeof obj.content === 'string') { - assistantText += obj.content; - } - } - return { assistantText, tokensIn, tokensOut, cacheCreate, cacheRead }; + return { + assistantText: content, + tokensIn: 0, + tokensOut: 0, + cacheCreate: 0, + cacheRead: 0 + }; } for (const line of content.split('\n')) { diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index b108e25c..d9628180 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -256,11 +256,10 @@ jobs: mkdir -p "$RUN_DIR/.gemini" cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/settings.json" else - # agy reads settings from ~/.gemini/antigravity-cli/ (home dir), not from RUN_DIR - mkdir -p "$HOME/.gemini/antigravity-cli" - cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$HOME/.gemini/antigravity-cli/settings.json" + mkdir -p "$RUN_DIR/.gemini/antigravity-cli" + cp "$GITHUB_WORKSPACE/.github/e2e/pm-settings/gemini.settings.json" "$RUN_DIR/.gemini/antigravity-cli/settings.json" fi - echo "Seeded $PROVIDER PM permissions into home dir" + echo "Seeded $PROVIDER PM permissions into $RUN_DIR" - name: Run fleet e2e - setup phase (${{ steps.suite.outputs.pm_provider }}) id: e2e_setup @@ -299,7 +298,6 @@ jobs: > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ - --output-format stream-json \ --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ @@ -312,16 +310,11 @@ jobs: if [ "$LLM_EXIT" -ne 0 ]; then echo "::error::LLM CLI ($PROVIDER) setup phase exited with code $LLM_EXIT" RAW="$RUN_DIR/raw-setup.txt" - R=$(grep '"type":"result"\|"type":"final"' "$RAW" | tail -1) - if [ -z "$R" ]; then - # agy (and other providers in plain-text mode) may not emit a result line - R=$(tail -1 "$RAW" 2>/dev/null) - fi + R=$(grep '"type":"result"' "$RAW" | tail -1) if [ -n "$R" ]; then - echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' 2>/dev/null \ - || echo "last line: $R" + echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' else - echo "(no output -- LLM produced no output at all)" + echo "(no result line -- LLM produced no final output)" fi exit "$LLM_EXIT" fi @@ -363,7 +356,6 @@ jobs: > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? else agy --dangerously-skip-permissions \ - --output-format stream-json \ --print-timeout 45m \ --add-dir "$RUN_DIR" \ --add-dir "$CFG_DIR" \ @@ -374,8 +366,7 @@ jobs: fi # Extract PM session ID from sprint phase - # gemini/agy emit type:init; claude emits type:system - SESSION_ID=$(grep -m1 '"type":"system"\|"type":"init"' "$RUN_DIR/raw-sprint.txt" \ + SESSION_ID=$(grep -m1 '"type":"system"' "$RUN_DIR/raw-sprint.txt" \ | jq -r '.session_id // ""' 2>/dev/null || true) echo "session_id=$SESSION_ID" >> "$GITHUB_OUTPUT" @@ -392,16 +383,11 @@ jobs: if [ "$LLM_EXIT" -ne 0 ]; then echo "::error::LLM CLI ($PROVIDER) sprint phase exited with code $LLM_EXIT" RAW="$RUN_DIR/raw-sprint.txt" - R=$(grep '"type":"result"\|"type":"final"' "$RAW" | tail -1) - if [ -z "$R" ]; then - # agy (and other providers in plain-text mode) may not emit a result line - R=$(tail -1 "$RAW" 2>/dev/null) - fi + R=$(grep '"type":"result"' "$RAW" | tail -1) if [ -n "$R" ]; then - echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' 2>/dev/null \ - || echo "last line: $R" + echo "$R" | jq -r '"reason : " + (.subtype // "?") + (if .is_error then " (is_error)" else "" end), "errors : " + ((.errors // []) | join("; ") | if . == "" then "(none)" else . end), "message: " + ((.result // "(no final text)") | gsub("\n";" ") | .[0:300])' else - echo "(no output -- LLM produced no output at all)" + echo "(no result line -- LLM produced no final output)" fi exit "$LLM_EXIT" fi From 1f382c275e39c4e30ee4f1a70fdc7bc240c47ab5 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 16:40:40 -0400 Subject: [PATCH 18/33] fix(agy): capture prompt output and resolve wsl hook path --- hooks/hooks-config.json | 2 +- src/os/linux.ts | 4 +-- src/os/windows.ts | 2 +- src/providers/agy.ts | 72 +++++++++++++++++++++++++++++++++++++---- src/services/ssh.ts | 8 +++-- 5 files changed, 75 insertions(+), 13 deletions(-) diff --git a/hooks/hooks-config.json b/hooks/hooks-config.json index 91b6b813..a961f4cb 100644 --- a/hooks/hooks-config.json +++ b/hooks/hooks-config.json @@ -6,7 +6,7 @@ "hooks": [ { "type": "command", - "command": "bash ~/.apra-fleet/hooks/post-register-member.sh" + "command": "node -e \"const fs = require('fs'), path = require('path'), cp = require('child_process'); let home = process.env.USERPROFILE || process.env.HOME || ''; if (process.platform === 'linux' && home.startsWith('/home/')) { const user = process.env.USER || home.split('/').pop(); const wslHome = '/mnt/c/Users/' + user; if (fs.existsSync(wslHome)) home = wslHome; } const p = path.join(home, '.apra-fleet', 'hooks', 'post-register-member.sh'); if (fs.existsSync(p)) { cp.execSync('bash \\\"' + p + '\\\"', { stdio: 'inherit' }); }\"" } ] } diff --git a/src/os/linux.ts b/src/os/linux.ts index 667e87f9..533b1e29 100644 --- a/src/os/linux.ts +++ b/src/os/linux.ts @@ -1,9 +1,9 @@ -import { execSync } from 'node:child_process'; +import { execSync } from 'node:child_process'; import type { OsCommands, ProviderAdapter, PromptOptions } from './os-commands.js'; import { escapeDoubleQuoted, escapeGrepPattern, sanitizeSessionId } from './os-commands.js'; import { escapeShellArg } from '../utils/shell-escape.js'; -const CLI_PATH = 'export PATH="$HOME/.local/bin:$PATH" && '; +const CLI_PATH = 'export PATH="$HOME/.local/bin:$PATH" && unset ANTIGRAVITY_SOURCE_METADATA GEMINI_SOURCE_METADATA CLAUDE_SOURCE_METADATA COPILOT_SOURCE_METADATA CODEX_SOURCE_METADATA && '; /** * Wrap a bash command string with PID capture. diff --git a/src/os/windows.ts b/src/os/windows.ts index 68ab8ee7..e1c94183 100644 --- a/src/os/windows.ts +++ b/src/os/windows.ts @@ -5,7 +5,7 @@ import type { OsCommands, ProviderAdapter, PromptOptions } from './os-commands.j import { escapeWindowsArg, sanitizeSessionId } from './os-commands.js'; import { escapeBatchMetachars } from '../utils/shell-escape.js'; -const CLI_PATH = '$env:Path = "$env:USERPROFILE\\.local\\bin;$env:Path"; '; +const CLI_PATH = '$env:Path = "$env:USERPROFILE\\.local\\bin;$env:Path"; \'ANTIGRAVITY_SOURCE_METADATA\',\'GEMINI_SOURCE_METADATA\',\'CLAUDE_SOURCE_METADATA\',\'COPILOT_SOURCE_METADATA\',\'CODEX_SOURCE_METADATA\' | ForEach-Object { Remove-Item "env:$_" -ErrorAction SilentlyContinue }; '; /** * Wrap PowerShell setup commands and a CLI invocation with PID capture. diff --git a/src/providers/agy.ts b/src/providers/agy.ts index fe33e667..44c2b2e6 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -3,6 +3,9 @@ import type { LlmProvider, SSHExecResult } from '../types.js'; import type { PromptErrorCategory } from '../utils/prompt-errors.js'; import { classifyPromptError } from '../utils/prompt-errors.js'; import { escapeDoubleQuoted } from '../os/os-commands.js'; +import { stripAnsi } from '../utils/ansi.js'; + +const NODE_TRANSCRIPT_SCRIPT = `const fs = require(\`fs\`); const path = require(\`path\`); try { const home = process.env.USERPROFILE || process.env.HOME || \`\`; const cachePath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`cache\`, \`last_conversations.json\`); if (!fs.existsSync(cachePath)) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_CACHE\`); process.exit(0); } const cache = JSON.parse(fs.readFileSync(cachePath, \`utf8\`)); const folder = process.argv[1]; if (!folder) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_FOLDER_ARG\`); process.exit(0); } const norm = p => path.resolve(p).toLowerCase().split(path.sep).join(\`/\`); const target = norm(folder); let found = \`\`; for (const k of Object.keys(cache)) { if (norm(k) === target) { found = cache[k]; break; } } if (found) { const transPath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`brain\`, found, \`.system_generated\`, \`logs\`, \`transcript.jsonl\`); if (fs.existsSync(transPath)) { console.log(\`FLEET_TRANSCRIPT_START\`); console.log(fs.readFileSync(transPath, \`utf8\`)); console.log(\`FLEET_TRANSCRIPT_END\`); } else { console.log(\`FLEET_TRANSCRIPT_MISSING:\` + found); } } else { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_SESSION_IN_CACHE\`); } } catch (e) { console.log(\`FLEET_TRANSCRIPT_ERROR:\` + e.message); }`; export class AgyProvider implements ProviderAdapter { readonly name: LlmProvider = 'agy'; @@ -28,7 +31,7 @@ export class AgyProvider implements ProviderAdapter { } buildPromptCommand(opts: PromptOptions): string { - const { folder, promptFile, sessionId, resuming, unattended, inv } = opts; + const { folder, promptFile, sessionId, unattended, inv } = opts; const escapedFolder = escapeDoubleQuoted(folder); let instruction = `Your task is described in ${promptFile} in the current directory. Read that file first, then execute the task.`; if (inv) { @@ -37,7 +40,8 @@ export class AgyProvider implements ProviderAdapter { let cmd = `cd "${escapedFolder}" && agy -p "${instruction}"`; - if (resuming && sessionId) { + // Always pass --conversation so fleet knows where the transcript will be written. + if (sessionId) { cmd += ` --conversation "${escapeDoubleQuoted(sessionId)}"`; } @@ -45,6 +49,10 @@ export class AgyProvider implements ProviderAdapter { cmd += ' --dangerously-skip-permissions'; } + // After agy exits, read its transcript from disk (primary output channel -- + // agy writes its response to CONOUT$, not stdout, so file I/O is required). + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' "$PWD"`; + return cmd; } @@ -57,9 +65,51 @@ export class AgyProvider implements ProviderAdapter { } parseResponse(result: SSHExecResult): ParsedResponse { - const raw = result.stdout.trim(); + const raw = result.stdout; + + // Primary path: extract response from the transcript JSONL that agy writes after + // completing its task. This is more reliable than PTY/ANSI capture because agy + // writes its LLM response to CONOUT$ (not stdout), but always writes a transcript file. + const startMarker = 'FLEET_TRANSCRIPT_START'; + const endMarker = 'FLEET_TRANSCRIPT_END'; + const startIdx = raw.indexOf(startMarker); + const endIdx = raw.indexOf(endMarker); + + if (startIdx !== -1 && endIdx !== -1) { + const section = raw.substring(startIdx + startMarker.length, endIdx); + const lines = section.split('\n').map(l => l.trim()).filter(Boolean); + let lastResponse = ''; + for (const line of lines) { + try { + const entry = JSON.parse(line) as { type?: string; status?: string; content?: string }; + if ( + entry.type === 'PLANNER_RESPONSE' && + entry.status === 'DONE' && + typeof entry.content === 'string' && + entry.content.trim() + ) { + lastResponse = entry.content.trim(); + } + } catch { /* skip malformed JSON lines */ } + } + if (lastResponse) { + return { + result: lastResponse, + sessionId: undefined, + isError: result.code !== 0, + raw, + usage: undefined, + }; + } + } + + // Fallback: ANSI-strip stdout (covers cases where transcript is missing or incomplete) + const stripped = stripAnsi(raw) + .replace(/^FLEET_PID:\d+\r?\n/m, '') + .replace(/\r/g, '') + .trim(); return { - result: raw, + result: stripped, sessionId: undefined, isError: result.code !== 0, raw, @@ -77,7 +127,10 @@ export class AgyProvider implements ProviderAdapter { resumeFlag(sessionId?: string, resuming?: boolean): string { if (!sessionId) return ''; - return resuming ? `--conversation "${escapeDoubleQuoted(sessionId)}"` : ''; + // Always pass --conversation so fleet knows where to read the transcript. + // When resuming=true this continues an existing session; otherwise starts fresh + // with a pre-minted UUID that fleet uses to locate the transcript after exit. + return `--conversation "${escapeDoubleQuoted(sessionId)}"`; } modelTiers(): Record<'cheap' | 'standard' | 'premium', string> { @@ -138,7 +191,14 @@ export class AgyProvider implements ProviderAdapter { } wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { - return `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; + let cmd = `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; + + // After agy exits, read its conversation transcript (primary output channel -- + // agy writes LLM responses to CONOUT$, not stdout; the transcript file is the + // reliable way to capture the response text). + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' "$((Get-Location).Path)"`; + + return cmd; } jsonOutputFlag(): string { diff --git a/src/services/ssh.ts b/src/services/ssh.ts index 876e9967..f2a76472 100644 --- a/src/services/ssh.ts +++ b/src/services/ssh.ts @@ -206,6 +206,7 @@ export async function execCommand( stdoutSpillStream.write(chunk); } }); + stream.stderr.on('data', (data: Buffer) => { resetInactivityTimer(); stderrLen += data.length; @@ -220,18 +221,19 @@ export async function execCommand( stderrSpillStream.write(data); } }); + stream.on('close', (code: number) => { clearStoredPid(agent.id); if (stdoutSpillStream) stdoutSpillStream.end(); if (stderrSpillStream) stderrSpillStream.end(); if (stdoutSpillPath) { - stdout = `[OUTPUT TRUNCATED — full stdout saved to ${stdoutSpillPath}]\n${stdout}`; + stdout = `[OUTPUT TRUNCATED -- full stdout saved to ${stdoutSpillPath}]\n${stdout}`; } if (stderrSpillPath) { - stderr = `[OUTPUT TRUNCATED — full stderr saved to ${stderrSpillPath}]\n${stderr}`; + stderr = `[OUTPUT TRUNCATED -- full stderr saved to ${stderrSpillPath}]\n${stderr}`; } if (warning) { - stderr = `⚠️ ${warning}\n${stderr}`; + stderr = `Warning: ${warning}\n${stderr}`; } settle(() => resolve({ stdout, stderr, code: code ?? 0 })); }); From 68abee832c48207427c7ba7fc8974e6e41b740d2 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 16:51:36 -0400 Subject: [PATCH 19/33] fix(agy): add missing ansi utility file --- src/utils/ansi.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/utils/ansi.ts diff --git a/src/utils/ansi.ts b/src/utils/ansi.ts new file mode 100644 index 00000000..d656c505 --- /dev/null +++ b/src/utils/ansi.ts @@ -0,0 +1,19 @@ +/** + * Strip ANSI escape sequences and non-printable control characters from a string. + * Used to clean PTY-captured output from agy.exe which writes via Windows Console API. + */ +export function stripAnsi(raw: string): string { + return raw + // CSI sequences: ESC [ ... (colors, cursor movement, etc.) + .replace(/\x1B\[[\x30-\x3F]*[\x20-\x2F]*[\x40-\x7E]/g, '') + // OSC sequences: ESC ] ... ST or BEL + .replace(/\x1B\][^\x07\x1B]*(?:\x07|\x1B\\)/g, '') + // DCS / PM / APC / SOS sequences: ESC [P X ^ _] ... ST + .replace(/\x1B[P X^_][^\x1B]*\x1B\\/g, '') + // Other two-char ESC sequences: ESC + .replace(/\x1B[^\[]/g, '') + // Remaining lone ESC + .replace(/\x1B/g, '') + // Non-printable control chars (keep \n and \t) + .replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/g, ''); +} From 0d30098f03f70f5252f5e7c89233ac1d90ef0d21 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 19:37:49 -0400 Subject: [PATCH 20/33] fix(agy): look up transcript by UUID, not folder path -- bypasses scratch dir switch --- .github/e2e/extract-results.mjs | 32 +++++++++++++++++++++++++++++++- src/os/windows.ts | 2 +- src/providers/agy.ts | 27 +++++++++++++++++++-------- src/providers/claude.ts | 2 +- src/providers/codex.ts | 2 +- src/providers/copilot.ts | 2 +- src/providers/gemini.ts | 2 +- src/providers/provider.ts | 2 +- 8 files changed, 56 insertions(+), 15 deletions(-) diff --git a/.github/e2e/extract-results.mjs b/.github/e2e/extract-results.mjs index ca84f5c8..3a9d881d 100644 --- a/.github/e2e/extract-results.mjs +++ b/.github/e2e/extract-results.mjs @@ -27,12 +27,42 @@ function processRawFile(filePath, provider) { const content = readFileSync(filePath, 'utf8'); if (provider === 'agy') { + // The raw file contains the stdout of the agy invocation. After agy exits, + // fleet appends the transcript JSONL wrapped in FLEET_TRANSCRIPT_START/END markers. + // We extract text from PLANNER_RESPONSE entries in the JSONL so that CHECKPOINT lines + // embedded in the agent's responses can be detected. + const startMarker = 'FLEET_TRANSCRIPT_START'; + const endMarker = 'FLEET_TRANSCRIPT_END'; + const startIdx = content.indexOf(startMarker); + const endIdx = content.indexOf(endMarker); + if (startIdx !== -1 && endIdx !== -1) { + const section = content.substring(startIdx + startMarker.length, endIdx); + let extracted = ''; + for (const line of section.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + const entry = JSON.parse(trimmed); + if (entry.type === 'PLANNER_RESPONSE' && entry.status === 'DONE' && typeof entry.content === 'string' && entry.content.trim()) { + extracted += '\n' + entry.content.trim(); + } + } catch { /* skip malformed lines */ } + } + return { + assistantText: extracted || content, + tokensIn: 0, + tokensOut: 0, + cacheCreate: 0, + cacheRead: 0, + }; + } + // No markers: treat raw content as plain text (fallback for empty or unexpected output) return { assistantText: content, tokensIn: 0, tokensOut: 0, cacheCreate: 0, - cacheRead: 0 + cacheRead: 0, }; } diff --git a/src/os/windows.ts b/src/os/windows.ts index e1c94183..97229e33 100644 --- a/src/os/windows.ts +++ b/src/os/windows.ts @@ -130,7 +130,7 @@ export class WindowsCommands implements OsCommands { argList += ` ${provider.modelFlag(escapeWindowsArg(model))}`; } - return provider.wrapWindowsPrompt(setupCmd, filePath, argList); + return provider.wrapWindowsPrompt(setupCmd, filePath, argList, sessionId); } // --- Filesystem --- diff --git a/src/providers/agy.ts b/src/providers/agy.ts index 44c2b2e6..a64c50fd 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -5,7 +5,12 @@ import { classifyPromptError } from '../utils/prompt-errors.js'; import { escapeDoubleQuoted } from '../os/os-commands.js'; import { stripAnsi } from '../utils/ansi.js'; -const NODE_TRANSCRIPT_SCRIPT = `const fs = require(\`fs\`); const path = require(\`path\`); try { const home = process.env.USERPROFILE || process.env.HOME || \`\`; const cachePath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`cache\`, \`last_conversations.json\`); if (!fs.existsSync(cachePath)) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_CACHE\`); process.exit(0); } const cache = JSON.parse(fs.readFileSync(cachePath, \`utf8\`)); const folder = process.argv[1]; if (!folder) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_FOLDER_ARG\`); process.exit(0); } const norm = p => path.resolve(p).toLowerCase().split(path.sep).join(\`/\`); const target = norm(folder); let found = \`\`; for (const k of Object.keys(cache)) { if (norm(k) === target) { found = cache[k]; break; } } if (found) { const transPath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`brain\`, found, \`.system_generated\`, \`logs\`, \`transcript.jsonl\`); if (fs.existsSync(transPath)) { console.log(\`FLEET_TRANSCRIPT_START\`); console.log(fs.readFileSync(transPath, \`utf8\`)); console.log(\`FLEET_TRANSCRIPT_END\`); } else { console.log(\`FLEET_TRANSCRIPT_MISSING:\` + found); } } else { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_SESSION_IN_CACHE\`); } } catch (e) { console.log(\`FLEET_TRANSCRIPT_ERROR:\` + e.message); }`; +// NODE_TRANSCRIPT_SCRIPT_BY_UUID: accepts a conversation UUID as argv[1] and reads +// the transcript directly from brain//.system_generated/logs/transcript.jsonl. +// This is robust against agy switching its working directory (e.g. to scratch) because +// we look up the transcript by the UUID we minted and passed via --conversation, not by +// folder path via last_conversations.json. +const NODE_TRANSCRIPT_SCRIPT = `const fs = require(\`fs\`); const path = require(\`path\`); try { const home = process.env.USERPROFILE || process.env.HOME || \`\`; const convId = process.argv[1]; if (!convId) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_CONV_ID\`); process.exit(0); } const transPath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`brain\`, convId, \`.system_generated\`, \`logs\`, \`transcript.jsonl\`); if (fs.existsSync(transPath)) { console.log(\`FLEET_TRANSCRIPT_START\`); console.log(fs.readFileSync(transPath, \`utf8\`)); console.log(\`FLEET_TRANSCRIPT_END\`); } else { console.log(\`FLEET_TRANSCRIPT_MISSING:\` + convId); } } catch (e) { console.log(\`FLEET_TRANSCRIPT_ERROR:\` + e.message); }`; export class AgyProvider implements ProviderAdapter { readonly name: LlmProvider = 'agy'; @@ -49,9 +54,12 @@ export class AgyProvider implements ProviderAdapter { cmd += ' --dangerously-skip-permissions'; } - // After agy exits, read its transcript from disk (primary output channel -- - // agy writes its response to CONOUT$, not stdout, so file I/O is required). - cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' "$PWD"`; + // After agy exits, read its transcript from disk by conversation UUID (primary output + // channel -- agy writes its response to CONOUT$, not stdout, so file I/O is required). + // We pass the UUID we minted via --conversation so the lookup is robust even if agy + // switches its working directory (e.g. to scratch) on launch. + const convArg = sessionId ? `"${escapeDoubleQuoted(sessionId)}"` : '""'; + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg}`; return cmd; } @@ -190,13 +198,16 @@ export class AgyProvider implements ProviderAdapter { return 'GEMINI_API_KEY'; } - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, sessionId?: string): string { let cmd = `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; - // After agy exits, read its conversation transcript (primary output channel -- + // After agy exits, read its conversation transcript by UUID (primary output channel -- // agy writes LLM responses to CONOUT$, not stdout; the transcript file is the - // reliable way to capture the response text). - cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' "$((Get-Location).Path)"`; + // reliable way to capture the response text). We look up the transcript directly + // by the conversation UUID we passed via --conversation, bypassing last_conversations.json + // which would fail if agy switches its working directory (e.g. to scratch) on launch. + const convArg = sessionId ? `"${sessionId}"` : '""'; + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg}`; return cmd; } diff --git a/src/providers/claude.ts b/src/providers/claude.ts index dbb122f9..9847c352 100644 --- a/src/providers/claude.ts +++ b/src/providers/claude.ts @@ -187,7 +187,7 @@ export class ClaudeProvider implements ProviderAdapter { - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, _sessionId?: string): string { // Native claude.exe (2.1.113+) does not inherit stdout via ProcessStartInfo. // Direct shell execution ensures stdout is captured through the PowerShell pipe. // $pid is the shell PID - killing it also kills claude as a direct child. diff --git a/src/providers/codex.ts b/src/providers/codex.ts index db4ab663..9c26df91 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -186,7 +186,7 @@ export class CodexProvider implements ProviderAdapter { - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, _sessionId?: string): string { // Codex on Windows is typically an npm-based .cmd script. // Use direct shell execution to ensure resolution, while emitting PID immediately. return `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index 5e56704c..1c55e63c 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -182,7 +182,7 @@ export class CopilotProvider implements ProviderAdapter { - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, _sessionId?: string): string { // For native binaries on Windows, shell-based wrapping ensures reliable output redirection // while still allowing for tree-based process termination via the shell PID. return `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; diff --git a/src/providers/gemini.ts b/src/providers/gemini.ts index 6c8bbde0..7429e7cc 100644 --- a/src/providers/gemini.ts +++ b/src/providers/gemini.ts @@ -204,7 +204,7 @@ export class GeminiProvider implements ProviderAdapter { - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string { + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, _sessionId?: string): string { // Gemini on Windows needs direct shell execution to resolve .cmd script wrappers reliably. // We emit the current shell PID immediately to satisfy fleet's lifecycle tracking. return `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; diff --git a/src/providers/provider.ts b/src/providers/provider.ts index 2916a87a..464e514f 100644 --- a/src/providers/provider.ts +++ b/src/providers/provider.ts @@ -104,7 +104,7 @@ export interface ProviderAdapter { // Windows / PowerShell prompt building helpers /** On Windows, wrap the command for execution (e.g. via .NET ProcessStartInfo or direct shell). */ - wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string): string; + wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, sessionId?: string): string; /** JSON output flag for the CLI (e.g. --output-format json, --json, --format json) */ jsonOutputFlag(): string; From 48945aa26ff4dcd05218ae1544f965570499d9af Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 20:07:10 -0400 Subject: [PATCH 21/33] fix(e2e): extract PM agy transcript after exit -- reads from brain/ by dir key --- .github/workflows/fleet-e2e.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index d9628180..a23033b9 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -305,6 +305,22 @@ jobs: --add-dir "$(dirname "$REVIEWER_FOLDER")" \ -p "$(cat "$RUN_DIR/rendered-setup.md")" \ > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? + # agy writes to CONOUT$ (Windows console), not stdout -- read its transcript + # from disk via last_conversations.json using the run directory as the lookup key. + RUN_DIR_WIN="$(cygpath -w "$RUN_DIR" 2>/dev/null || echo "$RUN_DIR")" + node -e " + const fs = require('fs'), path = require('path'); + const home = process.env.USERPROFILE || process.env.HOME || ''; + const cache = JSON.parse(fs.readFileSync(path.join(home, '.gemini', 'antigravity-cli', 'cache', 'last_conversations.json'), 'utf8')); + const norm = p => path.resolve(p).toLowerCase().split(path.sep).join('/'); + const target = norm(process.argv[1]); + let convId = ''; + for (const k of Object.keys(cache)) { if (norm(k) === target) { convId = cache[k]; break; } } + if (!convId) { console.log('FLEET_TRANSCRIPT_MISSING:NO_CONV_FOR_DIR'); process.exit(0); } + const tp = path.join(home, '.gemini', 'antigravity-cli', 'brain', convId, '.system_generated', 'logs', 'transcript.jsonl'); + if (fs.existsSync(tp)) { process.stdout.write('FLEET_TRANSCRIPT_START\n'); process.stdout.write(fs.readFileSync(tp, 'utf8')); process.stdout.write('\nFLEET_TRANSCRIPT_END\n'); } + else { console.log('FLEET_TRANSCRIPT_MISSING:' + convId); } + " "$RUN_DIR_WIN" >> "$RUN_DIR/raw-setup.txt" 2>&1 || true fi if [ "$LLM_EXIT" -ne 0 ]; then @@ -363,6 +379,22 @@ jobs: --add-dir "$(dirname "$REVIEWER_FOLDER")" \ -p "$(cat "$RUN_DIR/rendered-sprint.md")" \ > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? + # agy writes to CONOUT$ (Windows console), not stdout -- read its transcript + # from disk via last_conversations.json using the run directory as the lookup key. + RUN_DIR_WIN="$(cygpath -w "$RUN_DIR" 2>/dev/null || echo "$RUN_DIR")" + node -e " + const fs = require('fs'), path = require('path'); + const home = process.env.USERPROFILE || process.env.HOME || ''; + const cache = JSON.parse(fs.readFileSync(path.join(home, '.gemini', 'antigravity-cli', 'cache', 'last_conversations.json'), 'utf8')); + const norm = p => path.resolve(p).toLowerCase().split(path.sep).join('/'); + const target = norm(process.argv[1]); + let convId = ''; + for (const k of Object.keys(cache)) { if (norm(k) === target) { convId = cache[k]; break; } } + if (!convId) { console.log('FLEET_TRANSCRIPT_MISSING:NO_CONV_FOR_DIR'); process.exit(0); } + const tp = path.join(home, '.gemini', 'antigravity-cli', 'brain', convId, '.system_generated', 'logs', 'transcript.jsonl'); + if (fs.existsSync(tp)) { process.stdout.write('FLEET_TRANSCRIPT_START\n'); process.stdout.write(fs.readFileSync(tp, 'utf8')); process.stdout.write('\nFLEET_TRANSCRIPT_END\n'); } + else { console.log('FLEET_TRANSCRIPT_MISSING:' + convId); } + " "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true fi # Extract PM session ID from sprint phase From 3ed23db05d045c5ffb0f6cafe437cbdcb07576a9 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 20:15:13 -0400 Subject: [PATCH 22/33] fix(e2e): require tool call after each CHECKPOINT to prevent agy print-mode early exit --- .github/e2e/setup-script.md | 6 ++++-- .github/e2e/sprint-script.md | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/e2e/setup-script.md b/.github/e2e/setup-script.md index adf9c80b..75f1c078 100644 --- a/.github/e2e/setup-script.md +++ b/.github/e2e/setup-script.md @@ -7,7 +7,7 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}} Do all work yourself in this conversation -- no sub-agents. If a step fails, move on to the next one. > [!IMPORTANT] -> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags) and are waiting for a background task or moving to the next checkpoint, you MUST execute a tool call (such as a status check command or reading progress files) in every intermediate turn. Do not return a text-only response until the entire script/phase/task is fully completed. +> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags): after printing each CHECKPOINT line, you MUST immediately execute a tool call (e.g. `fleet_status`) and then continue to the next task without stopping. Never return a text-only response until you have printed ALL checkpoints including the final `-done` one. ## Checkpoints @@ -66,7 +66,7 @@ grep -q 'HOME/bin' ~/.profile 2>/dev/null || echo 'export PATH=$HOME/bin:$PATH' CHECKPOINT: {"id":"T1","status":"PASS","notes":"..."} ---- +After printing that line, immediately call `fleet_status` and then start T2 without stopping. ## T2: Basic Execution @@ -78,6 +78,8 @@ Write any scratch files into the run directory (the current working directory), CHECKPOINT: {"id":"T2","status":"PASS","notes":"..."} +After printing that line, immediately call `fleet_status` and then print T2-done without stopping. + ### Done Print this only after T1 and T2 are done: diff --git a/.github/e2e/sprint-script.md b/.github/e2e/sprint-script.md index c16994d1..416fba8f 100644 --- a/.github/e2e/sprint-script.md +++ b/.github/e2e/sprint-script.md @@ -10,7 +10,7 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}} - **reviewer** (name: `reviewer`, provider: {{REVIEWER_PROVIDER}}) > [!IMPORTANT] -> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags) and are waiting for a background task or moving to the next checkpoint, you MUST execute a tool call (such as a status check command or reading progress files) in every intermediate turn. Do not return a text-only response until the entire script/phase/task is fully completed. +> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags): after printing each CHECKPOINT line, you MUST immediately execute a tool call (e.g. `fleet_status`) and then continue to the next task without stopping. Never return a text-only response until you have printed ALL checkpoints including `T3-done`. ## Checkpoints @@ -35,12 +35,16 @@ On the doer: clone {{TOY_PROJECT_URL}} into its work folder if needed, then `git CHECKPOINT: {"id":"T3-repo-setup","status":"PASS","notes":"..."} +After printing that line, immediately call `fleet_status` and then start T3.2 without stopping. + ### T3.2 Pick the work Run `bd ready` on the doer. Pick 3 P1 issues. Write `requirements.md` for them into the current working directory. CHECKPOINT: {"id":"T3-discover","status":"PASS","notes":"..."} +After printing that line, immediately call `fleet_status` and then start T3.3 without stopping. + ### T3.3 Run the sprint Activate the pm skill, then run: @@ -58,12 +62,16 @@ The pm skill runs the doer/reviewer loop. After `/pm start doer`, keep driving t CHECKPOINT: {"id":"T3-sprint","status":"PASS","notes":"..."} +After printing that line, immediately call `fleet_status` and then start T3.4 without stopping. + ### T3.4 Check the result Confirm a branch with prefix `{{BRANCH_PREFIX}}` exists on origin and a PR was raised. CHECKPOINT: {"id":"T3-pr-verified","status":"PASS","notes":"..."} +After printing that line, immediately call `fleet_status` and then print T3-done without stopping. + ### Done Print this only after the four steps above are done: From 3ba0e07069a6d32bc75d37ed19921a4f2a26cc32 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 20:48:24 -0400 Subject: [PATCH 23/33] fix(e2e): resume agy with --continue loop until terminal checkpoint found --- .github/workflows/fleet-e2e.yml | 59 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index a23033b9..84b908e7 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -305,22 +305,21 @@ jobs: --add-dir "$(dirname "$REVIEWER_FOLDER")" \ -p "$(cat "$RUN_DIR/rendered-setup.md")" \ > "$RUN_DIR/raw-setup.txt" 2>&1 || LLM_EXIT=$? - # agy writes to CONOUT$ (Windows console), not stdout -- read its transcript - # from disk via last_conversations.json using the run directory as the lookup key. + # agy writes to CONOUT$ -- read transcript from disk after exit RUN_DIR_WIN="$(cygpath -w "$RUN_DIR" 2>/dev/null || echo "$RUN_DIR")" - node -e " - const fs = require('fs'), path = require('path'); - const home = process.env.USERPROFILE || process.env.HOME || ''; - const cache = JSON.parse(fs.readFileSync(path.join(home, '.gemini', 'antigravity-cli', 'cache', 'last_conversations.json'), 'utf8')); - const norm = p => path.resolve(p).toLowerCase().split(path.sep).join('/'); - const target = norm(process.argv[1]); - let convId = ''; - for (const k of Object.keys(cache)) { if (norm(k) === target) { convId = cache[k]; break; } } - if (!convId) { console.log('FLEET_TRANSCRIPT_MISSING:NO_CONV_FOR_DIR'); process.exit(0); } - const tp = path.join(home, '.gemini', 'antigravity-cli', 'brain', convId, '.system_generated', 'logs', 'transcript.jsonl'); - if (fs.existsSync(tp)) { process.stdout.write('FLEET_TRANSCRIPT_START\n'); process.stdout.write(fs.readFileSync(tp, 'utf8')); process.stdout.write('\nFLEET_TRANSCRIPT_END\n'); } - else { console.log('FLEET_TRANSCRIPT_MISSING:' + convId); } - " "$RUN_DIR_WIN" >> "$RUN_DIR/raw-setup.txt" 2>&1 || true + AGY_TRANSCRIPT_SCRIPT="const fs=require('fs'),path=require('path');const home=process.env.USERPROFILE||process.env.HOME||'';const cache=JSON.parse(fs.readFileSync(path.join(home,'.gemini','antigravity-cli','cache','last_conversations.json'),'utf8'));const norm=p=>path.resolve(p).toLowerCase().split(path.sep).join('/');const target=norm(process.argv[1]);let id='';for(const k of Object.keys(cache)){if(norm(k)===target){id=cache[k];break;}}if(!id){process.stdout.write('FLEET_TRANSCRIPT_MISSING:NO_CONV\n');process.exit(0);}const tp=path.join(home,'.gemini','antigravity-cli','brain',id,'.system_generated','logs','transcript.jsonl');if(fs.existsSync(tp)){process.stdout.write('FLEET_TRANSCRIPT_START\n');process.stdout.write(fs.readFileSync(tp,'utf8'));process.stdout.write('\nFLEET_TRANSCRIPT_END\n');}else{process.stdout.write('FLEET_TRANSCRIPT_MISSING:'+id+'\n');}" + node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-setup.txt" 2>&1 || true + # Resume loop: agy -p stops after each text response; loop until T2-done appears + for i in 1 2 3 4; do + if grep -q 'T2-done' "$RUN_DIR/raw-setup.txt" 2>/dev/null; then break; fi + echo "[e2e] setup resume attempt $i -- T2-done not yet found" + agy --dangerously-skip-permissions \ + --print-timeout 45m \ + --continue \ + -p "Continue from where you left off. Complete all remaining tasks (T2, T2-done) without stopping. After each CHECKPOINT, call fleet_status then immediately start the next task." \ + > /dev/null 2>&1 || true + node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-setup.txt" 2>&1 || true + done fi if [ "$LLM_EXIT" -ne 0 ]; then @@ -379,24 +378,24 @@ jobs: --add-dir "$(dirname "$REVIEWER_FOLDER")" \ -p "$(cat "$RUN_DIR/rendered-sprint.md")" \ > "$RUN_DIR/raw-sprint.txt" 2>&1 || LLM_EXIT=$? - # agy writes to CONOUT$ (Windows console), not stdout -- read its transcript - # from disk via last_conversations.json using the run directory as the lookup key. + # agy writes to CONOUT$ -- read transcript from disk after exit RUN_DIR_WIN="$(cygpath -w "$RUN_DIR" 2>/dev/null || echo "$RUN_DIR")" - node -e " - const fs = require('fs'), path = require('path'); - const home = process.env.USERPROFILE || process.env.HOME || ''; - const cache = JSON.parse(fs.readFileSync(path.join(home, '.gemini', 'antigravity-cli', 'cache', 'last_conversations.json'), 'utf8')); - const norm = p => path.resolve(p).toLowerCase().split(path.sep).join('/'); - const target = norm(process.argv[1]); - let convId = ''; - for (const k of Object.keys(cache)) { if (norm(k) === target) { convId = cache[k]; break; } } - if (!convId) { console.log('FLEET_TRANSCRIPT_MISSING:NO_CONV_FOR_DIR'); process.exit(0); } - const tp = path.join(home, '.gemini', 'antigravity-cli', 'brain', convId, '.system_generated', 'logs', 'transcript.jsonl'); - if (fs.existsSync(tp)) { process.stdout.write('FLEET_TRANSCRIPT_START\n'); process.stdout.write(fs.readFileSync(tp, 'utf8')); process.stdout.write('\nFLEET_TRANSCRIPT_END\n'); } - else { console.log('FLEET_TRANSCRIPT_MISSING:' + convId); } - " "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true + AGY_TRANSCRIPT_SCRIPT="const fs=require('fs'),path=require('path');const home=process.env.USERPROFILE||process.env.HOME||'';const cache=JSON.parse(fs.readFileSync(path.join(home,'.gemini','antigravity-cli','cache','last_conversations.json'),'utf8'));const norm=p=>path.resolve(p).toLowerCase().split(path.sep).join('/');const target=norm(process.argv[1]);let id='';for(const k of Object.keys(cache)){if(norm(k)===target){id=cache[k];break;}}if(!id){process.stdout.write('FLEET_TRANSCRIPT_MISSING:NO_CONV\n');process.exit(0);}const tp=path.join(home,'.gemini','antigravity-cli','brain',id,'.system_generated','logs','transcript.jsonl');if(fs.existsSync(tp)){process.stdout.write('FLEET_TRANSCRIPT_START\n');process.stdout.write(fs.readFileSync(tp,'utf8'));process.stdout.write('\nFLEET_TRANSCRIPT_END\n');}else{process.stdout.write('FLEET_TRANSCRIPT_MISSING:'+id+'\n');}" + node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true + # Resume loop: agy -p stops after each text response; loop until T3-done appears + for i in 1 2 3 4 5 6; do + if grep -q 'T3-done' "$RUN_DIR/raw-sprint.txt" 2>/dev/null; then break; fi + echo "[e2e] sprint resume attempt $i -- T3-done not yet found" + agy --dangerously-skip-permissions \ + --print-timeout 45m \ + --continue \ + -p "Continue from where you left off. Complete all remaining tasks without stopping. After each CHECKPOINT, call fleet_status then immediately start the next task. Do not stop until T3-done is printed." \ + > /dev/null 2>&1 || true + node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true + done fi + # Extract PM session ID from sprint phase SESSION_ID=$(grep -m1 '"type":"system"' "$RUN_DIR/raw-sprint.txt" \ | jq -r '.session_id // ""' 2>/dev/null || true) From 656ffcf6fa2b310a715a29e0b086d2fe7c73fbfe Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 21:26:33 -0400 Subject: [PATCH 24/33] fix(agy): use folder-based transcript fallback and only pass --conversation when resuming --- .github/e2e/sprint-script.md | 9 ++++++- src/providers/agy.ts | 52 +++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/.github/e2e/sprint-script.md b/.github/e2e/sprint-script.md index 416fba8f..2a32d824 100644 --- a/.github/e2e/sprint-script.md +++ b/.github/e2e/sprint-script.md @@ -58,7 +58,14 @@ Activate the pm skill, then run: Branch prefix: `{{BRANCH_PREFIX}}`. -The pm skill runs the doer/reviewer loop. After `/pm start doer`, keep driving that loop yourself: when the doer reaches review, dispatch the reviewer; when the reviewer asks for changes, dispatch the doer again. Repeat until the reviewer approves, then run `/pm cleanup fleet-e2e-toy`. Do not stop until the sprint is approved. +The pm skill runs the doer/reviewer loop. Drive it yourself: +1. Dispatch the doer with `execute_prompt`. Wait for its response. +2. Read the doer's response (it will be in the `execute_prompt` result). If it says VERIFY or requests review, dispatch the reviewer. +3. Read the reviewer's response. If the reviewer requests changes, dispatch the doer again. Repeat until the reviewer explicitly approves. +4. A reviewer approval means the reviewer's response contains words like "approved", "LGTM", or "no changes needed". If `execute_prompt` returns empty or an error, re-dispatch. +5. Once approved, run `/pm cleanup fleet-e2e-toy`. + +Do NOT print T3-sprint PASS until you have confirmed a reviewer approval response (not just dispatched -- you must read the response). CHECKPOINT: {"id":"T3-sprint","status":"PASS","notes":"..."} diff --git a/src/providers/agy.ts b/src/providers/agy.ts index a64c50fd..0c1dbca7 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -5,12 +5,13 @@ import { classifyPromptError } from '../utils/prompt-errors.js'; import { escapeDoubleQuoted } from '../os/os-commands.js'; import { stripAnsi } from '../utils/ansi.js'; -// NODE_TRANSCRIPT_SCRIPT_BY_UUID: accepts a conversation UUID as argv[1] and reads -// the transcript directly from brain//.system_generated/logs/transcript.jsonl. -// This is robust against agy switching its working directory (e.g. to scratch) because -// we look up the transcript by the UUID we minted and passed via --conversation, not by -// folder path via last_conversations.json. -const NODE_TRANSCRIPT_SCRIPT = `const fs = require(\`fs\`); const path = require(\`path\`); try { const home = process.env.USERPROFILE || process.env.HOME || \`\`; const convId = process.argv[1]; if (!convId) { console.log(\`FLEET_TRANSCRIPT_MISSING:NO_CONV_ID\`); process.exit(0); } const transPath = path.join(home, \`.gemini\`, \`antigravity-cli\`, \`brain\`, convId, \`.system_generated\`, \`logs\`, \`transcript.jsonl\`); if (fs.existsSync(transPath)) { console.log(\`FLEET_TRANSCRIPT_START\`); console.log(fs.readFileSync(transPath, \`utf8\`)); console.log(\`FLEET_TRANSCRIPT_END\`); } else { console.log(\`FLEET_TRANSCRIPT_MISSING:\` + convId); } } catch (e) { console.log(\`FLEET_TRANSCRIPT_ERROR:\` + e.message); }`; +// NODE_TRANSCRIPT_SCRIPT: tries two strategies to locate the agy transcript. +// 1. Direct UUID lookup: brain//...transcript.jsonl (when agy honors --conversation) +// 2. Folder-based lookup: last_conversations.json[workFolder] (when agy ignores --conversation +// and registers under its work folder, which happens for local members in a git repo) +// argv[1] = conversation UUID that fleet minted and passed via --conversation +// argv[2] = work folder path (Windows absolute path) for the fallback lookup +const NODE_TRANSCRIPT_SCRIPT = `const fs=require(\`fs\`),path=require(\`path\`);try{const home=process.env.USERPROFILE||process.env.HOME||\`\`;const convId=process.argv[1];const workDir=process.argv[2]||'';function readTranscript(id){const tp=path.join(home,\`.gemini\`,\`antigravity-cli\`,\`brain\`,id,\`.system_generated\`,\`logs\`,\`transcript.jsonl\`);if(fs.existsSync(tp)){console.log(\`FLEET_TRANSCRIPT_START\`);console.log(fs.readFileSync(tp,\`utf8\`));console.log(\`FLEET_TRANSCRIPT_END\`);return true;}return false;}if(convId&&readTranscript(convId)){process.exit(0);}const cachePath=path.join(home,\`.gemini\`,\`antigravity-cli\`,\`cache\`,\`last_conversations.json\`);if(workDir&&fs.existsSync(cachePath)){const cache=JSON.parse(fs.readFileSync(cachePath,\`utf8\`));const norm=p=>path.resolve(p).toLowerCase().split(path.sep).join(\`/\`);const target=norm(workDir);for(const k of Object.keys(cache)){if(norm(k)===target){if(readTranscript(cache[k])){process.exit(0);}break;}}console.log(\`FLEET_TRANSCRIPT_MISSING:NOT_IN_CACHE:\`+target);}else{console.log(\`FLEET_TRANSCRIPT_MISSING:\`+(convId||\`NO_ID\`));}}catch(e){console.log(\`FLEET_TRANSCRIPT_ERROR:\`+e.message);}`; export class AgyProvider implements ProviderAdapter { readonly name: LlmProvider = 'agy'; @@ -36,7 +37,7 @@ export class AgyProvider implements ProviderAdapter { } buildPromptCommand(opts: PromptOptions): string { - const { folder, promptFile, sessionId, unattended, inv } = opts; + const { folder, promptFile, sessionId, resuming, unattended, inv } = opts; const escapedFolder = escapeDoubleQuoted(folder); let instruction = `Your task is described in ${promptFile} in the current directory. Read that file first, then execute the task.`; if (inv) { @@ -45,8 +46,9 @@ export class AgyProvider implements ProviderAdapter { let cmd = `cd "${escapedFolder}" && agy -p "${instruction}"`; - // Always pass --conversation so fleet knows where the transcript will be written. - if (sessionId) { + // Only pass --conversation when resuming an existing session. For fresh sessions, + // agy ignores the UUID we pass and creates its own -- use folder lookup instead. + if (sessionId && resuming) { cmd += ` --conversation "${escapeDoubleQuoted(sessionId)}"`; } @@ -54,12 +56,13 @@ export class AgyProvider implements ProviderAdapter { cmd += ' --dangerously-skip-permissions'; } - // After agy exits, read its transcript from disk by conversation UUID (primary output - // channel -- agy writes its response to CONOUT$, not stdout, so file I/O is required). - // We pass the UUID we minted via --conversation so the lookup is robust even if agy - // switches its working directory (e.g. to scratch) on launch. + // After agy exits, read its transcript from disk (primary output channel -- + // agy writes its response to CONOUT$, not stdout, so file I/O is required). + // Pass both the UUID (argv[1]) and the work folder (argv[2]) so the script can + // try UUID lookup first, then fall back to folder-based lookup in last_conversations.json. const convArg = sessionId ? `"${escapeDoubleQuoted(sessionId)}"` : '""'; - cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg}`; + const folderArg = `"${escapeDoubleQuoted(folder)}"`; + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg} ${folderArg}`; return cmd; } @@ -134,10 +137,10 @@ export class AgyProvider implements ProviderAdapter { } resumeFlag(sessionId?: string, resuming?: boolean): string { - if (!sessionId) return ''; - // Always pass --conversation so fleet knows where to read the transcript. - // When resuming=true this continues an existing session; otherwise starts fresh - // with a pre-minted UUID that fleet uses to locate the transcript after exit. + if (!sessionId || !resuming) return ''; + // Only pass --conversation when resuming an existing session (agy uses it to + // reload conversation history). For fresh sessions, agy ignores any UUID we + // pass and creates its own -- transcript is found via folder lookup instead. return `--conversation "${escapeDoubleQuoted(sessionId)}"`; } @@ -201,13 +204,14 @@ export class AgyProvider implements ProviderAdapter { wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, sessionId?: string): string { let cmd = `${setupCmd}Write-Output "FLEET_PID:$pid"; ${filePath} ${argList}`; - // After agy exits, read its conversation transcript by UUID (primary output channel -- - // agy writes LLM responses to CONOUT$, not stdout; the transcript file is the - // reliable way to capture the response text). We look up the transcript directly - // by the conversation UUID we passed via --conversation, bypassing last_conversations.json - // which would fail if agy switches its working directory (e.g. to scratch) on launch. + // After agy exits, read its conversation transcript (primary output channel -- + // agy writes LLM responses to CONOUT$, not stdout). Try UUID lookup first, + // then fall back to folder-based lookup via last_conversations.json. + // Extract work folder from argList: it appears after --add-dir or in setupCmd's cd. + // Since wrapWindowsPrompt doesn't receive folder directly, pass empty string for argv[2] + // so the script falls back gracefully (UUID lookup still works when agy honors --conversation). const convArg = sessionId ? `"${sessionId}"` : '""'; - cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg}`; + cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg} ""`; return cmd; } From d1a5dab477c5365f6f873e5185940899e23d83de Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 21:41:58 -0400 Subject: [PATCH 25/33] fix(e2e): skip pm plan if project already active to prevent duplicate planning on resume --- .github/e2e/sprint-script.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/e2e/sprint-script.md b/.github/e2e/sprint-script.md index 2a32d824..3347f29b 100644 --- a/.github/e2e/sprint-script.md +++ b/.github/e2e/sprint-script.md @@ -47,7 +47,11 @@ After printing that line, immediately call `fleet_status` and then start T3.3 wi ### T3.3 Run the sprint -Activate the pm skill, then run: +Activate the pm skill. + +Before running any `/pm` commands, read `projects.md` in the current directory: +- If `fleet-e2e-toy` already appears in the table with Phase 1 or later and status "active", **skip `/pm init`, `/pm pair`, and `/pm plan`** -- the plan is already done. Go directly to `/pm start doer`. +- If `fleet-e2e-toy` is not present, run the full sequence: ``` /pm init fleet-e2e-toy From b7eed8e2ca8289ec80708eb4d0ad4d2077011710 Mon Sep 17 00:00:00 2001 From: Azure Pipeline Date: Fri, 22 May 2026 21:53:10 -0400 Subject: [PATCH 26/33] fix(e2e): file-based checkpoints for agy -- PM writes to checkpoints.json via tool call, never exits on text --- .github/e2e/extract-results.mjs | 20 +++++++++++- .github/e2e/sprint-script.md | 58 +++++++++++++++++++-------------- .github/workflows/fleet-e2e.yml | 14 ++++---- 3 files changed, 60 insertions(+), 32 deletions(-) diff --git a/.github/e2e/extract-results.mjs b/.github/e2e/extract-results.mjs index 3a9d881d..8747a53c 100644 --- a/.github/e2e/extract-results.mjs +++ b/.github/e2e/extract-results.mjs @@ -155,7 +155,7 @@ function sumMemberLogs(role) { telemetry.push({ role: 'doer', ...sumMemberLogs('doer') }); telemetry.push({ role: 'reviewer', ...sumMemberLogs('reviewer') }); -// Extract checkpoints: one JSON object per "CHECKPOINT:" line +// Extract checkpoints: one JSON object per "CHECKPOINT:" line (text-based, legacy) let checkpoints = []; const regex = /CHECKPOINT:\s*(\{[\s\S]*?\})/g; let match; @@ -170,6 +170,24 @@ while ((match = regex.exec(allAssistantText)) !== null) { } catch {} } +// Also read file-based checkpoints written by the PM via Add-Content (agy-specific approach). +// These are more reliable -- the PM writes them as tool calls (no agy exit risk). +// File-based entries take precedence over text-based ones. +const checkpointFile = join(runDir, 'checkpoints.json'); +if (existsSync(checkpointFile)) { + for (const line of readFileSync(checkpointFile, 'utf8').split('\n')) { + if (!line.trim()) continue; + try { + const cp = JSON.parse(line.trim()); + if (cp && cp.id) { + const existing = checkpoints.findIndex(c => c.id === cp.id); + if (existing >= 0) checkpoints[existing] = cp; + else checkpoints.push(cp); + } + } catch {} + } +} + // A phase passes only if its terminal checkpoint was emitted. const TERMINALS = { setup: 'T2-done', sprint: 'T3-done' }; const requiredTerminals = []; diff --git a/.github/e2e/sprint-script.md b/.github/e2e/sprint-script.md index 3347f29b..e9d0c023 100644 --- a/.github/e2e/sprint-script.md +++ b/.github/e2e/sprint-script.md @@ -10,18 +10,21 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}} - **reviewer** (name: `reviewer`, provider: {{REVIEWER_PROVIDER}}) > [!IMPORTANT] -> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags): after printing each CHECKPOINT line, you MUST immediately execute a tool call (e.g. `fleet_status`) and then continue to the next task without stopping. Never return a text-only response until you have printed ALL checkpoints including `T3-done`. +> Do NOT print CHECKPOINT lines as plain text -- that causes agy to exit. Instead, record each checkpoint by running a command (see below). After recording, immediately continue to the next task without any text-only response. ## Checkpoints -When you finish a step, print one line, exactly like this, as plain text (no code block, no backticks): +Record each checkpoint by running this PowerShell command (replace id, status, notes): - CHECKPOINT: {"id":"T3-repo-setup","status":"PASS","notes":"one short note"} +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-repo-setup","status":"PASS","notes":"one short note"}' +``` -- One line per step. One JSON object, not an array. Print it once. -- If a step fails, print it with `"status":"FAIL"` and move on to the next step. +- One JSON object per line appended to `checkpoints.json` in the current working directory. +- If a step fails, write `"status":"FAIL"` and continue to the next step. - The steps are: `T3-repo-setup`, `T3-discover`, `T3-sprint`, `T3-pr-verified`, `T3-done`. -- Print `T3-done` last, only after the other four. If `T3-done` is missing, the phase failed. +- After writing each checkpoint, immediately continue to the next task -- no pausing, no text summary. +- Write `T3-done` last. If it is missing from `checkpoints.json` after the session, the phase failed. --- @@ -33,17 +36,21 @@ Run a full sprint on the toy repo using the pm skill. Do all of it yourself in t On the doer: clone {{TOY_PROJECT_URL}} into its work folder if needed, then `git fetch origin && git checkout main && git pull`. Provision {{VCS}} auth. -CHECKPOINT: {"id":"T3-repo-setup","status":"PASS","notes":"..."} - -After printing that line, immediately call `fleet_status` and then start T3.2 without stopping. +Record checkpoint: +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-repo-setup","status":"PASS","notes":"...your note..."}' +``` +Then immediately continue to T3.2. ### T3.2 Pick the work Run `bd ready` on the doer. Pick 3 P1 issues. Write `requirements.md` for them into the current working directory. -CHECKPOINT: {"id":"T3-discover","status":"PASS","notes":"..."} - -After printing that line, immediately call `fleet_status` and then start T3.3 without stopping. +Record checkpoint: +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-discover","status":"PASS","notes":"...your note..."}' +``` +Then immediately continue to T3.3. ### T3.3 Run the sprint @@ -69,25 +76,26 @@ The pm skill runs the doer/reviewer loop. Drive it yourself: 4. A reviewer approval means the reviewer's response contains words like "approved", "LGTM", or "no changes needed". If `execute_prompt` returns empty or an error, re-dispatch. 5. Once approved, run `/pm cleanup fleet-e2e-toy`. -Do NOT print T3-sprint PASS until you have confirmed a reviewer approval response (not just dispatched -- you must read the response). +Do NOT record T3-sprint PASS until you have confirmed a reviewer approval response in the execute_prompt result (not just dispatched -- you must read the response). -CHECKPOINT: {"id":"T3-sprint","status":"PASS","notes":"..."} - -After printing that line, immediately call `fleet_status` and then start T3.4 without stopping. +Record checkpoint: +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-sprint","status":"PASS","notes":"...your note..."}' +``` +Then immediately continue to T3.4. ### T3.4 Check the result Confirm a branch with prefix `{{BRANCH_PREFIX}}` exists on origin and a PR was raised. -CHECKPOINT: {"id":"T3-pr-verified","status":"PASS","notes":"..."} - -After printing that line, immediately call `fleet_status` and then print T3-done without stopping. - -### Done - -Print this only after the four steps above are done: - -CHECKPOINT: {"id":"T3-done","status":"PASS","notes":"sprint phase finished"} +Record checkpoint: +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-pr-verified","status":"PASS","notes":"...your note..."}' +``` +Then record T3-done: +```powershell +Add-Content -Path checkpoints.json -Value '{"id":"T3-done","status":"PASS","notes":"sprint phase finished"}' +``` --- diff --git a/.github/workflows/fleet-e2e.yml b/.github/workflows/fleet-e2e.yml index 84b908e7..fd7a8f7b 100644 --- a/.github/workflows/fleet-e2e.yml +++ b/.github/workflows/fleet-e2e.yml @@ -382,14 +382,16 @@ jobs: RUN_DIR_WIN="$(cygpath -w "$RUN_DIR" 2>/dev/null || echo "$RUN_DIR")" AGY_TRANSCRIPT_SCRIPT="const fs=require('fs'),path=require('path');const home=process.env.USERPROFILE||process.env.HOME||'';const cache=JSON.parse(fs.readFileSync(path.join(home,'.gemini','antigravity-cli','cache','last_conversations.json'),'utf8'));const norm=p=>path.resolve(p).toLowerCase().split(path.sep).join('/');const target=norm(process.argv[1]);let id='';for(const k of Object.keys(cache)){if(norm(k)===target){id=cache[k];break;}}if(!id){process.stdout.write('FLEET_TRANSCRIPT_MISSING:NO_CONV\n');process.exit(0);}const tp=path.join(home,'.gemini','antigravity-cli','brain',id,'.system_generated','logs','transcript.jsonl');if(fs.existsSync(tp)){process.stdout.write('FLEET_TRANSCRIPT_START\n');process.stdout.write(fs.readFileSync(tp,'utf8'));process.stdout.write('\nFLEET_TRANSCRIPT_END\n');}else{process.stdout.write('FLEET_TRANSCRIPT_MISSING:'+id+'\n');}" node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true - # Resume loop: agy -p stops after each text response; loop until T3-done appears - for i in 1 2 3 4 5 6; do - if grep -q 'T3-done' "$RUN_DIR/raw-sprint.txt" 2>/dev/null; then break; fi - echo "[e2e] sprint resume attempt $i -- T3-done not yet found" + # Safety resume loop: if the PM exited before writing T3-done to checkpoints.json, + # resume it with -c (last conversation). Check checkpoints.json (file-based, written + # by PM via tool call) rather than scanning text output. + for i in 1 2 3; do + if grep -q 'T3-done' "$RUN_DIR/checkpoints.json" 2>/dev/null; then break; fi + echo "[e2e] sprint resume attempt $i -- T3-done not in checkpoints.json" agy --dangerously-skip-permissions \ --print-timeout 45m \ - --continue \ - -p "Continue from where you left off. Complete all remaining tasks without stopping. After each CHECKPOINT, call fleet_status then immediately start the next task. Do not stop until T3-done is printed." \ + -c \ + -p "Continue the sprint. Read projects.md and checkpoints.json to see current state. Do NOT re-run /pm init or /pm plan if the project is already active. Continue with /pm start doer or drive the reviewer loop as needed." \ > /dev/null 2>&1 || true node -e "$AGY_TRANSCRIPT_SCRIPT" "$RUN_DIR_WIN" >> "$RUN_DIR/raw-sprint.txt" 2>&1 || true done From 0253abbb48637709b85ca96000ce38036b0f1430 Mon Sep 17 00:00:00 2001 From: Akhil Kumar Date: Sat, 23 May 2026 00:27:54 -0400 Subject: [PATCH 27/33] fix(agy): address review feedback on feat/agy-support Bundles 10 fixes from the agy-support code review (severity-ordered): - B1 (blocker): escape sessionId in Windows prompt wrapper (agy.ts:213) - B2 (blocker): use ANTIGRAVITY_API_KEY instead of shared GEMINI_API_KEY (auth.ts:11) - H1 (high): list AGY.md as distinct context file in SKILL.md (skills/fleet/SKILL.md:250) - H2 (high): preserve user's custom defaultModel on reinstall (install.ts:284-287) - M2 (medium): warn when transcript parsing falls back to raw output (agy.ts:89-118) - M3 (medium): deduplicate requiredPerms entries (install.ts:220-230) - M4 (medium): write agy hooks.json with 0o600 permissions (install.ts:209) - L2 (low): mention agy in llms.txt subtitle - L3 (low): add agy setup note covering OAuth + ANTIGRAVITY_API_KEY (docs/install.md) - L4 (low): label agy-safety-rationalization.md as internal Test results: 1298 passed, 6 skipped, 0 failures. Typecheck clean. Deferred to follow-up beads issues: H3, H4, M1, M5, M6, N1. --- docs/agy-safety-rationalization.md | 4 +++- docs/install.md | 11 +++++++++++ docs/provider-matrix.md | 4 ++-- llms.txt | 2 +- skills/fleet/SKILL.md | 2 +- src/cli/auth.ts | 2 +- src/cli/install.ts | 10 +++++----- src/providers/agy.ts | 9 +++++---- tests/providers.test.ts | 2 +- 9 files changed, 30 insertions(+), 16 deletions(-) diff --git a/docs/agy-safety-rationalization.md b/docs/agy-safety-rationalization.md index 340633b6..b4b6fdfc 100644 --- a/docs/agy-safety-rationalization.md +++ b/docs/agy-safety-rationalization.md @@ -1,5 +1,7 @@ # Architecture & Safety Rationalization: Google Antigravity (agy) Integration +> **Internal document** -- architectural rationale for contributors and reviewers, not user-facing guidance. For user setup instructions see [docs/install.md](install.md). + This document rationalizes the design decisions, safety mechanisms, and compatibility considerations implemented for the Google Antigravity CLI (provider key: "agy") support in apra-fleet. --- @@ -37,7 +39,7 @@ Unlike Claude or Gemini, the Antigravity CLI reads its global configurations (MC ## 3. Authentication & Credential Isolation (provision-auth.ts) ### Change Rationalization -Antigravity utilizes the "GEMINI_API_KEY" environment variable to authenticate requests. We unified authentication provisioning to support local and remote members securely. +Antigravity utilizes the "ANTIGRAVITY_API_KEY" environment variable to authenticate requests. We unified authentication provisioning to support local and remote members securely. ### Safety Mechanisms - Local Exemption: Local members automatically skip LLM auth provisioning as they run on the host machine and inherit active host credentials directly. diff --git a/docs/install.md b/docs/install.md index 2fb24525..2e5716e4 100644 --- a/docs/install.md +++ b/docs/install.md @@ -127,6 +127,17 @@ fan-out. Gemini works well as a doer or reviewer, and as an orchestrator for serial workflows; for heavily parallel orchestration, Claude dispatches in parallel. This is a property of the Gemini CLI, not a Fleet limitation. +### Agy note + +`apra-fleet install --llm agy` configures Fleet for the Google Antigravity CLI. +Agy uses Google OAuth by default -- a browser-based login flow is required per +machine, so `provision_llm_auth` does **not** work for remote agy members today. + +For headless or remote members, set `ANTIGRAVITY_API_KEY` (obtain from +[Google AI Studio](https://aistudio.google.com)) in the environment before +invoking fleet commands. The agy CLI checks env vars before falling back to +OAuth. + ## Uninstall The built-in uninstall command surgically removes MCP registration, diff --git a/docs/provider-matrix.md b/docs/provider-matrix.md index 8ac1e853..670ba16f 100644 --- a/docs/provider-matrix.md +++ b/docs/provider-matrix.md @@ -21,7 +21,7 @@ Reference tables for all LLM providers supported by Apra Fleet. Extracted from ` | **Model selection** | `--model opus/sonnet/haiku` | **Not available** (custom models configured in apra-fleet registry) | `--model` / `-m` | `--model ` or `/model` interactive | `--model ` or `GEMINI_MODEL` env var | | **Max turns** | `--max-turns N` | **Not available** | **Not available** | **Not available** (auto-compaction) | **Not available** | | **Skip permissions** | `--dangerously-skip-permissions` | `--dangerously-skip-permissions` | `--ask-for-approval never` + `--sandbox danger-full-access` | `--allow-all-tools` / `--yolo` | `--yolo` / `-y` | -| **Auth env var** | `ANTHROPIC_API_KEY` | `GEMINI_API_KEY` | `OPENAI_API_KEY` (or `CODEX_API_KEY` in exec mode) | `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN` | `GEMINI_API_KEY` | +| **Auth env var** | `ANTHROPIC_API_KEY` | `GEMINI_API_KEY` | `OPENAI_API_KEY` (or `CODEX_API_KEY` in exec mode) | `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN` | `ANTIGRAVITY_API_KEY` | | **OAuth / login** | `~/.claude/.credentials.json` (copyable) | Browser OAuth / settings.json | `codex login` (ChatGPT account or API key) | `gh auth login` or `/login` (device flow) | Google OAuth (browser-based) | | **Version check** | `claude --version` | `agy --version 2>&1` | `codex --version` | `copilot --version` | `gemini --version` | | **Install cmd (Linux)** | `curl -fsSL https://claude.ai/install.sh \| bash` | `npm install -g @google/antigravity-cli` | `npm i -g @openai/codex` | `curl -fsSL https://gh.io/copilot-install \| bash` | `npm i -g @google/gemini-cli` | @@ -91,7 +91,7 @@ Known limitations when using non-Claude providers in a fleet. |----------|---------|--------| | Claude | `ANTHROPIC_API_KEY` | console.anthropic.com | | Gemini | `GEMINI_API_KEY` | aistudio.google.com | -| Antigravity (agy) | `GEMINI_API_KEY` | aistudio.google.com | +| Antigravity (agy) | `ANTIGRAVITY_API_KEY` | aistudio.google.com | | Codex | `OPENAI_API_KEY` | platform.openai.com | | Copilot | `COPILOT_GITHUB_TOKEN` | github.com/settings/tokens (fine-grained PAT with "Copilot Requests" permission) | diff --git a/llms.txt b/llms.txt index e35d757a..65db84c3 100644 --- a/llms.txt +++ b/llms.txt @@ -1,6 +1,6 @@ # Apra Fleet -> AI-managed fleet orchestration for Claude Code -- run, update, and coordinate multiple Claude Code agents from a single hub. +> AI-managed fleet orchestration for Claude Code, Gemini, and Antigravity (agy) -- run, update, and coordinate multiple LLM agents from a single hub. Apra Fleet is a multi-agent orchestration layer that lets a PM agent delegate work to a fleet of Claude Code instances via MCP tools, Git, and SSH. Each fleet member runs its own Claude Code session; the PM agent controls lifecycle, skills, and task assignment. Members can run different LLM backends (Claude, Gemini, Codex, Copilot, Antigravity) and be mixed freely within a single fleet. diff --git a/skills/fleet/SKILL.md b/skills/fleet/SKILL.md index fec5ad4e..b168f3ea 100644 --- a/skills/fleet/SKILL.md +++ b/skills/fleet/SKILL.md @@ -247,7 +247,7 @@ When you see this notice, surface it to the user verbatim before the rest of the | Concern | How to handle | |---------|---------------| -| **Agent context file** | Use `member_detail` -> `llmProvider` to determine filename: CLAUDE.md (Claude), GEMINI.md (Antigravity/Gemini), AGENTS.md (Codex), COPILOT.md (Copilot) | +| **Agent context file** | Use `member_detail` -> `llmProvider` to determine filename: CLAUDE.md (Claude), AGY.md (Antigravity), GEMINI.md (Gemini), AGENTS.md (Codex), COPILOT.md (Copilot) | | **Attribution config** | Claude-only (Step 2 in onboarding.md) - skip for all other providers | | **Timeouts** | Antigravity/Gemini members are slower -> use 2-3x timeout multiplier for `execute_prompt` dispatches to those members. Minimum `timeout_s: 900` for any non-trivial task. | diff --git a/src/cli/auth.ts b/src/cli/auth.ts index c22823d7..633860c1 100644 --- a/src/cli/auth.ts +++ b/src/cli/auth.ts @@ -9,7 +9,7 @@ const PROVIDER_AUTH_ENV: Record = { gemini: 'GEMINI_API_KEY', codex: 'OPENAI_API_KEY', copilot: 'COPILOT_GITHUB_TOKEN', - agy: 'GEMINI_API_KEY', + agy: 'ANTIGRAVITY_API_KEY', }; export async function runAuth(args: string[]): Promise { diff --git a/src/cli/install.ts b/src/cli/install.ts index aa3395cb..96736cd3 100644 --- a/src/cli/install.ts +++ b/src/cli/install.ts @@ -206,7 +206,7 @@ function mergeHooksConfig(paths: ProviderInstallConfig, hooksConfig: any, provid } if (isAgy) { - fs.writeFileSync(settingsFile, JSON.stringify(settings, null, 2) + '\n'); + fs.writeFileSync(settingsFile, JSON.stringify(settings, null, 2) + '\n', { mode: 0o600 }); } else { writeConfig(paths, settings); } @@ -222,8 +222,6 @@ function mergePermissions(paths: ProviderInstallConfig): void { 'activate_skill(*)', 'tracker_*', 'Agent(*)', - 'activate_skill(*)', - 'tracker_*', `Read(${paths.skillsDir.replace(/\\/g, '/')}/**)`, `Read(${paths.fleetSkillsDir.replace(/\\/g, '/')}/**)`, `Read(${path.join(paths.configDir, 'skills').replace(/\\/g, '/')}/**)`, @@ -283,8 +281,10 @@ function mergeAgyConfig(paths: ProviderInstallConfig, mcpConfig: any): void { function writeDefaultModel(paths: ProviderInstallConfig, standardModel: string): void { const settings = readConfig(paths); - settings.defaultModel = standardModel; - writeConfig(paths, settings); + if (!settings.defaultModel) { + settings.defaultModel = standardModel; + writeConfig(paths, settings); + } } function mergeCopilotConfig(paths: ProviderInstallConfig, mcpConfig: any): void { diff --git a/src/providers/agy.ts b/src/providers/agy.ts index 0c1dbca7..42a82a8f 100644 --- a/src/providers/agy.ts +++ b/src/providers/agy.ts @@ -16,7 +16,7 @@ const NODE_TRANSCRIPT_SCRIPT = `const fs=require(\`fs\`),path=require(\`path\`); export class AgyProvider implements ProviderAdapter { readonly name: LlmProvider = 'agy'; readonly processName = 'agy'; - readonly authEnvVar = 'GEMINI_API_KEY'; + readonly authEnvVar = 'ANTIGRAVITY_API_KEY'; readonly credentialPath = '~/.gemini/antigravity-cli/settings.json'; readonly instructionFileName = 'AGY.md'; @@ -115,6 +115,7 @@ export class AgyProvider implements ProviderAdapter { } // Fallback: ANSI-strip stdout (covers cases where transcript is missing or incomplete) + console.error('[agy] warning: transcript markers not found -- falling back to raw ANSI-stripped output'); const stripped = stripAnsi(raw) .replace(/^FLEET_PID:\d+\r?\n/m, '') .replace(/\r/g, '') @@ -194,11 +195,11 @@ export class AgyProvider implements ProviderAdapter { } oauthEnvVarsToUnset(): string[] { - return ['GEMINI_API_KEY']; + return ['ANTIGRAVITY_API_KEY']; } authEnvVarForToken(token: string): string { - return 'GEMINI_API_KEY'; + return 'ANTIGRAVITY_API_KEY'; } wrapWindowsPrompt(setupCmd: string, filePath: string, argList: string, sessionId?: string): string { @@ -210,7 +211,7 @@ export class AgyProvider implements ProviderAdapter { // Extract work folder from argList: it appears after --add-dir or in setupCmd's cd. // Since wrapWindowsPrompt doesn't receive folder directly, pass empty string for argv[2] // so the script falls back gracefully (UUID lookup still works when agy honors --conversation). - const convArg = sessionId ? `"${sessionId}"` : '""'; + const convArg = sessionId ? `"${escapeDoubleQuoted(sessionId)}"` : '""'; cmd += `; node -e '${NODE_TRANSCRIPT_SCRIPT}' ${convArg} ""`; return cmd; diff --git a/tests/providers.test.ts b/tests/providers.test.ts index 3ae0a83a..864dabc3 100644 --- a/tests/providers.test.ts +++ b/tests/providers.test.ts @@ -869,7 +869,7 @@ describe('AgyProvider', () => { it('has correct metadata', () => { expect(p.name).toBe('agy'); expect(p.processName).toBe('agy'); - expect(p.authEnvVar).toBe('GEMINI_API_KEY'); + expect(p.authEnvVar).toBe('ANTIGRAVITY_API_KEY'); expect(p.credentialPath).toBe('~/.gemini/antigravity-cli/settings.json'); expect(p.instructionFileName).toBe('AGY.md'); }); From 29fa4e4f6ee4f143f58b442efe5592620f9b9beb Mon Sep 17 00:00:00 2001 From: Akhil Kumar Date: Tue, 26 May 2026 21:21:52 -0400 Subject: [PATCH 28/33] fix(agy): use shell-safe empty string in transcript reader (55c) NODE_TRANSCRIPT_SCRIPT is embedded in a shell command as `node -e '