Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
8b7a53d
feat(agy): add support for antigravity cli and curated model validation
May 21, 2026
d989dcd
docs(agy): align provider docs to treat agy as primary and demote gemini
May 21, 2026
e963f86
feat(agy): rationalize safety and fix integration gaps
May 21, 2026
d15ef45
chore(pm): add last dispatched phase to status template
May 21, 2026
0966e69
feat(e2e): add agy e2e suites s8.1, s8.2, and s8.3
May 21, 2026
95e890c
feat(e2e): install agy cli in workflow if pm provider is agy
May 21, 2026
917cc0d
feat(e2e): locate pre-installed agy binary instead of npm install
May 21, 2026
1dfb05e
feat(e2e): robustly resolve agy CLI paths and add debug logs
May 21, 2026
e4e7925
feat(e2e): add AppData/Local file listing for agy debug
May 21, 2026
a647901
feat(e2e): debug gemini CLI location on runner
May 21, 2026
0d16fc6
feat(e2e): add local download fallback for agy CLI on Windows runner
May 21, 2026
333e868
feat: support agy LLM provider OAuth and simplify CLI location in wor…
May 21, 2026
b867282
Set agy print timeout to 45m in E2E workflow
May 22, 2026
8d22caf
feat(agy): support agy CLI as provider, decouple to AGY.md, add print…
May 22, 2026
1a174f2
fix(pm): enforce local linter checks in doer and reviewer templates
May 22, 2026
c42f6fe
fix(e2e): fix s8.1 agy suite output capture and checkpoint parsing
May 22, 2026
4479fbb
Revert "fix(e2e): fix s8.1 agy suite output capture and checkpoint pa…
May 22, 2026
1f382c2
fix(agy): capture prompt output and resolve wsl hook path
May 22, 2026
68abee8
fix(agy): add missing ansi utility file
May 22, 2026
0d30098
fix(agy): look up transcript by UUID, not folder path -- bypasses scr…
May 22, 2026
48945aa
fix(e2e): extract PM agy transcript after exit -- reads from brain/ b…
May 23, 2026
3ed23db
fix(e2e): require tool call after each CHECKPOINT to prevent agy prin…
May 23, 2026
3ba0e07
fix(e2e): resume agy with --continue loop until terminal checkpoint f…
May 23, 2026
656ffcf
fix(agy): use folder-based transcript fallback and only pass --conver…
May 23, 2026
d1a5dab
fix(e2e): skip pm plan if project already active to prevent duplicate…
May 23, 2026
b7eed8e
fix(e2e): file-based checkpoints for agy -- PM writes to checkpoints.…
May 23, 2026
0253abb
fix(agy): address review feedback on feat/agy-support
May 23, 2026
29fa4e4
fix(agy): use shell-safe empty string in transcript reader (55c)
May 27, 2026
13280d1
fix(agy): block global apra-fleet MCP + skills in local member worksp…
May 27, 2026
e8adfe4
feat(agy): honor model tier via per-workspace settings.json (1st)
May 27, 2026
174ae6a
refactor(agy): ship transcript-reader + settings-merge as .js files (…
May 27, 2026
e99fb1c
fix(agy): wire agy-settings-merge into Windows wrapPrompt path
May 27, 2026
e8a497b
Merge e99fb1c271062554ed2d7c134c574336bda6d332 into 2273ca5de1a3c161a…
kumaakh May 27, 2026
17c3471
chore: regenerate llms-full.txt
github-actions[bot] May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion .github/e2e/extract-results.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,46 @@ function processRawFile(filePath, provider) {

const content = readFileSync(filePath, 'utf8');

if (provider === 'agy') {
// The raw file contains the stdout of the agy invocation. After agy exits,
// fleet appends the transcript JSONL wrapped in FLEET_TRANSCRIPT_START/END markers.
// We extract text from PLANNER_RESPONSE entries in the JSONL so that CHECKPOINT lines
// embedded in the agent's responses can be detected.
const startMarker = 'FLEET_TRANSCRIPT_START';
const endMarker = 'FLEET_TRANSCRIPT_END';
const startIdx = content.indexOf(startMarker);
const endIdx = content.indexOf(endMarker);
if (startIdx !== -1 && endIdx !== -1) {
const section = content.substring(startIdx + startMarker.length, endIdx);
let extracted = '';
for (const line of section.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
const entry = JSON.parse(trimmed);
if (entry.type === 'PLANNER_RESPONSE' && entry.status === 'DONE' && typeof entry.content === 'string' && entry.content.trim()) {
extracted += '\n' + entry.content.trim();
}
} catch { /* skip malformed lines */ }
}
return {
assistantText: extracted || content,
tokensIn: 0,
tokensOut: 0,
cacheCreate: 0,
cacheRead: 0,
};
}
// No markers: treat raw content as plain text (fallback for empty or unexpected output)
return {
assistantText: content,
tokensIn: 0,
tokensOut: 0,
cacheCreate: 0,
cacheRead: 0,
};
}

for (const line of content.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
Expand Down Expand Up @@ -115,7 +155,7 @@ function sumMemberLogs(role) {
telemetry.push({ role: 'doer', ...sumMemberLogs('doer') });
telemetry.push({ role: 'reviewer', ...sumMemberLogs('reviewer') });

// Extract checkpoints: one JSON object per "CHECKPOINT:" line
// Extract checkpoints: one JSON object per "CHECKPOINT:" line (text-based, legacy)
let checkpoints = [];
const regex = /CHECKPOINT:\s*(\{[\s\S]*?\})/g;
let match;
Expand All @@ -130,6 +170,24 @@ while ((match = regex.exec(allAssistantText)) !== null) {
} catch {}
}

// Also read file-based checkpoints written by the PM via Add-Content (agy-specific approach).
// These are more reliable -- the PM writes them as tool calls (no agy exit risk).
// File-based entries take precedence over text-based ones.
const checkpointFile = join(runDir, 'checkpoints.json');
if (existsSync(checkpointFile)) {
for (const line of readFileSync(checkpointFile, 'utf8').split('\n')) {
if (!line.trim()) continue;
try {
const cp = JSON.parse(line.trim());
if (cp && cp.id) {
const existing = checkpoints.findIndex(c => c.id === cp.id);
if (existing >= 0) checkpoints[existing] = cp;
else checkpoints.push(cp);
}
} catch {}
}
}

// A phase passes only if its terminal checkpoint was emitted.
const TERMINALS = { setup: 'T2-done', sprint: 'T3-done' };
const requiredTerminals = [];
Expand Down
7 changes: 6 additions & 1 deletion .github/e2e/setup-script.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}}

Do all work yourself in this conversation -- no sub-agents. If a step fails, move on to the next one.

> [!IMPORTANT]
> If you are running in non-interactive print mode (e.g. through a CI runner or using the `-p` / `--print` flags): after printing each CHECKPOINT line, you MUST immediately execute a tool call (e.g. `fleet_status`) and then continue to the next task without stopping. Never return a text-only response until you have printed ALL checkpoints including the final `-done` one.

## Checkpoints

When you finish a step, print one line, exactly like this, as plain text (no code block, no backticks):
Expand Down Expand Up @@ -63,7 +66,7 @@ grep -q 'HOME/bin' ~/.profile 2>/dev/null || echo 'export PATH=$HOME/bin:$PATH'

CHECKPOINT: {"id":"T1","status":"PASS","notes":"..."}

---
After printing that line, immediately call `fleet_status` and then start T2 without stopping.

## T2: Basic Execution

Expand All @@ -75,6 +78,8 @@ Write any scratch files into the run directory (the current working directory),

CHECKPOINT: {"id":"T2","status":"PASS","notes":"..."}

After printing that line, immediately call `fleet_status` and then print T2-done without stopping.

### Done

Print this only after T1 and T2 are done:
Expand Down
64 changes: 47 additions & 17 deletions .github/e2e/sprint-script.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,22 @@ PM: {{PM_OS}} / {{PM_PROVIDER}} | VCS: {{VCS}} | Toy: {{TOY_PROJECT_URL}}
- **doer** (name: `doer`, provider: {{DOER_PROVIDER}})
- **reviewer** (name: `reviewer`, provider: {{REVIEWER_PROVIDER}})

> [!IMPORTANT]
> Do NOT print CHECKPOINT lines as plain text -- that causes agy to exit. Instead, record each checkpoint by running a command (see below). After recording, immediately continue to the next task without any text-only response.

## Checkpoints

When you finish a step, print one line, exactly like this, as plain text (no code block, no backticks):
Record each checkpoint by running this PowerShell command (replace id, status, notes):

CHECKPOINT: {"id":"T3-repo-setup","status":"PASS","notes":"one short note"}
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-repo-setup","status":"PASS","notes":"one short note"}'
```

- One line per step. One JSON object, not an array. Print it once.
- If a step fails, print it with `"status":"FAIL"` and move on to the next step.
- One JSON object per line appended to `checkpoints.json` in the current working directory.
- If a step fails, write `"status":"FAIL"` and continue to the next step.
- The steps are: `T3-repo-setup`, `T3-discover`, `T3-sprint`, `T3-pr-verified`, `T3-done`.
- Print `T3-done` last, only after the other four. If `T3-done` is missing, the phase failed.
- After writing each checkpoint, immediately continue to the next task -- no pausing, no text summary.
- Write `T3-done` last. If it is missing from `checkpoints.json` after the session, the phase failed.

---

Expand All @@ -30,17 +36,29 @@ Run a full sprint on the toy repo using the pm skill. Do all of it yourself in t

On the doer: clone {{TOY_PROJECT_URL}} into its work folder if needed, then `git fetch origin && git checkout main && git pull`. Provision {{VCS}} auth.

CHECKPOINT: {"id":"T3-repo-setup","status":"PASS","notes":"..."}
Record checkpoint:
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-repo-setup","status":"PASS","notes":"...your note..."}'
```
Then immediately continue to T3.2.

### T3.2 Pick the work

Run `bd ready` on the doer. Pick 3 P1 issues. Write `requirements.md` for them into the current working directory.

CHECKPOINT: {"id":"T3-discover","status":"PASS","notes":"..."}
Record checkpoint:
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-discover","status":"PASS","notes":"...your note..."}'
```
Then immediately continue to T3.3.

### T3.3 Run the sprint

Activate the pm skill, then run:
Activate the pm skill.

Before running any `/pm` commands, read `projects.md` in the current directory:
- If `fleet-e2e-toy` already appears in the table with Phase 1 or later and status "active", **skip `/pm init`, `/pm pair`, and `/pm plan`** -- the plan is already done. Go directly to `/pm start doer`.
- If `fleet-e2e-toy` is not present, run the full sequence:

```
/pm init fleet-e2e-toy
Expand All @@ -51,21 +69,33 @@ Activate the pm skill, then run:

Branch prefix: `{{BRANCH_PREFIX}}`.

The pm skill runs the doer/reviewer loop. After `/pm start doer`, keep driving that loop yourself: when the doer reaches review, dispatch the reviewer; when the reviewer asks for changes, dispatch the doer again. Repeat until the reviewer approves, then run `/pm cleanup fleet-e2e-toy`. Do not stop until the sprint is approved.
The pm skill runs the doer/reviewer loop. Drive it yourself:
1. Dispatch the doer with `execute_prompt`. Wait for its response.
2. Read the doer's response (it will be in the `execute_prompt` result). If it says VERIFY or requests review, dispatch the reviewer.
3. Read the reviewer's response. If the reviewer requests changes, dispatch the doer again. Repeat until the reviewer explicitly approves.
4. A reviewer approval means the reviewer's response contains words like "approved", "LGTM", or "no changes needed". If `execute_prompt` returns empty or an error, re-dispatch.
5. Once approved, run `/pm cleanup fleet-e2e-toy`.

Do NOT record T3-sprint PASS until you have confirmed a reviewer approval response in the execute_prompt result (not just dispatched -- you must read the response).

CHECKPOINT: {"id":"T3-sprint","status":"PASS","notes":"..."}
Record checkpoint:
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-sprint","status":"PASS","notes":"...your note..."}'
```
Then immediately continue to T3.4.

### T3.4 Check the result

Confirm a branch with prefix `{{BRANCH_PREFIX}}` exists on origin and a PR was raised.

CHECKPOINT: {"id":"T3-pr-verified","status":"PASS","notes":"..."}

### Done

Print this only after the four steps above are done:

CHECKPOINT: {"id":"T3-done","status":"PASS","notes":"sprint phase finished"}
Record checkpoint:
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-pr-verified","status":"PASS","notes":"...your note..."}'
```
Then record T3-done:
```powershell
Add-Content -Path checkpoints.json -Value '{"id":"T3-done","status":"PASS","notes":"sprint phase finished"}'
```

---

Expand Down
18 changes: 18 additions & 0 deletions .github/e2e/suites.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,24 @@
"doer": { "os": "local_doer_macos", "provider": "gemini", "type": "local" },
"reviewer": { "os": "local_reviewer_macos", "provider": "gemini", "type": "local" },
"vcs": "github"
},
"s8.1": {
"pm": { "os": "windows", "provider": "agy", "runner": "fleet-windows" },
"doer": { "os": "local_doer_windows", "provider": "agy", "type": "local" },
"reviewer": { "os": "local_reviewer_windows", "provider": "agy", "type": "local" },
"vcs": "github"
},
"s8.2": {
"pm": { "os": "linux", "provider": "agy", "runner": "fleet-linux" },
"doer": { "os": "local_doer_linux", "provider": "agy", "type": "local" },
"reviewer": { "os": "local_reviewer_linux", "provider": "agy", "type": "local" },
"vcs": "github"
},
"s8.3": {
"pm": { "os": "macos", "provider": "agy", "runner": "fleet-macos" },
"doer": { "os": "local_doer_macos", "provider": "agy", "type": "local" },
"reviewer": { "os": "local_reviewer_macos", "provider": "agy", "type": "local" },
"vcs": "github"
}
}
}
Loading
Loading