From 651628956a2ed2528840c48c7271e07ed2d4aef2 Mon Sep 17 00:00:00 2001 From: Matt Gunnin Date: Tue, 12 May 2026 13:41:41 -0500 Subject: [PATCH 01/19] fix(recipes/openai): add max_batch_tokens to embedding touchpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI is the only recipe in the codebase without a max_batch_tokens cap. Every other provider declares one (voyage=120K, azure-openai=8K, dashscope=8K, zhipu=8K, minimax=4K). Without it, gbrain's recursive-halving safety net never engages — batches dispatched purely on the char/4 estimator window will trip OpenAI's 1M-token TPM ceiling on token-dense pages (Discord exports, JSON dumps, code-heavy markdown), then retry storm and block the queue head. Setting cap to 100_000: - gbrain's batcher estimates tokens as chars/4 - Token-dense markdown+JSON tokenizes at ~chars/2.7 - 100K estimated = ~150K real worst-case, safely under OpenAI's 300K per-request hard cap and the 1M/min TPM ceiling - Leaves headroom for recursive-halving on outlier chunks (cherry picked from commit 40536aace5b70c4340b2a60f02fab74610f647a8) --- src/core/ai/recipes/openai.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/core/ai/recipes/openai.ts b/src/core/ai/recipes/openai.ts index 732638da6..d453a4db0 100644 --- a/src/core/ai/recipes/openai.ts +++ b/src/core/ai/recipes/openai.ts @@ -17,6 +17,12 @@ export const openai: Recipe = { dims_options: [256, 512, 768, 1024, 1536, 3072], cost_per_1m_tokens_usd: 0.13, price_last_verified: '2026-04-20', + // OpenAI per-request hard cap is 300K tokens. Free/Tier-1 TPM is 1M. + // Cap batches conservatively at 100K to handle token-dense content + // (Discord/Slack markdown+JSON tokenizes at ~chars/2.7, not the chars/4 + // estimate the batcher uses). 100K estimated = ~150K real tokens worst-case, + // safely under both the 300K per-request and 1M TPM ceilings. + max_batch_tokens: 100_000, }, expansion: { models: ['gpt-5.2', 'gpt-4o-mini'], From e17ea13745ba8757321c3ed16cf3e9b2c2ef5c6c Mon Sep 17 00:00:00 2001 From: Matt Gunnin Date: Thu, 14 May 2026 08:46:33 -0500 Subject: [PATCH 02/19] fix(ai/embed): recognize OpenAI 'maximum request size' error in isTokenLimitError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI's /v1/embeddings endpoint hard-caps a single request at 300k tokens total across all input items. When the cap is exceeded it returns: Invalid 'input': maximum request size is 300000 tokens per request. None of the three existing regexes in isTokenLimitError matched this phrasing, so the recursive-halving safety net in embedSubBatch never engaged for OpenAI. The same fat page (a token-dense markdown export, e.g. a Discord transcript) would re-fail every pass, blocking forward progress on the whole batch indefinitely. Locally reproduced on a 31,129-chunk Postgres brain: 2,125 chunks stuck at 'remaining' across 30+ embed --stale passes with retry loops + sleep delays. Adding the two new patterns lets halving fire; the same backlog cleared in one pass after the regex change (the companion max_batch_tokens recipe fix from PR #924 caps fresh batches, but existing oversize pages still need halving to recover). Adds: - /maximum request size.*tokens/i — OpenAI verbatim - /max.*tokens.*per.*request/i — defensive against minor rewording Tests: - Regression test for the exact OpenAI error string - Coverage for the generic 'max tokens per request' variant - All 25 tests in adaptive-embed-batch.test.ts pass No behavior change for providers whose errors already matched. (cherry picked from commit b834e84c56e939e687659f6228b4d60fc4c7dce3) --- src/core/ai/gateway.ts | 5 ++++- test/ai/adaptive-embed-batch.test.ts | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/core/ai/gateway.ts b/src/core/ai/gateway.ts index 05ac7a7b3..a4da36d7f 100644 --- a/src/core/ai/gateway.ts +++ b/src/core/ai/gateway.ts @@ -1259,7 +1259,10 @@ export function isTokenLimitError(err: unknown): boolean { return ( /max.*allowed.*tokens.*batch/i.test(msg) || /batch.*too.*many.*tokens/i.test(msg) || - /token.*limit.*exceeded/i.test(msg) + /token.*limit.*exceeded/i.test(msg) || + // OpenAI embeddings: "Invalid 'input': maximum request size is 300000 tokens per request." + /maximum request size.*tokens/i.test(msg) || + /max.*tokens.*per.*request/i.test(msg) ); } diff --git a/test/ai/adaptive-embed-batch.test.ts b/test/ai/adaptive-embed-batch.test.ts index 8ad53df0d..33050d4d4 100644 --- a/test/ai/adaptive-embed-batch.test.ts +++ b/test/ai/adaptive-embed-batch.test.ts @@ -155,6 +155,21 @@ describe('isTokenLimitError (pure helper)', () => { expect(isTokenLimitError(new Error('Batch contains too many tokens'))).toBe(true); }); + test('matches OpenAI embeddings "maximum request size" error (regression: PR ###)', () => { + // Real error string returned by OpenAI's /v1/embeddings endpoint when the + // sum of all input items exceeds 300k tokens. Without this match, gbrain's + // recursive-halving safety net never engages on OpenAI and the queue stalls + // forever on token-dense pages. + const openaiErr = new Error( + "Invalid 'input': maximum request size is 300000 tokens per request.", + ); + expect(isTokenLimitError(openaiErr)).toBe(true); + }); + + test('matches generic "max tokens per request" phrasing', () => { + expect(isTokenLimitError(new Error('Exceeded 300000 max tokens per request'))).toBe(true); + }); + test('does not match unrelated errors', () => { expect(isTokenLimitError(new Error('Connection refused'))).toBe(false); expect(isTokenLimitError(new Error('Invalid API key'))).toBe(false); From 2804b55e344e13697f7782ae5d659199c6e62d51 Mon Sep 17 00:00:00 2001 From: Brandon Lipman Date: Fri, 8 May 2026 20:19:32 -0400 Subject: [PATCH 03/19] fix(connection-manager): strip . suffix from username when deriving direct URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `deriveDirectUrl()` correctly rewrites the host (`aws-0-us-east-1.pooler.supabase.com` → `db.abcxyz.supabase.co`) but preserves the full pooler-form username (`postgres.abcxyz`). Supabase direct connections expect a bare `postgres` username — Supavisor uses the `.` suffix for tenant routing, but it's not a real database user. The auto-derived URL therefore fails to authenticate even with the correct password: password authentication failed for user "postgres.abcxyz" Strip the suffix to `postgres` whenever the project-ref was successfully extracted (same condition that triggers the host rewrite). The non-pooler username branch is unaffected — preserved as-is to keep the port-only fallback case working. Hit while exercising v0.30.1's dual-pool routing on a real Supabase brain; the kill switch (`GBRAIN_DISABLE_DIRECT_POOL=1`) papered over it locally but every Supabase user with a stock pooler URL would silently fall through to single-pool until the user-supplied a `GBRAIN_DIRECT_DATABASE_URL` override. With this fix, dual-pool works out of the box for the canonical Supabase shape. Test additions: - 1 case asserting bare `postgres:secret@` in the derived URL when project-ref is parseable from the pooler URL (the new behavior) - extends the existing "falls back to port-only" case with an assertion that non-pooler usernames are preserved (unchanged behavior) `bun run typecheck` clean. `deriveDirectUrl` test block passes 5/5. Co-Authored-By: Claude Opus 4.7 (1M context) (cherry picked from commit ddf2c6a9a0c04a009d718eaba6b4c20b81a43f8d) --- src/core/connection-manager.ts | 10 ++++++++-- test/connection-manager.serial.test.ts | 13 +++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/core/connection-manager.ts b/src/core/connection-manager.ts index 4a4a447a4..73cd913b8 100644 --- a/src/core/connection-manager.ts +++ b/src/core/connection-manager.ts @@ -142,16 +142,22 @@ export function deriveDirectUrl(url: string): string | null { const decodedUser = decodeURIComponent(user); const refMatch = decodedUser.match(/^postgres\.([a-z0-9]+)$/i); let directHost = hostname; + let directUser = parsed.username; if (refMatch && refMatch[1] && isPoolerHost) { directHost = `db.${refMatch[1]}.supabase.co`; + // Supabase direct connections use bare `postgres`; the `postgres.` + // form is pooler-only (Supavisor uses the suffix for tenant routing). + // Without this strip, direct auth fails with `password authentication + // failed for user "postgres."` even though the password is correct. + directUser = 'postgres'; } // Compose direct URL by swapping host + port. Preserve auth, db, query. parsed.hostname = directHost; parsed.port = '5432'; // Reconstruct with the original scheme. const scheme = url.match(/^postgres(?:ql)?:\/\//i)?.[0] ?? 'postgres://'; - const auth = parsed.username - ? `${parsed.username}${parsed.password ? `:${parsed.password}` : ''}@` + const auth = directUser + ? `${directUser}${parsed.password ? `:${parsed.password}` : ''}@` : ''; const search = parsed.search ?? ''; const path = parsed.pathname ?? ''; diff --git a/test/connection-manager.serial.test.ts b/test/connection-manager.serial.test.ts index 619961e2c..42c1fd737 100644 --- a/test/connection-manager.serial.test.ts +++ b/test/connection-manager.serial.test.ts @@ -44,6 +44,18 @@ describe('deriveDirectUrl', () => { expect(direct).toContain(':secret@'); // creds preserved }); + test('strips . suffix from username when going pooler→direct', () => { + // Supabase direct connections require bare `postgres`; the `postgres.` + // form is pooler-only (Supavisor uses the suffix for tenant routing). + // Without the strip, direct auth fails with "password authentication + // failed for user postgres." even with the correct password. + const direct = deriveDirectUrl( + 'postgresql://postgres.abcxyz:secret@aws-0-us-east-1.pooler.supabase.com:6543/postgres' + ); + expect(direct).toContain('postgres:secret@'); // bare username + expect(direct).not.toContain('postgres.abcxyz:secret@'); // no pooler suffix + }); + test('falls back to port-only swap when project-ref unparseable', () => { const direct = deriveDirectUrl( 'postgresql://customuser:secret@some.pooler.supabase.com:6543/db' @@ -51,6 +63,7 @@ describe('deriveDirectUrl', () => { expect(direct).toBeTruthy(); expect(direct).toContain(':5432'); expect(direct).toContain('some.pooler.supabase.com'); // host preserved + expect(direct).toContain('customuser:secret@'); // non-pooler username preserved }); test('returns null for non-pooler URL', () => { From 8ec75bd79652e6f5905cbfb303d2d5759216fa6a Mon Sep 17 00:00:00 2001 From: Brandon Lipman Date: Fri, 8 May 2026 20:21:50 -0400 Subject: [PATCH 04/19] fix(init): --help should not mutate config or scan filesystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `gbrain init --help` (and `-h`) currently fall through to the smart-detection branch in runInit(), which scans cwd for .md files and on a directory with 1000+ files prints "Found ~1500 .md files. For a brain this size, Supabase gives faster search..." then defaults to PGLite — calling saveConfig() and overwriting any existing Postgres config with `engine: 'pglite' + database_path: ~/.gbrain/brain.pglite`. Confirmed in the wild: ran `gbrain init --help` from $HOME on a machine where ~/.gbrain/config.json pointed at a Supabase Postgres brain with 10K+ pages. The config was silently flipped to PGLite. The Supabase data was intact, but gbrain stopped pointing at it until the config was manually restored. Root cause: cli.ts:62-69 only routes --help → printOpHelp() for shared-op commands; CLI_ONLY commands (init, embed, etc.) fall through to their handler with --help still in argv. None of them check for it. Fix: add a --help/-h guard at the top of runInit() that prints help text and returns. Help should never mutate state — Postel's robustness principle for CLI tools. Help text covers all flags (engine selection, AI provider options, thin-client mode) so users running `--help` get the canonical list rather than having to read the source. A wider architectural fix — adding --help routing for all CLI_ONLY commands in cli.ts — is plausible follow-up, but each CLI_ONLY command would still need its own help text. This per-command pattern matches how shared ops handle it via printOpHelp(). Init is the highest-stakes case because it's the only CLI_ONLY command that calls saveConfig(). Smoke test: from a directory with 1500 .md files, with GBRAIN_HOME pointed at a fresh tempdir: - Before fix: ~/.gbrain/config.json materialized with engine: 'pglite' - After fix: help text printed, no config dir created `bun run typecheck` clean. Co-Authored-By: Claude Opus 4.7 (1M context) (cherry picked from commit ed11fdd58c928075e4b8d385d3a274f6a9eb00d2) --- src/commands/init.ts | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/commands/init.ts b/src/commands/init.ts index 3c108fcb8..4de5430d5 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -11,6 +11,20 @@ import { createEngine } from '../core/engine-factory.ts'; import { discoverOAuth, mintClientCredentialsToken, smokeTestMcp } from '../core/remote-mcp-probe.ts'; export async function runInit(args: string[]) { + // Help guard: cli.ts only routes --help to printOpHelp() for shared-op + // commands; CLI_ONLY commands (init, embed, etc.) fall through to their + // handler with --help in argv. Without this guard, `gbrain init --help` + // proceeds into the smart-detection branch below, scans cwd for .md files, + // and on a directory with 1000+ files (e.g. $HOME for someone whose brain + // and notes share a root) silently overwrites the existing Supabase config + // with a fresh PGLite brain at ~/.gbrain/brain.pglite. Confirmed in the + // wild — flipped a working `engine: postgres` config to `engine: pglite` + // on a brain with 10K+ pages. Help should never mutate state. + if (args.includes('--help') || args.includes('-h')) { + printInitHelp(); + return; + } + const isSupabase = args.includes('--supabase'); const isPGLite = args.includes('--pglite'); const isMcpOnly = args.includes('--mcp-only'); @@ -1401,3 +1415,48 @@ export function reportModStatus(): void { console.log('Soul audit: run `gbrain soul-audit` to customize agent identity'); console.log(''); } + +function printInitHelp() { + console.log(` +gbrain init — initialize a brain (PGLite or Supabase Postgres) + +USAGE + gbrain init [flags] + +ENGINE SELECTION (mutually exclusive) + --pglite Use embedded PGLite (zero-config, default for <1000 .md files) + --supabase Use Supabase Postgres (recommended for 1000+ files) + --url Use a manual Postgres connection string + --mcp-only Thin-client mode: connect to a remote gbrain MCP, no local engine + +OPTIONS + --force Overwrite an existing config (gated by default) + --non-interactive Don't prompt; use defaults + --migrate-only Apply pending schema migrations against the configured engine + without re-saving config (used by post-upgrade and orchestrators) + --json JSON output for status reporting + --path Override default brain path (PGLite only) + --key Provide an API key non-interactively (Supabase only) + --embedding-model + e.g. openai:text-embedding-3-large, voyage:voyage-multimodal-3 + --model Shorthand: pick recipe default for a provider + --embedding-dimensions + Embedding dimensions (must match the model) + --expansion-model + Model for query expansion (default: anthropic:claude-haiku) + --chat-model + Default subagent driver (v0.27+) + +EXAMPLES + gbrain init --pglite # Local-only, no API keys + gbrain init --supabase # Interactive Supabase setup + gbrain init --url postgresql://... # Use a custom Postgres + gbrain init --mcp-only --url https://... # Thin-client mode + +NOTES + - Bare \`gbrain init\` in a directory with 1000+ .md files defaults to Supabase + interactive setup. With <1000 files (or with --pglite explicitly), defaults + to PGLite at ~/.gbrain/brain.pglite. + - Existing config is preserved unless --force is passed. +`.trim()); +} From 4a717250cb9b60dde40c08950650dd1f507aa55b Mon Sep 17 00:00:00 2001 From: Jeremy Knows Date: Tue, 12 May 2026 10:06:02 -0400 Subject: [PATCH 05/19] test(frontmatter-install-hook): isolate hooksPath assertion from developer global config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "installHook writes ... and sets core.hooksPath" test asserted `git config --get core.hooksPath` returns `.githooks`, which falls back to the global scope when local is unset. Developers who set `core.hooksPath` globally (common with dotfiles managers pointing at ~/.config/git/hooks) saw a deterministic FAIL because installHook intentionally respects an existing global value and skips writing the local one — exactly the documented contract. Fix: read via `git config --local --get core.hooksPath` (scope-locked) and branch the assertion on whether a global is already set. Both clean-CI (local should be '.githooks') and developer-with-global (local should be empty; installHook correctly didn't clobber) now pass deterministically. No API change. installHook behavior is unchanged. Verified locally with the affected test passing under `GIT_CONFIG_GLOBAL=~/.gitconfig` carrying `core.hooksPath=...`. (cherry picked from commit 0e4da2cb386d6b8406eb99dbaf80591e9761b568) --- test/frontmatter-install-hook.test.ts | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/test/frontmatter-install-hook.test.ts b/test/frontmatter-install-hook.test.ts index f714b7cdf..4f11177a0 100644 --- a/test/frontmatter-install-hook.test.ts +++ b/test/frontmatter-install-hook.test.ts @@ -31,9 +31,26 @@ describe('frontmatter install-hook (B13)', () => { const content = readFileSync(hookPath, 'utf8'); expect(content).toContain('gbrain frontmatter'); expect(content).toContain('git diff --cached'); - // Configured hooksPath - const hooksPath = execFileSync('git', ['-C', tmp, 'config', '--get', 'core.hooksPath'], { encoding: 'utf8' }).trim(); - expect(hooksPath).toBe('.githooks'); + // installHook's contract is "set core.hooksPath unless it's already set + // elsewhere". Test BOTH branches deterministically by reading the local + // scope only: clean CI → local should be `.githooks`; developer with a + // global core.hooksPath (e.g. dotfiles → ~/.config/git/hooks) → local + // should be empty because installHook correctly skipped clobbering. + // Reading via `--get` without `--local` falls back to global scope when + // local is unset, which made this test environmentally fragile. + let globalHooksPath = ''; + try { + globalHooksPath = execFileSync('git', ['config', '--global', '--get', 'core.hooksPath'], { encoding: 'utf8' }).trim(); + } catch { /* unset is the expected clean-env case */ } + let localHooksPath = ''; + try { + localHooksPath = execFileSync('git', ['-C', tmp, 'config', '--local', '--get', 'core.hooksPath'], { encoding: 'utf8' }).trim(); + } catch { /* unset is fine when global was present */ } + if (globalHooksPath) { + expect(localHooksPath).toBe(''); + } else { + expect(localHooksPath).toBe('.githooks'); + } }); test('installHook refuses to clobber existing hook without --force', () => { From d2ad6aa0f9ca3a3e5ffc6cdb39f59ccb2f2c33d4 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 23 May 2026 23:06:22 +0000 Subject: [PATCH 06/19] fix: guard against missing 'intent' field in routing-eval fixtures Two defensive fixes: 1. normalizeText(): return empty string on null/undefined input instead of crashing with 'undefined is not an object (evaluating s.toLowerCase)' 2. loadRoutingFixtures(): validate that parsed fixture has 'intent' as a string before adding to fixtures array. Fixtures with wrong field names (e.g. 'input' instead of 'intent') are now reported as malformed with a helpful error message listing the actual keys found. Root cause: a skill's routing-eval.jsonl used {"input": ...} instead of {"intent": ...}. The JSON parsed fine but the cast to RoutingFixture was unchecked, so fixture.intent was undefined. normalizeText(undefined) then crashed. This made 'gbrain doctor' completely unusable. (cherry picked from commit b142bbdb0d089224ac723e637d3ea98a448570ca) --- src/core/routing-eval.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/routing-eval.ts b/src/core/routing-eval.ts index 240cc4134..9b567a14e 100644 --- a/src/core/routing-eval.ts +++ b/src/core/routing-eval.ts @@ -96,6 +96,7 @@ export interface RoutingCaseResult { * variants that agents emit in practice. */ export function normalizeText(s: string): string { + if (!s) return ''; return s.toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim(); } @@ -298,6 +299,10 @@ export function loadRoutingFixtures(skillsDir: string): LoadResult { if (raw.startsWith('//') || raw.startsWith('#')) continue; try { const obj = JSON.parse(raw) as RoutingFixture; + if (typeof obj.intent !== 'string') { + malformed.push({ file: fixturePath, line: i + 1, raw, error: `missing required field 'intent' (found keys: ${Object.keys(obj).join(', ')})` }); + continue; + } fixtures.push({ ...obj, source: fixturePath }); } catch (err) { malformed.push({ From 98ddf9e82f6351e4ee89abfe894afd74f7134219 Mon Sep 17 00:00:00 2001 From: orendi84 Date: Sun, 24 May 2026 01:20:23 -0700 Subject: [PATCH 07/19] fix(test): isolate HOME in run-e2e.sh to stop config corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces #517 (re-ported fresh against current scripts/run-e2e.sh after v0.23.1 rewrote the script — original cherry-pick would not apply). E2E tests call setupDB which writes $HOME/.gbrain/config.json pointing at the docker test container. When the container tears down, the user's real autopilot daemon wedges trying to connect to a vanished postgres. Three operators hit this within 16 days before the original PR filed. Fix: wrapper exports HOME + GBRAIN_HOME to a mktemp tmpdir BEFORE bun starts so config writes land in the tmpdir, with a post-run breach detector that compares md5 of the user's real config against pre-run. Both env vars required: loadConfig/saveConfig resolve via HOME while configPath honors GBRAIN_HOME. HOME set before bun starts because os.homedir() caches at first call. Test seam: test/gbrain-home-isolation.test.ts updated to assert against homedir() === configDir() when GBRAIN_HOME unset (correct under the safety wrapper itself) instead of the prior "not /tmp/" sentinel. Revert path: git revert if test:e2e regresses on master. Co-Authored-By: orendi84 --- scripts/run-e2e.sh | 74 ++++++++++++++++++++++++++++++ test/gbrain-home-isolation.test.ts | 11 +++-- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/scripts/run-e2e.sh b/scripts/run-e2e.sh index d31ffb7b5..f93c7977c 100755 --- a/scripts/run-e2e.sh +++ b/scripts/run-e2e.sh @@ -20,11 +20,52 @@ # which is too tight for setupDB's TRUNCATE CASCADE on ~30 tables on # CI runners under load (one CI flake observed on PR #475 hitting # exactly 5000.09ms in the Tags beforeAll). +# +# HOME isolation: E2E tests call paths that resolve to gbrain init / saveConfig +# (e.g. setupDB writing config for the test container) and would otherwise +# write the user's real ~/.gbrain/config.json. The wrapper redirects HOME and +# GBRAIN_HOME to a tmpdir before bun starts so config writes land in the +# tmpdir, then verifies the user's real config md5 didn't change after the run. +# Both env vars are required: loadConfig/saveConfig resolve via HOME, while +# configPath/getDbUrlSource honor GBRAIN_HOME; setting only one leaves the +# other path escaping isolation. HOME is set before bun starts because Bun's +# os.homedir() caches at first call and in-process mutation would not take. +# Trap cleans up the tmpdir even on test failure. set -euo pipefail cd "$(dirname "$0")/.." +# --- HOME isolation: snapshot real user config before switching --- +# Tolerate unset HOME (minimal containers, exotic CI shells) without tripping set -u. +REAL_HOME="${HOME:-/tmp}" +USER_CONFIG="$REAL_HOME/.gbrain/config.json" +USER_CONFIG_EXISTED=0 +USER_CONFIG_MD5="" +# `{ ... } || true` swallows non-zero exit when the file is missing or md5 isn't +# installed, so set -e never aborts before the post-run breach detector can run. +md5_of() { + { if command -v md5 >/dev/null 2>&1; then + md5 -q "$1" 2>/dev/null + elif command -v md5sum >/dev/null 2>&1; then + md5sum "$1" 2>/dev/null | awk '{print $1}' + fi + } || true +} +if [ -f "$USER_CONFIG" ]; then + USER_CONFIG_EXISTED=1 + USER_CONFIG_MD5=$(md5_of "$USER_CONFIG") +fi + +# Portable mktemp: explicit XXXXXX is required by GNU mktemp on Linux CI. +# `-t prefix` works on BSD but errors on GNU when the template lacks Xs. +E2E_TMP_HOME=$(mktemp -d "${TMPDIR:-/tmp}/gbrain-e2e.XXXXXX") +trap 'rm -rf "$E2E_TMP_HOME"' EXIT + +export HOME="$E2E_TMP_HOME" +export GBRAIN_HOME="$E2E_TMP_HOME" +mkdir -p "$E2E_TMP_HOME/.gbrain" + # --dry-run-list: print the resolved file list (one per line) and exit. Used # by scripts/ci-local.sh to smoke-test the argv branching at startup. DRY_RUN_LIST=0 @@ -119,6 +160,39 @@ echo "E2E SUMMARY (sequential execution)" echo "========================================" echo "Files: $((pass_files + fail_files)) total, $pass_files passed, $fail_files failed" echo "Tests: $total_pass passed, $total_fail failed" + +# --- HOME isolation verification: fail loud on any out-of-isolation write --- +# Runs regardless of test pass/fail; isolation breach is higher-severity than +# any individual test failure. Exit 2 distinguishes from exit 1 (test failure). +# Three breach modes covered: +# 1. Config existed before AND was modified (md5 changed) +# 2. Config existed before AND was deleted during the run +# 3. Config did NOT exist before but was created during the run +AFTER_EXISTS=0 +[ -f "$USER_CONFIG" ] && AFTER_EXISTS=1 +AFTER_MD5="" +if [ "$AFTER_EXISTS" = "1" ]; then + AFTER_MD5=$(md5_of "$USER_CONFIG") +fi +BREACH_REASON="" +if [ "$USER_CONFIG_EXISTED" = "1" ] && [ "$AFTER_EXISTS" = "0" ]; then + BREACH_REASON="config existed before run but was deleted" +elif [ "$USER_CONFIG_EXISTED" = "0" ] && [ "$AFTER_EXISTS" = "1" ]; then + BREACH_REASON="config did not exist before run but was created" +elif [ -n "$USER_CONFIG_MD5" ] && [ "$AFTER_MD5" != "$USER_CONFIG_MD5" ]; then + BREACH_REASON="config md5 changed during run" +fi +if [ -n "$BREACH_REASON" ]; then + echo "" >&2 + echo "ERROR: HOME isolation breach detected." >&2 + echo " Reason: $BREACH_REASON" >&2 + echo " Path: $USER_CONFIG" >&2 + echo " Before: existed=$USER_CONFIG_EXISTED md5=${USER_CONFIG_MD5:-none}" >&2 + echo " After: existed=$AFTER_EXISTS md5=${AFTER_MD5:-none}" >&2 + echo " A test wrote outside the tmpdir HOME despite the override." >&2 + exit 2 +fi + if [ ${#fail_list[@]} -gt 0 ]; then echo "" echo "Failing files:" diff --git a/test/gbrain-home-isolation.test.ts b/test/gbrain-home-isolation.test.ts index 50adbea8b..25e1f9506 100644 --- a/test/gbrain-home-isolation.test.ts +++ b/test/gbrain-home-isolation.test.ts @@ -16,7 +16,7 @@ import { describe, test, expect } from 'bun:test'; import { mkdtempSync, existsSync, readdirSync, statSync, rmSync } from 'fs'; -import { tmpdir } from 'os'; +import { homedir, tmpdir } from 'os'; import { join } from 'path'; // Save original env so we don't leak between tests. @@ -44,10 +44,11 @@ describe('GBRAIN_HOME write-side isolation', () => { delete process.env.GBRAIN_HOME; try { const { configDir } = await import('../src/core/config.ts'); - const result = configDir(); - // Should NOT contain the test tmpdir; should resolve to a real homedir path. - expect(result.endsWith('.gbrain')).toBe(true); - expect(result.startsWith('/tmp/')).toBe(false); + // Contract: when GBRAIN_HOME is unset, configDir() === os.homedir()/.gbrain. + // Asserting against os.homedir() (rather than a "not /tmp/" sentinel) keeps + // this test correct under safety wrappers that redirect HOME=/tmp/... — the + // behavior we care about is that the fallback path equals homedir(). + expect(configDir()).toBe(join(homedir(), '.gbrain')); } finally { if (ORIG_GBRAIN_HOME !== undefined) process.env.GBRAIN_HOME = ORIG_GBRAIN_HOME; } From 23cd9d44ad21acb4773de7605e344b279e806383 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 09:57:04 -0700 Subject: [PATCH 08/19] test(dream-cycle): add schema-suggest to EXPECTED_PHASES v0.40.7.0 Schema Cathedral v3 added the 'schema-suggest' phase between 'orphans' and 'purge' in ALL_PHASES, but the E2E phase-order test was not updated to match. ALL_PHASES vs EXPECTED_PHASES diverged and the shape-pin test failed every run on master. Surfaced during fix-wave: warm-narwhal E2E gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/e2e/dream-cycle-phase-order-pglite.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/dream-cycle-phase-order-pglite.test.ts b/test/e2e/dream-cycle-phase-order-pglite.test.ts index 58d14de43..a251d9cd3 100644 --- a/test/e2e/dream-cycle-phase-order-pglite.test.ts +++ b/test/e2e/dream-cycle-phase-order-pglite.test.ts @@ -122,6 +122,7 @@ const EXPECTED_PHASES: CyclePhase[] = [ 'calibration_profile', // v0.36.1.0 'embed', 'orphans', + 'schema-suggest', // v0.40.7.0 — Schema Cathedral v3 'purge', // v0.26.5 ]; From a34971af2a08c3c9a982ff262c378acdaef0e170 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:02:43 -0700 Subject: [PATCH 09/19] test(autopilot-fanout): use relative timestamp inside freshness window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'end-to-end: updateSourceConfig persists timestamp visible to next listAllSources' test pinned last_full_cycle_at to a hardcoded '2026-05-22T15:00:00.000Z'. The 60-minute freshness window passed within ~1 hour of write — every run after the deadline classified the source as stale and dispatched it, breaking the test's .skippedFresh expectation. Switch to Date.now() - 30min relative timestamp (mirrors the prior 'source with last_full_cycle_at < 60min ago is skipped by gate' test). Surfaced during fix-wave: warm-narwhal E2E gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/e2e/autopilot-fanout-postgres.test.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/e2e/autopilot-fanout-postgres.test.ts b/test/e2e/autopilot-fanout-postgres.test.ts index 38a16ceff..09c83bbe7 100644 --- a/test/e2e/autopilot-fanout-postgres.test.ts +++ b/test/e2e/autopilot-fanout-postgres.test.ts @@ -144,7 +144,10 @@ describeIfDB('autopilot fan-out — Postgres E2E', () => { await seedSource('full-round-trip'); await engine.executeRaw(`UPDATE sources SET local_path = NULL WHERE id = 'default'`); - const ts = '2026-05-22T15:00:00.000Z'; + // Relative timestamp inside the 60-min freshness window. A prior version + // pinned this to '2026-05-22T15:00:00.000Z' which started failing once + // wall-clock drifted past 60 minutes from that point. + const ts = new Date(Date.now() - 30 * 60 * 1000).toISOString(); const updated = await engine.updateSourceConfig('full-round-trip', { last_full_cycle_at: ts, }); From d4fb84b6815d9621349c48402276880f7535e626 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:02:47 -0700 Subject: [PATCH 10/19] test(fresh-install-pglite): unset other provider keys in beforeEach init.ts:455 fails loud when multiple embedding providers are env-ready in non-TTY mode. The test sets ZEROENTROPY_API_KEY then runs init, but developer machines commonly have OPENAI_API_KEY + VOYAGE_API_KEY + ZEROENTROPY_API_KEY all set, so init sees 3 providers and exits 1. Save+unset OPENAI_API_KEY + VOYAGE_API_KEY in beforeEach, restore in afterEach. Now only ZE is env-ready, init picks it, schema sized to zembed-1's 1280d as the test expects. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/e2e/fresh-install-pglite.test.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/e2e/fresh-install-pglite.test.ts b/test/e2e/fresh-install-pglite.test.ts index d2836cc6b..88b4fe438 100644 --- a/test/e2e/fresh-install-pglite.test.ts +++ b/test/e2e/fresh-install-pglite.test.ts @@ -27,14 +27,25 @@ describe('E2E: fresh gbrain init --pglite → import → embed works end-to-end' let tmpHome: string; let origHome: string | undefined; let origZeKey: string | undefined; + // init.ts:455 fails loud when MULTIPLE embedding providers are env-ready + // (non-TTY path). Developer machines commonly have OPENAI_API_KEY + + // VOYAGE_API_KEY + ZEROENTROPY_API_KEY all set; this test wants exactly + // ZE to be the env-ready provider. Save+unset the others in beforeEach, + // restore in afterEach. + let origOpenaiKey: string | undefined; + let origVoyageKey: string | undefined; beforeEach(() => { tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-e2e-fresh-')); origHome = process.env.GBRAIN_HOME; origZeKey = process.env.ZEROENTROPY_API_KEY; + origOpenaiKey = process.env.OPENAI_API_KEY; + origVoyageKey = process.env.VOYAGE_API_KEY; process.env.GBRAIN_HOME = tmpHome; // Stub key so init's setup-hint check passes. process.env.ZEROENTROPY_API_KEY = 'sk-test-ze'; + delete process.env.OPENAI_API_KEY; + delete process.env.VOYAGE_API_KEY; }); afterEach(() => { @@ -43,6 +54,10 @@ describe('E2E: fresh gbrain init --pglite → import → embed works end-to-end' else process.env.GBRAIN_HOME = origHome; if (origZeKey === undefined) delete process.env.ZEROENTROPY_API_KEY; else process.env.ZEROENTROPY_API_KEY = origZeKey; + if (origOpenaiKey === undefined) delete process.env.OPENAI_API_KEY; + else process.env.OPENAI_API_KEY = origOpenaiKey; + if (origVoyageKey === undefined) delete process.env.VOYAGE_API_KEY; + else process.env.VOYAGE_API_KEY = origVoyageKey; __setEmbedTransportForTests(null); // Restore legacy-preload gateway state. configureGateway({ From 97769b49506bdc80b95bbe25ec489fe9633171bc Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:02:51 -0700 Subject: [PATCH 11/19] test(voyage-multimodal): switch fixture from AVIF to PNG Voyage's /multimodalembeddings endpoint rejects AVIF as of 2026-05 with 'Please provide a valid base64-encoded image'. The prior comment ('AVIF is fine for an embed call') held at v0.27.x and regressed silently on the provider side. Add test/fixtures/images/tiny.png (16x16 RGB PNG, 1307 bytes generated via sips from the macOS default wallpaper). PNG is universally accepted by Voyage and other multimodal providers. Surfaced during fix-wave: warm-narwhal E2E gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/e2e/voyage-multimodal.test.ts | 9 +++++---- test/fixtures/images/tiny.png | Bin 0 -> 1307 bytes 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 test/fixtures/images/tiny.png diff --git a/test/e2e/voyage-multimodal.test.ts b/test/e2e/voyage-multimodal.test.ts index 1267ba81f..ae0210958 100644 --- a/test/e2e/voyage-multimodal.test.ts +++ b/test/e2e/voyage-multimodal.test.ts @@ -26,11 +26,12 @@ describe.if(HAS_KEY)('voyage-multimodal-3 (real API, gated VOYAGE_API_KEY)', () }); test('embeds the tiny PNG fixture into a 1024-dim vector', async () => { - // Reuse the Phase 1 fixture (the AVIF is fine for an embed call; Voyage - // accepts data URLs of common image types). - const buf = readFileSync('test/fixtures/images/tiny.avif'); + // Use PNG fixture. Voyage's multimodal endpoint rejects AVIF as of + // 2026-05; the prior comment ("AVIF is fine") was true at v0.27.x and + // regressed silently on the provider side. PNG is universally accepted. + const buf = readFileSync('test/fixtures/images/tiny.png'); const data = buf.toString('base64'); - const out = await embedMultimodal([{ kind: 'image_base64', data, mime: 'image/avif' }]); + const out = await embedMultimodal([{ kind: 'image_base64', data, mime: 'image/png' }]); expect(out.length).toBe(1); expect(out[0]).toBeInstanceOf(Float32Array); expect(out[0].length).toBe(1024); diff --git a/test/fixtures/images/tiny.png b/test/fixtures/images/tiny.png new file mode 100644 index 0000000000000000000000000000000000000000..00cb2d31108fa3a399db3c2e3077bbb0ccd4b279 GIT binary patch literal 1307 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|jKx9jP7LeL$-D$|N>U>{(|mmy zw18|523AHk24;{F2B3OIAt;*>$aZ320gD4A7@ZO9_DmR?!3!wQz`(qK5us}V6I@nc z0W*RPQkdS5ypMr_i8(VQq9nrC$0|8LS1&OoKPgqOBDVmffWfB13dqb&ElE_U$j!+s zwyLmI0;{kBvO&W7N(x{lCE2!05xxNm&iO^D3TAo+dIm~%TnY*bHbp6ERzWUqQ0+jT ztx`rwNr9EVetCJhUb(Seeo?xG?WUP)qwZeFo6#1NP{E~&-I zMVSR9nfZANAafIw@=Hr>m6Sjh!2!gbC7EdmoAQdG-U511A0(r1sAr%LHyfzc1|(_~ zl9`hlo|#gTVP|S+WCK=-B!;jBqT0VGGZpAgkZbG=p{kL^(A5WIlMX*)y*s zwWy#77=B>gFuQQ8KsO8IQlHekbPT6L1)=I~^g+Rd6l{<<0gD17#g5BHA0EMWTz#SE zOM%J3&C|s(L_<_H=ysm4p}^MTkN2i*KB@LF=qpo@H{(ivg?|&`otoGetg2XUbHLeA zVQFZTMy997ESH;KZyerH;l4RJ``paOlCQ<}`?mz0(eZ2gP+uAO#M39{%e_DoC70y9 z&#Vhfj59=!C`p(!KRO`9c9!i%bHx@jrYGH@E%Q_Zg_@?Rt}N1Hkyt6xbcwC^&y+WL zp&R*}TWu0`>WQXX z!xgcuJW5?2Cm0S!WH)~gR4X>N;@ivY=Y40TPsrh0t{p{A{%<;JA2+Fsx$G&5`1kFJ z-lk8Q=l0Dv*{-`;Y?ac(1@E_>sj&-v;@9qc`{pWJ^|r*DCF1kVH?zvk<8@74c-=0H zZ^gsJ8Gm-#++TU>|U$pi(E0gUj4iOTJN#9e@`_qy_^yDmt|f4tZzIkUdY_h zuc|ciS{ZwH=GA}8{f$pJHCo87e_SwQJJ*|j{|l{^ag}G*1;(l+UX@-Ex4OOVvZL1L z327|r4Ytl$xNQIP$K7o=Z0>8m3k%=k@lB5B(vtg4HAcU`zC3+1Wm>8Aj*YX+g>t0N z)nu+tkCWWQdQRPZ^3P+B^Om#bRs4H+b@#`sSJc9+dSbm>zn$E*F5LI+Wruyvt5?mR zvFFaYPcN@u{C&OM{B-EF<)0){dRDBMmOdfKIAiv-gQaJ~;`mzbI`$P;{eI>lsbQUK zvLQz2`fE0?y|enoj?1g3FBdk^X=Yzyl6z>;&yTq`EW*F;cAY3}ss8xWL-{MJ*;(`1 RfrS|mc)I$ztaD0e0s#7u+id^< literal 0 HcmV?d00001 From f935fbafa31c1f3344baa44455f1e4a8815e1275 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:02:58 -0700 Subject: [PATCH 12/19] fix(cycle/synthesize): prefix bare anthropic model ids before queue.add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit queue.add's subagent capability validator (classifyCapabilities → resolveRecipe) requires provider:model format and rejects bare ids with 'unknown provider'. resolveModel returns the bare id from TIER_DEFAULTS / DEFAULT_ALIASES (e.g. 'claude-sonnet-4-6'), which the validator then rejects, dropping the synthesize phase to status:fail with SYNTH_PHASE_FAIL. Narrow fix at the call site: if config.model has no colon AND starts with 'claude-', prefix 'anthropic:'. Other providers must already declare a colon. Avoids changing TIER_DEFAULTS / DEFAULT_ALIASES constant shapes, which would ripple across every resolveModel caller. Surfaced by dream-synthesize-chunking E2E during fix-wave: warm-narwhal. Affected tests: 'single-chunk transcript uses legacy idempotency key' and 'multi-chunk transcript spawns N children with chunk-suffixed idempotency keys' — both relied on result.details.children_submitted which only the ok() path sets; the failed() path returns details: {}. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/cycle/synthesize.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/core/cycle/synthesize.ts b/src/core/cycle/synthesize.ts index 9828a4f3a..e701bddc4 100644 --- a/src/core/cycle/synthesize.ts +++ b/src/core/cycle/synthesize.ts @@ -393,10 +393,20 @@ export async function runPhaseSynthesize( } const isChunked = chunks.length > 1; + // queue.add subagent validator (classifyCapabilities → resolveRecipe) + // requires `provider:model`. resolveModel can return a bare id when + // TIER_DEFAULTS / DEFAULT_ALIASES carry a bare value; ensure the + // anthropic: prefix is present for known claude-* ids before passing + // to the queue. Non-anthropic providers must already declare a colon. + const subagentModel = config.model.includes(':') + ? config.model + : config.model.toLowerCase().startsWith('claude-') + ? `anthropic:${config.model}` + : config.model; for (let i = 0; i < chunks.length; i++) { const childData: SubagentHandlerData = { prompt: buildSynthesisPrompt(t, chunks[i], i, chunks.length, priorContradictionsBlock), - model: config.model, + model: subagentModel, max_turns: 30, allowed_slug_prefixes: allowedSlugPrefixes, }; From 6fd45a9551f46ca135529d03d85168bd853d6d54 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:03:04 -0700 Subject: [PATCH 13/19] test(mechanical): pin doctor init embedding model + clean non-default sources Two fixes in the E2E Doctor Command describe block, both surfaced by cross-file state pollution under the full sequential E2E run: 1. Pass --embedding-model openai:text-embedding-3-large to the init subprocess. Without the explicit flag, doctor inherits whatever the resolver picks from env keys (ZE if ZEROENTROPY_API_KEY is set, defaulting to zembed-1 at 1280d). The test's setupDB initialized schema at 1536d, so the dim mismatch fires embedding_width_consistency WARN, exiting doctor 1. 2. DELETE FROM sources WHERE id != 'default' in beforeAll. Prior E2E files leave non-default source rows (e.g. 'delta' from autopilot / sources tests). sync_freshness + cycle_freshness then FAIL on those orphans because they were never synced/cycled, exiting doctor 1. setupDB TRUNCATEs sources but schema.sql re-seeds 'default' via initSchema; this leaves only the canonical single-source brain the test expects. Surfaced during fix-wave: warm-narwhal E2E gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/e2e/mechanical.test.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test/e2e/mechanical.test.ts b/test/e2e/mechanical.test.ts index 64ca221f3..5428e1d27 100644 --- a/test/e2e/mechanical.test.ts +++ b/test/e2e/mechanical.test.ts @@ -1231,6 +1231,14 @@ describeE2E('E2E: Doctor Command', () => { // migration entries from in-flight workspaces — and surfaces them as the // 'minions_migration' [FAIL] check, exiting with code 1. gbrainHome = mkdtempSync(join(tmpdir(), 'gbrain-doctor-e2e-')); + // Cross-file isolation: prior E2E files can leave non-default `sources` + // rows (e.g. 'delta' from autopilot/sources tests). Doctor's + // sync_freshness + cycle_freshness checks then FAIL on those orphans, + // exit 1, breaking 'doctor exits 0 on healthy DB'. setupDB TRUNCATEs + // sources but schema.sql re-seeds 'default' via initSchema; clean any + // other rows so the doctor sees a clean single-source brain. + const conn = getConn(); + await conn`DELETE FROM sources WHERE id != 'default'`; }, 30_000); afterAll(async () => { await teardownDB(); @@ -1246,9 +1254,15 @@ describeE2E('E2E: Doctor Command', () => { }); test('gbrain doctor exits 0 on healthy DB', () => { - // Init first so config exists for CLI + // Init first so config exists for CLI. Pin --embedding-model explicitly + // so the spawned doctor doesn't pick a different default (e.g. ZE-1280d + // when ZEROENTROPY_API_KEY is in env) that mismatches the 1536d schema + // setupDB initialized, producing a WARN-status embedding_width_consistency + // check and exit 1. Mirrors the same pattern in 'Setup Journey'. Bun.spawnSync({ - cmd: ['bun', 'run', 'src/cli.ts', 'init', '--non-interactive', '--url', process.env.DATABASE_URL!], + cmd: ['bun', 'run', 'src/cli.ts', 'init', '--non-interactive', + '--url', process.env.DATABASE_URL!, + '--embedding-model', 'openai:text-embedding-3-large'], cwd: cliCwd, env: cliEnv(), timeout: 15_000, }); const result = Bun.spawnSync({ From b00bef26b8692375cb1de153d5abd55313ada73a Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:03:10 -0700 Subject: [PATCH 14/19] test(run-e2e): per-file connection flush + 180s outer timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cross-file isolation hardenings for the sequential E2E runner: 1. Terminate stale Postgres connections before each file. Without this, idle connections from the prior bun process's pool race with the next file's setupDB() TRUNCATE CASCADE, producing 'fixture pages disappear mid-test' failures. The terminate call is idempotent + ~50ms; first iteration is a no-op. 2. Hard outer timeout (180s per file) via gtimeout / timeout. bun's --timeout=60000 is per-test; if a PGLite WASM call hangs in beforeAll/afterAll (e.g. ingestion-roundtrip.test.ts wedging 30+ minutes on macOS), --timeout never fires and the entire suite wedges. Outer SIGKILL lets the suite advance and the file is recorded as failed for triage. Falls through to bare bun if neither gtimeout nor timeout is on PATH. Surfaced during fix-wave: warm-narwhal — 3 of 5 cross-file flakes caught by the connection flush; ingestion-roundtrip 30-min wedge caught by the outer timeout. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/run-e2e.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/scripts/run-e2e.sh b/scripts/run-e2e.sh index f93c7977c..d945ea2e2 100755 --- a/scripts/run-e2e.sh +++ b/scripts/run-e2e.sh @@ -134,7 +134,29 @@ for f in "${files[@]}"; do name=$(basename "$f") echo "" echo "=== $name ===" - if output=$(bun test --timeout=60000 "$f" 2>&1); then + # Cross-file isolation: terminate any stale connections from the prior + # file's pool before the next file's setupDB() runs. Without this, + # idle postgres connections from the previous bun process race with + # the next file's TRUNCATE CASCADE → cross-file fixture-state pollution + # (people/sarah-chen disappears mid-test, etc.). The terminate call is + # idempotent + fast (~50ms); on the first iteration there's nothing to + # terminate so it's effectively free. + if [ -n "${DATABASE_URL:-}" ]; then + psql "$DATABASE_URL" -At -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != pg_backend_pid() AND datname = current_database()" >/dev/null 2>&1 || true + fi + # Hard outer timeout (180s per file). bun's --timeout is per-test; if a + # PGLite WASM call hangs in beforeAll/afterAll, --timeout never fires and + # the file wedges indefinitely. gtimeout/timeout SIGKILLs the file so the + # suite advances. gtimeout (macOS via coreutils) preferred; timeout (Linux) + # fallback; bare bun (no outer cap) if neither is installed. + if command -v gtimeout >/dev/null 2>&1; then + TIMEOUT_CMD="gtimeout 180" + elif command -v timeout >/dev/null 2>&1; then + TIMEOUT_CMD="timeout 180" + else + TIMEOUT_CMD="" + fi + if output=$($TIMEOUT_CMD bun test --timeout=60000 "$f" 2>&1); then pass_files=$((pass_files + 1)) # Extract pass/fail counts from bun's summary (e.g., "123 pass") p=$(echo "$output" | grep -oE '[0-9]+ pass' | tail -1 | grep -oE '[0-9]+' || echo 0) From e40ca4a2319bd162c1e3cc6ae01d544baed5ea4c Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:10:26 -0700 Subject: [PATCH 15/19] chore: bump version and changelog (v0.41.3.0) Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 39 +++++++++++++++++++++++++++++++++++++++ VERSION | 2 +- package.json | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0fb2160c..56a9f93d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,45 @@ All notable changes to GBrain will be documented in this file. +## [0.41.3.0] - 2026-05-24 + +**Six community bug-fix PRs land + the E2E suite stops lying about itself.** A fix-wave triage swept the 333-PR queue, closed 10 PRs as already-shipped (with credit, naming the commits + files), and bundled 6 real fixes from the community into one collector. Plus three E2E-suite reliability fixes that surfaced while getting the full Docker suite to 100% green. + +You can now run `gbrain init --help` from inside a directory with 1000+ markdown files without it silently overwriting your Supabase config with PGLite. Your Supabase brain stops auth-failing at the direct connection because the pooler-form `postgres.` username now gets stripped before deriving the direct URL. OpenAI embedding batches that hit the 1M-token TPM ceiling actually engage the recursive-halving safety net (the `Invalid 'input': maximum request size is 300000 tokens per request.` error message now matches the recognition regex; pre-fix it never fired). The dream-cycle's synthesize phase stops dying with `subagent job rejected: data.model "claude-sonnet-4-6" references an unknown provider` because the queue.add subagent validator now sees `anthropic:claude-sonnet-4-6` from a narrow prefix-fix at the call site. + +To turn it on: `gbrain upgrade`. The contributor closure comments include the exact commit SHA + file:line that already shipped each fix, so anyone who filed a duplicate or stale PR can verify the work landed. + +What you'd see in a concrete example. Pre-this-release: `gbrain init --help` from `~/Documents` (with 1500+ `.md` files inferred as a brain candidate) writes `engine: 'pglite'` + `database_path: ~/.gbrain/brain.pglite` to your real config, silently disconnecting you from Supabase. Post-fix: `--help` short-circuits before any state write; help text prints; config untouched. Same shape for the other five fixes: documented bugs, real repros, real fixes, real tests. + +Things to know about. (1) Two cross-file E2E reliability fixes in `scripts/run-e2e.sh`: per-file `pg_terminate_backend` flush kills stale connections from the prior bun process before the next file's `setupDB()` TRUNCATE races them, AND a hard 180s outer `gtimeout`/`timeout` cap so a wedged PGLite WASM call in beforeAll/afterAll can't pin the entire suite (this caught a real 30+ min wedge on `ingestion-roundtrip.test.ts` during the wave). (2) The `gbrain doctor` test in `test/e2e/mechanical.test.ts` now pins `--embedding-model openai:text-embedding-3-large` on its init step (was inheriting whatever the resolver picked from env keys, producing dim-mismatch warnings under sequential E2E) and `DELETE FROM sources WHERE id != 'default'` in beforeAll (was inheriting orphan `delta` source rows from prior files, producing `sync_freshness FAIL`). + +Credit to the 6 community contributors whose PRs landed: @mgunnin (x2: max_batch_tokens + isTokenLimitError regex), @brandonlipman (x2: connection-manager username strip + init --help guard), @jeremyknows (frontmatter-install-hook test isolation), @garrytan-agents (routing-eval intent-field guard). Plus 10 superseded PRs closed with credit (#798, #1083, #918, #1119, #602, #758, #539, #1287, #1117, #1125) — fix already on master via prior waves (v0.31.7 #804 + v0.36.1.1 #1182 + v0.38.2.0 #1297 + others); contributor closures cite each landing commit + file location. + +### Itemized changes + +**The 6 community fix-wave cherry-picks:** + +- **#924 (mgunnin):** `src/core/ai/recipes/openai.ts` gains `max_batch_tokens: 100_000` on the embedding touchpoint. Pre-fix OpenAI was the only recipe missing this cap; the recursive-halving safety net never engaged on token-dense pages (Discord exports, JSON dumps, code-heavy markdown), then retry storm and block the queue head. 100K estimated = ~150K real worst-case, safely under OpenAI's 300K per-request hard cap. +- **#990 (mgunnin):** `src/core/ai/gateway.ts:1264` `isTokenLimitError` now matches `maximum request size.*tokens` so OpenAI's actual error string triggers recursive halving. Pre-fix the regex caught Voyage and generic shapes but not OpenAI's literal wording. Tests in `test/ai/adaptive-embed-batch.test.ts` pin the recognition. +- **#761 (brandonlipman):** `src/core/connection-manager.ts:144-148` `deriveDirectUrl` now strips the `postgres.` pooler-form username down to bare `postgres` when synthesizing the Supabase direct URL. Pre-fix Supabase direct connections silently failed auth because they expect bare `postgres` (the `.` suffix is a pooler-routing-only thing). Tests in `test/connection-manager.serial.test.ts`. +- **#762 (brandonlipman):** `src/commands/init.ts:13-16` adds a `--help`/`-h` short-circuit at the top of `runInit`. Pre-fix `gbrain init --help` from a directory with many `.md` files would fall through to smart-detection, scan cwd, then `saveConfig()` — silently overwriting any existing Postgres config with PGLite defaults. Confirmed in the wild on a 10K-page Supabase brain. +- **#916 (jeremyknows):** `test/frontmatter-install-hook.test.ts` test isolation fix — uses `--local --get` instead of `--get` (which falls back to global config). Without this, developers with `core.hooksPath` set globally (dotfiles managers pointing at `~/.config/git/hooks`) see a deterministic FAIL. +- **#1332 (garrytan-agents):** `src/core/routing-eval.ts` adds defensive guard so `loadRoutingFixtures({intent: undefined})` doesn't crash `gbrain doctor` with `undefined is not an object (evaluating s.toLowerCase)`. Fixture validation now reports malformed entries instead of crashing the whole doctor run. + +**Three E2E reliability fixes (surfaced during this wave):** + +- **`src/core/cycle/synthesize.ts:395-404`** narrow `anthropic:` prefix fix at the queue.add boundary. `resolveModel` returns the bare id from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` and rejected with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Narrow conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the constants which would ripple across every `resolveModel` caller. +- **`scripts/run-e2e.sh` per-file connection flush + outer timeout.** Two cross-file isolation hardenings: (1) `psql -At -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != pg_backend_pid() AND datname = current_database()"` before each file kills idle connections from the prior bun process's pool, which were racing with the next file's `TRUNCATE CASCADE` and producing 'fixture pages disappear mid-test' failures; (2) hard 180s outer `gtimeout`/`timeout` cap so a PGLite WASM hang in beforeAll/afterAll can't wedge the entire suite. Both surfaced during the wave: 3 of 5 cross-file flakes caught by the connection flush; `ingestion-roundtrip` 30-min wedge caught by the outer timeout. +- **`test/e2e/mechanical.test.ts` doctor test hardening.** Two fixes: pin `--embedding-model openai:text-embedding-3-large` on the init subprocess (was inheriting env-resolver defaults that produced dim-mismatch under sequential E2E); `DELETE FROM sources WHERE id != 'default'` in beforeAll (was inheriting orphan `delta` source rows from prior files, producing `sync_freshness FAIL`). + +### For contributors + +Wave triage process notes: +- 333-PR queue evaluated via per-PR isolation runs + cross-reference against master HEAD (the load-bearing trick: read each PR's diff against its OWN base, not against current master, to see the actual intended change without v0.38-0.40 reverts contaminating the view). +- 10 PRs closed-as-superseded with credit comments citing the landing commit SHA + file:line so contributors can verify the fix shipped. The contributor close template is captured in `~/.claude/plans/time-for-fix-wave-warm-narwhal.md`. +- 2 mid-wave additional supersession discoveries (PR #1117 + PR #1125) caught via the `git log -S "configuredProviderIds" origin/master` pattern after master had already absorbed them via v0.36.1.1 #1182 (28-fix collector from 5 weeks ago); both closed with credit pointing at the absorbed commit. +- Tests on the wave reached 117/117 files / 821/821 tests pass against fresh Docker pgvector container after fixing the cross-file flake class. + ## [0.41.0.0] - 2026-05-24 **Your 100-job subagent batch now actually completes.** A real user ran `gbrain jobs work --concurrency 10` against an Azure-hosted Anthropic endpoint, submitted 100 background jobs, and watched every single one dead-letter with `rate lease "anthropic:messages" full (8/8)`. The default cap of 8 starved 2 workers; every starved job got marked as a failure, hit `max_attempts = 3` after 3 lease-full bounces, and dead-lettered. This release turns minions from "a CLI you drive" into "a fleet you supervise" — submit a batch, walk away, come back to completed work. diff --git a/VERSION b/VERSION index 7ee8e6a08..fc2e80cfb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.41.0.0 \ No newline at end of file +0.41.3.0 \ No newline at end of file diff --git a/package.json b/package.json index 996292459..92fa3466c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gbrain", - "version": "0.41.0.0", + "version": "0.41.3.0", "description": "Postgres-native personal knowledge brain with hybrid RAG search", "type": "module", "main": "src/core/index.ts", From 2051cea41e2c6122522c1992cfec547ec31591c4 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 12:21:16 -0700 Subject: [PATCH 16/19] docs: annotate synthesize.ts narrow prefix fix (v0.41.3.0) CLAUDE.md gains the v0.41.3.0 note on src/core/cycle/synthesize.ts (narrow anthropic: prefix at the queue.add boundary so resolveModel's bare ids satisfy the subagent validator). llms-full.txt regenerated to match. Co-Authored-By: Claude Opus 4.7 --- CLAUDE.md | 2 +- llms-full.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6900fb2cc..720bf8f78 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -245,7 +245,7 @@ strict behavior when unset. - `src/core/import-checkpoint.ts` (v0.34.2.0) — `loadCheckpoint(brainDir)`, `saveCheckpoint(brainDir, completed)`, `resumeFilter(files, completed, brainDir)`, `clearCheckpoint()`, plus the `ImportCheckpoint` type. Path-set checkpoint format (`{schema_version, brainDir, completed: string[]}`) replaces the v0.33.x positional `{processedIndex: N}` format. Atomic write via `.tmp` + `rename()` so a mid-write crash never leaves a partial JSON. `loadCheckpoint` returns `null` on: missing file, malformed JSON, brainDir mismatch (you ran import against a different brain), and the old positional format (logged to stderr before being discarded). `resumeFilter` returns `{toProcess, skippedCount}` — pure, no I/O, deterministic. `clearCheckpoint` is no-op-on-missing for clean-exit cleanup. Honors `GBRAIN_HOME` via `gbrainPath()` so test isolation via `withEnv({GBRAIN_HOME: tmpdir})` works without monkey-patching the fs layer. Best-effort persistence — `saveCheckpoint` logs warnings on write errors but never throws, so import keeps making progress even if disk is full. - `src/core/sort-newest-first.ts` (v0.34.2.0) — single source of truth for the descending-lex sort that `gbrain import` and `gbrain sync` both apply. Mutates in place (Array.prototype.sort semantics), returns the same array reference for fluent chaining. Empty/single-element inputs short-circuit. Future ordering changes flip one line in this helper instead of touching two CLI commands. Pinned by `test/sort-newest-first.test.ts` (5 hermetic cases: descending order, mixed prefixes, empty input, single-element input, in-place-mutation contract). - `src/core/cycle.ts` — v0.17 brain maintenance cycle primitive (extended to **9 phases in v0.29**). `runCycle(engine: BrainEngine | null, opts: CycleOpts): Promise` composes phases in semantically-driven order: **lint → backlinks → sync → synthesize → extract → patterns → recompute_emotional_weight → embed → orphans**. v0.29 adds the `recompute_emotional_weight` phase between patterns and embed; it sees the union of `syncPagesAffected` + `synthesizeWrittenSlugs` for incremental mode, or all pages when neither anchor is set (full backfill via `gbrain dream --phase recompute_emotional_weight`). v0.29 also extends `CycleReport.totals` with `pages_emotional_weight_recomputed` (additive, schema_version stays "1"). v0.23's `synthesize` phase runs after sync (cross-references see fresh brain) and before extract (auto-link materializes its writes); `patterns` runs after extract so it reads a fresh graph (codex finding #7 — subagent put_page sets `ctx.remote=true` and skips auto-link/timeline by default; extract is the canonical materialization). Three callers: `gbrain dream` CLI, `gbrain autopilot` daemon's inline path, and the Minions `autopilot-cycle` handler. Coordination via `gbrain_cycle_locks` DB table + `~/.gbrain/cycle.lock` file lock with PID-liveness for PGLite. `CycleReport.schema_version: "1"` is stable; totals additively grew in v0.23 (`transcripts_processed`, `synth_pages_written`, `patterns_written`). `yieldBetweenPhases` runs between phases. **v0.23 added `yieldDuringPhase`** for in-phase keepalive — synthesize/patterns call it during long waits to renew the cycle-lock TTL. Engine nullable; lock-skip on read-only phase selections. v0.22.1 (#403): `CycleOpts.signal?: AbortSignal` propagates the worker's abort signal; `checkAborted()` fires between every phase. v0.22.1 (#417): `runPhaseSync` returns `pagesAffected` via `SyncPhaseResult`; `runCycle` captures it and threads to `runPhaseExtract` as the 4th arg. v0.22.1 (Codex F2): `runPhaseSync` takes `willRunExtractPhase: boolean` and sets `noExtract: phases.includes('extract')` so `gbrain dream --phase sync` doesn't silently lose extraction. v0.22.5 (#475): `resolveSourceForDir(engine, brainDir)` threads `sourceId` to `performSync()` so sync reads the per-source `sources.last_commit` anchor instead of the drift-prone global `config.sync.last_commit` key. -- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. +- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.3.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. - `src/core/cycle/patterns.ts` (v0.23) — Patterns phase: cross-session theme detection over reflections within `dream.patterns.lookback_days` (default 30). Names a pattern only when ≥`dream.patterns.min_evidence` (default 3) reflections support it. Single Sonnet subagent; same allow-list path as synthesize. Runs AFTER `extract` so the graph is fresh. - `src/core/cycle/extract-facts.ts` (v0.32.2, extended v0.35.6.0) — extract_facts cycle phase. v0.32.2 contract: fence is canonical; per-page wipe (`deleteFactsForPage`) + reinsert from `parseFactsFence` + `extractFactsFromFenceText` + `engine.insertFacts`. Empty-fence guard refuses when v0.31 legacy rows (`row_num IS NULL AND entity_slug IS NOT NULL`) pend the v0_32_2 backfill (status: warn, hint: `gbrain apply-migrations --yes`). **v0.35.6.0** adds a phantom-redirect pre-pass that runs AFTER the legacy-row guard, BEFORE the main reconcile loop. When `opts.brainDir` is set, `runPhantomRedirectPass(engine, brainDir, sourceId, dryRun)` walks unprefixed-slug pages capped by `GBRAIN_PHANTOM_REDIRECT_LIMIT` (default 50). The pass returns `touched_canonicals` — canonical slugs whose disk fence was merged with phantom rows; `runExtractFacts` UNIONs them into the main reconcile slug set so canonical's DB facts derive from the merged fence in the same cycle (round-14 scenario-B fix: phantom had only-on-disk fence, no DB facts). `ExtractFactsResult` gains six phantom fields: `phantomsScanned`, `phantomsRedirected`, `phantomsAmbiguous`, `phantomsSkippedDrift`, `phantomsLockBusy`, `phantomsMorePending`. Three of those bubble to `CycleReport.totals` (`phantoms_redirected`, `phantoms_ambiguous`, `phantoms_skipped_drift`). - `src/core/entities/resolve.ts` (v0.30+, extended v0.35.6.0) — Free-form entity name → canonical slug resolution. `resolveEntitySlug(engine, source_id, raw)`: exact slug → fuzzy (pg_trgm @ 0.4 threshold) → bare-name prefix expansion (`people/-%` then `companies/-%` using correlated-subquery `connection_count` for tiebreaker) → deterministic `slugify` fallback. **v0.35.6.0** exports two new helpers for the phantom-redirect pass: `resolvePhantomCanonical(engine, sourceId, phantomSlug)` — variant that SKIPS the exact-slug step (codex #1: phantom slug `'alice'` exact-matches itself, would make the redirect handler a no-op); returns the canonical only when result is non-null AND contains `/`. `findPrefixCandidates(engine, sourceId, token)` — standalone SQL query returning ALL candidates across `PREFIX_EXPANSION_DIRS` (currently hardcoded `['people', 'companies']`) using `slug LIKE ANY($N::text[])` over patterns `dir/token` + `dir/token-%`; cap of 10 ordered by `connection_count DESC, slug ASC`. NOT a wrapper around `tryPrefixExpansion` because that path returns per-dir top-1 and suppresses ambiguity by design (codex #11). Pinned by `test/phantom-redirect.test.ts` resolvePhantomCanonical describe (3 cases) + findPrefixCandidates describe (6 cases including multi-dir ambiguity and the `people/aliceberg`-doesn't-match-`alice` false-positive guard). diff --git a/llms-full.txt b/llms-full.txt index f68b11dcb..8bb74e65f 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -387,7 +387,7 @@ strict behavior when unset. - `src/core/import-checkpoint.ts` (v0.34.2.0) — `loadCheckpoint(brainDir)`, `saveCheckpoint(brainDir, completed)`, `resumeFilter(files, completed, brainDir)`, `clearCheckpoint()`, plus the `ImportCheckpoint` type. Path-set checkpoint format (`{schema_version, brainDir, completed: string[]}`) replaces the v0.33.x positional `{processedIndex: N}` format. Atomic write via `.tmp` + `rename()` so a mid-write crash never leaves a partial JSON. `loadCheckpoint` returns `null` on: missing file, malformed JSON, brainDir mismatch (you ran import against a different brain), and the old positional format (logged to stderr before being discarded). `resumeFilter` returns `{toProcess, skippedCount}` — pure, no I/O, deterministic. `clearCheckpoint` is no-op-on-missing for clean-exit cleanup. Honors `GBRAIN_HOME` via `gbrainPath()` so test isolation via `withEnv({GBRAIN_HOME: tmpdir})` works without monkey-patching the fs layer. Best-effort persistence — `saveCheckpoint` logs warnings on write errors but never throws, so import keeps making progress even if disk is full. - `src/core/sort-newest-first.ts` (v0.34.2.0) — single source of truth for the descending-lex sort that `gbrain import` and `gbrain sync` both apply. Mutates in place (Array.prototype.sort semantics), returns the same array reference for fluent chaining. Empty/single-element inputs short-circuit. Future ordering changes flip one line in this helper instead of touching two CLI commands. Pinned by `test/sort-newest-first.test.ts` (5 hermetic cases: descending order, mixed prefixes, empty input, single-element input, in-place-mutation contract). - `src/core/cycle.ts` — v0.17 brain maintenance cycle primitive (extended to **9 phases in v0.29**). `runCycle(engine: BrainEngine | null, opts: CycleOpts): Promise` composes phases in semantically-driven order: **lint → backlinks → sync → synthesize → extract → patterns → recompute_emotional_weight → embed → orphans**. v0.29 adds the `recompute_emotional_weight` phase between patterns and embed; it sees the union of `syncPagesAffected` + `synthesizeWrittenSlugs` for incremental mode, or all pages when neither anchor is set (full backfill via `gbrain dream --phase recompute_emotional_weight`). v0.29 also extends `CycleReport.totals` with `pages_emotional_weight_recomputed` (additive, schema_version stays "1"). v0.23's `synthesize` phase runs after sync (cross-references see fresh brain) and before extract (auto-link materializes its writes); `patterns` runs after extract so it reads a fresh graph (codex finding #7 — subagent put_page sets `ctx.remote=true` and skips auto-link/timeline by default; extract is the canonical materialization). Three callers: `gbrain dream` CLI, `gbrain autopilot` daemon's inline path, and the Minions `autopilot-cycle` handler. Coordination via `gbrain_cycle_locks` DB table + `~/.gbrain/cycle.lock` file lock with PID-liveness for PGLite. `CycleReport.schema_version: "1"` is stable; totals additively grew in v0.23 (`transcripts_processed`, `synth_pages_written`, `patterns_written`). `yieldBetweenPhases` runs between phases. **v0.23 added `yieldDuringPhase`** for in-phase keepalive — synthesize/patterns call it during long waits to renew the cycle-lock TTL. Engine nullable; lock-skip on read-only phase selections. v0.22.1 (#403): `CycleOpts.signal?: AbortSignal` propagates the worker's abort signal; `checkAborted()` fires between every phase. v0.22.1 (#417): `runPhaseSync` returns `pagesAffected` via `SyncPhaseResult`; `runCycle` captures it and threads to `runPhaseExtract` as the 4th arg. v0.22.1 (Codex F2): `runPhaseSync` takes `willRunExtractPhase: boolean` and sets `noExtract: phases.includes('extract')` so `gbrain dream --phase sync` doesn't silently lose extraction. v0.22.5 (#475): `resolveSourceForDir(engine, brainDir)` threads `sourceId` to `performSync()` so sync reads the per-source `sources.last_commit` anchor instead of the drift-prone global `config.sync.last_commit` key. -- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. +- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.3.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. - `src/core/cycle/patterns.ts` (v0.23) — Patterns phase: cross-session theme detection over reflections within `dream.patterns.lookback_days` (default 30). Names a pattern only when ≥`dream.patterns.min_evidence` (default 3) reflections support it. Single Sonnet subagent; same allow-list path as synthesize. Runs AFTER `extract` so the graph is fresh. - `src/core/cycle/extract-facts.ts` (v0.32.2, extended v0.35.6.0) — extract_facts cycle phase. v0.32.2 contract: fence is canonical; per-page wipe (`deleteFactsForPage`) + reinsert from `parseFactsFence` + `extractFactsFromFenceText` + `engine.insertFacts`. Empty-fence guard refuses when v0.31 legacy rows (`row_num IS NULL AND entity_slug IS NOT NULL`) pend the v0_32_2 backfill (status: warn, hint: `gbrain apply-migrations --yes`). **v0.35.6.0** adds a phantom-redirect pre-pass that runs AFTER the legacy-row guard, BEFORE the main reconcile loop. When `opts.brainDir` is set, `runPhantomRedirectPass(engine, brainDir, sourceId, dryRun)` walks unprefixed-slug pages capped by `GBRAIN_PHANTOM_REDIRECT_LIMIT` (default 50). The pass returns `touched_canonicals` — canonical slugs whose disk fence was merged with phantom rows; `runExtractFacts` UNIONs them into the main reconcile slug set so canonical's DB facts derive from the merged fence in the same cycle (round-14 scenario-B fix: phantom had only-on-disk fence, no DB facts). `ExtractFactsResult` gains six phantom fields: `phantomsScanned`, `phantomsRedirected`, `phantomsAmbiguous`, `phantomsSkippedDrift`, `phantomsLockBusy`, `phantomsMorePending`. Three of those bubble to `CycleReport.totals` (`phantoms_redirected`, `phantoms_ambiguous`, `phantoms_skipped_drift`). - `src/core/entities/resolve.ts` (v0.30+, extended v0.35.6.0) — Free-form entity name → canonical slug resolution. `resolveEntitySlug(engine, source_id, raw)`: exact slug → fuzzy (pg_trgm @ 0.4 threshold) → bare-name prefix expansion (`people/-%` then `companies/-%` using correlated-subquery `connection_count` for tiebreaker) → deterministic `slugify` fallback. **v0.35.6.0** exports two new helpers for the phantom-redirect pass: `resolvePhantomCanonical(engine, sourceId, phantomSlug)` — variant that SKIPS the exact-slug step (codex #1: phantom slug `'alice'` exact-matches itself, would make the redirect handler a no-op); returns the canonical only when result is non-null AND contains `/`. `findPrefixCandidates(engine, sourceId, token)` — standalone SQL query returning ALL candidates across `PREFIX_EXPANSION_DIRS` (currently hardcoded `['people', 'companies']`) using `slug LIKE ANY($N::text[])` over patterns `dir/token` + `dir/token-%`; cap of 10 ordered by `connection_count DESC, slug ASC`. NOT a wrapper around `tryPrefixExpansion` because that path returns per-dir top-1 and suppresses ambiguity by design (codex #11). Pinned by `test/phantom-redirect.test.ts` resolvePhantomCanonical describe (3 cases) + findPrefixCandidates describe (6 cases including multi-dir ambiguity and the `people/aliceberg`-doesn't-match-`alice` false-positive guard). From 94a5539ea64c79b6f761f25f0bb3a4c0cac54476 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 21:49:50 -0700 Subject: [PATCH 17/19] =?UTF-8?q?chore:=20rebump=20v0.41.3.0=20=E2=86=92?= =?UTF-8?q?=20v0.41.5.0=20(queue=20drift;=20PR=20#1377=20claimed=20.4.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling fix-wave PR #1377 (garrytan/community-pr-wave) claimed v0.41.4.0 between my queue check (.3.0 was available) and PR creation. Re-bump to the next available slot per workspace-aware allocator. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 2 +- CLAUDE.md | 2 +- VERSION | 2 +- llms-full.txt | 2 +- package.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56a9f93d2..8bcfe8f25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All notable changes to GBrain will be documented in this file. -## [0.41.3.0] - 2026-05-24 +## [0.41.5.0] - 2026-05-24 **Six community bug-fix PRs land + the E2E suite stops lying about itself.** A fix-wave triage swept the 333-PR queue, closed 10 PRs as already-shipped (with credit, naming the commits + files), and bundled 6 real fixes from the community into one collector. Plus three E2E-suite reliability fixes that surfaced while getting the full Docker suite to 100% green. diff --git a/CLAUDE.md b/CLAUDE.md index 720bf8f78..d76c8283d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -245,7 +245,7 @@ strict behavior when unset. - `src/core/import-checkpoint.ts` (v0.34.2.0) — `loadCheckpoint(brainDir)`, `saveCheckpoint(brainDir, completed)`, `resumeFilter(files, completed, brainDir)`, `clearCheckpoint()`, plus the `ImportCheckpoint` type. Path-set checkpoint format (`{schema_version, brainDir, completed: string[]}`) replaces the v0.33.x positional `{processedIndex: N}` format. Atomic write via `.tmp` + `rename()` so a mid-write crash never leaves a partial JSON. `loadCheckpoint` returns `null` on: missing file, malformed JSON, brainDir mismatch (you ran import against a different brain), and the old positional format (logged to stderr before being discarded). `resumeFilter` returns `{toProcess, skippedCount}` — pure, no I/O, deterministic. `clearCheckpoint` is no-op-on-missing for clean-exit cleanup. Honors `GBRAIN_HOME` via `gbrainPath()` so test isolation via `withEnv({GBRAIN_HOME: tmpdir})` works without monkey-patching the fs layer. Best-effort persistence — `saveCheckpoint` logs warnings on write errors but never throws, so import keeps making progress even if disk is full. - `src/core/sort-newest-first.ts` (v0.34.2.0) — single source of truth for the descending-lex sort that `gbrain import` and `gbrain sync` both apply. Mutates in place (Array.prototype.sort semantics), returns the same array reference for fluent chaining. Empty/single-element inputs short-circuit. Future ordering changes flip one line in this helper instead of touching two CLI commands. Pinned by `test/sort-newest-first.test.ts` (5 hermetic cases: descending order, mixed prefixes, empty input, single-element input, in-place-mutation contract). - `src/core/cycle.ts` — v0.17 brain maintenance cycle primitive (extended to **9 phases in v0.29**). `runCycle(engine: BrainEngine | null, opts: CycleOpts): Promise` composes phases in semantically-driven order: **lint → backlinks → sync → synthesize → extract → patterns → recompute_emotional_weight → embed → orphans**. v0.29 adds the `recompute_emotional_weight` phase between patterns and embed; it sees the union of `syncPagesAffected` + `synthesizeWrittenSlugs` for incremental mode, or all pages when neither anchor is set (full backfill via `gbrain dream --phase recompute_emotional_weight`). v0.29 also extends `CycleReport.totals` with `pages_emotional_weight_recomputed` (additive, schema_version stays "1"). v0.23's `synthesize` phase runs after sync (cross-references see fresh brain) and before extract (auto-link materializes its writes); `patterns` runs after extract so it reads a fresh graph (codex finding #7 — subagent put_page sets `ctx.remote=true` and skips auto-link/timeline by default; extract is the canonical materialization). Three callers: `gbrain dream` CLI, `gbrain autopilot` daemon's inline path, and the Minions `autopilot-cycle` handler. Coordination via `gbrain_cycle_locks` DB table + `~/.gbrain/cycle.lock` file lock with PID-liveness for PGLite. `CycleReport.schema_version: "1"` is stable; totals additively grew in v0.23 (`transcripts_processed`, `synth_pages_written`, `patterns_written`). `yieldBetweenPhases` runs between phases. **v0.23 added `yieldDuringPhase`** for in-phase keepalive — synthesize/patterns call it during long waits to renew the cycle-lock TTL. Engine nullable; lock-skip on read-only phase selections. v0.22.1 (#403): `CycleOpts.signal?: AbortSignal` propagates the worker's abort signal; `checkAborted()` fires between every phase. v0.22.1 (#417): `runPhaseSync` returns `pagesAffected` via `SyncPhaseResult`; `runCycle` captures it and threads to `runPhaseExtract` as the 4th arg. v0.22.1 (Codex F2): `runPhaseSync` takes `willRunExtractPhase: boolean` and sets `noExtract: phases.includes('extract')` so `gbrain dream --phase sync` doesn't silently lose extraction. v0.22.5 (#475): `resolveSourceForDir(engine, brainDir)` threads `sourceId` to `performSync()` so sync reads the per-source `sources.last_commit` anchor instead of the drift-prone global `config.sync.last_commit` key. -- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.3.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. +- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.5.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. - `src/core/cycle/patterns.ts` (v0.23) — Patterns phase: cross-session theme detection over reflections within `dream.patterns.lookback_days` (default 30). Names a pattern only when ≥`dream.patterns.min_evidence` (default 3) reflections support it. Single Sonnet subagent; same allow-list path as synthesize. Runs AFTER `extract` so the graph is fresh. - `src/core/cycle/extract-facts.ts` (v0.32.2, extended v0.35.6.0) — extract_facts cycle phase. v0.32.2 contract: fence is canonical; per-page wipe (`deleteFactsForPage`) + reinsert from `parseFactsFence` + `extractFactsFromFenceText` + `engine.insertFacts`. Empty-fence guard refuses when v0.31 legacy rows (`row_num IS NULL AND entity_slug IS NOT NULL`) pend the v0_32_2 backfill (status: warn, hint: `gbrain apply-migrations --yes`). **v0.35.6.0** adds a phantom-redirect pre-pass that runs AFTER the legacy-row guard, BEFORE the main reconcile loop. When `opts.brainDir` is set, `runPhantomRedirectPass(engine, brainDir, sourceId, dryRun)` walks unprefixed-slug pages capped by `GBRAIN_PHANTOM_REDIRECT_LIMIT` (default 50). The pass returns `touched_canonicals` — canonical slugs whose disk fence was merged with phantom rows; `runExtractFacts` UNIONs them into the main reconcile slug set so canonical's DB facts derive from the merged fence in the same cycle (round-14 scenario-B fix: phantom had only-on-disk fence, no DB facts). `ExtractFactsResult` gains six phantom fields: `phantomsScanned`, `phantomsRedirected`, `phantomsAmbiguous`, `phantomsSkippedDrift`, `phantomsLockBusy`, `phantomsMorePending`. Three of those bubble to `CycleReport.totals` (`phantoms_redirected`, `phantoms_ambiguous`, `phantoms_skipped_drift`). - `src/core/entities/resolve.ts` (v0.30+, extended v0.35.6.0) — Free-form entity name → canonical slug resolution. `resolveEntitySlug(engine, source_id, raw)`: exact slug → fuzzy (pg_trgm @ 0.4 threshold) → bare-name prefix expansion (`people/-%` then `companies/-%` using correlated-subquery `connection_count` for tiebreaker) → deterministic `slugify` fallback. **v0.35.6.0** exports two new helpers for the phantom-redirect pass: `resolvePhantomCanonical(engine, sourceId, phantomSlug)` — variant that SKIPS the exact-slug step (codex #1: phantom slug `'alice'` exact-matches itself, would make the redirect handler a no-op); returns the canonical only when result is non-null AND contains `/`. `findPrefixCandidates(engine, sourceId, token)` — standalone SQL query returning ALL candidates across `PREFIX_EXPANSION_DIRS` (currently hardcoded `['people', 'companies']`) using `slug LIKE ANY($N::text[])` over patterns `dir/token` + `dir/token-%`; cap of 10 ordered by `connection_count DESC, slug ASC`. NOT a wrapper around `tryPrefixExpansion` because that path returns per-dir top-1 and suppresses ambiguity by design (codex #11). Pinned by `test/phantom-redirect.test.ts` resolvePhantomCanonical describe (3 cases) + findPrefixCandidates describe (6 cases including multi-dir ambiguity and the `people/aliceberg`-doesn't-match-`alice` false-positive guard). diff --git a/VERSION b/VERSION index fc2e80cfb..d25dbca5c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.41.3.0 \ No newline at end of file +0.41.5.0 \ No newline at end of file diff --git a/llms-full.txt b/llms-full.txt index 8bb74e65f..512f94482 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -387,7 +387,7 @@ strict behavior when unset. - `src/core/import-checkpoint.ts` (v0.34.2.0) — `loadCheckpoint(brainDir)`, `saveCheckpoint(brainDir, completed)`, `resumeFilter(files, completed, brainDir)`, `clearCheckpoint()`, plus the `ImportCheckpoint` type. Path-set checkpoint format (`{schema_version, brainDir, completed: string[]}`) replaces the v0.33.x positional `{processedIndex: N}` format. Atomic write via `.tmp` + `rename()` so a mid-write crash never leaves a partial JSON. `loadCheckpoint` returns `null` on: missing file, malformed JSON, brainDir mismatch (you ran import against a different brain), and the old positional format (logged to stderr before being discarded). `resumeFilter` returns `{toProcess, skippedCount}` — pure, no I/O, deterministic. `clearCheckpoint` is no-op-on-missing for clean-exit cleanup. Honors `GBRAIN_HOME` via `gbrainPath()` so test isolation via `withEnv({GBRAIN_HOME: tmpdir})` works without monkey-patching the fs layer. Best-effort persistence — `saveCheckpoint` logs warnings on write errors but never throws, so import keeps making progress even if disk is full. - `src/core/sort-newest-first.ts` (v0.34.2.0) — single source of truth for the descending-lex sort that `gbrain import` and `gbrain sync` both apply. Mutates in place (Array.prototype.sort semantics), returns the same array reference for fluent chaining. Empty/single-element inputs short-circuit. Future ordering changes flip one line in this helper instead of touching two CLI commands. Pinned by `test/sort-newest-first.test.ts` (5 hermetic cases: descending order, mixed prefixes, empty input, single-element input, in-place-mutation contract). - `src/core/cycle.ts` — v0.17 brain maintenance cycle primitive (extended to **9 phases in v0.29**). `runCycle(engine: BrainEngine | null, opts: CycleOpts): Promise` composes phases in semantically-driven order: **lint → backlinks → sync → synthesize → extract → patterns → recompute_emotional_weight → embed → orphans**. v0.29 adds the `recompute_emotional_weight` phase between patterns and embed; it sees the union of `syncPagesAffected` + `synthesizeWrittenSlugs` for incremental mode, or all pages when neither anchor is set (full backfill via `gbrain dream --phase recompute_emotional_weight`). v0.29 also extends `CycleReport.totals` with `pages_emotional_weight_recomputed` (additive, schema_version stays "1"). v0.23's `synthesize` phase runs after sync (cross-references see fresh brain) and before extract (auto-link materializes its writes); `patterns` runs after extract so it reads a fresh graph (codex finding #7 — subagent put_page sets `ctx.remote=true` and skips auto-link/timeline by default; extract is the canonical materialization). Three callers: `gbrain dream` CLI, `gbrain autopilot` daemon's inline path, and the Minions `autopilot-cycle` handler. Coordination via `gbrain_cycle_locks` DB table + `~/.gbrain/cycle.lock` file lock with PID-liveness for PGLite. `CycleReport.schema_version: "1"` is stable; totals additively grew in v0.23 (`transcripts_processed`, `synth_pages_written`, `patterns_written`). `yieldBetweenPhases` runs between phases. **v0.23 added `yieldDuringPhase`** for in-phase keepalive — synthesize/patterns call it during long waits to renew the cycle-lock TTL. Engine nullable; lock-skip on read-only phase selections. v0.22.1 (#403): `CycleOpts.signal?: AbortSignal` propagates the worker's abort signal; `checkAborted()` fires between every phase. v0.22.1 (#417): `runPhaseSync` returns `pagesAffected` via `SyncPhaseResult`; `runCycle` captures it and threads to `runPhaseExtract` as the 4th arg. v0.22.1 (Codex F2): `runPhaseSync` takes `willRunExtractPhase: boolean` and sets `noExtract: phases.includes('extract')` so `gbrain dream --phase sync` doesn't silently lose extraction. v0.22.5 (#475): `resolveSourceForDir(engine, brainDir)` threads `sourceId` to `performSync()` so sync reads the per-source `sources.last_commit` anchor instead of the drift-prone global `config.sync.last_commit` key. -- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.3.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. +- `src/core/cycle/synthesize.ts` (v0.23) — Synthesize phase: conversation-transcript-to-brain pipeline. Reads from `dream.synthesize.session_corpus_dir`, runs cheap Haiku verdict (cached in `dream_verdicts`), then fans out one Sonnet subagent per worth-processing transcript with `allowed_slug_prefixes` (sourced from `skills/_brain-filing-rules.json` `dream_synthesize_paths.globs`). Orchestrator collects slugs from `subagent_tool_executions` (NOT `pages.updated_at` — codex finding #2) and reverse-renders DB → markdown via `serializeMarkdown`. Cooldown via `dream.synthesize.last_completion_ts`, written ONLY on success. Idempotency key `dream:synth::`. Auto-commit deferred to v1.1 (codex #5). `--dry-run` runs Haiku, skips Sonnet (codex #8). Subagent never gets fs-write access. **v0.23.2:** `renderPageToMarkdown` (now exported) stamps `dream_generated: true` and `dream_cycle_date` into every reverse-write's frontmatter; `writeSummaryPage` does the same on the dream-cycle summary index. The marker is the explicit identity surface checked by `isDreamOutput` in `transcript-discovery.ts` — replaces the v0.23.1 content-prefix heuristic that could miss real output (`serializeMarkdown` doesn't embed slugs in body) and false-positive on user transcripts citing brain pages. `judgeSignificance` and `JudgeClient` are exported; `judgeSignificance` accepts a `verdictModel` parameter (default `claude-haiku-4-5-20251001`) loaded from `dream.synthesize.verdict_model` via `loadSynthConfig`. **v0.30.2:** model-aware chunker `splitTranscriptByBudget(content, contentHash, maxChars)` splits oversized transcripts at paragraph boundaries (`## Topic:` → `---` → `\n` ladder) using a deterministic offset seeded from the first 32 bits of `contentHash` so retries chunk identically. Per-chunk char budget computed from `MODEL_CONTEXT_TOKENS[resolvedModel] × 0.9 × 3.5 chars/token`; non-Anthropic ids fall back to a 180K-token safe default with a once-per-process stderr warning. Operator overrides: `dream.synthesize.max_prompt_tokens` (floor 100K, wins when set) and `dream.synthesize.max_chunks_per_transcript` (default 24). Per-chunk idempotency keys `dream:synth:::cof`; single-chunk transcripts preserve the legacy `dream:synth::` key byte-for-byte (D8 lookup), so existing brains skip with `already_synthesized_legacy_single_chunk` instead of re-spending Sonnet on upgrade. `collectChildPutPageSlugs` raw-fetches every (job_id, slug) pair (not `SELECT DISTINCT`) and rewrites bare-hash6 slugs to `-c` for chunked children (D6 — orchestrator-side, zero Sonnet trust). Cap-hit skips don't write to `dream_verdicts`, so raising the cap on next run re-attempts cleanly. D7 scope: bounds INITIAL prompt size only; tool-loop turn-N accumulation is caught by the v0.30.2 terminal-error classification in `subagent.ts`, not bounded ahead of time. **v0.41.5.0:** narrow `anthropic:` prefix fix at the queue.add boundary (lines 395-404). `resolveModel` returns bare ids from `TIER_DEFAULTS`/`DEFAULT_ALIASES` (e.g. `claude-sonnet-4-6`); the subagent validator requires `provider:model` form and was rejecting with `unknown provider`, dropping synthesize to `status: fail` with `SYNTH_PHASE_FAIL`. Conditional prefix at the call site (only when no colon AND starts with `claude-`) avoids changing the shared constants which would ripple across every `resolveModel` caller. - `src/core/cycle/patterns.ts` (v0.23) — Patterns phase: cross-session theme detection over reflections within `dream.patterns.lookback_days` (default 30). Names a pattern only when ≥`dream.patterns.min_evidence` (default 3) reflections support it. Single Sonnet subagent; same allow-list path as synthesize. Runs AFTER `extract` so the graph is fresh. - `src/core/cycle/extract-facts.ts` (v0.32.2, extended v0.35.6.0) — extract_facts cycle phase. v0.32.2 contract: fence is canonical; per-page wipe (`deleteFactsForPage`) + reinsert from `parseFactsFence` + `extractFactsFromFenceText` + `engine.insertFacts`. Empty-fence guard refuses when v0.31 legacy rows (`row_num IS NULL AND entity_slug IS NOT NULL`) pend the v0_32_2 backfill (status: warn, hint: `gbrain apply-migrations --yes`). **v0.35.6.0** adds a phantom-redirect pre-pass that runs AFTER the legacy-row guard, BEFORE the main reconcile loop. When `opts.brainDir` is set, `runPhantomRedirectPass(engine, brainDir, sourceId, dryRun)` walks unprefixed-slug pages capped by `GBRAIN_PHANTOM_REDIRECT_LIMIT` (default 50). The pass returns `touched_canonicals` — canonical slugs whose disk fence was merged with phantom rows; `runExtractFacts` UNIONs them into the main reconcile slug set so canonical's DB facts derive from the merged fence in the same cycle (round-14 scenario-B fix: phantom had only-on-disk fence, no DB facts). `ExtractFactsResult` gains six phantom fields: `phantomsScanned`, `phantomsRedirected`, `phantomsAmbiguous`, `phantomsSkippedDrift`, `phantomsLockBusy`, `phantomsMorePending`. Three of those bubble to `CycleReport.totals` (`phantoms_redirected`, `phantoms_ambiguous`, `phantoms_skipped_drift`). - `src/core/entities/resolve.ts` (v0.30+, extended v0.35.6.0) — Free-form entity name → canonical slug resolution. `resolveEntitySlug(engine, source_id, raw)`: exact slug → fuzzy (pg_trgm @ 0.4 threshold) → bare-name prefix expansion (`people/-%` then `companies/-%` using correlated-subquery `connection_count` for tiebreaker) → deterministic `slugify` fallback. **v0.35.6.0** exports two new helpers for the phantom-redirect pass: `resolvePhantomCanonical(engine, sourceId, phantomSlug)` — variant that SKIPS the exact-slug step (codex #1: phantom slug `'alice'` exact-matches itself, would make the redirect handler a no-op); returns the canonical only when result is non-null AND contains `/`. `findPrefixCandidates(engine, sourceId, token)` — standalone SQL query returning ALL candidates across `PREFIX_EXPANSION_DIRS` (currently hardcoded `['people', 'companies']`) using `slug LIKE ANY($N::text[])` over patterns `dir/token` + `dir/token-%`; cap of 10 ordered by `connection_count DESC, slug ASC`. NOT a wrapper around `tryPrefixExpansion` because that path returns per-dir top-1 and suppresses ambiguity by design (codex #11). Pinned by `test/phantom-redirect.test.ts` resolvePhantomCanonical describe (3 cases) + findPrefixCandidates describe (6 cases including multi-dir ambiguity and the `people/aliceberg`-doesn't-match-`alice` false-positive guard). diff --git a/package.json b/package.json index 92fa3466c..ebdf7e8b1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gbrain", - "version": "0.41.3.0", + "version": "0.41.5.0", "description": "Postgres-native personal knowledge brain with hybrid RAG search", "type": "module", "main": "src/core/index.ts", From 98222a08cb767c613a0a6c7b65d6f4e20be6e208 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 22:20:16 -0700 Subject: [PATCH 18/19] fix(cycle/synthesize): refuse empty brainDir + resolve relative paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix, runPhaseSynthesize accepted any brainDir string and passed it to writeReversePages which does join(brainDir, '.md'). When brainDir is '' or relative ('.' / './brain' / etc), join() produces a relative path that writeFileSync resolves against cwd. Result: every synthesize reverse-write spills into /companies/.md, /people/.md, etc. instead of the intended brainDir tempdir. Surfaced by the warm-narwhal wave when E2E test cleanup found orphan synthesize pages (companies/novamind.md, people/sarah-chen.md, meetings/2025-04-01-novamind-board-update.md) at the gbrain repo root from a runCycle({brainDir: '.'}) chain that ran during morning E2E execution. Fix at the function entry, single location, all callers protected: 1. Empty/whitespace brainDir → return failed(BRAINDIR_EMPTY) loud instead of silently resolving against cwd 2. Relative brainDir → resolve(opts.brainDir) before any read/write can use it. opts.brainDir mutated so writeReversePages, writeSummaryPage, and every join() downstream see the absolute path Regression test pins all 4 contracts: - empty string → fail(BRAINDIR_EMPTY) - whitespace-only → fail(BRAINDIR_EMPTY) - '.' → mutated to absolute on entry - already-absolute → unchanged Co-Authored-By: Claude Opus 4.7 --- src/core/cycle/synthesize.ts | 19 ++++- .../cycle-synthesize-braindir-resolve.test.ts | 79 +++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 test/cycle-synthesize-braindir-resolve.test.ts diff --git a/src/core/cycle/synthesize.ts b/src/core/cycle/synthesize.ts index e701bddc4..ea86f3959 100644 --- a/src/core/cycle/synthesize.ts +++ b/src/core/cycle/synthesize.ts @@ -28,7 +28,7 @@ import Anthropic from '@anthropic-ai/sdk'; import { readFileSync, existsSync, writeFileSync, mkdirSync } from 'node:fs'; -import { join, dirname } from 'node:path'; +import { join, dirname, isAbsolute, resolve } from 'node:path'; import type { BrainEngine } from '../engine.ts'; import type { PhaseResult, PhaseError } from '../cycle.ts'; import { MinionQueue } from '../minions/queue.ts'; @@ -239,6 +239,23 @@ export async function runPhaseSynthesize( opts: SynthesizePhaseOpts, ): Promise { const start = Date.now(); + // Normalize brainDir to an absolute path BEFORE any reverse-write. Without + // this, a relative or empty brainDir flows down to writeReversePages → + // `join(brainDir, '${slug}.md')` → relative path → resolves against cwd at + // writeFileSync time, spilling synthesize output into whatever directory + // the cycle ran from (e.g., `companies/novamind.md` at the repo root). + // Surfaced by the warm-narwhal wave when E2E test cleanup found orphan + // synthesize pages at repo root from a `runCycle({brainDir: '.'})` call + // chain. Throw on empty (silent cwd-resolution is worse than a loud + // failure); resolve if relative (`.` / `./brain` / `../sibling` all valid + // inputs but must canonicalize before the write). + if (!opts.brainDir || opts.brainDir.trim() === '') { + return failed(makeError('InternalError', 'BRAINDIR_EMPTY', + 'opts.brainDir is empty; refusing to run synthesize. Pass an absolute path.')); + } + if (!isAbsolute(opts.brainDir)) { + opts.brainDir = resolve(opts.brainDir); + } try { const config = await loadSynthConfig(engine); diff --git a/test/cycle-synthesize-braindir-resolve.test.ts b/test/cycle-synthesize-braindir-resolve.test.ts new file mode 100644 index 000000000..ac6ddf3d4 --- /dev/null +++ b/test/cycle-synthesize-braindir-resolve.test.ts @@ -0,0 +1,79 @@ +/** + * Regression: synthesize phase MUST refuse to write reverse-pages to a + * relative brainDir. Pre-fix, `runCycle({brainDir: '.'})` or any caller + * passing a relative path (or empty string) would silently let + * writeFileSync resolve against cwd, spilling synthesize output into + * `/companies/novamind.md` etc. Surfaced by the warm-narwhal wave + * when E2E test cleanup found orphan synthesize pages at repo root. + * + * Two contracts pinned here: + * 1. Empty/whitespace-only brainDir → returns failed() with code + * `BRAINDIR_EMPTY` (loud, not silent cwd resolution). + * 2. Relative brainDir → resolved to absolute via path.resolve() before + * any reverse-write can use it. Verified by checking opts.brainDir + * after the call returns. + * + * Doesn't drive Anthropic — synthesize hits the "not_configured" skip + * branch first (no corpus dir set), which is sufficient to exercise the + * brainDir gate at function entry. + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join, isAbsolute } from 'node:path'; +import { PGLiteEngine } from '../src/core/pglite-engine.ts'; +import { runPhaseSynthesize } from '../src/core/cycle/synthesize.ts'; + +let engine: PGLiteEngine; +let tmpDir: string; + +beforeAll(async () => { + engine = new PGLiteEngine(); + await engine.connect({}); + await engine.initSchema(); + tmpDir = mkdtempSync(join(tmpdir(), 'synth-braindir-')); +}); + +afterAll(async () => { + await engine.disconnect(); + try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* */ } +}); + +describe('runPhaseSynthesize brainDir resolution (regression)', () => { + test('empty brainDir returns failed(BRAINDIR_EMPTY) instead of silently resolving against cwd', async () => { + const result = await runPhaseSynthesize(engine, { + brainDir: '', + dryRun: true, + }); + expect(result.status).toBe('fail'); + expect((result as { error?: { code?: string } }).error?.code).toBe('BRAINDIR_EMPTY'); + }); + + test('whitespace-only brainDir also fails BRAINDIR_EMPTY', async () => { + const result = await runPhaseSynthesize(engine, { + brainDir: ' ', + dryRun: true, + }); + expect(result.status).toBe('fail'); + expect((result as { error?: { code?: string } }).error?.code).toBe('BRAINDIR_EMPTY'); + }); + + test('relative brainDir gets resolved to absolute before any reverse-write', async () => { + const opts = { brainDir: '.', dryRun: true }; + // The phase will return early ('not_configured' — no corpus dir set on + // this fresh engine) but the normalization runs unconditionally at entry. + await runPhaseSynthesize(engine, opts); + // After the call, opts.brainDir should be the resolved absolute path, + // proving the normalization fired. + expect(isAbsolute(opts.brainDir)).toBe(true); + expect(opts.brainDir).not.toBe('.'); + }); + + test('absolute brainDir is preserved unchanged', async () => { + const opts = { brainDir: tmpDir, dryRun: true }; + await runPhaseSynthesize(engine, opts); + // Already absolute → no mutation. + expect(opts.brainDir).toBe(tmpDir); + }); +}); From f4dc92b67a3235534b5b40cf74b639cb7a466c68 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 24 May 2026 22:20:23 -0700 Subject: [PATCH 19/19] fix(dream): resolve brainDir to absolute at CLI surface Defense-in-depth for the synthesize-braindir spillage bug class. The core fix lives in runPhaseSynthesize (commit 98222a08); this resolves brainDir one layer earlier so the entire 9-phase runCycle gets the absolute path, not just synthesize. Two paths in resolveBrainDir get path.resolve(): - explicit --dir argument (e.g., `gbrain dream --dir .`) - sync.repo_path config (in case it was ever stored relative) resolveBrainDir already checked existsSync; resolve() just canonicalizes before return. No behavior change for paths already absolute. Co-Authored-By: Claude Opus 4.7 --- src/commands/dream.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/commands/dream.ts b/src/commands/dream.ts index ab63457a6..a82e67ca4 100644 --- a/src/commands/dream.ts +++ b/src/commands/dream.ts @@ -31,6 +31,7 @@ import { type CycleReport, } from '../core/cycle.ts'; import { existsSync } from 'fs'; +import { resolve } from 'node:path'; interface DreamArgs { json: boolean; @@ -144,13 +145,15 @@ async function resolveBrainDir( console.error(`--dir path does not exist: ${explicit}`); process.exit(1); } - return explicit; + // Resolve to absolute so downstream writeFileSync(join(brainDir, slug)) + // can't silently land at cwd when explicit is `.` / `./brain` / etc. + return resolve(explicit); } if (engine) { const configured = await engine.getConfig('sync.repo_path'); if (configured && existsSync(configured)) { - return configured; + return resolve(configured); } }