BrunooMoniz · BrunooMoniz · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,6 +23,40 @@ jobs:
         # Unit tests run without a DB or API keys (DB/Voyage-dependent cases self-skip).
         run: npm test
 
+  # Real Postgres (pgvector) so the DB/FTS slice of the suite actually runs,
+  # instead of self-skipping (ghost coverage). Not a required check yet: it
+  # exercises the storage layer (incl. the portuguese_unaccent FTS path that the
+  # jun/2026 diagnosis found broken and unguarded). Promote to required once stable.
+  db-test:
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: pgvector/pgvector:pg16
+        env:
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_DB: zinom_test
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+    env:
+      POSTGRES_URL: postgres://postgres:postgres@localhost:5432/zinom_test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "22"
+          cache: "npm"
+      - name: Install
+        run: npm ci
+      - name: Migrate (creates pgvector/unaccent + portuguese_unaccent config)
+        run: npm run migrate
+      - name: DB-backed RAG tests (storage + FTS)
+        run: npx tsx --test src/rag/__tests__/storage.test.ts
+
   web-build:
     runs-on: ubuntu-latest
     defaults:

diff --git a/scripts/migrations/0002_hnsw_unaccent_pgtrgm.sql b/scripts/migrations/0002_hnsw_unaccent_pgtrgm.sql
@@ -8,25 +8,31 @@ BEGIN;
 CREATE EXTENSION IF NOT EXISTS unaccent;
 CREATE EXTENSION IF NOT EXISTS pg_trgm;
 
--- 2. IMMUTABLE accent-insensitive TS config via dictionary mapping
+-- 2. Drop the generated tsv column + its index FIRST. On a fresh DB, 0001 already
+--    created BOTH the portuguese_unaccent config AND a tsv column that depends on
+--    it, so the DROP CONFIGURATION below would fail with a dependency error if the
+--    column still existed. Dropping it here makes a from-scratch migrate run work
+--    (prod already recorded 0002 in schema_migrations and never re-runs it).
+DROP INDEX IF EXISTS brain_chunks_tsv_idx;
+ALTER TABLE brain_chunks DROP COLUMN IF EXISTS tsv;
+
+-- 3. IMMUTABLE accent-insensitive TS config via dictionary mapping
 --    (NOT a direct unaccent() call, which is only STABLE).
 DROP TEXT SEARCH CONFIGURATION IF EXISTS portuguese_unaccent;
 CREATE TEXT SEARCH CONFIGURATION portuguese_unaccent ( COPY = portuguese );
 ALTER TEXT SEARCH CONFIGURATION portuguese_unaccent
   ALTER MAPPING FOR hword, hword_part, word
   WITH unaccent, portuguese_stem;
 
--- 3. Swap ivfflat -> HNSW (cosine)
+-- 4. Swap ivfflat -> HNSW (cosine)
 DROP INDEX IF EXISTS brain_chunks_embedding_idx;
 CREATE INDEX brain_chunks_embedding_idx
   ON brain_chunks USING hnsw (embedding vector_cosine_ops)
   WITH (m = 16, ef_construction = 200);
 
--- 4. Rebuild the generated tsv column to use portuguese_unaccent.
---    A generated column's expression CANNOT be altered in place, so drop &
---    re-add (this REWRITES the table; fine for the small corpus).
-DROP INDEX IF EXISTS brain_chunks_tsv_idx;
-ALTER TABLE brain_chunks DROP COLUMN IF EXISTS tsv;
+-- 5. Re-add the generated tsv column using portuguese_unaccent.
+--    A generated column's expression CANNOT be altered in place, so drop (above)
+--    & re-add (this REWRITES the table; fine for the small corpus).
 ALTER TABLE brain_chunks
   ADD COLUMN tsv tsvector
   GENERATED ALWAYS AS (to_tsvector('portuguese_unaccent', text)) STORED;

diff --git a/src/__tests__/mcp-account-config.test.ts b/src/__tests__/mcp-account-config.test.ts
@@ -14,3 +14,13 @@ test("owner e friend instructions trazem a regra Zinom-first e os links", () =>
     assert.match(s, /zinom_setup_tasks/);
   }
 });
+
+test("owner e friend instructions ensinam brain_get_document p/ conteúdo íntegro (anti-gambiarra)", () => {
+  // Diagnóstico jun/2026: ambas as superfícies precisam advertir a tool de
+  // documento inteiro e proibir remontar via brain_search — senão o modelo
+  // recai na gambiarra de somar chunks.
+  for (const s of [OWNER_INSTRUCTIONS, FRIEND_INSTRUCTIONS]) {
+    assert.match(s, /brain_get_document/);
+    assert.match(s, /NUNCA reconstrua um documento somando resultados de brain_search/);
+  }
+});
diff --git a/src/rag/__tests__/storage.test.ts b/src/rag/__tests__/storage.test.ts
@@ -63,6 +63,37 @@ test("searchKeyword uses the SAME ts config as the indexed tsv column (portugues
   assert.doesNotMatch(captured, /plainto_tsquery\('portuguese',\s*\$1\)/);
 });
 
+test("searchKeyword finds a doc by a real PT-BR term against the unaccent tsv (regression: ervilha)", async () => {
+  // The bug that started the jun/2026 diagnosis: keyword 'ervilha' returned
+  // empty. Against a real Postgres, searchKeyword MUST find a doc whose text
+  // contains 'ervilha'. Runs only in the db-test CI job (real pgvector).
+  if (!HAS_PG) {
+    console.log("skipping: no POSTGRES_URL");
+    return;
+  }
+  const sid = `${TEST_PREFIX}-ervilha`;
+  await upsertChunks([
+    {
+      id: `${sid}-0`,
+      source_type: "notion",
+      source_id: sid,
+      workspace: "personal",
+      db_name: "Roteiros",
+      parent_url: "https://notion.so/ervilha",
+      chunk_index: 0,
+      text: "[Roteiros · personal] A Ervilha\n\nVocê conhece a história da Princesa e a Ervilha?",
+      embedding: fakeEmbed(7),
+      metadata: {},
+      source_updated: null,
+    },
+  ]);
+  const hits = await searchKeyword("ervilha", undefined, 10);
+  assert.ok(
+    hits.some((h) => h.chunk.source_id === sid),
+    "searchKeyword('ervilha') deveria recuperar o doc indexado",
+  );
+});
+
 test("getDocumentChunks pins account_id, orders by chunk_index, and maps rows", async () => {
   // Full-document fetch (powers brain_get_document). Multi-tenant: account_id is
   // ALWAYS in the WHERE (never from input). Captures SQL via an injected pool.