BrunooMoniz · BrunooMoniz · Jun 19, 2026 · Jun 19, 2026
diff --git a/src/rag/__tests__/storage.test.ts b/src/rag/__tests__/storage.test.ts
@@ -42,11 +42,12 @@ after(async () => {
   await closePool();
 });
 
-test("searchKeyword uses the SAME ts config as the indexed tsv column (portuguese_unaccent)", async () => {
-  // The tsv GENERATED column is to_tsvector('portuguese_unaccent', text)
-  // (migrations 0001/0002). The query MUST use the same config; using
-  // 'portuguese' silently loses accent-insensitive matching on the query side.
-  // Runs WITHOUT a DB by capturing the SQL via an injected pool.
+test("searchKeyword queries with the eval-winning 'portuguese' config (NOT portuguese_unaccent)", async () => {
+  // Counter-intuitive but eval-backed: the query config is 'portuguese', which
+  // does NOT match the tsv column's 'portuguese_unaccent'. The matched config was
+  // tried and REGRESSED the golden set (Recall@5 0.95 -> 0.83, MRR 0.73 -> 0.60).
+  // This test pins the decision so nobody "fixes" the mismatch without re-running
+  // `npm run eval`. Runs WITHOUT a DB by capturing the SQL via an injected pool.
   let captured = "";
   __setPoolForTest({
     query: async (sql: unknown) => {
@@ -59,8 +60,8 @@ test("searchKeyword uses the SAME ts config as the indexed tsv column (portugues
   } finally {
     __setPoolForTest(null);
   }
-  assert.match(captured, /plainto_tsquery\('portuguese_unaccent',\s*\$1\)/);
-  assert.doesNotMatch(captured, /plainto_tsquery\('portuguese',\s*\$1\)/);
+  assert.match(captured, /plainto_tsquery\('portuguese',\s*\$1\)/);
+  assert.doesNotMatch(captured, /plainto_tsquery\('portuguese_unaccent',\s*\$1\)/);
 });
 
 test("searchKeyword finds a doc by a real PT-BR term against the unaccent tsv (regression: ervilha)", async () => {

diff --git a/src/rag/storage.ts b/src/rag/storage.ts
@@ -567,17 +567,22 @@ export async function searchKeyword(
 ): Promise<{ chunk: Chunk; rank: number; score: number }[]> {
   const p = getPool();
   const filterClauses = buildFilterClauses(filters, 3);
-  // Query config MUST match the tsv GENERATED column config (portuguese_unaccent,
-  // migrations 0001/0002), otherwise accent-insensitive matching is lost here.
+  // Query config is 'portuguese' on purpose. It does NOT match the tsv column's
+  // 'portuguese_unaccent' config — but `npm run eval` proved the matched config
+  // REGRESSES ranking on the golden set (Recall@5 0.95 -> 0.83, MRR 0.73 -> 0.60):
+  // unaccenting the query broadens the keyword leg, and RRF+rerank then rank worse
+  // on this corpus. Accent-insensitive keyword search is a known limitation to
+  // revisit only WITH an eval that shows a net gain. Do not "fix" to
+  // portuguese_unaccent without re-running the eval. (jun/2026 diagnosis.)
   const sql = `
     SELECT
       id, source_type, source_id, workspace, db_name, parent_url, chunk_index,
       text, metadata, source_updated,
-      ts_rank(tsv, plainto_tsquery('portuguese_unaccent', $1)) AS score
+      ts_rank(tsv, plainto_tsquery('portuguese', $1)) AS score
     FROM brain_chunks
-    WHERE tsv @@ plainto_tsquery('portuguese_unaccent', $1)
+    WHERE tsv @@ plainto_tsquery('portuguese', $1)
       ${filterClauses.sql}
-    ORDER BY ts_rank(tsv, plainto_tsquery('portuguese_unaccent', $1)) DESC
+    ORDER BY ts_rank(tsv, plainto_tsquery('portuguese', $1)) DESC
     LIMIT $2
   `;
   const { rows } = await p.query<QueryRow>(sql, [