Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/core/store/sqlite.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { afterEach, describe, expect, it } from "vitest";
import { buildFtsQuery, _resetJiebaForTest, _setJiebaForTest } from "./sqlite.js";

describe("buildFtsQuery", () => {
afterEach(() => {
_resetJiebaForTest();
});

it("strips FTS5 operators before fallback tokenization", () => {
_setJiebaForTest(null);

expect(buildFtsQuery("alpha AND beta OR NOT gamma NEAR delta")).toBe(
'"alpha" OR "beta" OR "gamma" OR "delta"',
);
});

it("does not strip operator words embedded inside normal tokens", () => {
_setJiebaForTest(null);

expect(buildFtsQuery("android origin notable nearby")).toBe(
'"android" OR "origin" OR "notable" OR "nearby"',
);
});

it("strips FTS5 operators before jieba tokenization", () => {
const seen: string[] = [];
_setJiebaForTest({
cutForSearch(text: string): string[] {
seen.push(text);
return text.split(/\s+/);
},
});

expect(buildFtsQuery("用户 AND TypeScript OR 记忆")).toBe(
'"用户" OR "TypeScript" OR "记忆"',
);
expect(seen).toEqual(["用户 TypeScript 记忆"]);
});
});
7 changes: 5 additions & 2 deletions src/core/store/sqlite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ const ZH_STOP_WORDS = new Set([
"吗", "吧", "呢", "啊", "呀", "哦", "嗯",
]);

const FTS5_OPERATORS = /\b(?:AND|OR|NOT|NEAR)\b/gi;

/**
* Build an FTS5 MATCH query from raw text.
*
Expand All @@ -196,14 +198,15 @@ const ZH_STOP_WORDS = new Set([
* "旅行计划 API" → '"旅行计划" OR "API"'
*/
export function buildFtsQuery(raw: string): string | null {
const cleaned = raw.replace(FTS5_OPERATORS, " ");
const jieba = getJieba();

let tokens: string[];
if (jieba) {
// jieba cutForSearch: splits long words further for better recall
// e.g. "北京烤鸭" → ["北京", "烤鸭", "北京烤鸭"]
tokens = jieba
.cutForSearch(raw, true)
.cutForSearch(cleaned, true)
.map((t) => t.trim())
.filter((t) => {
if (!t) return false;
Expand All @@ -218,7 +221,7 @@ export function buildFtsQuery(raw: string): string | null {
} else {
// Fallback: simple Unicode regex split
tokens =
raw
cleaned
.match(/[\p{L}\p{N}_]+/gu)
?.map((t) => t.trim())
.filter(Boolean) ?? [];
Expand Down