diff --git a/internal/query/duckdb.go b/internal/query/duckdb.go index d8ad07e3..a0666e5c 100644 --- a/internal/query/duckdb.go +++ b/internal/query/duckdb.go @@ -418,38 +418,6 @@ func escapeILIKE(s string) string { return s } -// startsWithWordChar reports whether s begins with a regex word character -// [a-zA-Z0-9_]. Used to decide whether \b is appropriate as a prefix. -func startsWithWordChar(s string) bool { - if len(s) == 0 { - return false - } - c := s[0] - return (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - c == '_' -} - -// escapeRegex escapes special regex characters for use in DuckDB's regexp_matches function -func escapeRegex(s string) string { - s = strings.ReplaceAll(s, "\\", "\\\\") - s = strings.ReplaceAll(s, ".", "\\.") - s = strings.ReplaceAll(s, "*", "\\*") - s = strings.ReplaceAll(s, "+", "\\+") - s = strings.ReplaceAll(s, "?", "\\?") - s = strings.ReplaceAll(s, "[", "\\[") - s = strings.ReplaceAll(s, "]", "\\]") - s = strings.ReplaceAll(s, "(", "\\(") - s = strings.ReplaceAll(s, ")", "\\)") - s = strings.ReplaceAll(s, "{", "\\{") - s = strings.ReplaceAll(s, "}", "\\}") - s = strings.ReplaceAll(s, "|", "\\|") - s = strings.ReplaceAll(s, "^", "\\^") - s = strings.ReplaceAll(s, "$", "\\$") - return s -} - // buildWhereClause builds WHERE conditions for Parquet queries. // Column references use msg. prefix to be explicit since aggregate queries join multiple CTEs. // buildAggregateSearchConditions builds SQL conditions for a search query in aggregate views. @@ -470,36 +438,47 @@ func (e *DuckDBEngine) buildAggregateSearchConditions(searchQuery string, keyCol // Text terms: always search subject + sender, plus the view's grouping // key columns when provided (e.g., label name in Labels view). - // Uses word-boundary regex (\b) for terms starting with word chars. - // Terms starting with non-word chars (e.g., +, @, #) skip \b - // since it requires a word/non-word transition that fails at - // string start or after whitespace. + // Uses ILIKE for performance on Parquet scans. for _, term := range q.TextTerms { - escaped := escapeRegex(term) - regexPattern := "(?i)" + escaped - if startsWithWordChar(term) { - regexPattern = "(?i)\\b" + escaped - } + termPattern := "%" + escapeILIKE(term) + "%" var parts []string - parts = append(parts, `regexp_matches(COALESCE(msg.subject, ''), ?)`) - args = append(args, regexPattern) - parts = append(parts, `regexp_matches(COALESCE(msg.snippet, ''), ?)`) - args = append(args, regexPattern) + parts = append(parts, `msg.subject ILIKE ? ESCAPE '\'`) + args = append(args, termPattern) + parts = append(parts, `COALESCE(msg.snippet, '') ILIKE ? ESCAPE '\'`) + args = append(args, termPattern) parts = append(parts, `EXISTS ( SELECT 1 FROM mr mr_search JOIN p p_search ON p_search.id = mr_search.participant_id WHERE mr_search.message_id = msg.id AND mr_search.recipient_type = 'from' - AND (regexp_matches(p_search.email_address, ?) OR regexp_matches(COALESCE(p_search.display_name, ''), ?)) + AND (p_search.email_address ILIKE ? ESCAPE '\' OR COALESCE(p_search.display_name, '') ILIKE ? ESCAPE '\') )`) - args = append(args, regexPattern, regexPattern) + args = append(args, termPattern, termPattern) for _, col := range keyColumns { - parts = append(parts, `regexp_matches(COALESCE(`+col+`, ''), ?)`) - args = append(args, regexPattern) + parts = append(parts, col+` ILIKE ? ESCAPE '\'`) + args = append(args, termPattern) } conditions = append(conditions, "("+strings.Join(parts, " OR ")+")") } + // Append non-text filters (from:, to:, subject:, label:, has:, dates, sizes). + nonTextConds, nonTextArgs := e.buildNonTextSearchConditions(q, keyColumns...) + conditions = append(conditions, nonTextConds...) + args = append(args, nonTextArgs...) + + return conditions, args +} + +// buildNonTextSearchConditions builds WHERE conditions for the non-text +// portion of a parsed search query (from:, to:, subject:, label:, has:, +// date/size filters). Extracted from buildAggregateSearchConditions so +// callers that handle text terms themselves (e.g. buildStatsSearchConditions) +// can append non-text filters without having to compute how many args +// the text-term portion produced. +func (e *DuckDBEngine) buildNonTextSearchConditions(q *search.Query, keyColumns ...string) ([]string, []interface{}) { + var conditions []string + var args []interface{} + // from: filter - match sender email for _, from := range q.FromAddrs { fromPattern := "%" + escapeILIKE(from) + "%" @@ -650,28 +629,15 @@ func (e *DuckDBEngine) buildStatsSearchConditions(searchQuery string, groupBy Vi } // Non-text filters (from:, to:, subject:, label:, etc.) are the same - // regardless of view — delegate to the standard builder with no key columns. - nonTextConds, nonTextArgs := e.buildAggregateSearchConditions(searchQuery) - // Remove text-term conditions from the standard builder output (they are - // the first len(q.TextTerms) entries). We already handled text terms above. - if len(q.TextTerms) > 0 && len(nonTextConds) > len(q.TextTerms) { - conditions = append(conditions, nonTextConds[len(q.TextTerms):]...) - args = append(args, nonTextArgs[countArgsForTextTerms(len(q.TextTerms)):]...) - } else if len(q.TextTerms) == 0 { - conditions = append(conditions, nonTextConds...) - args = append(args, nonTextArgs...) - } + // regardless of view — delegate to the non-text helper directly so we + // don't have to track how many args the text-term portion emits. + nonTextConds, nonTextArgs := e.buildNonTextSearchConditions(q) + conditions = append(conditions, nonTextConds...) + args = append(args, nonTextArgs...) return conditions, args } -// countArgsForTextTerms returns the number of args used by N text terms in -// buildAggregateSearchConditions with no keyColumns (4 args per term: -// subject + snippet + 2 sender). -func countArgsForTextTerms(n int) int { - return n * 4 -} - // keyColumns are passed through to buildAggregateSearchConditions to control // which columns text search terms filter on. func (e *DuckDBEngine) buildWhereClause(opts AggregateOptions, keyColumns ...string) (string, []interface{}) { @@ -2407,23 +2373,18 @@ func (e *DuckDBEngine) buildSearchConditions(q *search.Query, filter MessageFilt } // Text search terms - search subject, snippet, and from fields (fast path). - // Use word-boundary regex (\b) for terms starting with word chars. - // Terms starting with non-word chars skip \b (see startsWithWordChar). + // Uses ILIKE for performance on Parquet scans. if len(q.TextTerms) > 0 { for _, term := range q.TextTerms { - escaped := escapeRegex(term) - regexPattern := "(?i)" + escaped - if startsWithWordChar(term) { - regexPattern = "(?i)\\b" + escaped - } + termPattern := "%" + escapeILIKE(term) + "%" conditions = append(conditions, `( - regexp_matches(COALESCE(msg.subject, ''), ?) OR - regexp_matches(COALESCE(msg.snippet, ''), ?) OR - regexp_matches(COALESCE(ms.from_email, ds.from_email, ''), ?) OR - regexp_matches(COALESCE(ms.from_name, ds.from_name, ''), ?) OR - regexp_matches(COALESCE(ms.from_phone, ds.from_phone, ''), ?) + msg.subject ILIKE ? ESCAPE '\' OR + COALESCE(msg.snippet, '') ILIKE ? ESCAPE '\' OR + COALESCE(ms.from_email, ds.from_email, '') ILIKE ? ESCAPE '\' OR + COALESCE(ms.from_name, ds.from_name, '') ILIKE ? ESCAPE '\' OR + COALESCE(ms.from_phone, ds.from_phone, '') ILIKE ? ESCAPE '\' )`) - args = append(args, regexPattern, regexPattern, regexPattern, regexPattern, regexPattern) + args = append(args, termPattern, termPattern, termPattern, termPattern, termPattern) } } diff --git a/internal/query/duckdb_test.go b/internal/query/duckdb_test.go index 2616c233..f3e29f65 100644 --- a/internal/query/duckdb_test.go +++ b/internal/query/duckdb_test.go @@ -1839,7 +1839,7 @@ func TestBuildWhereClause_SearchOperators(t *testing.T) { { name: "text terms", searchQuery: "hello world", - wantClauses: []string{"regexp_matches(COALESCE(msg.subject"}, + wantClauses: []string{"msg.subject ILIKE"}, }, { name: "from operator", @@ -1899,39 +1899,36 @@ func TestBuildWhereClause_EscapedArgs(t *testing.T) { opts := AggregateOptions{SearchQuery: "100%_off"} _, args := engine.buildWhereClause(opts) - // With regex search, % and _ are not special so appear unescaped. - // Term starts with word char '1', so \b prefix is applied. + // With ILIKE search, % and _ are escaped with backslash. found := false for _, arg := range args { - if s, ok := arg.(string); ok && strings.Contains(s, "\\b100%_off") { + if s, ok := arg.(string); ok && strings.Contains(s, "100\\%\\_off") { found = true break } } if !found { - t.Errorf("expected regex pattern containing '\\b100%%_off' in args, got: %v", args) + t.Errorf("expected ILIKE pattern containing '100\\%%\\_off' in args, got: %v", args) } } -// TestBuildWhereClause_WordBoundaryPrefix verifies that \b is only applied -// when the search term starts with a word character [a-zA-Z0-9_]. Terms -// starting with non-word characters (+, @, #, etc.) must not get \b, as it -// requires a word/non-word transition that fails at string start. -func TestBuildWhereClause_WordBoundaryPrefix(t *testing.T) { +// TestBuildWhereClause_ILIKEEscape verifies that search terms are properly +// escaped for ILIKE patterns in aggregate search conditions. +func TestBuildWhereClause_ILIKEEscape(t *testing.T) { engine := &DuckDBEngine{} tests := []struct { - name string - term string - wantPrefix string // expected prefix in the regex arg + name string + term string + wantArg string // expected ILIKE arg pattern }{ - {"word_char_letter", "hello", "(?i)\\bhello"}, - {"word_char_digit", "123", "(?i)\\b123"}, - {"word_char_underscore", "_test", "(?i)\\b_test"}, - {"non_word_plus", "+15551234567", "(?i)\\+15551234567"}, - {"non_word_at", "@gmail.com", "(?i)@gmail\\.com"}, - {"non_word_hash", "#bug", "(?i)#bug"}, - {"non_word_paren", "(test)", "(?i)\\(test\\)"}, + {"word_char_letter", "hello", "%hello%"}, + {"word_char_digit", "123", "%123%"}, + {"word_char_underscore", "_test", "%\\_test%"}, + {"non_word_plus", "+15551234567", "%+15551234567%"}, + {"non_word_at", "@gmail.com", "%@gmail.com%"}, + {"non_word_hash", "#bug", "%#bug%"}, + {"wildcard_percent", "100%off", "%100\\%off%"}, } for _, tc := range tests { @@ -1941,14 +1938,14 @@ func TestBuildWhereClause_WordBoundaryPrefix(t *testing.T) { found := false for _, arg := range args { - if s, ok := arg.(string); ok && s == tc.wantPrefix { + if s, ok := arg.(string); ok && s == tc.wantArg { found = true break } } if !found { t.Errorf("term %q: expected arg %q, got %v", - tc.term, tc.wantPrefix, args) + tc.term, tc.wantArg, args) } }) } @@ -2049,7 +2046,7 @@ func TestAggregateByLabel_WithLabelSearch(t *testing.T) { } // TestBuildSearchConditions_EscapedWildcards verifies that buildSearchConditions -// escapes wildcards: regex for TextTerms, ILIKE ESCAPE for operators. +// escapes wildcards: ILIKE ESCAPE for TextTerms and operators. func TestBuildSearchConditions_EscapedWildcards(t *testing.T) { engine := &DuckDBEngine{} @@ -2064,8 +2061,8 @@ func TestBuildSearchConditions_EscapedWildcards(t *testing.T) { query: &search.Query{ TextTerms: []string{"100%_off"}, }, - wantClauses: []string{"regexp_matches(COALESCE(msg.subject"}, - wantInArgs: []string{"\\b100%_off"}, + wantClauses: []string{"msg.subject ILIKE"}, + wantInArgs: []string{"100\\%\\_off"}, }, { name: "from: with wildcards", @@ -2122,6 +2119,53 @@ func TestBuildSearchConditions_EscapedWildcards(t *testing.T) { } } +// TestBuildSearchConditions_UsesILIKENotRegex verifies that the fast search +// path uses ILIKE (fast on Parquet scans) instead of regexp_matches (slow). +func TestBuildSearchConditions_UsesILIKENotRegex(t *testing.T) { + engine := &DuckDBEngine{} + + q := &search.Query{TextTerms: []string{"hello"}} + conditions, args := engine.buildSearchConditions(q, MessageFilter{}) + where := strings.Join(conditions, " AND ") + + // Must use ILIKE, not regexp_matches + if strings.Contains(where, "regexp_matches") { + t.Errorf("fast search path should use ILIKE, not regexp_matches\ngot: %s", where) + } + if !strings.Contains(where, "ILIKE") { + t.Errorf("fast search path should contain ILIKE\ngot: %s", where) + } + + // Args should be ILIKE patterns (%%term%%), not regex patterns + for _, arg := range args { + if s, ok := arg.(string); ok && strings.Contains(s, "(?i)") { + t.Errorf("fast search args should not contain regex patterns, got: %q", s) + } + } +} + +// TestBuildAggregateSearchConditions_UsesILIKENotRegex verifies that the +// aggregate search path also uses ILIKE instead of regexp_matches. +func TestBuildAggregateSearchConditions_UsesILIKENotRegex(t *testing.T) { + engine := &DuckDBEngine{} + + conditions, args := engine.buildAggregateSearchConditions("hello world") + where := strings.Join(conditions, " AND ") + + if strings.Contains(where, "regexp_matches") { + t.Errorf("aggregate search should use ILIKE, not regexp_matches\ngot: %s", where) + } + if !strings.Contains(where, "ILIKE") { + t.Errorf("aggregate search should contain ILIKE\ngot: %s", where) + } + + for _, arg := range args { + if s, ok := arg.(string); ok && strings.Contains(s, "(?i)") { + t.Errorf("aggregate search args should not contain regex patterns, got: %q", s) + } + } +} + // ============================================================================= // RecipientName tests // ============================================================================= @@ -2347,6 +2391,89 @@ func TestDuckDBEngine_GetTotalStats_GroupByDefault(t *testing.T) { } } +// TestBuildStatsSearchConditions_PlaceholderArgCount is a regression test for +// the mismatch between the number of "?" placeholders emitted and the number +// of args appended when a stats search mixes text terms with non-text +// operators (e.g. "hello from:bob@example.com"). Previously, +// buildStatsSearchConditions called buildAggregateSearchConditions and sliced +// off the text-term prefix using a hand-tracked arg count — any drift between +// the count and the actual emit rate would cause aggregate stats queries to +// fail with unmatched placeholders. The helper now delegates directly to +// buildNonTextSearchConditions so there is nothing to slice; this test locks +// the invariant in place by counting "?" placeholders. +func TestBuildStatsSearchConditions_PlaceholderArgCount(t *testing.T) { + engine := &DuckDBEngine{} + + cases := []struct { + name string + query string + groupBy ViewType + }{ + {"text only (default)", "hello", ViewSenders}, + {"text only (recipients)", "hello", ViewRecipients}, + {"text only (labels)", "hello", ViewLabels}, + {"text + from (default)", "hello from:bob@example.com", ViewSenders}, + {"text + from (recipients)", "hello from:bob@example.com", ViewRecipients}, + {"text + from (labels)", "hello from:bob@example.com", ViewLabels}, + {"text + from + to", "hello from:a@b.com to:c@d.com", ViewSenders}, + {"text + subject + label", "hello subject:report label:work", ViewSenders}, + {"multi-text + from", "hello world from:bob@example.com", ViewSenders}, + {"non-text only", "from:bob@example.com", ViewSenders}, + {"empty query", "", ViewSenders}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + conditions, args := engine.buildStatsSearchConditions(tc.query, tc.groupBy) + where := strings.Join(conditions, " AND ") + placeholders := strings.Count(where, "?") + if placeholders != len(args) { + t.Errorf("placeholder/arg mismatch for query %q (groupBy=%v): %d placeholders vs %d args\nconditions: %s\nargs: %v", + tc.query, tc.groupBy, placeholders, len(args), where, args) + } + }) + } +} + +// TestBuildAggregateSearchConditions_PlaceholderArgCount locks the same +// invariant on buildAggregateSearchConditions: the number of "?" placeholders +// in the emitted WHERE conditions must match the number of args, for any +// combination of text terms, non-text filters, and keyColumns. +func TestBuildAggregateSearchConditions_PlaceholderArgCount(t *testing.T) { + engine := &DuckDBEngine{} + + cases := []struct { + name string + query string + keyColumns []string + }{ + {"text only, no keyColumns", "hello", nil}, + {"text only, one keyColumn", "hello", []string{"p.email_address"}}, + {"text only, label keyColumn", "hello", []string{"lbl.name"}}, + {"text + from", "hello from:bob@example.com", nil}, + {"text + from + to + subject", "hello from:a@b.com to:c@d.com subject:report", nil}, + {"text + label (label view)", "hello label:work", []string{"lbl.name"}}, + {"text + label (non-label view)", "hello label:work", nil}, + {"multi-text + from + keyColumns", "foo bar from:x@y.com", []string{"p.email_address", "p.display_name"}}, + {"has:attachment", "has:attachment", nil}, + {"date filter", "after:2024-01-01 before:2024-12-31", nil}, + {"size filter", "larger:1000 smaller:5000", nil}, + {"empty query", "", nil}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + conditions, args := engine.buildAggregateSearchConditions(tc.query, tc.keyColumns...) + where := strings.Join(conditions, " AND ") + placeholders := strings.Count(where, "?") + if placeholders != len(args) { + t.Errorf("placeholder/arg mismatch for query %q (keyColumns=%v): %d placeholders vs %d args\nconditions: %s\nargs: %v", + tc.query, tc.keyColumns, placeholders, len(args), where, args) + } + }) + } +} + // ============================================================================= // Aggregate and SubAggregate Table-Driven Tests // These tests cover the refactored aggregation helpers and time granularity logic. diff --git a/internal/tui/model.go b/internal/tui/model.go index 7f1f7d4e..b67b3aa1 100644 --- a/internal/tui/model.go +++ b/internal/tui/model.go @@ -1187,11 +1187,12 @@ func (m Model) handleSearchDebounce(msg searchDebounceMsg) (tea.Model, tea.Cmd) m.searchFilter.WithAttachmentsOnly = m.filters.attachmentsOnly m.searchFilter.HideDeletedFromSource = m.filters.hideDeletedFromSource m.searchRequestID++ + m.loadRequestID++ // Invalidate any in-flight loadMessages to prevent overwriting search results if msg.query == "" { // Empty query: reload unfiltered messages - m.loadRequestID++ return m, tea.Batch(spinCmd, m.loadMessages()) } + m.messages = nil // Clear stale messages immediately so they don't show during search return m, tea.Batch(spinCmd, m.loadSearch(msg.query)) } // Aggregate views: reload aggregates with search filter diff --git a/internal/tui/search_test.go b/internal/tui/search_test.go index 5fc4b4cf..f1148c00 100644 --- a/internal/tui/search_test.go +++ b/internal/tui/search_test.go @@ -1505,5 +1505,86 @@ func TestZeroSearchResultsRendersSearchBar(t *testing.T) { }) } +// TestStaleLoadMessagesDoesNotOverwriteSearch verifies that an in-flight +// loadMessages response (e.g., from pressing "a" or "v" before searching) +// does not overwrite search results. This was a race condition: the TUI would +// show stale "all messages" results instead of the filtered search results. +func TestStaleLoadMessagesDoesNotOverwriteSearch(t *testing.T) { + allMessages := makeMessages(50) + searchResults := []query.MessageSummary{ + {ID: 901, Subject: "Search Hit 1"}, + {ID: 902, Subject: "Search Hit 2"}, + } + + model := NewBuilder(). + WithMessages(allMessages...). + WithLevel(levelMessageList). + WithPageSize(20).WithSize(100, 30). + Build() + + // Simulate: user presses "v" (sort toggle) which fires loadMessages + // with loadRequestID=N. + staleLoadRequestID := model.loadRequestID + + // Simulate: user then activates search, which should increment loadRequestID + // to invalidate the pending loadMessages. + model.searchQuery = "test" + model.inlineSearchActive = true + model.searchRequestID++ + model.loadRequestID++ // This is the fix under test + + // Simulate: search results arrive and are applied. + model = applySearchResults(t, model, model.searchRequestID, searchResults, 2) + if len(model.messages) != 2 { + t.Fatalf("expected 2 search results, got %d", len(model.messages)) + } + + // Simulate: the stale loadMessages response arrives with the OLD requestID. + staleMsg := messagesLoadedMsg{ + messages: allMessages, + requestID: staleLoadRequestID, + } + model, _ = sendMsg(t, model, staleMsg) + + // The stale response must be ignored — search results should remain. + if len(model.messages) != 2 { + t.Errorf("stale loadMessages overwrote search results: expected 2 messages, got %d", len(model.messages)) + } + if model.messages[0].Subject != "Search Hit 1" { + t.Errorf("expected first message 'Search Hit 1', got %q", model.messages[0].Subject) + } +} + +// TestSearchClearsStaleMessages verifies that starting an inline search +// immediately clears the message list so stale "all messages" results +// are never visible during the search transition. +func TestSearchClearsStaleMessages(t *testing.T) { + allMessages := makeMessages(50) + + model := NewBuilder(). + WithMessages(allMessages...). + WithLevel(levelMessageList). + WithPageSize(20).WithSize(100, 30). + Build() + + if len(model.messages) != 50 { + t.Fatalf("expected 50 pre-loaded messages, got %d", len(model.messages)) + } + + // Simulate debounce firing with a search query. + debounceMsg := searchDebounceMsg{ + query: "test", + debounceID: model.inlineSearchDebounce, + } + model.inlineSearchActive = true + model, _ = sendMsg(t, model, debounceMsg) + + // Messages should be nil immediately — not showing stale results while + // waiting for the async search to complete. + if model.messages != nil { + t.Errorf("expected messages to be nil after search starts, got %d messages", len(model.messages)) + } +} + // TestHighlightedColumnsAligned verifies that highlighting search terms in // aggregate rows doesn't break column alignment.