Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 41 additions & 80 deletions internal/query/duckdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,38 +418,6 @@ func escapeILIKE(s string) string {
return s
}

// startsWithWordChar reports whether s begins with a regex word character
// [a-zA-Z0-9_]. Used to decide whether \b is appropriate as a prefix.
func startsWithWordChar(s string) bool {
if len(s) == 0 {
return false
}
c := s[0]
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '_'
}

// escapeRegex escapes special regex characters for use in DuckDB's regexp_matches function
func escapeRegex(s string) string {
s = strings.ReplaceAll(s, "\\", "\\\\")
s = strings.ReplaceAll(s, ".", "\\.")
s = strings.ReplaceAll(s, "*", "\\*")
s = strings.ReplaceAll(s, "+", "\\+")
s = strings.ReplaceAll(s, "?", "\\?")
s = strings.ReplaceAll(s, "[", "\\[")
s = strings.ReplaceAll(s, "]", "\\]")
s = strings.ReplaceAll(s, "(", "\\(")
s = strings.ReplaceAll(s, ")", "\\)")
s = strings.ReplaceAll(s, "{", "\\{")
s = strings.ReplaceAll(s, "}", "\\}")
s = strings.ReplaceAll(s, "|", "\\|")
s = strings.ReplaceAll(s, "^", "\\^")
s = strings.ReplaceAll(s, "$", "\\$")
return s
}

// buildWhereClause builds WHERE conditions for Parquet queries.
// Column references use msg. prefix to be explicit since aggregate queries join multiple CTEs.
// buildAggregateSearchConditions builds SQL conditions for a search query in aggregate views.
Expand All @@ -470,36 +438,47 @@ func (e *DuckDBEngine) buildAggregateSearchConditions(searchQuery string, keyCol

// Text terms: always search subject + sender, plus the view's grouping
// key columns when provided (e.g., label name in Labels view).
// Uses word-boundary regex (\b) for terms starting with word chars.
// Terms starting with non-word chars (e.g., +, @, #) skip \b
// since it requires a word/non-word transition that fails at
// string start or after whitespace.
// Uses ILIKE for performance on Parquet scans.
for _, term := range q.TextTerms {
escaped := escapeRegex(term)
regexPattern := "(?i)" + escaped
if startsWithWordChar(term) {
regexPattern = "(?i)\\b" + escaped
}
termPattern := "%" + escapeILIKE(term) + "%"
var parts []string
parts = append(parts, `regexp_matches(COALESCE(msg.subject, ''), ?)`)
args = append(args, regexPattern)
parts = append(parts, `regexp_matches(COALESCE(msg.snippet, ''), ?)`)
args = append(args, regexPattern)
parts = append(parts, `msg.subject ILIKE ? ESCAPE '\'`)
args = append(args, termPattern)
parts = append(parts, `COALESCE(msg.snippet, '') ILIKE ? ESCAPE '\'`)
args = append(args, termPattern)
parts = append(parts, `EXISTS (
SELECT 1 FROM mr mr_search
JOIN p p_search ON p_search.id = mr_search.participant_id
WHERE mr_search.message_id = msg.id
AND mr_search.recipient_type = 'from'
AND (regexp_matches(p_search.email_address, ?) OR regexp_matches(COALESCE(p_search.display_name, ''), ?))
AND (p_search.email_address ILIKE ? ESCAPE '\' OR COALESCE(p_search.display_name, '') ILIKE ? ESCAPE '\')
)`)
args = append(args, regexPattern, regexPattern)
args = append(args, termPattern, termPattern)
for _, col := range keyColumns {
parts = append(parts, `regexp_matches(COALESCE(`+col+`, ''), ?)`)
args = append(args, regexPattern)
parts = append(parts, col+` ILIKE ? ESCAPE '\'`)
args = append(args, termPattern)
}
conditions = append(conditions, "("+strings.Join(parts, " OR ")+")")
}

// Append non-text filters (from:, to:, subject:, label:, has:, dates, sizes).
nonTextConds, nonTextArgs := e.buildNonTextSearchConditions(q, keyColumns...)
conditions = append(conditions, nonTextConds...)
args = append(args, nonTextArgs...)

return conditions, args
}

// buildNonTextSearchConditions builds WHERE conditions for the non-text
// portion of a parsed search query (from:, to:, subject:, label:, has:,
// date/size filters). Extracted from buildAggregateSearchConditions so
// callers that handle text terms themselves (e.g. buildStatsSearchConditions)
// can append non-text filters without having to compute how many args
// the text-term portion produced.
func (e *DuckDBEngine) buildNonTextSearchConditions(q *search.Query, keyColumns ...string) ([]string, []interface{}) {
var conditions []string
var args []interface{}

// from: filter - match sender email
for _, from := range q.FromAddrs {
fromPattern := "%" + escapeILIKE(from) + "%"
Expand Down Expand Up @@ -650,28 +629,15 @@ func (e *DuckDBEngine) buildStatsSearchConditions(searchQuery string, groupBy Vi
}

// Non-text filters (from:, to:, subject:, label:, etc.) are the same
// regardless of view — delegate to the standard builder with no key columns.
nonTextConds, nonTextArgs := e.buildAggregateSearchConditions(searchQuery)
// Remove text-term conditions from the standard builder output (they are
// the first len(q.TextTerms) entries). We already handled text terms above.
if len(q.TextTerms) > 0 && len(nonTextConds) > len(q.TextTerms) {
conditions = append(conditions, nonTextConds[len(q.TextTerms):]...)
args = append(args, nonTextArgs[countArgsForTextTerms(len(q.TextTerms)):]...)
} else if len(q.TextTerms) == 0 {
conditions = append(conditions, nonTextConds...)
args = append(args, nonTextArgs...)
}
// regardless of view — delegate to the non-text helper directly so we
// don't have to track how many args the text-term portion emits.
nonTextConds, nonTextArgs := e.buildNonTextSearchConditions(q)
conditions = append(conditions, nonTextConds...)
args = append(args, nonTextArgs...)

return conditions, args
}

// countArgsForTextTerms returns the number of args used by N text terms in
// buildAggregateSearchConditions with no keyColumns (4 args per term:
// subject + snippet + 2 sender).
func countArgsForTextTerms(n int) int {
return n * 4
}

// keyColumns are passed through to buildAggregateSearchConditions to control
// which columns text search terms filter on.
func (e *DuckDBEngine) buildWhereClause(opts AggregateOptions, keyColumns ...string) (string, []interface{}) {
Expand Down Expand Up @@ -2407,23 +2373,18 @@ func (e *DuckDBEngine) buildSearchConditions(q *search.Query, filter MessageFilt
}

// Text search terms - search subject, snippet, and from fields (fast path).
// Use word-boundary regex (\b) for terms starting with word chars.
// Terms starting with non-word chars skip \b (see startsWithWordChar).
// Uses ILIKE for performance on Parquet scans.
if len(q.TextTerms) > 0 {
for _, term := range q.TextTerms {
escaped := escapeRegex(term)
regexPattern := "(?i)" + escaped
if startsWithWordChar(term) {
regexPattern = "(?i)\\b" + escaped
}
termPattern := "%" + escapeILIKE(term) + "%"
conditions = append(conditions, `(
regexp_matches(COALESCE(msg.subject, ''), ?) OR
regexp_matches(COALESCE(msg.snippet, ''), ?) OR
regexp_matches(COALESCE(ms.from_email, ds.from_email, ''), ?) OR
regexp_matches(COALESCE(ms.from_name, ds.from_name, ''), ?) OR
regexp_matches(COALESCE(ms.from_phone, ds.from_phone, ''), ?)
msg.subject ILIKE ? ESCAPE '\' OR
COALESCE(msg.snippet, '') ILIKE ? ESCAPE '\' OR
COALESCE(ms.from_email, ds.from_email, '') ILIKE ? ESCAPE '\' OR
COALESCE(ms.from_name, ds.from_name, '') ILIKE ? ESCAPE '\' OR
COALESCE(ms.from_phone, ds.from_phone, '') ILIKE ? ESCAPE '\'
)`)
args = append(args, regexPattern, regexPattern, regexPattern, regexPattern, regexPattern)
args = append(args, termPattern, termPattern, termPattern, termPattern, termPattern)
}
}

Expand Down
Loading