From df4506722442f1007e55e262709e2b41ba95ae0b Mon Sep 17 00:00:00 2001 From: Vitalii Bondar Date: Mon, 8 Jun 2026 01:30:10 +0200 Subject: [PATCH] Fix search for non-Latin queries (\w is ASCII-only in Ruby) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Search::Query#remove_invalid_search_characters used gsub(/[^\w"]/, " "). In Ruby, \w matches ASCII [a-zA-Z0-9_] only, so any query containing non-Latin characters (Cyrillic, CJK, Greek, Arabic, …) was reduced to whitespace. The blank query then failed Search::Query's presence validation and Search::Record.for_query returned `none` — zero results, even though the FTS index contains the content. Switch to the POSIX [[:word:]] class, which is Unicode-aware in Ruby (Onigmo), so word characters in any script are preserved. The same ASCII-only \w appears in Search::Stemmer (Trilogy/MySQL path) and is fixed for consistency. Co-Authored-By: Claude Opus 4.8 --- app/models/search/query.rb | 2 +- app/models/search/stemmer.rb | 2 +- test/models/search_test.rb | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/models/search/query.rb b/app/models/search/query.rb index 5fe0829e41..ce68438fc8 100644 --- a/app/models/search/query.rb +++ b/app/models/search/query.rb @@ -33,7 +33,7 @@ def sanitize(terms) end def remove_invalid_search_characters(terms) - terms.gsub(/[^\w"]/, " ") + terms.gsub(/[^[[:word:]]"]/, " ") end def remove_unbalanced_quotes(terms) diff --git a/app/models/search/stemmer.rb b/app/models/search/stemmer.rb index 21cb75f97c..dead5ae8cf 100644 --- a/app/models/search/stemmer.rb +++ b/app/models/search/stemmer.rb @@ -5,7 +5,7 @@ module Search::Stemmer def stem(value) if value.present? - value.gsub(/[^\w\s]/, " ").split(/\s+/).map { |word| STEMMER.stem(word.downcase) }.join(" ") + value.gsub(/[^[[:word:]]\s]/, " ").split(/\s+/).map { |word| STEMMER.stem(word.downcase) }.join(" ") else value end diff --git a/test/models/search_test.rb b/test/models/search_test.rb index c52a397f41..22957cccae 100644 --- a/test/models/search_test.rb +++ b/test/models/search_test.rb @@ -42,4 +42,11 @@ class SearchTest < ActiveSupport::TestCase results = Search::Record.for(@user.account_id).search("BC3-IOS-1D8B", user: @user) assert results.find { |it| it.card_id == card.id } end + + test "search for non-Latin (e.g. Cyrillic) strings" do + card = @board.cards.create!(title: "фільтрувати картки", creator: @user, status: "published") + + results = Search::Record.for(@user.account_id).search("картки", user: @user) + assert results.find { |it| it.card_id == card.id } + end end