44import ca .gc .tbs .repository .BadWordEntryRepository ;
55import java .util .*;
66import java .util .concurrent .ConcurrentHashMap ;
7+ import java .util .regex .Matcher ;
8+ import java .util .regex .Pattern ;
9+ import java .util .stream .Collectors ;
710import javax .annotation .PostConstruct ;
811import org .slf4j .Logger ;
912import org .slf4j .LoggerFactory ;
@@ -30,6 +33,8 @@ public class BadWords {
3033 // Combined set of all words to filter (profanity + threats)
3134 private final Set <String > allFilterWords = Collections .newSetFromMap (new ConcurrentHashMap <>());
3235
36+ private Pattern filterPattern ;
37+
3338 @ Autowired
3439 public BadWords (BadWordEntryRepository badWordEntryRepository ) {
3540 this .badWordEntryRepository = badWordEntryRepository ;
@@ -53,6 +58,7 @@ public void loadConfigs() {
5358 String word = entry .getWord ().trim ().toLowerCase ();
5459 profanityWords .add (word );
5560 allFilterWords .add (word );
61+ compileFilterPattern ();
5662 });
5763
5864 // Load threat words
@@ -138,33 +144,51 @@ public Set<String> getThreatWords() {
138144 return Collections .unmodifiableSet (threatWords );
139145 }
140146
147+
148+ private void compileFilterPattern () {
149+ if (allFilterWords .isEmpty ()) {
150+ filterPattern = null ;
151+ return ;
152+ }
153+ String patternString = allFilterWords .stream ()
154+ .filter (word -> word != null && !word .trim ().isEmpty ())
155+ .map (Pattern ::quote )
156+ .map (word -> "\\ b" + word + "\\ b" ) // exact whole word only
157+ .collect (Collectors .joining ("|" ));
158+ filterPattern = Pattern .compile (patternString , Pattern .CASE_INSENSITIVE | Pattern .UNICODE_CASE );
159+ }
160+
141161 /**
142162 * Censors profanity and threats in the given text by replacing them with asterisks.
143163 * Words in the allowed words list are never censored.
144- *
164+ *
145165 * @param text The text to censor
146166 * @return The censored text
147167 */
148168 public String censor (String text ) {
149169 if (text == null || text .isEmpty ()) {
150170 return text ;
151171 }
152-
153- StringBuilder result = new StringBuilder ();
154- for (String word : text .split ("\\ s+" )) {
155- String wordToCheck =
156- word .toLowerCase ()
157- .replaceAll ("[^a-zà-ÿ]" , "" ); // Including accented characters for French
158-
159- // Skip censoring if the word is in the allowed words list
160- boolean shouldCensor = allFilterWords .contains (wordToCheck ) &&
161- !allowedWords .contains (wordToCheck );
162-
163- result
164- .append (shouldCensor ? createMask (word ) : word )
165- .append (' ' );
172+ if (filterPattern == null ) {
173+ // No filter words loaded
174+ return text ;
175+ }
176+
177+ Matcher matcher = filterPattern .matcher (text );
178+ StringBuffer result = new StringBuffer ();
179+
180+ while (matcher .find ()) {
181+ String match = matcher .group ();
182+ String normalized = match .toLowerCase ().replaceAll ("[^a-zà-ÿ]" , "" );
183+
184+ if (allowedWords .contains (normalized )) {
185+ matcher .appendReplacement (result , Matcher .quoteReplacement (match ));
186+ } else {
187+ matcher .appendReplacement (result , Matcher .quoteReplacement (createMask (match )));
188+ }
166189 }
167- return result .toString ().trim ();
190+ matcher .appendTail (result );
191+ return result .toString ();
168192 }
169193
170194 /**
@@ -190,6 +214,7 @@ public void reload() {
190214 allowedWords .clear ();
191215 errorKeywords .clear ();
192216 allFilterWords .clear ();
217+ compileFilterPattern ();
193218
194219 // Reload from database
195220 loadConfigs ();
0 commit comments