Skip to content

Commit 607d4ef

Browse files
committed
Adding template_parts as a field in the template class
1 parent abee851 commit 607d4ef

File tree

6 files changed

+213
-7
lines changed

6 files changed

+213
-7
lines changed

.devcontainer/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM mcr.microsoft.com/vscode/devcontainers/python:3.12
1+
FROM mcr.microsoft.com/vscode/devcontainers/python:3.12-bookworm
22

33
ENV POETRY_VERSION="1.7.1"
44
ENV POETRY_VENV_PATH="/home/vscode/.venv/workspace"

.github/actions/waffles/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ docopt==0.6.2
22
Flask==2.3.3
33
markupsafe==2.1.5
44
setuptools==78.1.1 # required for distutils in Python 3.12
5-
git+https://github.com/cds-snc/notifier-utils.git@53.2.13#egg=notifications-utils
5+
git+https://github.com/cds-snc/notifier-utils.git@53.2.14#egg=notifications-utils

notifications_utils/sanitise_text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class SanitiseSMS(SanitiseText):
100100

101101
# Welsh characters not already included in GSM
102102
WELSH_NON_GSM_CHARACTERS = set("ÂâÊêÎîÔôÛûŴŵŶŷ")
103-
FRENCH_NON_GSM_CHARACTESR = set("ÀÂËÎÏÔŒÙÛâçêëîïôœû")
103+
FRENCH_NON_GSM_CHARACTERS = set("ÀÂËÎÏÔŒÙÛâçêëîïôœû")
104104
INUKTITUK_CHARACTERS = set(
105105
"ᐁᐯᑌᑫᕴᒉᒣᓀᓭᓓᔦᑦᔦᕓᕂᙯᖅᑫᙰᐃᐱᑎᑭᕵᒋᒥᓂᓯ𑪶𑪰ᓕᔨᑦᔨᖨᕕᕆᕿᖅᑭᖏᙱᖠᐄᐲᑏᑮᕶᒌᒦᓃᓰ𑪷𑪱ᓖᔩᑦᔩᖩᕖᕇᖀᖅᑮᖐᙲᖡᐅᐳᑐᑯᕷᒍᒧᓄᓱ𑪸𑪲ᓗᔪᑦᔪᖪᕗᕈᖁᖅᑯᖑᙳᖢᐊᐸᑕᑲᕹᒐᒪᓇᓴ𑪺𑪴ᓚᔭᑦᔭᖬᕙᕋᖃᖅᑲᖓᙵᖤᑉᑦᒃᕻᒡᒻᓐᔅᓪᔾᑦᔾᖮᕝᕐᖅᖅᒃᖕᖖᖦᖯᕼᑊ" # noqa: E501
106106
)
@@ -116,7 +116,7 @@ class SanitiseSMS(SanitiseText):
116116
+ "^{}\\[~]|€" # character set extension
117117
)
118118
| WELSH_NON_GSM_CHARACTERS
119-
| FRENCH_NON_GSM_CHARACTESR
119+
| FRENCH_NON_GSM_CHARACTERS
120120
| INUKTITUK_CHARACTERS
121121
| CREE_CHARACTERS
122122
| OJIBWE_CHARACTERS

notifications_utils/template.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,17 @@ def is_message_too_long(self):
225225
def is_name_too_long(self):
226226
return len(self.name) > self.NAME_CHAR_LIMIT
227227

228+
def message_parts(self):
229+
content_with_placeholders = str(self)
230+
character_count = self.content_count
231+
unicode = is_unicode(content_with_placeholders)
232+
fragment_count = get_sms_fragment_count(character_count, unicode)
233+
return {
234+
"character_count": character_count,
235+
"fragment_count": fragment_count,
236+
"unicode": unicode,
237+
}
238+
228239

229240
class SMSPreviewTemplate(SMSMessageTemplate):
230241
def __init__(
@@ -803,7 +814,7 @@ def get_sms_fragment_count(character_count, is_unicode):
803814

804815

805816
def is_unicode(content):
806-
return set(content) & set(SanitiseSMS.WELSH_NON_GSM_CHARACTERS)
817+
return set(content) & (set(SanitiseSMS.WELSH_NON_GSM_CHARACTERS) | set(SanitiseSMS.FRENCH_NON_GSM_CHARACTERS))
807818

808819

809820
def get_html_email_body(template_content, template_values, redact_missing_personalisation=False, html="escape"):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "notifications-utils"
3-
version = "53.2.13"
3+
version = "53.2.14"
44
description = "Shared python code for Notification - Provides logging utils etc."
55
authors = ["Canadian Digital Service"]
66
license = "MIT license"

tests/test_template.py

Lines changed: 196 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22
from bs4 import BeautifulSoup
3-
from notifications_utils.template import get_html_email_body
3+
from notifications_utils.template import SMSMessageTemplate, get_html_email_body
44

55

66
def test_lang_tags_in_templates():
@@ -198,3 +198,198 @@ def test_rtl_tags_work_with_other_features(self, content: str, extra_tag: str):
198198
assert '<div dir="rtl">' in html
199199
assert "RTL CONTENT" in html
200200
assert "<{}".format(extra_tag) in html
201+
202+
203+
class TestTemplateParts:
204+
def test_message_parts_basic(self):
205+
template = {"content": "Hello world", "template_type": "sms"}
206+
sms = SMSMessageTemplate(template)
207+
parts = sms.message_parts()
208+
209+
assert parts["character_count"] == 11
210+
assert parts["fragment_count"] == 1
211+
assert parts["unicode"] == set() # Empty set for non-unicode
212+
213+
def test_message_parts_with_unicode(self):
214+
# Welsh character 'â' triggers unicode ('â' is 2 bytes in UTF-8)
215+
template = {"content": "Helo byd â", "template_type": "sms"}
216+
sms = SMSMessageTemplate(template)
217+
parts = sms.message_parts()
218+
219+
assert parts["character_count"] == 11 # 9 ASCII bytes + 2 bytes for 'â' = 11 total bytes
220+
assert parts["fragment_count"] == 1
221+
assert parts["unicode"] == {"â"} # Set containing unicode char
222+
223+
def test_message_parts_long_non_unicode_single_fragment(self):
224+
# 160 bytes is the limit for single non-unicode SMS
225+
template = {"content": "a" * 160, "template_type": "sms"}
226+
sms = SMSMessageTemplate(template)
227+
parts = sms.message_parts()
228+
229+
assert parts["character_count"] == 160
230+
assert parts["fragment_count"] == 1
231+
assert parts["unicode"] == set() # Empty set for non-unicode
232+
233+
def test_message_parts_long_non_unicode_multiple_fragments(self):
234+
# 161 bytes triggers multi-part SMS (153 bytes per fragment)
235+
template = {"content": "a" * 161, "template_type": "sms"}
236+
sms = SMSMessageTemplate(template)
237+
parts = sms.message_parts()
238+
239+
assert parts["character_count"] == 161
240+
assert parts["fragment_count"] == 2
241+
assert parts["unicode"] == set() # Empty set for non-unicode
242+
243+
def test_message_parts_long_unicode_single_fragment(self):
244+
# 70 bytes is the limit for single unicode SMS ('â' is 2 bytes each)
245+
template = {"content": "â" * 35, "template_type": "sms"}
246+
sms = SMSMessageTemplate(template)
247+
parts = sms.message_parts()
248+
249+
assert parts["character_count"] == 70 # 35 chars * 2 bytes each
250+
assert parts["fragment_count"] == 1
251+
assert parts["unicode"] == {"â"} # Set containing unicode char
252+
253+
def test_message_parts_long_unicode_multiple_fragments(self):
254+
# 71 bytes triggers multi-part unicode SMS (67 bytes per fragment)
255+
template = {"content": "â" * 36, "template_type": "sms"}
256+
sms = SMSMessageTemplate(template)
257+
parts = sms.message_parts()
258+
259+
assert parts["character_count"] == 72 # 36 chars * 2 bytes each
260+
assert parts["fragment_count"] == 2
261+
assert parts["unicode"] == {"â"} # Set containing unicode char
262+
263+
def test_message_parts_with_placeholders(self):
264+
template = {"content": "Hello ((name))", "template_type": "sms"}
265+
sms = SMSMessageTemplate(template, values={"name": "Alice"})
266+
parts = sms.message_parts()
267+
268+
assert parts["character_count"] == 11 # "Hello Alice"
269+
assert parts["fragment_count"] == 1
270+
assert parts["unicode"] == set() # Empty set for non-unicode
271+
272+
def test_message_parts_with_unicode_placeholder(self):
273+
template = {"content": "Hello ((name))", "template_type": "sms"}
274+
sms = SMSMessageTemplate(template, values={"name": "Siân"})
275+
parts = sms.message_parts()
276+
277+
assert parts["character_count"] == 11 # "Hello Siân" (â is 2 bytes)
278+
assert parts["fragment_count"] == 1
279+
assert parts["unicode"] == {"â"} # Set containing unicode char
280+
281+
def test_message_parts_with_prefix(self):
282+
template = {"content": "Hello world", "template_type": "sms"}
283+
sms = SMSMessageTemplate(template, prefix="Service")
284+
parts = sms.message_parts()
285+
286+
# "Service: Hello world" = 20 bytes
287+
assert parts["character_count"] == 20
288+
assert parts["fragment_count"] == 1
289+
assert parts["unicode"] == set() # Empty set for non-unicode
290+
291+
def test_message_parts_with_prefix_hidden(self):
292+
template = {"content": "Hello world", "template_type": "sms"}
293+
sms = SMSMessageTemplate(template, prefix="Service", show_prefix=False)
294+
parts = sms.message_parts()
295+
296+
# Prefix not shown, so just "Hello world" = 11 bytes
297+
assert parts["character_count"] == 11
298+
assert parts["fragment_count"] == 1
299+
assert parts["unicode"] == set() # Empty set for non-unicode
300+
301+
@pytest.mark.parametrize(
302+
"content, byte_count, fragment_count, has_unicode",
303+
[
304+
# Non-unicode: single fragment up to 160 bytes, then 153 bytes per fragment
305+
("a" * 160, 160, 1, False),
306+
("a" * 161, 161, 2, False),
307+
("a" * 306, 306, 2, False),
308+
("a" * 307, 307, 3, False),
309+
# Unicode: single fragment up to 70 bytes, then 67 bytes per fragment
310+
# 'â' is 2 bytes in UTF-8
311+
("â" * 35, 70, 1, True), # 35 chars * 2 = 70 bytes
312+
("â" * 36, 72, 2, True), # 36 chars * 2 = 72 bytes (>70)
313+
("â" * 67, 134, 2, True), # 67 chars * 2 = 134 bytes
314+
("â" * 68, 136, 3, True), # 68 chars * 2 = 136 bytes (>134)
315+
],
316+
)
317+
def test_message_parts_fragment_boundaries(self, content, byte_count, fragment_count, has_unicode):
318+
template = {"content": content, "template_type": "sms"}
319+
sms = SMSMessageTemplate(template)
320+
parts = sms.message_parts()
321+
322+
assert parts["character_count"] == byte_count
323+
assert parts["fragment_count"] == fragment_count
324+
# Check if unicode set is empty or not
325+
assert bool(parts["unicode"]) == has_unicode
326+
327+
def test_message_parts_with_multiple_unicode_chars_near_250_bytes(self):
328+
# Test with 4 different French non-GSM unicode characters (each 2 bytes in UTF-8)
329+
# Using: â, ê, î, ô from FRENCH_NON_GSM_CHARACTERS
330+
# Unicode SMS fragments: 70 bytes for single, then 67 bytes per fragment
331+
# 4 fragments can hold up to 268 bytes (70 for first would be single, but 71+ triggers multi-part at 67 each)
332+
# Boundary: 201 bytes = 3 fragments, 202 bytes = 4 fragments
333+
334+
# Create content with mix of 4 French non-GSM unicode characters: â, ê, î, ô
335+
# Each is 2 bytes, so we need 100 chars total = 200 bytes (just under boundary)
336+
content_200_bytes = "âêîô" * 25 # 4 chars * 25 = 100 chars * 2 bytes = 200 bytes
337+
template = {"content": content_200_bytes, "template_type": "sms"}
338+
sms = SMSMessageTemplate(template)
339+
parts = sms.message_parts()
340+
341+
assert parts["character_count"] == 200
342+
assert parts["fragment_count"] == 3 # 200 bytes = 3 fragments (67*2 = 134, need 3rd for remaining 66)
343+
assert len(parts["unicode"]) == 4 # 4 different non-GSM chars
344+
345+
# Now add one more unicode char to cross the boundary to 202 bytes
346+
content_202_bytes = content_200_bytes + "â" # +2 bytes = 202 bytes total
347+
template = {"content": content_202_bytes, "template_type": "sms"}
348+
sms = SMSMessageTemplate(template)
349+
parts = sms.message_parts()
350+
351+
assert parts["character_count"] == 202
352+
assert parts["fragment_count"] == 4 # 202 bytes crosses boundary, needs 4 fragments
353+
assert len(parts["unicode"]) == 4 # Still 4 different non-GSM chars
354+
355+
# Test at exactly 250 bytes (still in 4-fragment range: 202-268 bytes)
356+
# Need 125 chars * 2 bytes = 250 bytes
357+
content_250_bytes = "âêîô" * 31 + "âê" # (4*31 + 2) = 126 chars * 2 bytes = 252 bytes
358+
# Adjust: 124 chars = 248 bytes, 125 chars = 250 bytes
359+
content_250_bytes = "âêîô" * 31 + "â" # (4*31 + 1) = 125 chars * 2 bytes = 250 bytes
360+
template = {"content": content_250_bytes, "template_type": "sms"}
361+
sms = SMSMessageTemplate(template)
362+
parts = sms.message_parts()
363+
364+
assert parts["character_count"] == 250
365+
assert parts["fragment_count"] == 4 # 250 bytes = 4 fragments
366+
assert len(parts["unicode"]) == 4 # 4 different non-GSM chars
367+
368+
def test_message_parts_with_multiple_unicode_chars(self):
369+
# Real-world bilingual emergency test message with French accented characters
370+
content = (
371+
"NB- xxxxxxxx, 120 Harbourview Blvd: This is a test for the xxxxxxxx employees, "
372+
"and no action is required from you at this time. The purpose of this exercise is "
373+
"to ensure that our emergency communication system is functioning properly and that "
374+
"everyone is familiar with the process.\n"
375+
"Ceci est uniquement un test pour les employés xx xxxxxxxx et aucune action n'est "
376+
"requise de votre part pour le moment. L'objectif de cet exercice est de s'assurer "
377+
"que notre système de communication d'urgence fonctionne correctement et que chacun "
378+
"connaît la procédure."
379+
)
380+
381+
template = {"content": content, "template_type": "sms"}
382+
sms = SMSMessageTemplate(template)
383+
parts = sms.message_parts()
384+
385+
# Verify it's detected as unicode (has French accented characters)
386+
assert len(parts["unicode"]) > 0
387+
388+
# Content is large enough to require multiple SMS fragments
389+
# With unicode, fragments are: first 70 bytes, then 67 bytes each
390+
assert parts["character_count"] > 500 # Should be around 580+ bytes
391+
assert parts["fragment_count"] >= 9 # Should need multiple fragments
392+
393+
# Verify specific unicode characters from French text
394+
french_unicode_chars = {"î"}
395+
assert french_unicode_chars.issubset(parts["unicode"])

0 commit comments

Comments
 (0)