|
1 | 1 | import pytest |
2 | 2 | from bs4 import BeautifulSoup |
3 | | -from notifications_utils.template import get_html_email_body |
| 3 | +from notifications_utils.template import SMSMessageTemplate, get_html_email_body |
4 | 4 |
|
5 | 5 |
|
6 | 6 | def test_lang_tags_in_templates(): |
@@ -198,3 +198,198 @@ def test_rtl_tags_work_with_other_features(self, content: str, extra_tag: str): |
198 | 198 | assert '<div dir="rtl">' in html |
199 | 199 | assert "RTL CONTENT" in html |
200 | 200 | assert "<{}".format(extra_tag) in html |
| 201 | + |
| 202 | + |
| 203 | +class TestTemplateParts: |
| 204 | + def test_message_parts_basic(self): |
| 205 | + template = {"content": "Hello world", "template_type": "sms"} |
| 206 | + sms = SMSMessageTemplate(template) |
| 207 | + parts = sms.message_parts() |
| 208 | + |
| 209 | + assert parts["character_count"] == 11 |
| 210 | + assert parts["fragment_count"] == 1 |
| 211 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 212 | + |
| 213 | + def test_message_parts_with_unicode(self): |
| 214 | + # Welsh character 'â' triggers unicode ('â' is 2 bytes in UTF-8) |
| 215 | + template = {"content": "Helo byd â", "template_type": "sms"} |
| 216 | + sms = SMSMessageTemplate(template) |
| 217 | + parts = sms.message_parts() |
| 218 | + |
| 219 | + assert parts["character_count"] == 11 # 9 ASCII bytes + 2 bytes for 'â' = 11 total bytes |
| 220 | + assert parts["fragment_count"] == 1 |
| 221 | + assert parts["unicode"] == {"â"} # Set containing unicode char |
| 222 | + |
| 223 | + def test_message_parts_long_non_unicode_single_fragment(self): |
| 224 | + # 160 bytes is the limit for single non-unicode SMS |
| 225 | + template = {"content": "a" * 160, "template_type": "sms"} |
| 226 | + sms = SMSMessageTemplate(template) |
| 227 | + parts = sms.message_parts() |
| 228 | + |
| 229 | + assert parts["character_count"] == 160 |
| 230 | + assert parts["fragment_count"] == 1 |
| 231 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 232 | + |
| 233 | + def test_message_parts_long_non_unicode_multiple_fragments(self): |
| 234 | + # 161 bytes triggers multi-part SMS (153 bytes per fragment) |
| 235 | + template = {"content": "a" * 161, "template_type": "sms"} |
| 236 | + sms = SMSMessageTemplate(template) |
| 237 | + parts = sms.message_parts() |
| 238 | + |
| 239 | + assert parts["character_count"] == 161 |
| 240 | + assert parts["fragment_count"] == 2 |
| 241 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 242 | + |
| 243 | + def test_message_parts_long_unicode_single_fragment(self): |
| 244 | + # 70 bytes is the limit for single unicode SMS ('â' is 2 bytes each) |
| 245 | + template = {"content": "â" * 35, "template_type": "sms"} |
| 246 | + sms = SMSMessageTemplate(template) |
| 247 | + parts = sms.message_parts() |
| 248 | + |
| 249 | + assert parts["character_count"] == 70 # 35 chars * 2 bytes each |
| 250 | + assert parts["fragment_count"] == 1 |
| 251 | + assert parts["unicode"] == {"â"} # Set containing unicode char |
| 252 | + |
| 253 | + def test_message_parts_long_unicode_multiple_fragments(self): |
| 254 | + # 71 bytes triggers multi-part unicode SMS (67 bytes per fragment) |
| 255 | + template = {"content": "â" * 36, "template_type": "sms"} |
| 256 | + sms = SMSMessageTemplate(template) |
| 257 | + parts = sms.message_parts() |
| 258 | + |
| 259 | + assert parts["character_count"] == 72 # 36 chars * 2 bytes each |
| 260 | + assert parts["fragment_count"] == 2 |
| 261 | + assert parts["unicode"] == {"â"} # Set containing unicode char |
| 262 | + |
| 263 | + def test_message_parts_with_placeholders(self): |
| 264 | + template = {"content": "Hello ((name))", "template_type": "sms"} |
| 265 | + sms = SMSMessageTemplate(template, values={"name": "Alice"}) |
| 266 | + parts = sms.message_parts() |
| 267 | + |
| 268 | + assert parts["character_count"] == 11 # "Hello Alice" |
| 269 | + assert parts["fragment_count"] == 1 |
| 270 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 271 | + |
| 272 | + def test_message_parts_with_unicode_placeholder(self): |
| 273 | + template = {"content": "Hello ((name))", "template_type": "sms"} |
| 274 | + sms = SMSMessageTemplate(template, values={"name": "Siân"}) |
| 275 | + parts = sms.message_parts() |
| 276 | + |
| 277 | + assert parts["character_count"] == 11 # "Hello Siân" (â is 2 bytes) |
| 278 | + assert parts["fragment_count"] == 1 |
| 279 | + assert parts["unicode"] == {"â"} # Set containing unicode char |
| 280 | + |
| 281 | + def test_message_parts_with_prefix(self): |
| 282 | + template = {"content": "Hello world", "template_type": "sms"} |
| 283 | + sms = SMSMessageTemplate(template, prefix="Service") |
| 284 | + parts = sms.message_parts() |
| 285 | + |
| 286 | + # "Service: Hello world" = 20 bytes |
| 287 | + assert parts["character_count"] == 20 |
| 288 | + assert parts["fragment_count"] == 1 |
| 289 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 290 | + |
| 291 | + def test_message_parts_with_prefix_hidden(self): |
| 292 | + template = {"content": "Hello world", "template_type": "sms"} |
| 293 | + sms = SMSMessageTemplate(template, prefix="Service", show_prefix=False) |
| 294 | + parts = sms.message_parts() |
| 295 | + |
| 296 | + # Prefix not shown, so just "Hello world" = 11 bytes |
| 297 | + assert parts["character_count"] == 11 |
| 298 | + assert parts["fragment_count"] == 1 |
| 299 | + assert parts["unicode"] == set() # Empty set for non-unicode |
| 300 | + |
| 301 | + @pytest.mark.parametrize( |
| 302 | + "content, byte_count, fragment_count, has_unicode", |
| 303 | + [ |
| 304 | + # Non-unicode: single fragment up to 160 bytes, then 153 bytes per fragment |
| 305 | + ("a" * 160, 160, 1, False), |
| 306 | + ("a" * 161, 161, 2, False), |
| 307 | + ("a" * 306, 306, 2, False), |
| 308 | + ("a" * 307, 307, 3, False), |
| 309 | + # Unicode: single fragment up to 70 bytes, then 67 bytes per fragment |
| 310 | + # 'â' is 2 bytes in UTF-8 |
| 311 | + ("â" * 35, 70, 1, True), # 35 chars * 2 = 70 bytes |
| 312 | + ("â" * 36, 72, 2, True), # 36 chars * 2 = 72 bytes (>70) |
| 313 | + ("â" * 67, 134, 2, True), # 67 chars * 2 = 134 bytes |
| 314 | + ("â" * 68, 136, 3, True), # 68 chars * 2 = 136 bytes (>134) |
| 315 | + ], |
| 316 | + ) |
| 317 | + def test_message_parts_fragment_boundaries(self, content, byte_count, fragment_count, has_unicode): |
| 318 | + template = {"content": content, "template_type": "sms"} |
| 319 | + sms = SMSMessageTemplate(template) |
| 320 | + parts = sms.message_parts() |
| 321 | + |
| 322 | + assert parts["character_count"] == byte_count |
| 323 | + assert parts["fragment_count"] == fragment_count |
| 324 | + # Check if unicode set is empty or not |
| 325 | + assert bool(parts["unicode"]) == has_unicode |
| 326 | + |
| 327 | + def test_message_parts_with_multiple_unicode_chars_near_250_bytes(self): |
| 328 | + # Test with 4 different French non-GSM unicode characters (each 2 bytes in UTF-8) |
| 329 | + # Using: â, ê, î, ô from FRENCH_NON_GSM_CHARACTERS |
| 330 | + # Unicode SMS fragments: 70 bytes for single, then 67 bytes per fragment |
| 331 | + # 4 fragments can hold up to 268 bytes (70 for first would be single, but 71+ triggers multi-part at 67 each) |
| 332 | + # Boundary: 201 bytes = 3 fragments, 202 bytes = 4 fragments |
| 333 | + |
| 334 | + # Create content with mix of 4 French non-GSM unicode characters: â, ê, î, ô |
| 335 | + # Each is 2 bytes, so we need 100 chars total = 200 bytes (just under boundary) |
| 336 | + content_200_bytes = "âêîô" * 25 # 4 chars * 25 = 100 chars * 2 bytes = 200 bytes |
| 337 | + template = {"content": content_200_bytes, "template_type": "sms"} |
| 338 | + sms = SMSMessageTemplate(template) |
| 339 | + parts = sms.message_parts() |
| 340 | + |
| 341 | + assert parts["character_count"] == 200 |
| 342 | + assert parts["fragment_count"] == 3 # 200 bytes = 3 fragments (67*2 = 134, need 3rd for remaining 66) |
| 343 | + assert len(parts["unicode"]) == 4 # 4 different non-GSM chars |
| 344 | + |
| 345 | + # Now add one more unicode char to cross the boundary to 202 bytes |
| 346 | + content_202_bytes = content_200_bytes + "â" # +2 bytes = 202 bytes total |
| 347 | + template = {"content": content_202_bytes, "template_type": "sms"} |
| 348 | + sms = SMSMessageTemplate(template) |
| 349 | + parts = sms.message_parts() |
| 350 | + |
| 351 | + assert parts["character_count"] == 202 |
| 352 | + assert parts["fragment_count"] == 4 # 202 bytes crosses boundary, needs 4 fragments |
| 353 | + assert len(parts["unicode"]) == 4 # Still 4 different non-GSM chars |
| 354 | + |
| 355 | + # Test at exactly 250 bytes (still in 4-fragment range: 202-268 bytes) |
| 356 | + # Need 125 chars * 2 bytes = 250 bytes |
| 357 | + content_250_bytes = "âêîô" * 31 + "âê" # (4*31 + 2) = 126 chars * 2 bytes = 252 bytes |
| 358 | + # Adjust: 124 chars = 248 bytes, 125 chars = 250 bytes |
| 359 | + content_250_bytes = "âêîô" * 31 + "â" # (4*31 + 1) = 125 chars * 2 bytes = 250 bytes |
| 360 | + template = {"content": content_250_bytes, "template_type": "sms"} |
| 361 | + sms = SMSMessageTemplate(template) |
| 362 | + parts = sms.message_parts() |
| 363 | + |
| 364 | + assert parts["character_count"] == 250 |
| 365 | + assert parts["fragment_count"] == 4 # 250 bytes = 4 fragments |
| 366 | + assert len(parts["unicode"]) == 4 # 4 different non-GSM chars |
| 367 | + |
| 368 | + def test_message_parts_with_multiple_unicode_chars(self): |
| 369 | + # Real-world bilingual emergency test message with French accented characters |
| 370 | + content = ( |
| 371 | + "NB- xxxxxxxx, 120 Harbourview Blvd: This is a test for the xxxxxxxx employees, " |
| 372 | + "and no action is required from you at this time. The purpose of this exercise is " |
| 373 | + "to ensure that our emergency communication system is functioning properly and that " |
| 374 | + "everyone is familiar with the process.\n" |
| 375 | + "Ceci est uniquement un test pour les employés xx xxxxxxxx et aucune action n'est " |
| 376 | + "requise de votre part pour le moment. L'objectif de cet exercice est de s'assurer " |
| 377 | + "que notre système de communication d'urgence fonctionne correctement et que chacun " |
| 378 | + "connaît la procédure." |
| 379 | + ) |
| 380 | + |
| 381 | + template = {"content": content, "template_type": "sms"} |
| 382 | + sms = SMSMessageTemplate(template) |
| 383 | + parts = sms.message_parts() |
| 384 | + |
| 385 | + # Verify it's detected as unicode (has French accented characters) |
| 386 | + assert len(parts["unicode"]) > 0 |
| 387 | + |
| 388 | + # Content is large enough to require multiple SMS fragments |
| 389 | + # With unicode, fragments are: first 70 bytes, then 67 bytes each |
| 390 | + assert parts["character_count"] > 500 # Should be around 580+ bytes |
| 391 | + assert parts["fragment_count"] >= 9 # Should need multiple fragments |
| 392 | + |
| 393 | + # Verify specific unicode characters from French text |
| 394 | + french_unicode_chars = {"î"} |
| 395 | + assert french_unicode_chars.issubset(parts["unicode"]) |
0 commit comments