From df07f831dab9f12f9390ff72e287510a0b167d2c Mon Sep 17 00:00:00 2001 From: Bartosz Sokorski Date: Fri, 17 Oct 2025 10:45:15 +0200 Subject: [PATCH] Add pure Python wcwidth util --- src/cleo/_utils.py | 55 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/cleo/_utils.py b/src/cleo/_utils.py index 0665f0b9..feb9a64f 100644 --- a/src/cleo/_utils.py +++ b/src/cleo/_utils.py @@ -1,9 +1,11 @@ from __future__ import annotations import math +import unicodedata from dataclasses import dataclass from difflib import SequenceMatcher +from functools import lru_cache from html.parser import HTMLParser @@ -105,3 +107,56 @@ def format_time(secs: float) -> str: (fmt for fmt in _TIME_FORMATS if secs < fmt.threshold), _TIME_FORMATS[-1] ) return time_format.apply(secs) + + +@lru_cache(100) +def wcwidth(c: str) -> int: + """Determine how many columns are needed to display a character in a terminal. + + Returns -1 if the character is not printable. + Returns 0, 1 or 2 for other characters. + """ + o = ord(c) + + # ASCII fast path. + if 0x20 <= o < 0x07F: + return 1 + + # Some Cf/Zp/Zl characters which should be zero-width. + if ( + o == 0x0000 + or 0x200B <= o <= 0x200F + or 0x2028 <= o <= 0x202E + or 0x2060 <= o <= 0x2063 + ): + return 0 + + category = unicodedata.category(c) + + # Control characters. + if category == "Cc": + return -1 + + # Combining characters with zero width. + if category in ("Me", "Mn"): + return 0 + + # Full/Wide east asian characters. + if unicodedata.east_asian_width(c) in ("F", "W"): + return 2 + + return 1 + + +def wcswidth(s: str) -> int: + """Determine how many columns are needed to display a string in a terminal. + + Returns -1 if the string contains non-printable characters. + """ + width = 0 + for c in unicodedata.normalize("NFC", s): + wc = wcwidth(c) + if wc < 0: + return -1 + width += wc + return width diff --git a/tests/test_utils.py b/tests/test_utils.py index c9693431..96ca1fef 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,6 +5,8 @@ from cleo._utils import find_similar_names from cleo._utils import format_time from cleo._utils import strip_tags +from cleo._utils import wcswidth +from cleo._utils import wcwidth @pytest.mark.parametrize( @@ -45,3 +47,38 @@ def test_find_similar_names(name: str, expected: list[str]) -> None: ) def test_strip_tags(value: str, expected: str) -> None: assert strip_tags(value) == expected + + +@pytest.mark.parametrize( + ("c", "expected"), + [ + ("\0", 0), + ("\n", -1), + ("a", 1), + ("1", 1), + ("א", 1), + ("\u200b", 0), + ("\u1abe", 0), + ("\u0591", 0), + ("🉐", 2), + ("$", 2), # noqa: RUF001 + ], +) +def test_wcwidth(c: str, expected: int) -> None: + assert wcwidth(c) == expected + + +@pytest.mark.parametrize( + ("s", "expected"), + [ + ("", 0), + ("hello, world!", 13), + ("hello, world!\n", -1), + ("0123456789", 10), + ("שלום, עולם!", 11), + ("שְבֻעָיים", 6), + ("🉐🉐🉐", 6), + ], +) +def test_wcswidth(s: str, expected: int) -> None: + assert wcswidth(s) == expected