From 0022ee55c5cd84d8105a7100a4b27453c7e7da40 Mon Sep 17 00:00:00 2001 From: Peter Reid Date: Mon, 12 May 2025 22:48:30 -0400 Subject: [PATCH] Ensure tokens don't end up with leading or trailing whitespace Previously, two spaces, for example between sentences, would lead to the token following the spaces being prefixed by a space. That would lead to it registering as not in the lexicon, and then passing the prefixed word into the fallback. --- misaki/en.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misaki/en.py b/misaki/en.py index 222c170..99d58b9 100644 --- a/misaki/en.py +++ b/misaki/en.py @@ -25,7 +25,7 @@ def merge_tokens(tokens: List[MToken], unk: Optional[str] = None) -> MToken: phonemes += ' ' phonemes += unk if tk.phonemes is None else tk.phonemes return MToken( - text=''.join(tk.text + tk.whitespace for tk in tokens[:-1]) + tokens[-1].text, + text=(''.join(tk.text + tk.whitespace for tk in tokens[:-1]) + tokens[-1].text).strip(), tag=max(tokens, key=lambda tk: sum(1 if c == c.lower() else 2 for c in tk.text)).tag, whitespace=tokens[-1].whitespace, phonemes=phonemes,