From 0022ee55c5cd84d8105a7100a4b27453c7e7da40 Mon Sep 17 00:00:00 2001
From: Peter Reid <peter@peterreid.net>
Date: Mon, 12 May 2025 22:48:30 -0400
Subject: [PATCH] Ensure tokens don't end up with leading or trailing
 whitespace

Previously, two spaces, for example between sentences, would lead
to the token following the spaces being prefixed by a space. That
would lead to it registering as not in the lexicon, and then passing
the prefixed word into the fallback.
---
 misaki/en.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misaki/en.py b/misaki/en.py
index 222c170..99d58b9 100644
--- a/misaki/en.py
+++ b/misaki/en.py
@@ -25,7 +25,7 @@ def merge_tokens(tokens: List[MToken], unk: Optional[str] = None) -> MToken:
                 phonemes += ' '
             phonemes += unk if tk.phonemes is None else tk.phonemes
     return MToken(
-        text=''.join(tk.text + tk.whitespace for tk in tokens[:-1]) + tokens[-1].text,
+        text=(''.join(tk.text + tk.whitespace for tk in tokens[:-1]) + tokens[-1].text).strip(),
         tag=max(tokens, key=lambda tk: sum(1 if c == c.lower() else 2 for c in tk.text)).tag,
         whitespace=tokens[-1].whitespace,
         phonemes=phonemes,