EveryVoiceTTS · joanise · Apr 28, 2026 · Apr 24, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.github/workflows/matrix-tests.yml b/.github/workflows/matrix-tests.yml
@@ -27,7 +27,7 @@ jobs:
       - uses: awalsh128/cache-apt-pkgs-action@acb598e5ddbc6f68a970c5da0688d2f3a9f04d05 # v1.6.0
         with:
           packages: sox libsox-dev
-      - uses: FedericoCarboni/setup-ffmpeg@583042d32dd1cabb8bd09df03bde06080da5c87c # v2
+      - uses: FedericoCarboni/setup-ffmpeg@37062fbf7149fc5578d6c57e08aed62458b375d6 # @v3.1, with tool cache
       - name: Install dependencies and EveryVoice itself
         run: |
           CUDA_TAG=cpu pip install -r requirements.torch.txt --find-links https://download.pytorch.org/whl/torch_stable.html

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -26,7 +26,7 @@ jobs:
           packages: sox libsox-dev
       - name: Verify SoX installation
         run: sox --version
-      - uses: FedericoCarboni/setup-ffmpeg@583042d32dd1cabb8bd09df03bde06080da5c87c # v2
+      - uses: FedericoCarboni/setup-ffmpeg@37062fbf7149fc5578d6c57e08aed62458b375d6 # @v3.1, with tool cache
       - uses: actions/setup-python@v6
         with:
           python-version: "3.10"
@@ -100,7 +100,7 @@ jobs:
       - uses: actions/checkout@v6
         with:
           submodules: recursive
-      - uses: FedericoCarboni/setup-ffmpeg@583042d32dd1cabb8bd09df03bde06080da5c87c # v2
+      - uses: FedericoCarboni/setup-ffmpeg@37062fbf7149fc5578d6c57e08aed62458b375d6 # @v3.1, with tool cache
       - uses: actions/setup-python@v6
         with:
           python-version: "3.10"
@@ -170,7 +170,7 @@ jobs:
       - name: Run license check overall
         run: |
           licensecheck --requirements-paths pyproject.toml --zero \
-            --ignore-packages text-unidecode pympi-ling pyworld pyworld-prebuilt pysdtw audioread anytree gradio hf-gradio \
+            --ignore-packages text-unidecode pympi-ling pyworld pyworld-prebuilt audioread anytree gradio hf-gradio \
             --skip-dependencies llvmlite \
             --ignore-licenses OTHER/PROPRIETARY || \
           ! echo "Package(s) listed with an X above is/are potentially a problem. Please review their licenses for compatibility with EveryVoice."

diff --git a/everyvoice/model/aligner/wav2vec2aligner b/everyvoice/model/aligner/wav2vec2aligner
diff --git a/everyvoice/tests/test_custom_g2p.py b/everyvoice/tests/test_custom_g2p.py
@@ -95,6 +95,15 @@ def test_basic_g2p(self):
         with self.assertRaises(NotImplementedError):
             get_g2p_engine("boop")
 
+    def test_unusual_ipa_code(self):
+        # sal-apa goes to sal-ipa instead of sal-apa-ipa
+        sal_apa_g2p = get_g2p_engine("sal-apa")
+        self.assertEqual(sal_apa_g2p("ac"), list("ats"))
+
+        # but iku-sro goes to iku-sro-ipa, not iku-ipa
+        iku_sro_g2p = get_g2p_engine("iku-sro")
+        self.assertEqual(iku_sro_g2p("akaq"), list("akaq"))
+
     def test_phonemizer_normalization(self):
         moh_g2p = get_g2p_engine("moh")
         self.assertEqual(moh_g2p("\u00e9"), ["\u00e9"])

diff --git a/everyvoice/text/phonemizer.py b/everyvoice/text/phonemizer.py
@@ -13,7 +13,7 @@
 DEFAULT_G2P = "DEFAULT_G2P"
 
 
-def make_default_g2p_engines():
+def make_default_g2p_engines() -> dict[str, str | G2PCallable]:
     return {k: DEFAULT_G2P for k in get_arpabet_langs()[0]}
 
 
@@ -32,9 +32,24 @@ def make_default_g2p_engines():
 class CachingG2PEngine:
     """caching tokenizing g2p engine"""
 
-    def __init__(self, lang_id):
-        self._cache = {}
-        self.phonemizer = make_g2p(lang_id, f"{lang_id}-ipa")
+    def __init__(self, lang_id: str) -> None:
+        self._cache: dict[str, list[str]] = {}
+        self.phonemizer = make_g2p(lang_id, self.get_ipa_code(lang_id))
+
+    def get_ipa_code(self, lang_id: str) -> str:
+        """Given a lang ID in get_arpabet_langs()[0], find its IPA language code.
+
+        Most languages in the g2p library have a three letter code lll mapped to
+        lll-ipa, but a few do not, e.g., sal-apa -> sal-ipa, oji-syl -> oji-ipa
+
+        Copied from g2p.get_ipa_code(), for compatibility with any version of g2p."""
+
+        from g2p.mappings.langs import LANGS_NETWORK
+
+        if lang_id + "-ipa" in LANGS_NETWORK.nodes:
+            return lang_id + "-ipa"
+        else:
+            return lang_id.split("-", 1)[0] + "-ipa"
 
     def process_one_token(self, input_token: str) -> list[str]:
         """Process one input token, dumbly split on whitespace.
@@ -62,7 +77,7 @@ def process_one_token(self, input_token: str) -> list[str]:
 
     def __call__(self, normalized_input_text: str) -> list[str]:
         input_tokens = re.split(r"(\s+)", normalized_input_text)
-        output_tokens = []
+        output_tokens: list[str] = []
         for token in input_tokens:
             cached = self._cache.get(token, None)
             if cached is None: