From d60f33fa408d2b13697d782d0e27f5be493e2718 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Thu, 19 Feb 2026 12:51:40 -0500 Subject: [PATCH 1/4] feat: implement preprocessing just characters in process_text() --- everyvoice/preprocessor/preprocessor.py | 49 +++++++++++++++---------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/everyvoice/preprocessor/preprocessor.py b/everyvoice/preprocessor/preprocessor.py index 26e99fa8..8f15b3ad 100644 --- a/everyvoice/preprocessor/preprocessor.py +++ b/everyvoice/preprocessor/preprocessor.py @@ -767,9 +767,12 @@ def process_text( Returns: tuple[Optional[str], Optional[str], Optional[npt.NDArray[np.float32]]]|tuple[Optional[list[int]], Optional[list[int]], Optional[npt.NDArray[np.float32]]]: if encode_as_string is true, returns an optional characters string, an optional phones string, and an optional multi-hot phonological feature vector. if encode_as_string is false, returns a list of ints for characters and phones """ - if specific_text_representation is not None: + if specific_text_representation not in ( + None, + TargetTrainingTextRepresentationLevel.characters, + ): raise NotImplementedError( - "Sorry 'specific_text_representation' isn't implemented yet, please set it to None." + "Sorry 'specific_text_representation' is only implemented for characters, please set it to None or characters." ) # TODO: refactor so that you don't *need* to generate all possible representations, to make synthesis faster. if text_processor is None: raise NotImplementedError( @@ -785,6 +788,8 @@ def process_text( if ( DatasetTextRepresentation.arpabet.value in item and DatasetTextRepresentation.ipa_phones.value not in item + and specific_text_representation + != TargetTrainingTextRepresentationLevel.characters ): tokens = text_processor.encode_text( text=ARPABET_TO_IPA_TRANSDUCER( @@ -813,6 +818,8 @@ def process_text( if ( item["language"] in AVAILABLE_G2P_ENGINES and DatasetTextRepresentation.ipa_phones.value not in item + and specific_text_representation + != TargetTrainingTextRepresentationLevel.characters ): tokens = text_processor.encode_text( text=item[DatasetTextRepresentation.characters.value], @@ -824,23 +831,27 @@ def process_text( ) assert isinstance(tokens, list) phone_tokens = tokens - # if dataset is phones - if DatasetTextRepresentation.ipa_phones.value in item: - tokens = text_processor.encode_text( - text=item[DatasetTextRepresentation.ipa_phones.value], - dataset_label=dataset_label, - apply_g2p=False, - encode_as_phonological_features=False, - quiet=True, - ) - assert isinstance(tokens, list) - phone_tokens = tokens - # calculate pfs - if phone_tokens and use_pfs: - pfs = text_processor.calculate_phonological_features( - text_processor.token_sequence_to_text_sequence(phone_tokens), - apply_punctuation_rules=True, - ) + if ( + specific_text_representation + != TargetTrainingTextRepresentationLevel.characters + ): + # if dataset is phones + if DatasetTextRepresentation.ipa_phones.value in item: + tokens = text_processor.encode_text( + text=item[DatasetTextRepresentation.ipa_phones.value], + dataset_label=dataset_label, + apply_g2p=False, + encode_as_phonological_features=False, + quiet=True, + ) + assert isinstance(tokens, list) + phone_tokens = tokens + # calculate pfs + if phone_tokens and use_pfs: + pfs = text_processor.calculate_phonological_features( + text_processor.token_sequence_to_text_sequence(phone_tokens), + apply_punctuation_rules=True, + ) # encode to string if encode_as_string: if phone_tokens is not None: From be840b326fa2bc8e10f550ae565eb195c906c921 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Tue, 3 Feb 2026 17:01:28 -0500 Subject: [PATCH 2/4] feat: everyvoice check-text-config checks for missing symbols in config First step in implementing #385 --- everyvoice/cli.py | 146 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 2 deletions(-) diff --git a/everyvoice/cli.py b/everyvoice/cli.py index 0d76bc15..e374c647 100644 --- a/everyvoice/cli.py +++ b/everyvoice/cli.py @@ -15,13 +15,14 @@ from rich.panel import Panel from everyvoice._version import VERSION -from everyvoice.base_cli.checkpoint import inspect, rename_speaker +from everyvoice.base_cli.checkpoint import inspect, load_checkpoint, rename_speaker from everyvoice.base_cli.interfaces import ( inference_base_command_interface, typer_directory_option, typer_file_argument, typer_file_option, ) +from everyvoice.config.type_definitions import TargetTrainingTextRepresentationLevel from everyvoice.model.aligner.wav2vec2aligner.aligner.cli import ( ALIGN_SINGLE_LONG_HELP, ALIGN_SINGLE_SHORT_HELP, @@ -62,7 +63,7 @@ ) from everyvoice.model.vocoder.HiFiGAN_iSTFT_lightning.hfgl.cli import train as train_hfg from everyvoice.run_tests import SUITE_NAMES, run_tests -from everyvoice.utils import spinner +from everyvoice.utils import generic_psv_filelist_reader, spinner from everyvoice.wizard import ( PREPROCESSING_CONFIG_FILENAME_PREFIX, SPEC_TO_WAV_CONFIG_FILENAME_PREFIX, @@ -897,5 +898,146 @@ def g2p( print(g2p(line)) +@app.command() +def check_text_config( # noqa: C901 + config: Annotated[ + Optional[Path], + typer.Option( + help="path to text config, i.e., everyvoice-shared-text.yaml", + exists=True, + dir_okay=False, + file_okay=True, + ), + ] = None, + model: Annotated[ + Optional[Path], + typer.Option( + help="path to a model whose text config will be used", + exists=True, + dir_okay=False, + file_okay=True, + ), + ] = None, + text_file: Annotated[ + Optional[Path], + typer.Option( + help="path to a plain text file to check", + exists=True, + dir_okay=False, + file_okay=True, + ), + ] = None, + psv_file: Annotated[ + Optional[Path], + typer.Option( + help="path to a psv file to check", + exists=True, + dir_okay=False, + file_okay=True, + ), + ] = None, + language: Annotated[ + Optional[str], + typer.Option( + help="language id, required with --text-file, or for a csv file without a language column. " + + "Declaring the language is always required, because text normalization can be language specific, and g2p is always language specific." + ), + ] = None, +): + """ + Inspect a text configuration for compatiblity with an input file + + Test processing input_file against the text configuration provided, or the text + configuration found in model, and report any incompatibilities. + """ + if config and model: + raise typer.BadParameter("Please specify only one of --config or --model") + if not config and not model: + raise typer.BadParameter("One of --config or --model is required") + if text_file and psv_file: + raise typer.BadParameter("Please specify only one of --csv-file or --text-file") + if not text_file and not psv_file: + raise typer.BadParameter("One of --text-file or --csv-file is required") + + if text_file: + with open(text_file, "r", encoding="utf8") as f: + text_lines = list(f) + # print(text_lines) + if language is None: + raise typer.BadParameter("--language is required with --text-file") + records = [{"characters": line, "language": language} for line in text_lines] + elif psv_file: + records = generic_psv_filelist_reader(psv_file) + if "language" not in records[0]: + if language is None: + raise typer.BadParameter( + "--language is required for a csv file without a language column" + ) + for record in records: + record["language"] = language + + # Expensive imports are deferred so we fail fast where we can + with spinner("Loading software"): + from everyvoice.config.text_config import TextConfig + from everyvoice.preprocessor.preprocessor import Preprocessor + from everyvoice.text.text_processor import TextProcessor + from everyvoice.text.utils import guess_graphemes_in_text + + if config: + text_config: TextConfig = TextConfig.load_config_from_path(config) + elif model: + with spinner("Loading model"): + checkpoint = load_checkpoint(model) + # print("Looking for text config") + model_config = checkpoint["hyper_parameters"]["config"] + if "text" in model_config: + # Question: FS2 models have text config, do any others have it? + # For other models that have it, are they in the same place in the metadata? + text_config = TextConfig(**model_config["text"]) + else: + # Models without text config, e.g., a HiFiGan Vocoder, are not accepted here + raise typer.BadParameter( + f"Model/checkpoint {model} does not have an embedded text configuration" + ) + + # print(text_config) + + text_processor_chars_only = TextProcessor(text_config) + text_processor_all = TextProcessor(text_config) + with spinner("Analyzing text"): + for record in records: + # print(record) + # Process just the text to calculate missing characters + _ = Preprocessor.process_text( + record, + text_processor_chars_only, + specific_text_representation=TargetTrainingTextRepresentationLevel.characters, + ) + # Process all to also calculate missing phones + _ = Preprocessor.process_text(record, text_processor_all) + + missing_characters = text_processor_chars_only.missing_symbols + missing_phones = text_processor_all.missing_symbols - missing_characters + missing_symbol_groups = list(missing_characters) + for missing_symbol_group in missing_symbol_groups: + split_symbols = guess_graphemes_in_text(missing_symbol_group) + if len(split_symbols) > 1: + count = missing_characters.pop(missing_symbol_group) + for symbol in split_symbols: + missing_characters[symbol] += count + # print("Missing characters", missing_characters) + # print("Missing phones", missing_phones) + if missing_characters: + print( + "The following characters are missing from your text config:", + sorted(missing_characters), + ) + if missing_phones: + print( + "The following phones are missing from your text config:", + sorted(missing_phones), + ) + + if __name__ == "__main__": app() From 259fa63ec59c0270ec8f2db053c918647a4f5df7 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Tue, 24 Feb 2026 11:35:50 -0500 Subject: [PATCH 3/4] refactor: clean up the check_text_config code --- everyvoice/cli.py | 115 +++++++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/everyvoice/cli.py b/everyvoice/cli.py index e374c647..51c85c37 100644 --- a/everyvoice/cli.py +++ b/everyvoice/cli.py @@ -785,7 +785,7 @@ def demo( print("\t config loaded") except Exception as e: raise typer.BadParameter( - f"Your config file {ui_config_file} has errors\n {e}" + f"Your config file {ui_config_file} has errors.\n {e}" ) else: print(" - UI Config file path: None") @@ -898,8 +898,69 @@ def g2p( print(g2p(line)) +def require_exactly_one_of(arg1: Any, arg1_name: str, arg2: Any, arg2_name: str): + if arg1 and arg2: + raise typer.BadParameter( + f"Please specify only one of {arg1_name} or {arg2_name}." + ) + if not arg1 and not arg2: + raise typer.BadParameter(f"One of {arg1_name} and {arg2_name} is required.") + + +def open_text_or_psv_file( + text_file: Optional[Path], psv_file: Optional[Path], language: Optional[str] +) -> list[dict[str, str]]: + """helper for check_text_config: Open a text or psv file into records. + + Language is required if not already in the psv + + raises: typer.BadParameter if something is wrong""" + if text_file: + with open(text_file, "r", encoding="utf8") as f: + text_lines = list(f) + # print(text_lines) + if language is None: + raise typer.BadParameter("--language is required with --text-file.") + records = [{"characters": line, "language": language} for line in text_lines] + elif psv_file: + records = generic_psv_filelist_reader(psv_file) + if "language" not in records[0]: + if language is None: + raise typer.BadParameter( + "--language is required for a psv file without a language column." + ) + for record in records: + record["language"] = language + else: + assert False + return records + + +def get_text_config_from_config_or_model(config: Optional[Path], model: Optional[Path]): + """Helper for chec_text_config: load a TextConfig from a config file or model file""" + from everyvoice.config.text_config import TextConfig + + if config: + text_config: TextConfig = TextConfig.load_config_from_path(config) + elif model: + with spinner("Loading model"): + checkpoint = load_checkpoint(model) + # print("Looking for text config") + model_config = checkpoint["hyper_parameters"]["config"] + if "text" in model_config: + # Question: FS2 models have text config, do any others have it? + # For other models that have it, are they in the same place in the metadata? + text_config = TextConfig(**model_config["text"]) + else: + # Models without text config, e.g., a HiFiGan Vocoder, are not accepted here + raise typer.BadParameter( + f"Model/checkpoint {model} does not have an embedded text configuration." + ) + return text_config + + @app.command() -def check_text_config( # noqa: C901 +def check_text_config( config: Annotated[ Optional[Path], typer.Option( @@ -939,7 +1000,7 @@ def check_text_config( # noqa: C901 language: Annotated[ Optional[str], typer.Option( - help="language id, required with --text-file, or for a csv file without a language column. " + help="language id, required with --text-file, or for a psv file without a language column. " + "Declaring the language is always required, because text normalization can be language specific, and g2p is always language specific." ), ] = None, @@ -950,56 +1011,18 @@ def check_text_config( # noqa: C901 Test processing input_file against the text configuration provided, or the text configuration found in model, and report any incompatibilities. """ - if config and model: - raise typer.BadParameter("Please specify only one of --config or --model") - if not config and not model: - raise typer.BadParameter("One of --config or --model is required") - if text_file and psv_file: - raise typer.BadParameter("Please specify only one of --csv-file or --text-file") - if not text_file and not psv_file: - raise typer.BadParameter("One of --text-file or --csv-file is required") - - if text_file: - with open(text_file, "r", encoding="utf8") as f: - text_lines = list(f) - # print(text_lines) - if language is None: - raise typer.BadParameter("--language is required with --text-file") - records = [{"characters": line, "language": language} for line in text_lines] - elif psv_file: - records = generic_psv_filelist_reader(psv_file) - if "language" not in records[0]: - if language is None: - raise typer.BadParameter( - "--language is required for a csv file without a language column" - ) - for record in records: - record["language"] = language + require_exactly_one_of(config, "--config", model, "--model") + require_exactly_one_of(text_file, "--text-file", psv_file, "--psv-file") + records = open_text_or_psv_file(text_file, psv_file, language) # Expensive imports are deferred so we fail fast where we can with spinner("Loading software"): - from everyvoice.config.text_config import TextConfig + from everyvoice.config.text_config import TextConfig # noqa F401 from everyvoice.preprocessor.preprocessor import Preprocessor from everyvoice.text.text_processor import TextProcessor from everyvoice.text.utils import guess_graphemes_in_text - if config: - text_config: TextConfig = TextConfig.load_config_from_path(config) - elif model: - with spinner("Loading model"): - checkpoint = load_checkpoint(model) - # print("Looking for text config") - model_config = checkpoint["hyper_parameters"]["config"] - if "text" in model_config: - # Question: FS2 models have text config, do any others have it? - # For other models that have it, are they in the same place in the metadata? - text_config = TextConfig(**model_config["text"]) - else: - # Models without text config, e.g., a HiFiGan Vocoder, are not accepted here - raise typer.BadParameter( - f"Model/checkpoint {model} does not have an embedded text configuration" - ) - + text_config = get_text_config_from_config_or_model(config, model) # print(text_config) text_processor_chars_only = TextProcessor(text_config) From 31706a7bace7ecdd8371e6d66daebf1ef9ff9ad7 Mon Sep 17 00:00:00 2001 From: Eric Joanis Date: Tue, 24 Feb 2026 13:49:53 -0500 Subject: [PATCH 4/4] refactor: use typer_file_option in check_text_config --- everyvoice/cli.py | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/everyvoice/cli.py b/everyvoice/cli.py index 51c85c37..df04ece8 100644 --- a/everyvoice/cli.py +++ b/everyvoice/cli.py @@ -963,39 +963,21 @@ def get_text_config_from_config_or_model(config: Optional[Path], model: Optional def check_text_config( config: Annotated[ Optional[Path], - typer.Option( - help="path to text config, i.e., everyvoice-shared-text.yaml", - exists=True, - dir_okay=False, - file_okay=True, + typer_file_option( + help="path to text config, i.e., everyvoice-shared-text.yaml" ), ] = None, model: Annotated[ Optional[Path], - typer.Option( - help="path to a model whose text config will be used", - exists=True, - dir_okay=False, - file_okay=True, - ), + typer_file_option(help="path to a model whose text config will be used"), ] = None, text_file: Annotated[ Optional[Path], - typer.Option( - help="path to a plain text file to check", - exists=True, - dir_okay=False, - file_okay=True, - ), + typer_file_option(help="path to a plain text file to check"), ] = None, psv_file: Annotated[ Optional[Path], - typer.Option( - help="path to a psv file to check", - exists=True, - dir_okay=False, - file_okay=True, - ), + typer_file_option(help="path to a psv file to check"), ] = None, language: Annotated[ Optional[str],