diff --git a/README.md b/README.md index 2cbaaed..fa6742b 100644 --- a/README.md +++ b/README.md @@ -3,37 +3,17 @@

- - Release - - - Downloads - - Coverage + Release + Downloads + ShredGuard + Jira

- - ShredGuard - -

- -

- - Ubuntu Tests - - - macOS Tests - - - Windows Tests - - - Code Quality - - - Jira - + Ubuntu Tests + macOS Tests + Windows Tests + Code Quality

> [!IMPORTANT] diff --git a/src/voxkit/analyzers/audio_format_profile.py b/src/voxkit/analyzers/audio_format_profile.py index 4844d81..068d1b1 100644 --- a/src/voxkit/analyzers/audio_format_profile.py +++ b/src/voxkit/analyzers/audio_format_profile.py @@ -18,6 +18,8 @@ from pathlib import Path from typing import Any, Dict, List +from voxkit.storage.constants import SUPERSET_AUDIO_EXTENSIONS + from .base import DatasetAnalyzer logger = logging.getLogger(__name__) @@ -38,7 +40,7 @@ def analyze(self, dataset_path: str) -> List[Dict[str, Any]]: import torchaudio results = [] - audio_extensions = {".wav", ".flac", ".mp3", ".ogg", ".m4a"} + audio_extensions = SUPERSET_AUDIO_EXTENSIONS try: for entry in os.scandir(dataset_path): diff --git a/src/voxkit/analyzers/clip_duration_statistics.py b/src/voxkit/analyzers/clip_duration_statistics.py index 5f8d259..8b5b348 100644 --- a/src/voxkit/analyzers/clip_duration_statistics.py +++ b/src/voxkit/analyzers/clip_duration_statistics.py @@ -17,6 +17,8 @@ from pathlib import Path from typing import Any, Dict, List +from voxkit.storage.constants import SUPERSET_AUDIO_EXTENSIONS + from .base import DatasetAnalyzer logger = logging.getLogger(__name__) @@ -37,7 +39,7 @@ def analyze(self, dataset_path: str) -> List[Dict[str, Any]]: import torchaudio results = [] - audio_extensions = {".wav", ".flac", ".mp3", ".ogg", ".m4a"} + audio_extensions = SUPERSET_AUDIO_EXTENSIONS try: for entry in os.scandir(dataset_path): diff --git a/src/voxkit/analyzers/default_analyzer.py b/src/voxkit/analyzers/default_analyzer.py index d9b7383..c7ab6b5 100644 --- a/src/voxkit/analyzers/default_analyzer.py +++ b/src/voxkit/analyzers/default_analyzer.py @@ -17,6 +17,8 @@ from pathlib import Path from typing import Any, Dict, List +from voxkit.storage.constants import SUPERSET_AUDIO_EXTENSIONS + from .base import DatasetAnalyzer @@ -43,7 +45,7 @@ def analyze(self, dataset_path: str) -> List[Dict[str, Any]]: ``audio_file_count``. """ results = [] - audio_extensions = {".wav", ".flac", ".mp3", ".ogg", ".m4a"} + audio_extensions = SUPERSET_AUDIO_EXTENSIONS try: for entry in os.scandir(dataset_path): diff --git a/src/voxkit/config/__init__.py b/src/voxkit/config/__init__.py index 0cc437e..b1a6203 100644 --- a/src/voxkit/config/__init__.py +++ b/src/voxkit/config/__init__.py @@ -28,6 +28,7 @@ get_profile_config_path, resolve_config_file, ) +from voxkit.config.constants import DEFAULT_HELP_URL from voxkit.config.logging_config import ( LOG_FILE, reset_logging, @@ -40,7 +41,6 @@ get_pipeline_config, ) from voxkit.config.startup_config import ( - HELP_URL, STARTUP_SCRIPT, AppName, Defaults, @@ -63,7 +63,7 @@ "UIConfig", "get_pipeline_config", # Startup config - "HELP_URL", + "DEFAULT_HELP_URL", "AppName", "Dimensions", "Defaults", diff --git a/src/voxkit/config/app_config.py b/src/voxkit/config/app_config.py index 3201aff..aa46fe4 100644 --- a/src/voxkit/config/app_config.py +++ b/src/voxkit/config/app_config.py @@ -14,6 +14,8 @@ import yaml +from voxkit.config.constants import DEFAULT_HELP_URL + def get_config_root() -> Path: """Get the path to the config root directory. @@ -96,30 +98,12 @@ def resolve_config_file(filename: str) -> Path: if default_path.exists(): return default_path - # Fall back to legacy location (config root) - legacy_path = config_root / filename - if legacy_path.exists(): - return legacy_path - + # Throw error if not found in either location raise FileNotFoundError( - f"Config file '{filename}' not found in profile '{profile}', " - f"default profile, or config root" + f"Config file '{filename}' not found in profile '{profile}' or default profile" ) -# Legacy alias for backwards compatibility -def get_config_path() -> Path: - """Get the path to the config directory. - - Deprecated: Use get_profile_config_path() for profile-aware loading, - or get_config_root() for the config root directory. - - Returns: - Path to the active profile's config directory - """ - return get_profile_config_path() - - @dataclass class AppConfig: """Application configuration data class.""" @@ -128,7 +112,7 @@ class AppConfig: version: str description: str introduction: str - help_url: str = "https://voxkit-web.vercel.app/help" + help_url: str | None = None release_date: Optional[str] = None release_notes: Optional[str] = None log_max_bytes: int = 5 * 1024 * 1024 @@ -164,7 +148,7 @@ def from_yaml(cls, config_path: Path) -> "AppConfig": version=version, description=data.get("description", ""), introduction=data.get("introduction", ""), - help_url=data.get("help_url", "https://voxkit-web.vercel.app/help"), + help_url=data.get("help_url", DEFAULT_HELP_URL), release_date=data.get("release_date"), release_notes=data.get("release_notes"), log_max_bytes=int(data.get("log_max_bytes", 5 * 1024 * 1024)), diff --git a/src/voxkit/config/constants.py b/src/voxkit/config/constants.py new file mode 100644 index 0000000..65ed8f4 --- /dev/null +++ b/src/voxkit/config/constants.py @@ -0,0 +1,8 @@ +"""Constants relevant to configuration and setup. + +Constants +--------- +- **DEFAULT_HELP_URL**: URL for user help documentation +""" + +DEFAULT_HELP_URL = "https://voxkit-web.vercel.app/help" diff --git a/src/voxkit/config/startup_config.py b/src/voxkit/config/startup_config.py index 6bd2491..334c520 100644 --- a/src/voxkit/config/startup_config.py +++ b/src/voxkit/config/startup_config.py @@ -3,7 +3,7 @@ from voxkit.services.mfa import download_acoustic_model from voxkit.storage import models -from voxkit.storage.config import MODELS_ROOT +from voxkit.storage.constants import MODELS_ROOT from voxkit.storage.models import download_and_copy_huggingface_model from voxkit.storage.utils import get_storage_root @@ -18,7 +18,6 @@ } Mode = Literal["MFAENGINE", "W2TGENGINE"] -HELP_URL = "https://voxkit-web.vercel.app/help" def startup_routine(): diff --git a/src/voxkit/engines/__init__.py b/src/voxkit/engines/__init__.py index 04d5aed..2b2ecde 100644 --- a/src/voxkit/engines/__init__.py +++ b/src/voxkit/engines/__init__.py @@ -6,7 +6,7 @@ - **EngineManager.list_engines**: List registered engine IDs - **EngineManager.get_engine**: Retrieve engine instance by ID - **EngineManager.get_tool_providers**: Get engines providing a specific tool type -- **ToolType**: Literal type for compatible tool types +- **AVAILABLE_TOOLS**: Literal type for compatible tool types Available Engines ----------------- @@ -48,7 +48,8 @@ from typing import List -from .base import AlignmentEngine, ToolType +from .base import AlignmentEngine +from .constants import AVAILABLE_TOOLS from .faster_whisper_engine import FasterWhisperEngine from .mfa_engine import MFAEngine from .w2tg_engine import W2TGEngine @@ -82,7 +83,7 @@ def get_engine(self, engine_id: str) -> AlignmentEngine: except KeyError: raise ValueError(f"No engine with id: {engine_id}") - def get_tool_providers(self, tool: ToolType) -> dict[str, AlignmentEngine]: + def get_tool_providers(self, tool: AVAILABLE_TOOLS) -> dict[str, AlignmentEngine]: """Return a list of engines that provide the specified tool type.""" engines = {} for _, engine in self._engines.items(): @@ -97,4 +98,4 @@ def get_tool_providers(self, tool: ToolType) -> dict[str, AlignmentEngine]: faster_whisper = FasterWhisperEngine(id="FASTERWHISPERENGINE") engines = EngineManager({mfa.id: mfa, faster_whisper.id: faster_whisper, w2tg.id: w2tg}) -__all__ = ["engines", "ToolType"] +__all__ = ["engines", "AVAILABLE_TOOLS"] diff --git a/src/voxkit/engines/base.py b/src/voxkit/engines/base.py index ae34871..bc23134 100644 --- a/src/voxkit/engines/base.py +++ b/src/voxkit/engines/base.py @@ -32,8 +32,9 @@ def align(self, dataset_id: str, model_id: str) -> None: import json from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Literal +from typing import Any +from voxkit.engines.constants import AVAILABLE_TOOLS from voxkit.storage.utils import get_storage_root """ @@ -42,18 +43,16 @@ def align(self, dataset_id: str, model_id: str) -> None: has its own settings that are stored in a JSON file. """ -ToolType = Literal["train", "align", "transcribe"] - class AlignmentEngine(ABC): """ Abstract base class for alignment engines. - Subclasses must implement at least one ToolType operation and provide + Subclasses must implement at least one AVAILABLE_TOOLS operation and provide specific validation criteria. Attributes: - settings_configurations (dict[ToolType, Any]): Mapping of + settings_configurations (dict[AVAILABLE_TOOLS, Any]): Mapping of tool type names ("train"/"align") to their store configuration. reference_url (str | None): Optional reference URL for the engine. description (str | None): Human-readable description of the engine. @@ -63,7 +62,7 @@ class AlignmentEngine(ABC): def __init__( self, - settings_configurations: dict[ToolType, Any], + settings_configurations: dict[AVAILABLE_TOOLS, Any], reference_url: str | None = None, description: str | None = None, human_readable_name: str | None = None, @@ -218,7 +217,7 @@ def _get_default_settings(self, cfg: Any) -> dict: """ return {field.name: field.default_value for field in (cfg.fields or [])} - def get_settings(self, tool_type: ToolType) -> dict: + def get_settings(self, tool_type: AVAILABLE_TOOLS) -> dict: """ Load and validate settings for a specific tool. @@ -269,7 +268,7 @@ def get_settings(self, tool_type: ToolType) -> dict: return settings - def get_settings_config(self, tool_type: ToolType) -> Any: + def get_settings_config(self, tool_type: AVAILABLE_TOOLS) -> Any: """ Return the :class:`Any` for a tool type. @@ -287,7 +286,7 @@ def get_settings_config(self, tool_type: ToolType) -> Any: raise ValueError(f"No settings configuration found for tool type: {tool_type}") return config - def has_tool(self, tool_type: ToolType) -> bool: + def has_tool(self, tool_type: AVAILABLE_TOOLS) -> bool: """Check if the engine has a tool of the specified type.""" return tool_type in self.settings_configurations diff --git a/src/voxkit/engines/constants.py b/src/voxkit/engines/constants.py new file mode 100644 index 0000000..ac8429d --- /dev/null +++ b/src/voxkit/engines/constants.py @@ -0,0 +1,4 @@ +from typing import Literal + +# New engines can implement these tools or a subset of them +AVAILABLE_TOOLS = Literal["train", "align", "transcribe"] diff --git a/src/voxkit/gui/pages/pipeline/viewer_stacker.py b/src/voxkit/gui/pages/pipeline/viewer_stacker.py index 293b7c9..b39eef5 100644 --- a/src/voxkit/gui/pages/pipeline/viewer_stacker.py +++ b/src/voxkit/gui/pages/pipeline/viewer_stacker.py @@ -40,6 +40,7 @@ from voxkit.gui.pages.pipeline.base_stacker import BaseStacker from voxkit.gui.styles import Buttons, Colors, Containers, Labels from voxkit.storage import alignments, datasets +from voxkit.storage.constants import SUPERSET_AUDIO_EXTENSIONS if TYPE_CHECKING: from PyQt6.QtMultimedia import QAudioOutput, QMediaPlayer @@ -52,7 +53,7 @@ MULTIMEDIA_AVAILABLE = False -_AUDIO_EXTENSIONS = {".wav", ".flac", ".mp3", ".ogg", ".m4a"} +_AUDIO_EXTENSIONS = SUPERSET_AUDIO_EXTENSIONS _SILENCE_LABELS = {"", "sp", "sil", "", "spn"} # --------------------------------------------------------------------------- diff --git a/src/voxkit/storage/alignments.py b/src/voxkit/storage/alignments.py index 618ca68..dd1cf9a 100644 --- a/src/voxkit/storage/alignments.py +++ b/src/voxkit/storage/alignments.py @@ -38,7 +38,7 @@ from pathlib import Path from typing import List, Literal, Tuple, TypedDict -from .config import ALIGNMENTS_ROOT +from .constants import ALIGNMENTS_ROOT, SUPERSET_AUDIO_EXTENSIONS from .datasets import _get_dataset_root, get_dataset_metadata from .models import ModelMetadata, get_model_metadata from .utils import generate_unique_id, readable_from_unique_id @@ -192,7 +192,7 @@ def create_alignment( return False, f"Failed to create alignment metadata: {str(e)}" -_AUDIO_EXTS = (".wav", ".flac", ".mp3", ".ogg", ".m4a") +_AUDIO_EXTS = SUPERSET_AUDIO_EXTENSIONS def validate_hand_alignments(dataset_path: Path, hand_path: Path) -> Tuple[bool, str]: diff --git a/src/voxkit/storage/config.py b/src/voxkit/storage/config.py deleted file mode 100644 index 9f3312f..0000000 --- a/src/voxkit/storage/config.py +++ /dev/null @@ -1,20 +0,0 @@ -"""This module contains constants for the VoxKit storage system. - -Constants ---------- -- **STORAGE_ROOT**: Root directory for all VoxKit storage (~/.voxkit) -- **MODELS_ROOT**: Subdirectory for model storage relative to engine directory -- **DATASETS_ROOT**: Subdirectory for dataset storage relative to STORAGE_ROOT -- **ALIGNMENTS_ROOT**: Subdirectory for alignments relative to dataset directory - -Notes ------ -- STORAGE_ROOT uses tilde (~) notation to reference the user's home directory -- All paths are relative to appropriate parent directories in the hierarchy -- The directory structure is created automatically on first use -""" - -STORAGE_ROOT = "~/.voxkit" # Root directory for all storage -MODELS_ROOT = "train" # Path from STORAGE_ROOT to models -DATASETS_ROOT = "datasets" # Path from STORAGE_ROOT to datasets -ALIGNMENTS_ROOT = "alignments" # Path from STORAGE_ROOT/DATASETS_ROOT to alignments diff --git a/src/voxkit/storage/constants.py b/src/voxkit/storage/constants.py new file mode 100644 index 0000000..6270649 --- /dev/null +++ b/src/voxkit/storage/constants.py @@ -0,0 +1,21 @@ +"""This module contains constants for the VoxKit storage layer. + +Constants +--------- +- **STORAGE_ROOT**: Root directory for all VoxKit storage (~/.voxkit) +- **MODELS_ROOT**: Subdirectory for model storage relative to engine directory +- **DATASETS_ROOT**: Subdirectory for dataset storage relative to STORAGE_ROOT +- **ALIGNMENTS_ROOT**: Subdirectory for alignments relative to dataset directory +- **SUPERSET_AUDIO_EXTENSIONS**: Comprehensive set of audio file extensions + +Notes +----- +- STORAGE_ROOT uses tilde (~) notation to reference the user's home directory +- All paths are relative to appropriate parent directories in the hierarchy +""" + +STORAGE_ROOT: str = "~/.voxkit" # Root directory for all storage +MODELS_ROOT: str = "train" # Path from STORAGE_ROOT to models +DATASETS_ROOT: str = "datasets" # Path from STORAGE_ROOT to datasets +ALIGNMENTS_ROOT: str = "alignments" # Path from STORAGE_ROOT/DATASETS_ROOT to alignments +SUPERSET_AUDIO_EXTENSIONS: frozenset[str] = frozenset({".wav", ".flac", ".mp3", ".ogg", ".m4a"}) diff --git a/src/voxkit/storage/datasets.py b/src/voxkit/storage/datasets.py index 2017112..b67a8fb 100644 --- a/src/voxkit/storage/datasets.py +++ b/src/voxkit/storage/datasets.py @@ -43,7 +43,7 @@ from pathlib import Path from typing import Any, List, Literal, Tuple, TypedDict -from voxkit.storage.config import ALIGNMENTS_ROOT, DATASETS_ROOT +from voxkit.storage.constants import ALIGNMENTS_ROOT, DATASETS_ROOT, SUPERSET_AUDIO_EXTENSIONS from voxkit.storage.utils import generate_unique_id, get_storage_root, readable_from_unique_id @@ -627,9 +627,7 @@ def validate_dataset(dataset_path: Path, transcribed: bool = True) -> Tuple[bool for speaker in speaker_dirs: speaker_path = os.path.join(dataset_path, speaker) audio_files = [ - f - for f in os.listdir(speaker_path) - if f.endswith((".wav", ".flac", ".mp3", ".ogg", ".m4a")) + f for f in os.listdir(speaker_path) if f.endswith(tuple(SUPERSET_AUDIO_EXTENSIONS)) ] if not audio_files: diff --git a/src/voxkit/storage/models.py b/src/voxkit/storage/models.py index b197e94..93e4a79 100644 --- a/src/voxkit/storage/models.py +++ b/src/voxkit/storage/models.py @@ -40,7 +40,7 @@ from voxkit.storage.utils import generate_unique_id, get_storage_root, readable_from_unique_id -from .config import MODELS_ROOT +from .constants import MODELS_ROOT class ModelMetadata(TypedDict): diff --git a/src/voxkit/storage/utils.py b/src/voxkit/storage/utils.py index 3f25184..7a297a5 100644 --- a/src/voxkit/storage/utils.py +++ b/src/voxkit/storage/utils.py @@ -25,7 +25,7 @@ from pathlib import Path from typing import Any -from .config import STORAGE_ROOT +from .constants import STORAGE_ROOT _id_lock = threading.Lock() _last_id_dt: datetime | None = None diff --git a/tests/config/test_app_config.py b/tests/config/test_app_config.py index 1bb288d..bb72ebe 100644 --- a/tests/config/test_app_config.py +++ b/tests/config/test_app_config.py @@ -6,7 +6,6 @@ AppConfig, get_active_profile, get_app_config, - get_config_path, get_config_root, get_profile_config_path, ) @@ -35,10 +34,6 @@ def test_get_profile_config_path_is_inside_profiles(self): # Should be config/profiles/ assert result.parent.name == "profiles" - def test_get_config_path_is_alias_for_profile_path(self): - # get_config_path is now an alias for get_profile_config_path - assert get_config_path() == get_profile_config_path() - class TestAppConfig: def test_dataclass_fields(self): @@ -52,7 +47,7 @@ def test_dataclass_fields(self): assert config.version == "1.0.0" assert config.description == "Test description" assert config.introduction == "Test intro" - assert config.help_url == "https://voxkit-web.vercel.app/help" + assert config.help_url is None config = AppConfig( app_name="TestApp", version="2.0.0", diff --git a/tests/engines/test_engine_manager.py b/tests/engines/test_engine_manager.py index 1356ea0..93318e4 100644 --- a/tests/engines/test_engine_manager.py +++ b/tests/engines/test_engine_manager.py @@ -35,7 +35,7 @@ def test_get_engine_not_found(self): assert "No engine with id" in str(exc_info.value) def test_get_tool_providers_align(self): - # ToolType is Literal["train", "align", "transcribe"] + # AVAILABLE_TOOLS is Literal["train", "align", "transcribe"] providers = engines.get_tool_providers("align") assert isinstance(providers, dict) # At least some engines should provide alignment diff --git a/tests/storage/test_models.py b/tests/storage/test_models.py index 95f325b..17ee9ba 100644 --- a/tests/storage/test_models.py +++ b/tests/storage/test_models.py @@ -631,7 +631,7 @@ def test_import_models_success(self, monkeypatch): import json from voxkit.storage import models - from voxkit.storage.config import MODELS_ROOT + from voxkit.storage.constants import MODELS_ROOT from voxkit.storage.models import import_models monkeypatch.setattr(models, "get_storage_root", mock_get_storage_root) @@ -674,7 +674,7 @@ def test_import_models_paths_rewritten(self, monkeypatch): import json from voxkit.storage import models - from voxkit.storage.config import MODELS_ROOT + from voxkit.storage.constants import MODELS_ROOT from voxkit.storage.models import import_models, list_models monkeypatch.setattr(models, "get_storage_root", mock_get_storage_root) @@ -737,7 +737,7 @@ def test_import_models_engine_mismatch(self, monkeypatch): import json from voxkit.storage import models - from voxkit.storage.config import MODELS_ROOT + from voxkit.storage.constants import MODELS_ROOT from voxkit.storage.models import import_models monkeypatch.setattr(models, "get_storage_root", mock_get_storage_root) diff --git a/tests/storage/test_setup.py b/tests/storage/test_setup.py index aee3508..ba6f518 100644 --- a/tests/storage/test_setup.py +++ b/tests/storage/test_setup.py @@ -1,7 +1,7 @@ import shutil from pathlib import Path -from voxkit.storage.config import MODELS_ROOT +from voxkit.storage.constants import MODELS_ROOT ENGINE_IDS = ["ENGINE_A", "ENGINE_B", "ENGINE_C"]