diff --git a/config/pipeline_definitions.yaml b/config/pipeline_definitions.yaml index 6c813a4..56c7476 100644 --- a/config/pipeline_definitions.yaml +++ b/config/pipeline_definitions.yaml @@ -38,6 +38,8 @@ pipeline: - **TranscriptionStacker**: Transcribe audio files using speech-to-text engines - **TrainingStacker**: Train acoustic models on labeled datasets - **PredictionStacker**: Generate forced alignments using trained/pretrained models + - **ViewerStacker**: Browse alignments by speaker/file; view TextGrid tiers, transcripts, and audio + - **ComparisonStacker**: Compare two alignments with four phoneme-level plots; export as PNGs - **PLLRStacker**: Extract Goodness of Pronunciation (PLLR) scores from alignments - **MarkdownStacker**: Display informational markdown content (this step) @@ -121,10 +123,39 @@ pipeline: # ====== - # PIPELINE STEP D: PLLR EXTRACTION + # PIPELINE STEP D: ALIGNMENT VIEWER + # ====== + - id: "viewer" + label: "Ⓓ View Alignments" + stacker_class: "ViewerStacker" + enabled: true + collapsible_sections: + "About the Alignment Viewer": | + Use this step to inspect alignment results. Select a dataset and one of its + completed alignments, then browse by speaker and audio file to view the + transcript (.lab), the TextGrid tier intervals (phones, words, etc.), and + play back the original audio side-by-side. + + # ====== + # PIPELINE STEP E: ALIGNMENT COMPARISON + # ====== + - id: "comparison" + label: "Ⓔ Compare Alignments" + stacker_class: "ComparisonStacker" + enabled: true + collapsible_sections: + "About Alignment Comparison": | + Use this step to compare two forced alignments side-by-side. Select a + dataset and one of its alignments for each side (A and B), then click + Compare to view four phoneme-level plots: Phoneme Counts, Overlap (IoU), + Overlap Rate, and Substitution Scatter. Any subset of plots can be + exported as PNGs to a folder you choose. + + # ====== + # PIPELINE STEP F: PLLR EXTRACTION # ====== - id: "pllr" - label: "Ⓓ Extract PLLR Scoring" + label: "Ⓕ Extract PLLR Scoring" stacker_class: "PLLRStacker" enabled: true collapsible_sections: diff --git a/config/profiles/default/pipeline_definitions.yaml b/config/profiles/default/pipeline_definitions.yaml index 6c813a4..56c7476 100644 --- a/config/profiles/default/pipeline_definitions.yaml +++ b/config/profiles/default/pipeline_definitions.yaml @@ -38,6 +38,8 @@ pipeline: - **TranscriptionStacker**: Transcribe audio files using speech-to-text engines - **TrainingStacker**: Train acoustic models on labeled datasets - **PredictionStacker**: Generate forced alignments using trained/pretrained models + - **ViewerStacker**: Browse alignments by speaker/file; view TextGrid tiers, transcripts, and audio + - **ComparisonStacker**: Compare two alignments with four phoneme-level plots; export as PNGs - **PLLRStacker**: Extract Goodness of Pronunciation (PLLR) scores from alignments - **MarkdownStacker**: Display informational markdown content (this step) @@ -121,10 +123,39 @@ pipeline: # ====== - # PIPELINE STEP D: PLLR EXTRACTION + # PIPELINE STEP D: ALIGNMENT VIEWER + # ====== + - id: "viewer" + label: "Ⓓ View Alignments" + stacker_class: "ViewerStacker" + enabled: true + collapsible_sections: + "About the Alignment Viewer": | + Use this step to inspect alignment results. Select a dataset and one of its + completed alignments, then browse by speaker and audio file to view the + transcript (.lab), the TextGrid tier intervals (phones, words, etc.), and + play back the original audio side-by-side. + + # ====== + # PIPELINE STEP E: ALIGNMENT COMPARISON + # ====== + - id: "comparison" + label: "Ⓔ Compare Alignments" + stacker_class: "ComparisonStacker" + enabled: true + collapsible_sections: + "About Alignment Comparison": | + Use this step to compare two forced alignments side-by-side. Select a + dataset and one of its alignments for each side (A and B), then click + Compare to view four phoneme-level plots: Phoneme Counts, Overlap (IoU), + Overlap Rate, and Substitution Scatter. Any subset of plots can be + exported as PNGs to a folder you choose. + + # ====== + # PIPELINE STEP F: PLLR EXTRACTION # ====== - id: "pllr" - label: "Ⓓ Extract PLLR Scoring" + label: "Ⓕ Extract PLLR Scoring" stacker_class: "PLLRStacker" enabled: true collapsible_sections: diff --git a/config/profiles/explanatory/pipeline_definitions.yaml b/config/profiles/explanatory/pipeline_definitions.yaml index b82bfdc..bd1ff9b 100644 --- a/config/profiles/explanatory/pipeline_definitions.yaml +++ b/config/profiles/explanatory/pipeline_definitions.yaml @@ -2,7 +2,7 @@ # This profile provides detailed glossary-style explanations for each form element. # # Form Element Reference System: -# Letter = Pipeline step (Ⓐ Ⓑ Ⓒ Ⓓ) +# Letter = Pipeline step (Ⓐ Ⓑ Ⓒ Ⓓ Ⓔ Ⓕ) # Number = Form element within that step (① ② ③ ④ ⑤ ⑥) # Example: "B-3" refers to element ③ in the Training stacker (Ⓑ) @@ -49,7 +49,21 @@ pipeline: --- - ### Ⓓ Extract PLLR Scoring + ### Ⓓ View Alignments + **Purpose**: Inspect alignment results file-by-file. + + Browse completed alignments by speaker and audio file. View the transcript (.lab), TextGrid tier intervals (phones, words, etc.), and play back the original audio side-by-side. Useful for quality-checking alignments before downstream analysis. + + --- + + ### Ⓔ Compare Alignments + **Purpose**: Visualize differences between two forced alignments of the same dataset. + + Select two alignments (A and B) and click Compare to view four phoneme-level plots: Phoneme Counts, Overlap (IoU), Overlap Rate, and Substitution Scatter. Any subset of plots can be exported as PNGs. Useful for comparing pretrained vs. custom-trained model outputs. + + --- + + ### Ⓕ Extract PLLR Scoring **Purpose**: Calculate pronunciation quality scores from alignments. PLLR (Probabilistic Linear Likelihood Ratio) scores indicate how well each phoneme was pronounced. Outputs CSV files with per-phoneme and per-frame probability scores for clinical or research analysis. @@ -64,8 +78,11 @@ pipeline: **Custom model workflow**: Ⓐ → Ⓒ → Ⓑ → Ⓒ *Transcribe, align with pretrained model, train custom model, re-align with custom model.* - **Full analysis workflow**: Ⓐ → Ⓒ → Ⓓ - *Transcribe, align, then extract pronunciation scores.* + **Alignment review workflow**: Ⓐ → Ⓒ → Ⓓ → Ⓔ + *Transcribe, align, inspect results, then compare two alignment runs side-by-side.* + + **Full analysis workflow**: Ⓐ → Ⓒ → Ⓓ → Ⓕ + *Transcribe, align, review alignments, then extract pronunciation scores.* collapsible_sections: "Element Reference Notation": | @@ -81,7 +98,7 @@ pipeline: Some form elements depend on selections made in previous elements. Ⓑ-④ (Choose an Alignment) only populates after selecting a dataset in Ⓑ-③. - Ⓓ-② (Choose an Alignment) only populates after selecting a dataset in Ⓓ-①. + Ⓕ-② (Choose an Alignment) only populates after selecting a dataset in Ⓕ-①. If a dropdown shows "Select a dataset first", complete the previous selection. @@ -247,10 +264,100 @@ pipeline: When to adjust: Default settings work for most cases. Adjust if you experience issues or have specific requirements. # ====== - # PIPELINE STEP D: PLLR EXTRACTION + # PIPELINE STEP D: ALIGNMENT VIEWER + # ====== + - id: "viewer" + label: "Ⓓ View Alignments" + stacker_class: "ViewerStacker" + enabled: true + collapsible_sections: + "About This Step": | + The Alignment Viewer lets you inspect forced alignment results file-by-file. + Select a dataset and one of its completed alignments, then browse by speaker + and audio file to view the transcript (.lab), TextGrid tier intervals + (phones, words, etc.), and play back the original audio side-by-side. + + Use this step to quality-check alignments before running PLLR extraction or + comparing two alignment runs. + + "D-① Choose a Dataset": | + What it does: Selects which registered dataset to browse. + + Requirements: The dataset must have at least one completed alignment from step Ⓒ. + + What happens next: After selection, the alignment dropdown (D-②) will populate + with all completed alignments for this dataset. + + "D-② Choose an Alignment": | + What it does: Selects which alignment run to inspect. + + Depends on: Your selection in D-① (dataset). This dropdown only populates after + selecting a dataset. + + Shows: Engine ID, model name, alignment date, and status. + + "D-③ Speaker and File Navigation": | + What it does: Lets you drill down from dataset → speaker → audio file. + + Speaker list: Populated from the directory structure within the alignment. + File list: Shows all audio files for the selected speaker. + + Selecting a file loads the transcript and TextGrid tiers in the viewer panel. + + # ====== + # PIPELINE STEP E: ALIGNMENT COMPARISON + # ====== + - id: "comparison" + label: "Ⓔ Compare Alignments" + stacker_class: "ComparisonStacker" + enabled: true + collapsible_sections: + "About This Step": | + The Alignment Comparison step visualizes phoneme-level differences between two + forced alignments of the same dataset. Select a dataset and two of its alignments + (A and B), then click Compare to generate four diagnostic plots. + + Use this step to compare pretrained vs. custom-trained model outputs, or to + evaluate alignment quality across different engine configurations. + + "Ⓔ-① Choose a Dataset": | + What it does: Selects which dataset to compare alignments within. + + Requirements: The dataset must have at least two completed alignments from step Ⓒ. + + What happens next: After selection, both alignment dropdowns (A and B) will + populate with all completed alignments for this dataset. + + "Ⓔ-② Alignment A and Alignment B": | + What they do: Select the two alignment runs to compare side-by-side. + + Depends on: Your dataset selection in Ⓔ-①. + + Shows: Engine ID, model name, alignment date, and status for each alignment. + + Note: A and B can reference the same alignment — useful for verifying the + comparison tool itself, but typically you will choose two different runs. + + "Ⓔ-③ Compare Button": | + What it does: Runs the comparison and generates four phoneme-level plots. + + Plots generated: + Phoneme Counts: Bar chart of how many times each phoneme appears in A vs. B. + Overlap (IoU): Intersection-over-union for each phoneme boundary pair. + Overlap Rate: Fraction of phoneme intervals that overlap between A and B. + Substitution Scatter: Phoneme-by-phoneme substitution frequency matrix. + + "Ⓔ-④ Export Button": | + What it does: Saves a selected subset of the four plots as PNG files. + + How to use: Check the plots you want to export, then click Export and choose + an output directory. Files are named by plot type. + + # ====== + # PIPELINE STEP F: PLLR EXTRACTION # ====== - id: "pllr" - label: "Ⓓ Extract PLLR Scoring" + label: "Ⓕ Extract PLLR Scoring" stacker_class: "PLLRStacker" enabled: true collapsible_sections: @@ -262,23 +369,23 @@ pipeline: These scores can be used for clinical assessment, research analysis, or identifying pronunciation patterns. - "D-① Choose a PLLR Dataset": | + "Ⓕ-① Choose a PLLR Dataset": | What it does: Selects which dataset to analyze for pronunciation scoring. Requirements: The dataset must have completed alignments from step Ⓒ. What happens next: After selection, element ② will populate with available alignments for this dataset. - "D-② Choose an Alignment": | + "Ⓕ-② Choose an Alignment": | What it does: Selects which alignment provides the TextGrids for scoring. - Depends on: Your selection in D-① (dataset). This dropdown only populates after selecting a dataset. + Depends on: Your selection in Ⓕ-① (dataset). This dropdown only populates after selecting a dataset. Shows: Engine ID, model name, alignment date, and status. Multiple alignments: If you've aligned the same dataset with different models, you can extract PLLR scores from each to compare results. - "D-③ Output Path": | + "Ⓕ-③ Output Path": | What it does: Specifies where to save the extracted score files. Output files created: @@ -289,7 +396,7 @@ pipeline: Recommendation: Use a dedicated output folder to keep results organized. - "D-④ Start PLLR Extraction Button": | + "Ⓕ-④ Start PLLR Extraction Button": | What it does: Begins the PLLR score computation. Processing: Each audio file is analyzed against its TextGrid alignment using a pronunciation scoring model (typically wav2vec2-based). @@ -310,6 +417,7 @@ pipeline: When to adjust: Default settings work for standard English analysis. Adjust for different languages or specialized scoring needs. + # ====== # UI CONFIGURATION # ====== diff --git a/pyproject.toml b/pyproject.toml index 523b140..cde8ec2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "pyyaml>=6.0.0", "rich>=14.2.0", "faster-whisper>=1.1.0", + "alignment-comparison-plots", ] [dependency-groups] @@ -194,3 +195,6 @@ show_missing = true [tool.setuptools.packages.find] where = ["src"] + +[tool.uv.sources] +alignment-comparison-plots = { git = "https://github.com/WISCLab/alignment-comparison-plots" } diff --git a/src/voxkit/gui/pages/pipeline/__init__.py b/src/voxkit/gui/pages/pipeline/__init__.py index 0ec6341..2d369e7 100644 --- a/src/voxkit/gui/pages/pipeline/__init__.py +++ b/src/voxkit/gui/pages/pipeline/__init__.py @@ -92,11 +92,13 @@ def on_settings(self): from voxkit.config.pipeline_config import PipelineConfig from .base_stacker import BaseStacker +from .comparison_stacker import ComparisonStacker from .markdown_stacker import MarkdownStacker from .pllr_stacker import PLLRStacker from .prediction_stacker import PredictionStacker from .training_stacker import TrainingStacker from .transcription_stacker import TranscriptionStacker +from .viewer_stacker import ViewerStacker # Mapping of stacker class names to actual classes STACKER_REGISTRY = { @@ -105,6 +107,8 @@ def on_settings(self): "PLLRStacker": PLLRStacker, "MarkdownStacker": MarkdownStacker, "TranscriptionStacker": TranscriptionStacker, + "ViewerStacker": ViewerStacker, + "ComparisonStacker": ComparisonStacker, } @@ -280,6 +284,14 @@ def reload(self): if hasattr(stacker_widget, "reload_datasets"): stacker_widget.reload_datasets() + elif stacker_class == "ViewerStacker": + if hasattr(stacker_widget, "reload_datasets"): + stacker_widget.reload_datasets() + + elif stacker_class == "ComparisonStacker": + if hasattr(stacker_widget, "reload_datasets"): + stacker_widget.reload_datasets() + def change_page(self, index): """Change the displayed page based on menu selection with animation""" if index >= 0: # Valid index @@ -303,5 +315,7 @@ def set_current_page_index(self, index): "PLLRStacker", "TranscriptionStacker", "MarkdownStacker", + "ViewerStacker", + "ComparisonStacker", "STACKER_REGISTRY", ] diff --git a/src/voxkit/gui/pages/pipeline/comparison_stacker.py b/src/voxkit/gui/pages/pipeline/comparison_stacker.py new file mode 100644 index 0000000..86878a9 --- /dev/null +++ b/src/voxkit/gui/pages/pipeline/comparison_stacker.py @@ -0,0 +1,569 @@ +"""Comparison Stacker Module. + +Pipeline page for comparing two forced-alignment outputs from the same dataset +using four phoneme-level visualizations from the alignment-comparison-plots library: +counts, mean IoU overlap, overlap rate, and phoneme-pair substitution scatter. + +API +--- +- **ComparisonStacker**: Alignment comparison workflow UI +""" + +import glob +from pathlib import Path + +from PyQt6.QtWidgets import ( + QCheckBox, + QDoubleSpinBox, + QFileDialog, + QGroupBox, + QHBoxLayout, + QLabel, + QLineEdit, + QPushButton, + QScrollArea, + QSizePolicy, + QTabWidget, + QVBoxLayout, + QWidget, +) + +from voxkit.gui.components import MultiColumnComboBox +from voxkit.gui.pages.pipeline.base_stacker import BaseStacker +from voxkit.gui.styles import Buttons, Colors, Containers, Labels +from voxkit.storage import alignments, datasets + + +def _get_tg_paths(alignment_meta: dict) -> list[str]: + """Glob all TextGrid files under an alignment's tg_path directory.""" + tg_root = Path(alignment_meta["tg_path"]) + return glob.glob(str(tg_root / "**" / "*.TextGrid"), recursive=True) + + +def _make_scrollable(widget: QWidget) -> QScrollArea: + """Wrap a widget in a scroll area that fills the tab uniformly. + + Setting Expanding size policy lets setWidgetResizable grow the chart to + fill the full tab area; minimum size is still honoured so scrollbars + appear only when the chart is genuinely wider/taller than the viewport. + """ + widget.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding) + scroll = QScrollArea() + scroll.setWidget(widget) + scroll.setWidgetResizable(True) + scroll.setFrameShape(QScrollArea.Shape.NoFrame) + scroll.setStyleSheet("QScrollArea { background: transparent; border: none; }") + return scroll + + +class ComparisonStacker(BaseStacker): + """Alignment comparison pipeline page. + + Select a dataset, then pick two of its alignments (A and B). Click Compare + to view four phoneme-level plots in a tab widget. Export any subset of plots + as PNGs to a folder you choose. + """ + + def __init__(self, parent=None): + # Shared dataset state + self._dataset_dropdown: MultiColumnComboBox | None = None + self._dataset_meta: dict | None = None + + # A-side alignment state + self._a_alignment_dropdown: MultiColumnComboBox | None = None + self._a_alignment_meta: dict | None = None + + # B-side alignment state + self._b_alignment_dropdown: MultiColumnComboBox | None = None + self._b_alignment_meta: dict | None = None + + # Options + self._tier_input: QLineEdit | None = None + self._aggregate_cb: QCheckBox | None = None + self._threshold_spin: QDoubleSpinBox | None = None + self._compare_btn: QPushButton | None = None + + # Results + self._results_widget: QWidget | None = None + self._tab_widget: QTabWidget | None = None + + # Download + self._dl_folder: str = "" + self._dl_folder_label: QLabel | None = None + self._dl_counts_cb: QCheckBox | None = None + self._dl_overlap_cb: QCheckBox | None = None + self._dl_rate_cb: QCheckBox | None = None + self._dl_scatter_cb: QCheckBox | None = None + self._download_btn: QPushButton | None = None + + # Last comparison parameters (populated on successful compare) + self._last_comparison: dict | None = None + + super().__init__(parent) + + # ── BaseStacker overrides ──────────────────────────────────────────────── + + def get_title(self) -> str: + return "Alignment Comparison" + + def has_status_label(self) -> bool: + return True + + def build_ui(self): + # ── ① Dataset ──────────────────────────────────────────────────────── + self.content_layout.addWidget(self._make_section_label("① Choose a Dataset")) + + self._dataset_dropdown = MultiColumnComboBox() + self._dataset_dropdown.setStyleSheet(Containers.COMBOBOX_STANDARD) + self._dataset_dropdown.currentIndexChanged.connect(self._on_dataset_changed) + self.content_layout.addWidget(self._dataset_dropdown) + + # ── ② Alignment selectors (side by side) ───────────────────────────── + self.content_layout.addWidget( + self._make_section_label("② Choose Two Alignments to Compare") + ) + + al_row = QHBoxLayout() + al_row.setSpacing(16) + al_row.addWidget(self._make_alignment_box("A", is_a=True), stretch=1) + al_row.addWidget(self._make_alignment_box("B", is_a=False), stretch=1) + self.content_layout.addLayout(al_row) + + # ── Options + Compare row ───────────────────────────────────────────── + opts = QHBoxLayout() + opts.setSpacing(12) + + tier_lbl = QLabel("Tier:") + tier_lbl.setStyleSheet(Labels.SECTION_LABEL) + opts.addWidget(tier_lbl) + + self._tier_input = QLineEdit("phones") + self._tier_input.setFixedWidth(90) + self._tier_input.setStyleSheet( + f"QLineEdit {{ border: 1px solid {Colors.BORDER}; border-radius: 4px; " + f"padding: 4px 6px; font-size: 12px; background: white; }}" + f"QLineEdit:focus {{ border-color: {Colors.PRIMARY}; }}" + ) + opts.addWidget(self._tier_input) + + self._aggregate_cb = QCheckBox("Aggregate stress (AH1 → AH)") + self._aggregate_cb.setStyleSheet( + "QCheckBox { spacing: 6px; font-size: 12px; color: #2c3e50; }" + ) + self._aggregate_cb.setChecked(True) + opts.addWidget(self._aggregate_cb) + + opts.addStretch() + + threshold_lbl = QLabel("IoU threshold:") + threshold_lbl.setStyleSheet(Labels.SECTION_LABEL) + opts.addWidget(threshold_lbl) + + self._threshold_spin = QDoubleSpinBox() + self._threshold_spin.setRange(0.0, 1.0) + self._threshold_spin.setSingleStep(0.05) + self._threshold_spin.setValue(0.5) + self._threshold_spin.setDecimals(2) + self._threshold_spin.setFixedWidth(70) + self._threshold_spin.setStyleSheet( + "QDoubleSpinBox { border: 1px solid #d0d0d0; border-radius: 4px; " + "padding: 4px; font-size: 12px; color: black; background: white; selection-color: black; selection-background-color: #cce5ff; }" + ) + opts.addWidget(self._threshold_spin) + + self._compare_btn = QPushButton("Compare") + self._compare_btn.setStyleSheet(Buttons.PRIMARY) + self._compare_btn.setFixedWidth(100) + self._compare_btn.setEnabled(False) + self._compare_btn.clicked.connect(self._run_comparison) + opts.addWidget(self._compare_btn) + + self.content_layout.addLayout(opts) + + # ── Results section (hidden until comparison runs) ──────────────────── + self._results_widget = QWidget() + results_col = QVBoxLayout(self._results_widget) + results_col.setContentsMargins(0, 8, 0, 0) + results_col.setSpacing(10) + + self._tab_widget = QTabWidget() + self._tab_widget.setFixedHeight(520) + self._tab_widget.setSizePolicy( + QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed + ) + results_col.addWidget(self._tab_widget) + + # Download group ────────────────────────────────────────────────────── + dl_group = QGroupBox("Download Plots") + dl_group.setStyleSheet(Containers.GROUP_BOX) + dl_layout = QVBoxLayout(dl_group) + dl_layout.setSpacing(8) + + checks_row = QHBoxLayout() + self._dl_counts_cb = QCheckBox("Phoneme Counts") + self._dl_overlap_cb = QCheckBox("Overlap (IoU)") + self._dl_rate_cb = QCheckBox("Overlap Rate") + self._dl_scatter_cb = QCheckBox("Substitutions") + for cb in (self._dl_counts_cb, self._dl_overlap_cb, self._dl_rate_cb, self._dl_scatter_cb): + cb.setChecked(True) + cb.setStyleSheet("QCheckBox { spacing: 6px; font-size: 12px; color: #2c3e50; }") + checks_row.addWidget(cb) + checks_row.addStretch() + dl_layout.addLayout(checks_row) + + folder_row = QHBoxLayout() + folder_row.setSpacing(8) + + self._dl_folder_label = QLabel("No folder selected") + self._dl_folder_label.setStyleSheet(Labels.INFO_SMALL) + self._dl_folder_label.setSizePolicy( + QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Preferred + ) + folder_row.addWidget(self._dl_folder_label, stretch=1) + + browse_btn = QPushButton("Select Folder…") + browse_btn.setStyleSheet(Buttons.BROWSE) + browse_btn.setFixedWidth(130) + browse_btn.clicked.connect(self._browse_output_folder) + folder_row.addWidget(browse_btn) + + self._download_btn = QPushButton("Download") + self._download_btn.setStyleSheet(Buttons.SUCCESS_SMALL) + self._download_btn.setFixedWidth(100) + self._download_btn.setEnabled(False) + self._download_btn.clicked.connect(self._download_plots) + folder_row.addWidget(self._download_btn) + + dl_layout.addLayout(folder_row) + results_col.addWidget(dl_group) + + self._results_widget.setVisible(False) + self.content_layout.addWidget(self._results_widget) + + self.reload_datasets() + + # ── Alignment box builder ──────────────────────────────────────────────── + + def _make_alignment_box(self, side: str, *, is_a: bool) -> QGroupBox: + """Build a labeled group box with a single alignment dropdown.""" + box = QGroupBox(f"Alignment {side}") + box.setStyleSheet(Containers.GROUP_BOX) + layout = QVBoxLayout(box) + layout.setSpacing(6) + + al_dd = MultiColumnComboBox() + al_dd.setStyleSheet(Containers.COMBOBOX_STANDARD) + al_dd.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + al_dd.setEnabled(False) + + if is_a: + self._a_alignment_dropdown = al_dd + al_dd.currentIndexChanged.connect(self._on_a_alignment_changed) + else: + self._b_alignment_dropdown = al_dd + al_dd.currentIndexChanged.connect(self._on_b_alignment_changed) + + layout.addWidget(al_dd) + return box + + # ── Reload hook ────────────────────────────────────────────────────────── + + def reload_datasets(self): + """Refresh the dataset dropdown from storage.""" + if self._dataset_dropdown is None: + return + + self._dataset_meta = None + self._a_alignment_meta = None + self._b_alignment_meta = None + + self._dataset_dropdown.clear() + metas = datasets.list_datasets_metadata() + if metas: + rows = [ + {"id": m["id"], "data": (m["name"], m["registration_date"], m["description"])} + for m in metas + ] + self._dataset_dropdown.set_data( + rows, ["Name", "Date", "Description"], placeholder="Select a dataset" + ) + self._dataset_dropdown.setEnabled(True) + else: + self._dataset_dropdown.set_data( + [{"id": None, "data": ("No datasets registered", "", "")}], + ["Name", "Date", "Description"], + placeholder="No datasets registered", + ) + self._dataset_dropdown.setEnabled(False) + + for al_dd in (self._a_alignment_dropdown, self._b_alignment_dropdown): + al_dd.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + al_dd.setEnabled(False) + + self._update_compare_btn() + + # ── Selection handlers ─────────────────────────────────────────────────── + + def _on_dataset_changed(self): + dataset_id = self._dataset_dropdown.itemData(self._dataset_dropdown.currentIndex()) + self._dataset_meta = None + self._a_alignment_meta = None + self._b_alignment_meta = None + + if not dataset_id: + for al_dd in (self._a_alignment_dropdown, self._b_alignment_dropdown): + al_dd.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + al_dd.setEnabled(False) + self._update_compare_btn() + return + + self._dataset_meta = datasets.get_dataset_metadata(dataset_id) + al_list = alignments.list_alignments(dataset_id) + + if al_list: + rows = [ + { + "id": a["id"], + "data": ( + a["engine_id"], + a["model_metadata"]["name"], + a["alignment_date"], + a["status"], + ), + } + for a in al_list + ] + for al_dd in (self._a_alignment_dropdown, self._b_alignment_dropdown): + al_dd.set_data( + rows, ["Engine", "Model", "Date", "Status"], placeholder="Select an alignment" + ) + al_dd.setEnabled(True) + else: + for al_dd in (self._a_alignment_dropdown, self._b_alignment_dropdown): + al_dd.set_data( + [{"id": None, "data": ("No alignments found", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="No alignments found", + ) + al_dd.setEnabled(False) + + self._update_compare_btn() + + def _on_a_alignment_changed(self): + al_id = self._a_alignment_dropdown.itemData(self._a_alignment_dropdown.currentIndex()) + self._a_alignment_meta = None + if al_id and self._dataset_meta: + self._a_alignment_meta = alignments.get_alignment_metadata( + self._dataset_meta["id"], al_id + ) + self._update_compare_btn() + + def _on_b_alignment_changed(self): + al_id = self._b_alignment_dropdown.itemData(self._b_alignment_dropdown.currentIndex()) + self._b_alignment_meta = None + if al_id and self._dataset_meta: + self._b_alignment_meta = alignments.get_alignment_metadata( + self._dataset_meta["id"], al_id + ) + self._update_compare_btn() + + def _update_compare_btn(self) -> None: + if self._compare_btn: + self._compare_btn.setEnabled( + bool(self._a_alignment_meta and self._b_alignment_meta) + ) + + # ── Comparison ─────────────────────────────────────────────────────────── + + def _run_comparison(self) -> None: + if not self._a_alignment_meta or not self._b_alignment_meta: + return + + paths_a = _get_tg_paths(self._a_alignment_meta) + paths_b = _get_tg_paths(self._b_alignment_meta) + + if not paths_a: + self.set_status("No TextGrid files found for Alignment A.", "error") + return + if not paths_b: + self.set_status("No TextGrid files found for Alignment B.", "error") + return + + tier = self._tier_input.text().strip() or "phones" + aggregate = self._aggregate_cb.isChecked() + threshold = self._threshold_spin.value() + + label_a = ( + f"{self._a_alignment_meta['engine_id']} / " + f"{self._a_alignment_meta['model_metadata']['name']}" + ) + label_b = ( + f"{self._b_alignment_meta['engine_id']} / " + f"{self._b_alignment_meta['model_metadata']['name']}" + ) + + self.set_status("Running comparison…", "working") + + try: + from alignment_comparison_plots.phoneme_counts import BarChartWidget, count_phonemes + from alignment_comparison_plots.phoneme_overlap import ( + OverlapChartWidget, + OverlapRateWidget, + PairScatterWidget, + compute_phoneme_overlap, + compute_phoneme_overlap_rate, + compute_phoneme_pair_overlap, + ) + except ImportError: + self.set_status( + "alignment-comparison-plots not installed. Run: uv add alignment-comparison-plots", + "error", + ) + return + + try: + counts_a = count_phonemes(paths_a, tier_name=tier, normalize=aggregate) + counts_b = count_phonemes(paths_b, tier_name=tier, normalize=aggregate) + overlap = compute_phoneme_overlap( + paths_a, paths_b, tier_name=tier, normalize=aggregate + ) + rates = compute_phoneme_overlap_rate( + paths_a, paths_b, tier_name=tier, normalize=aggregate, threshold=threshold + ) + pairs = compute_phoneme_pair_overlap( + paths_a, paths_b, tier_name=tier, normalize=aggregate + ) + except Exception as exc: + self.set_status(f"Comparison failed: {exc}", "error") + return + + # Cache parameters for download + self._last_comparison = { + "paths_a": paths_a, + "paths_b": paths_b, + "label_a": label_a, + "label_b": label_b, + "tier": tier, + "aggregate": aggregate, + "threshold": threshold, + } + + # Rebuild tab widget — each chart is wrapped in a scroll area so it + # can expand horizontally/vertically without being clipped. + self._tab_widget.clear() + self._tab_widget.addTab( + _make_scrollable(BarChartWidget(counts_a, counts_b, label_a, label_b)), + "Phoneme Counts", + ) + self._tab_widget.addTab( + _make_scrollable(OverlapChartWidget(overlap)), + "Overlap (IoU)", + ) + self._tab_widget.addTab( + _make_scrollable(OverlapRateWidget(rates, threshold)), + f"Overlap Rate ≥{threshold:.2f}", + ) + self._tab_widget.addTab( + _make_scrollable(PairScatterWidget(pairs)), + "Substitutions", + ) + + self._results_widget.setVisible(True) + self._download_btn.setEnabled(True) + + self.set_status( + f"Compared {len(paths_a)} + {len(paths_b)} TextGrids · tier: {tier}", + "success", + ) + + # ── Download ───────────────────────────────────────────────────────────── + + def _browse_output_folder(self) -> None: + folder = QFileDialog.getExistingDirectory(self, "Select Output Folder", str(Path.home())) + if folder: + self._dl_folder = folder + self._dl_folder_label.setText(folder) + + def _download_plots(self) -> None: + if self._last_comparison is None: + return + if not self._dl_folder: + self.set_status("Select an output folder first.", "ready") + self._browse_output_folder() + if not self._dl_folder: + return + + from alignment_comparison_plots import ( + plot_phoneme_counts, + plot_phoneme_overlap, + plot_phoneme_overlap_rate, + plot_phoneme_pair_scatter, + ) + + c = self._last_comparison + out = Path(self._dl_folder) + saved: list[str] = [] + errors: list[str] = [] + + specs = [ + (self._dl_counts_cb, plot_phoneme_counts, "phoneme_counts.png", {}), + (self._dl_overlap_cb, plot_phoneme_overlap, "phoneme_overlap.png", {}), + ( + self._dl_rate_cb, + plot_phoneme_overlap_rate, + "phoneme_overlap_rate.png", + {"threshold": c["threshold"]}, + ), + (self._dl_scatter_cb, plot_phoneme_pair_scatter, "phoneme_pair_scatter.png", {}), + ] + + for cb, plot_fn, filename, extra_kwargs in specs: + if not cb.isChecked(): + continue + try: + plot_fn( + paths_a=c["paths_a"], + paths_b=c["paths_b"], + label_a=c["label_a"], + label_b=c["label_b"], + tier_name=c["tier"], + aggregate_emphasis=c["aggregate"], + save_png=str(out / filename), + exec_=False, + **extra_kwargs, + ) + saved.append(filename) + except Exception as exc: + errors.append(f"{filename}: {exc}") + + if errors: + self.set_status( + f"Saved {len(saved)} plot(s); errors: {'; '.join(errors)}", "error" + ) + elif saved: + self.set_status(f"Saved {len(saved)} plot(s) to {self._dl_folder}", "success") + else: + self.set_status("No plots selected for download.", "ready") + + # ── Helpers ────────────────────────────────────────────────────────────── + + @staticmethod + def _make_section_label(text: str) -> QLabel: + lbl = QLabel(text) + lbl.setStyleSheet(Labels.SECTION_LABEL) + return lbl + + +__all__ = ["ComparisonStacker"] diff --git a/src/voxkit/gui/pages/pipeline/pllr_stacker.py b/src/voxkit/gui/pages/pipeline/pllr_stacker.py index ee8f72d..b599cbc 100644 --- a/src/voxkit/gui/pages/pipeline/pllr_stacker.py +++ b/src/voxkit/gui/pages/pipeline/pllr_stacker.py @@ -214,7 +214,7 @@ def init_ui(self): header_layout = QHBoxLayout() # Title - title = QLabel("Ⓒ Extract PLLR Scoring") + title = QLabel("Extract PLLR Scoring") title.setStyleSheet(Labels.PAGE_TITLE) header_layout.addWidget(title) diff --git a/src/voxkit/gui/pages/pipeline/prediction_stacker.py b/src/voxkit/gui/pages/pipeline/prediction_stacker.py index 157434f..c0217cd 100644 --- a/src/voxkit/gui/pages/pipeline/prediction_stacker.py +++ b/src/voxkit/gui/pages/pipeline/prediction_stacker.py @@ -41,7 +41,7 @@ def __init__(self, parent): def get_title(self) -> str: """Return the stacker's title.""" - return "Ⓑ Predict Alignments" + return "Predict Alignments" def has_settings(self) -> bool: """This stacker has settings.""" diff --git a/src/voxkit/gui/pages/pipeline/training_stacker.py b/src/voxkit/gui/pages/pipeline/training_stacker.py index 814856e..ba3dfcc 100644 --- a/src/voxkit/gui/pages/pipeline/training_stacker.py +++ b/src/voxkit/gui/pages/pipeline/training_stacker.py @@ -50,7 +50,7 @@ def __init__(self, parent): def get_title(self) -> str: """Return the stacker's title.""" - return "Ⓐ Train Aligners" + return "Train Aligners" def has_settings(self) -> bool: """This stacker has settings.""" diff --git a/src/voxkit/gui/pages/pipeline/viewer_stacker.py b/src/voxkit/gui/pages/pipeline/viewer_stacker.py new file mode 100644 index 0000000..cdc7c53 --- /dev/null +++ b/src/voxkit/gui/pages/pipeline/viewer_stacker.py @@ -0,0 +1,960 @@ +"""Viewer Stacker Module. + +Pipeline page for browsing alignment results — synchronized view of TextGrid +tiers, transcript, and audio playback for any speaker/file in a registered +alignment. + +API +--- +- **TextGridTimeline**: Custom painted widget rendering TextGrid tiers as a + time-aligned view with a live playhead and click-to-seek. +- **ViewerStacker**: Alignment viewer workflow UI +""" + +import re +import subprocess +import sys +from pathlib import Path + +from PyQt6.QtCore import Qt, QPoint, QUrl, pyqtSignal +from PyQt6.QtGui import QColor, QFont, QPainter, QPen, QPolygon +from PyQt6.QtWidgets import ( + QComboBox, + QHBoxLayout, + QLabel, + QLineEdit, + QListWidget, + QPushButton, + QSizePolicy, + QSlider, + QTextEdit, + QVBoxLayout, + QWidget, +) + +from voxkit.gui.components import MultiColumnComboBox +from voxkit.gui.pages.pipeline.base_stacker import BaseStacker +from voxkit.gui.styles import Buttons, Colors, Containers, Labels +from voxkit.storage import alignments, datasets +from voxkit.storage.datasets import _get_dataset_root + +try: + from PyQt6.QtMultimedia import QAudioOutput, QMediaPlayer + + MULTIMEDIA_AVAILABLE = True +except ImportError: + MULTIMEDIA_AVAILABLE = False + + +_AUDIO_EXTENSIONS = {".wav", ".flac", ".mp3", ".ogg", ".m4a"} +_SILENCE_LABELS = {"", "sp", "sil", "", "spn"} + +# --------------------------------------------------------------------------- +# TextGrid parser +# --------------------------------------------------------------------------- + + +def _parse_textgrid(filepath: str) -> list[dict]: + """Parse a Praat TextGrid file into a list of tier dicts. + + Each tier dict has keys: ``name``, ``class``, ``intervals`` (list of dicts + with ``start``/``end``/``label`` for IntervalTier, or ``time``/``label`` + for TextTier). + """ + try: + with open(filepath, "r", encoding="utf-8-sig") as f: + content = f.read() + except UnicodeDecodeError: + with open(filepath, "r", encoding="latin-1") as f: + content = f.read() + + tiers: list[dict] = [] + tier_blocks = re.split(r"item\s*\[\d+\]:", content)[1:] + + for block in tier_blocks: + name_m = re.search(r'name\s*=\s*"([^"]*)"', block) + class_m = re.search(r'class\s*=\s*"([^"]*)"', block) + + tier: dict = { + "name": name_m.group(1) if name_m else "unknown", + "class": class_m.group(1) if class_m else "unknown", + "intervals": [], + } + + if "IntervalTier" in tier["class"]: + for ib in re.split(r"intervals\s*\[\d+\]:", block)[1:]: + xmin = re.search(r"xmin\s*=\s*([0-9.e+\-]+)", ib) + xmax = re.search(r"xmax\s*=\s*([0-9.e+\-]+)", ib) + text = re.search(r'text\s*=\s*"([^"]*)"', ib) + if xmin and xmax and text: + tier["intervals"].append( + { + "start": float(xmin.group(1)), + "end": float(xmax.group(1)), + "label": text.group(1), + } + ) + elif "TextTier" in tier["class"]: + for pb in re.split(r"points\s*\[\d+\]:", block)[1:]: + time = re.search(r"time\s*=\s*([0-9.e+\-]+)", pb) + mark = re.search(r'mark\s*=\s*"([^"]*)"', pb) + if time and mark: + tier["intervals"].append( + {"time": float(time.group(1)), "label": mark.group(1)} + ) + + tiers.append(tier) + + return tiers + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + + +def _dataset_data_path(meta: dict) -> Path: + """Return the directory containing speaker subdirs (audio + .lab files).""" + if meta.get("cached"): + root = _get_dataset_root(meta["id"]) + if root: + return root / "cache" + return Path(meta["original_path"]) + + +def _find_textgrid(tg_root: Path, speaker: str, stem: str) -> Path | None: + """Probe common TextGrid layouts and return the first match.""" + candidates = [ + tg_root / speaker / f"{stem}.TextGrid", + tg_root / f"{stem}.TextGrid", + tg_root / "cache" / speaker / f"{stem}.TextGrid", + tg_root / "cache" / f"{stem}.TextGrid", + ] + return next((c for c in candidates if c.exists()), None) + + +def _find_lab(data_root: Path, speaker: str, stem: str) -> Path | None: + """Return the transcript file (.lab or .txt) for the given audio stem.""" + for ext in (".lab", ".txt"): + p = data_root / speaker / f"{stem}{ext}" + if p.exists(): + return p + return None + + +# --------------------------------------------------------------------------- +# TextGridTimeline +# --------------------------------------------------------------------------- + + +class TextGridTimeline(QWidget): + """Custom painted widget showing TextGrid tiers as a synchronized timeline. + + - One row per tier, each with time-scaled labeled interval blocks. + - A vertical red playhead tracks the current audio position. + - Clicking anywhere emits ``seek_requested`` with the target time in seconds. + """ + + seek_requested = pyqtSignal(float) # seconds + + TIER_HEIGHT = 36 + RULER_HEIGHT = 26 + LEFT_MARGIN = 92 # space reserved for tier name labels + RIGHT_MARGIN = 8 + + # Fixed colors for well-known tier names (case-insensitive match) + _TIER_COLOR_MAP: dict[str, QColor] = { + "words": QColor("#3498db"), # blue + "phones": QColor("#27ae60"), # green + } + + # Fallback palette for unknown tiers (indexed by position after known tiers) + _TIER_COLORS = [ + QColor("#e67e22"), # orange + QColor("#9b59b6"), # purple + QColor("#16a085"), # teal + QColor("#c0392b"), # red + QColor("#2980b9"), # dark blue + QColor("#8e44ad"), # dark purple + ] + + def __init__(self, parent=None): + super().__init__(parent) + self._tiers: list[dict] = [] + self._duration: float = 0.0 + self._current_time: float = 0.0 + self.setCursor(Qt.CursorShape.PointingHandCursor) + self.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + self.setFixedHeight(self.RULER_HEIGHT) + + _TIER_ORDER = {"phones": 0, "words": 1} + + def set_data(self, tiers: list[dict], duration: float) -> None: + self._tiers = sorted( + tiers, + key=lambda t: self._TIER_ORDER.get(t["name"].lower(), 2), + ) + self._duration = duration + self._current_time = 0.0 + self.setFixedHeight(self.RULER_HEIGHT + max(1, len(tiers)) * self.TIER_HEIGHT) + self.update() + + def set_current_time(self, seconds: float) -> None: + if abs(seconds - self._current_time) > 0.008: + self._current_time = seconds + self.update() + + def clear(self) -> None: + self._tiers = [] + self._duration = 0.0 + self._current_time = 0.0 + self.setFixedHeight(self.RULER_HEIGHT) + self.update() + + # ── coordinate conversion ───────────────────────────────────────────────── + + def _time_to_x(self, t: float) -> int: + if self._duration <= 0: + return self.LEFT_MARGIN + span = self.width() - self.LEFT_MARGIN - self.RIGHT_MARGIN + return self.LEFT_MARGIN + int(t / self._duration * span) + + def _x_to_time(self, x: float) -> float: + span = self.width() - self.LEFT_MARGIN - self.RIGHT_MARGIN + if span <= 0: + return 0.0 + return max(0.0, min(self._duration, (x - self.LEFT_MARGIN) / span * self._duration)) + + # ── painting ────────────────────────────────────────────────────────────── + + def paintEvent(self, _event): + painter = QPainter(self) + painter.setRenderHint(QPainter.RenderHint.Antialiasing, False) + w, h = self.width(), self.height() + + # Overall background + painter.fillRect(0, 0, w, h, QColor("#f8f9fa")) + + # ── Ruler ───────────────────────────────────────────────────────────── + painter.fillRect(0, 0, w, self.RULER_HEIGHT, QColor("#2c3e50")) + + if self._duration > 0: + ruler_font = QFont() + ruler_font.setPointSize(8) + ruler_font.setFamily("Courier") + painter.setFont(ruler_font) + painter.setPen(QColor("#ecf0f1")) + + available = w - self.LEFT_MARGIN - self.RIGHT_MARGIN + approx_ticks = max(2, available // 72) + raw_step = self._duration / approx_ticks + for nice in (0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30, 60): + if raw_step <= nice: + step = nice + break + else: + step = raw_step + + t = 0.0 + while t <= self._duration + step * 0.01: + x = self._time_to_x(t) + painter.drawLine(x, self.RULER_HEIGHT - 5, x, self.RULER_HEIGHT) + lbl = f"{t:.2f}s" if t < 1 else ( + f"{t:.1f}s" if t < 60 else f"{int(t // 60)}:{int(t % 60):02d}" + ) + painter.drawText( + x - 26, 1, 52, self.RULER_HEIGHT - 6, + Qt.AlignmentFlag.AlignHCenter | Qt.AlignmentFlag.AlignVCenter, + lbl, + ) + t += step + if t > self._duration * 20: + break + + # Left margin background + painter.fillRect( + 0, self.RULER_HEIGHT, + self.LEFT_MARGIN, h - self.RULER_HEIGHT, + QColor("#ecf0f1"), + ) + painter.setPen(QPen(QColor("#bdc3c7"), 1)) + painter.drawLine(self.LEFT_MARGIN, self.RULER_HEIGHT, self.LEFT_MARGIN, h) + + # ── Tiers ───────────────────────────────────────────────────────────── + name_font = QFont() + name_font.setPointSize(8) + name_font.setBold(True) + iv_font = QFont() + iv_font.setPointSize(8) + + fallback_idx = 0 + for idx, tier in enumerate(self._tiers): + y = self.RULER_HEIGHT + idx * self.TIER_HEIGHT + tier_key = tier["name"].lower() + if tier_key in self._TIER_COLOR_MAP: + color = self._TIER_COLOR_MAP[tier_key] + else: + color = self._TIER_COLORS[fallback_idx % len(self._TIER_COLORS)] + fallback_idx += 1 + is_interval = tier["class"] == "IntervalTier" + + # Alternating row background + row_bg = QColor("#ffffff") if idx % 2 == 0 else QColor("#f5f6fa") + painter.fillRect(self.LEFT_MARGIN, y, w - self.LEFT_MARGIN, self.TIER_HEIGHT, row_bg) + + # Tier name + painter.setFont(name_font) + painter.setPen(color.darker(150)) + painter.drawText( + 4, y, self.LEFT_MARGIN - 6, self.TIER_HEIGHT, + Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft, + tier["name"], + ) + + # Intervals + painter.setFont(iv_font) + pad = 2 + for iv in tier.get("intervals", []): + if is_interval: + t_start, t_end = iv["start"], iv["end"] + iv_label = iv["label"] + else: + t_start = t_end = iv["time"] + iv_label = iv["label"] + + x1 = self._time_to_x(t_start) + x2 = self._time_to_x(t_end) if is_interval else x1 + 3 + bw = max(1, x2 - x1) + + active = is_interval and t_start <= self._current_time < t_end + silent = iv_label in _SILENCE_LABELS + + if active: + fill = color + elif silent: + fill = color.lighter(195) + else: + fill = color.lighter(155) + + painter.fillRect(x1, y + pad, bw, self.TIER_HEIGHT - pad * 2, fill) + + # Block border + painter.setPen(QPen(color.darker(115) if active else color.lighter(120), 0.5)) + painter.drawRect(x1, y + pad, bw, self.TIER_HEIGHT - pad * 2) + + # Label inside block + if bw > 10 and iv_label: + text_color = ( + QColor("white") if (active or not silent) + else color.darker(140) + ) + painter.setPen(text_color) + painter.drawText( + x1 + 2, y + pad, bw - 4, self.TIER_HEIGHT - pad * 2, + Qt.AlignmentFlag.AlignCenter, + iv_label, + ) + + # Row bottom border + painter.setPen(QPen(QColor("#dde1e7"), 1)) + painter.drawLine(self.LEFT_MARGIN, y + self.TIER_HEIGHT, w, y + self.TIER_HEIGHT) + + # ── Playhead ────────────────────────────────────────────────────────── + if self._duration > 0: + px = self._time_to_x(self._current_time) + painter.setPen(QPen(QColor("#e74c3c"), 2)) + painter.drawLine(px, 0, px, h) + + # Small downward triangle at ruler + ts = 5 + painter.setBrush(QColor("#e74c3c")) + painter.setPen(Qt.PenStyle.NoPen) + painter.drawPolygon( + QPolygon([QPoint(px - ts, 0), QPoint(px + ts, 0), QPoint(px, ts * 2)]) + ) + + def mousePressEvent(self, event): + if event.button() == Qt.MouseButton.LeftButton and self._duration > 0: + self.seek_requested.emit(self._x_to_time(event.position().x())) + + def resizeEvent(self, event): + super().resizeEvent(event) + self.update() + + +# --------------------------------------------------------------------------- +# ViewerStacker +# --------------------------------------------------------------------------- + + +class ViewerStacker(BaseStacker): + """Alignment viewer pipeline page. + + Walk through: dataset → alignment → speaker → audio file, then display a + synchronized view of the TextGrid tiers (as a time-scaled interactive + timeline), the full transcript, and audio playback controls — all visible + at once. + """ + + def __init__(self, parent=None): + # Pre-declare all attributes so build_ui() (called by super().__init__) + # can reference them safely. + self._dataset_dropdown: MultiColumnComboBox | None = None + self._alignment_dropdown: MultiColumnComboBox | None = None + self._speaker_dropdown: QComboBox | None = None + self._file_list: QListWidget | None = None + self._file_search = None # QLineEdit, set in build_ui + self._all_audio_files: list[str] = [] + self._selection_section: QWidget | None = None + self._viewer_section: QWidget | None = None + self._timeline: TextGridTimeline | None = None + self._active_label: QLabel | None = None + self._transcript_edit: QTextEdit | None = None + self._audio_path_label: QLabel | None = None + self._current_dataset_meta: dict | None = None + self._current_alignment_meta: dict | None = None + self._current_data_path: Path | None = None + self._current_audio_path: Path | None = None + self._loaded_tiers: list[dict] = [] + # Multimedia (may remain None if QtMultimedia is unavailable) + self._player = None + self._audio_output = None + self._play_btn: QPushButton | None = None + self._seek_slider: QSlider | None = None + self._time_label: QLabel | None = None + super().__init__(parent) + + # ── BaseStacker overrides ──────────────────────────────────────────────── + + def get_title(self) -> str: + return "Alignment Viewer" + + def has_status_label(self) -> bool: + return True + + def build_ui(self): + # ── ① Dataset ──────────────────────────────────────────────────────── + self.content_layout.addWidget(self._make_section_label("① Choose a Dataset")) + + self._dataset_dropdown = MultiColumnComboBox() + self._dataset_dropdown.setStyleSheet(Containers.COMBOBOX_STANDARD) + self._dataset_dropdown.currentIndexChanged.connect(self._on_dataset_changed) + self.content_layout.addWidget(self._dataset_dropdown) + + # ── ② Alignment ────────────────────────────────────────────────────── + self.content_layout.addWidget(self._make_section_label("② Choose an Alignment")) + + self._alignment_dropdown = MultiColumnComboBox() + self._alignment_dropdown.setStyleSheet(Containers.COMBOBOX_STANDARD) + self._alignment_dropdown.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + self._alignment_dropdown.setEnabled(False) + self._alignment_dropdown.currentIndexChanged.connect(self._on_alignment_changed) + self.content_layout.addWidget(self._alignment_dropdown) + + # ── ③/④ Speaker + File (hidden until alignment selected) ───────────── + self._selection_section = QWidget() + sel_col = QVBoxLayout(self._selection_section) + sel_col.setContentsMargins(0, 4, 0, 0) + sel_col.setSpacing(4) + + # Labels row — same stretch ratios as the controls row below so the + # ③ and ④ numbers land at the same x-positions as ① and ②. + lbl_row = QHBoxLayout() + lbl_row.setContentsMargins(0, 0, 0, 0) + lbl_row.setSpacing(12) + lbl_row.addWidget(self._make_section_label("③ Speaker"), stretch=1) + lbl_row.addWidget(self._make_section_label("④ Audio File"), stretch=2) + sel_col.addLayout(lbl_row) + + # Controls row + ctrl_row = QHBoxLayout() + ctrl_row.setContentsMargins(0, 0, 0, 0) + ctrl_row.setSpacing(12) + + self._speaker_dropdown = QComboBox() + self._speaker_dropdown.setStyleSheet(Containers.COMBOBOX_STANDARD) + self._speaker_dropdown.currentTextChanged.connect(self._on_speaker_changed) + + spk_wrapper = QVBoxLayout() + spk_wrapper.setContentsMargins(0, 0, 0, 0) + spk_wrapper.setSpacing(0) + spk_wrapper.addWidget(self._speaker_dropdown) + spk_wrapper.addStretch() + ctrl_row.addLayout(spk_wrapper, stretch=1) + + file_controls = QVBoxLayout() + file_controls.setContentsMargins(0, 0, 0, 0) + file_controls.setSpacing(4) + + self._file_search = QLineEdit() + self._file_search.setPlaceholderText("Search files...") + self._file_search.setClearButtonEnabled(True) + self._file_search.setStyleSheet( + f"QLineEdit {{ border: 1px solid {Colors.BORDER}; border-radius: 4px; " + f"padding: 4px 6px; font-size: 12px; background: white; }}" + f"QLineEdit:focus {{ border-color: {Colors.PRIMARY}; }}" + ) + self._file_search.textChanged.connect(self._filter_file_list) + file_controls.addWidget(self._file_search) + + self._file_list = QListWidget() + self._file_list.setFixedHeight(96) + self._file_list.setStyleSheet(Containers.TABLE_WIDGET) + self._file_list.currentItemChanged.connect(self._on_file_selected) + file_controls.addWidget(self._file_list) + + ctrl_row.addLayout(file_controls, stretch=2) + sel_col.addLayout(ctrl_row) + + self._selection_section.setSizePolicy( + QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Maximum + ) + self._selection_section.setVisible(False) + self.content_layout.addWidget(self._selection_section) + + # ── Viewer (hidden until file selected) ────────────────────────────── + self._viewer_section = QWidget() + view_col = QVBoxLayout(self._viewer_section) + view_col.setContentsMargins(0, 6, 0, 0) + view_col.setSpacing(4) + + # Audio controls row ────────────────────────────────────────────────── + audio_row = QHBoxLayout() + audio_row.setSpacing(8) + + self._audio_path_label = QLabel("No file selected") + self._audio_path_label.setStyleSheet(Labels.INFO_SMALL) + self._audio_path_label.setWordWrap(True) + audio_row.addWidget(self._audio_path_label, stretch=1) + + if MULTIMEDIA_AVAILABLE: + self._play_btn = QPushButton("▶ Play") + self._play_btn.setFixedWidth(82) + self._play_btn.setStyleSheet(Buttons.PRIMARY) + self._play_btn.clicked.connect(self._toggle_playback) + audio_row.addWidget(self._play_btn) + + stop_btn = QPushButton("■ Stop") + stop_btn.setFixedWidth(72) + stop_btn.setStyleSheet(Buttons.SECONDARY) + stop_btn.clicked.connect(self._stop_playback) + audio_row.addWidget(stop_btn) + + self._time_label = QLabel("0:00 / 0:00") + self._time_label.setStyleSheet(Labels.INFO_SMALL) + self._time_label.setFixedWidth(92) + audio_row.addWidget(self._time_label) + else: + open_btn = QPushButton("Open Audio") + open_btn.setFixedWidth(100) + open_btn.setStyleSheet(Buttons.SECONDARY) + open_btn.clicked.connect(self._open_audio_externally) + audio_row.addWidget(open_btn) + + view_col.addLayout(audio_row) + + if MULTIMEDIA_AVAILABLE: + self._seek_slider = QSlider(Qt.Orientation.Horizontal) + self._seek_slider.setRange(0, 0) + self._seek_slider.sliderMoved.connect(self._seek_to_ms) + view_col.addWidget(self._seek_slider) + + # TextGrid timeline ─────────────────────────────────────────────────── + tg_header = QHBoxLayout() + tg_header.setContentsMargins(0, 4, 0, 0) + tg_lbl = QLabel("TextGrid Alignment") + tg_lbl.setStyleSheet(Labels.SECTION_LABEL) + tg_header.addWidget(tg_lbl) + tg_header.addStretch() + view_col.addLayout(tg_header) + + self._timeline = TextGridTimeline() + self._timeline.seek_requested.connect(self._seek_to_seconds) + self._timeline.setStyleSheet( + f"border: 1px solid {Colors.BORDER}; border-radius: 4px;" + ) + view_col.addWidget(self._timeline) + + # Active-segment indicator ──────────────────────────────────────────── + self._active_label = QLabel("") + self._active_label.setStyleSheet( + f"QLabel {{ font-size: 12px; font-weight: bold; color: {Colors.PRIMARY}; " + f"background-color: #ebf5fb; border-left: 3px solid {Colors.PRIMARY}; " + f"border-radius: 3px; padding: 3px 8px; }}" + ) + self._active_label.setWordWrap(True) + self._active_label.setVisible(False) + view_col.addWidget(self._active_label) + + # Transcript ────────────────────────────────────────────────────────── + tr_lbl = QLabel("Transcript") + tr_lbl.setStyleSheet(Labels.SECTION_LABEL) + view_col.addWidget(tr_lbl) + + self._transcript_edit = QTextEdit() + self._transcript_edit.setReadOnly(True) + self._transcript_edit.setFixedHeight(72) + self._transcript_edit.setPlaceholderText("No transcript (.lab) found for this file") + self._transcript_edit.setStyleSheet( + f"QTextEdit {{ border: 1px solid {Colors.BORDER}; border-radius: 4px; " + f"padding: 6px; font-size: 13px; background: white; }}" + ) + view_col.addWidget(self._transcript_edit) + + self._viewer_section.setVisible(False) + self.content_layout.addWidget(self._viewer_section) + + # Initialise multimedia player ──────────────────────────────────────── + if MULTIMEDIA_AVAILABLE: + self._audio_output = QAudioOutput() + self._player = QMediaPlayer() + self._player.setAudioOutput(self._audio_output) + self._player.playbackStateChanged.connect(self._on_playback_state_changed) + self._player.positionChanged.connect(self._on_position_changed) + self._player.durationChanged.connect(self._on_duration_changed) + + self.reload_datasets() + + # ── Reload hooks ───────────────────────────────────────────────────────── + + def reload_datasets(self): + """Refresh the dataset dropdown from storage.""" + if self._dataset_dropdown is None: + return + + self._dataset_dropdown.clear() + metas = datasets.list_datasets_metadata() + if metas: + rows = [ + {"id": m["id"], "data": (m["name"], m["registration_date"], m["description"])} + for m in metas + ] + self._dataset_dropdown.set_data( + rows, ["Name", "Date", "Description"], placeholder="Select a dataset" + ) + self._dataset_dropdown.setEnabled(True) + else: + self._dataset_dropdown.set_data( + [{"id": None, "data": ("No datasets registered", "", "")}], + ["Name", "Date", "Description"], + placeholder="No datasets registered", + ) + self._dataset_dropdown.setEnabled(False) + + if self._alignment_dropdown: + self._alignment_dropdown.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + self._alignment_dropdown.setEnabled(False) + + if self._selection_section: + self._selection_section.setVisible(False) + if self._viewer_section: + self._viewer_section.setVisible(False) + + # ── Selection handlers ─────────────────────────────────────────────────── + + def _on_dataset_changed(self): + dataset_id = self._dataset_dropdown.itemData(self._dataset_dropdown.currentIndex()) + + self._selection_section.setVisible(False) + self._viewer_section.setVisible(False) + self._alignment_dropdown.clear() + + if not dataset_id: + self._alignment_dropdown.set_data( + [{"id": None, "data": ("Select a dataset first", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="Select a dataset first", + ) + self._alignment_dropdown.setEnabled(False) + return + + self._current_dataset_meta = datasets.get_dataset_metadata(dataset_id) + if not self._current_dataset_meta: + return + + self._current_data_path = _dataset_data_path(self._current_dataset_meta) + + al_list = alignments.list_alignments(dataset_id) + if al_list: + rows = [ + { + "id": a["id"], + "data": ( + a["engine_id"], + a["model_metadata"]["name"], + a["alignment_date"], + a["status"], + ), + } + for a in al_list + ] + self._alignment_dropdown.set_data( + rows, ["Engine", "Model", "Date", "Status"], placeholder="Select an alignment" + ) + self._alignment_dropdown.setEnabled(True) + else: + self._alignment_dropdown.set_data( + [{"id": None, "data": ("No alignments found", "", "", "")}], + ["Engine", "Model", "Date", "Status"], + placeholder="No alignments found", + ) + self._alignment_dropdown.setEnabled(False) + + def _on_alignment_changed(self): + alignment_id = self._alignment_dropdown.itemData(self._alignment_dropdown.currentIndex()) + + self._selection_section.setVisible(False) + self._viewer_section.setVisible(False) + + if not alignment_id or not self._current_dataset_meta: + return + + meta = alignments.get_alignment_metadata( + self._current_dataset_meta["id"], alignment_id + ) + if not meta: + return + + self._current_alignment_meta = meta + self._populate_speakers() + self._selection_section.setVisible(True) + self.set_status("Select a speaker and audio file to view alignment", "ready") + + def _populate_speakers(self): + self._speaker_dropdown.clear() + if not self._current_data_path or not self._current_data_path.exists(): + return + speakers = sorted( + d.name + for d in self._current_data_path.iterdir() + if d.is_dir() and not d.name.startswith(".") + ) + self._speaker_dropdown.addItems(speakers) + + def _on_speaker_changed(self, speaker: str): + self._file_list.clear() + self._all_audio_files = [] + self._viewer_section.setVisible(False) + if self._file_search: + self._file_search.blockSignals(True) + self._file_search.clear() + self._file_search.blockSignals(False) + + if not speaker or not self._current_data_path: + return + + spk_path = self._current_data_path / speaker + if not spk_path.exists(): + return + + self._all_audio_files = sorted( + f.name for f in spk_path.iterdir() if f.suffix.lower() in _AUDIO_EXTENSIONS + ) + self._file_list.addItems(self._all_audio_files) + + def _filter_file_list(self, query: str): + """Show only files whose names contain the search query (case-insensitive).""" + self._file_list.clear() + q = query.strip().lower() + matches = [f for f in self._all_audio_files if q in f.lower()] if q else self._all_audio_files + self._file_list.addItems(matches) + # Hide viewer if the previously selected file is no longer visible + if self._viewer_section and self._viewer_section.isVisible(): + self._viewer_section.setVisible(False) + + def _on_file_selected(self, item, _prev=None): + if not item: + self._viewer_section.setVisible(False) + return + + speaker = self._speaker_dropdown.currentText() + filename = item.text() + stem = Path(filename).stem + + if not self._current_data_path or not self._current_alignment_meta: + return + + audio_path = self._current_data_path / speaker / filename + tg_root = Path(self._current_alignment_meta["tg_path"]) + lab_path = _find_lab(self._current_data_path, speaker, stem) + tg_path = _find_textgrid(tg_root, speaker, stem) + + self._load_viewer(audio_path, lab_path, tg_path) + self._viewer_section.setVisible(True) + + # ── Viewer loading ──────────────────────────────────────────────────────── + + def _load_viewer( + self, + audio_path: Path, + lab_path: Path | None, + tg_path: Path | None, + ): + """Populate audio player, timeline, active-segment label, and transcript.""" + # ── Audio ───────────────────────────────────────────────────────────── + if audio_path.exists(): + self._audio_path_label.setText(str(audio_path)) + self._current_audio_path = audio_path + if MULTIMEDIA_AVAILABLE and self._player: + if self._player.playbackState() != QMediaPlayer.PlaybackState.StoppedState: + self._player.stop() + self._player.setSource(QUrl.fromLocalFile(str(audio_path))) + if self._play_btn: + self._play_btn.setText("▶ Play") + else: + self._audio_path_label.setText(f"Audio not found: {audio_path}") + self._current_audio_path = None + + # ── Transcript ──────────────────────────────────────────────────────── + if lab_path and lab_path.exists(): + self._transcript_edit.setPlainText(lab_path.read_text(encoding="utf-8").strip()) + else: + self._transcript_edit.setPlainText("") + self._transcript_edit.setPlaceholderText( + f"No .lab/.txt transcript found for {audio_path.stem}" + ) + + # ── TextGrid timeline ───────────────────────────────────────────────── + self._loaded_tiers = [] + self._timeline.clear() + self._active_label.setVisible(False) + + if tg_path and tg_path.exists(): + try: + tiers = _parse_textgrid(str(tg_path)) + if tiers: + # Derive duration from the last interval's end time + duration = 0.0 + for tier in tiers: + if tier["class"] == "IntervalTier" and tier["intervals"]: + duration = max(duration, tier["intervals"][-1]["end"]) + if duration <= 0 and MULTIMEDIA_AVAILABLE and self._player: + duration = self._player.duration() / 1000.0 + + self._loaded_tiers = tiers + self._timeline.set_data(tiers, duration) + self._active_label.setVisible(True) + except Exception as exc: + self._audio_path_label.setText( + f"{self._audio_path_label.text()} [TextGrid parse error: {exc}]" + ) + else: + self._audio_path_label.setText( + self._audio_path_label.text() + + f" [TextGrid not found in {Path(self._current_alignment_meta['tg_path'])}]" + ) + + # ── Status ──────────────────────────────────────────────────────────── + parts = [] + if audio_path.exists(): + parts.append("audio ready") + if lab_path and lab_path.exists(): + parts.append("transcript loaded") + if self._loaded_tiers: + parts.append(f"TextGrid loaded ({len(self._loaded_tiers)} tiers)") + self.set_status(" · ".join(parts) if parts else "File loaded", "success") + + # ── Audio player ────────────────────────────────────────────────────────── + + def _toggle_playback(self): + if not self._player: + return + if self._player.playbackState() == QMediaPlayer.PlaybackState.PlayingState: + self._player.pause() + else: + self._player.play() + + def _stop_playback(self): + if self._player: + self._player.stop() + + def _seek_to_ms(self, ms: int): + if self._player: + self._player.setPosition(ms) + + def _seek_to_seconds(self, seconds: float): + if self._player: + self._player.setPosition(int(seconds * 1000)) + + def _on_playback_state_changed(self, state): + if self._play_btn is None: + return + if state == QMediaPlayer.PlaybackState.PlayingState: + self._play_btn.setText("⏸ Pause") + else: + self._play_btn.setText("▶ Play") + + def _on_position_changed(self, position_ms: int): + # Update seek slider + if self._seek_slider: + self._seek_slider.blockSignals(True) + self._seek_slider.setValue(position_ms) + self._seek_slider.blockSignals(False) + + # Update time label + if self._time_label and self._player: + self._time_label.setText( + f"{self._fmt_ms(position_ms)} / {self._fmt_ms(self._player.duration())}" + ) + + # Advance timeline playhead + secs = position_ms / 1000.0 + if self._timeline: + self._timeline.set_current_time(secs) + + # Update active-segment label + if self._active_label and self._active_label.isVisible() and self._loaded_tiers: + parts = [] + for tier in self._loaded_tiers: + if tier["class"] == "IntervalTier": + for iv in tier["intervals"]: + if iv["start"] <= secs < iv["end"] and iv["label"] not in _SILENCE_LABELS: + parts.append(f"{tier['name']}: {iv['label']}") + break + self._active_label.setText(" | ".join(parts)) + + def _on_duration_changed(self, duration_ms: int): + if self._seek_slider: + self._seek_slider.setRange(0, duration_ms) + # Update timeline duration if tiers didn't provide a reliable value + if self._timeline and self._loaded_tiers and duration_ms > 0: + dur_s = duration_ms / 1000.0 + # Only override if timeline duration seems shorter than the audio + existing = self._timeline._duration + if existing < dur_s * 0.95: + self._timeline.set_data(self._loaded_tiers, dur_s) + + def _open_audio_externally(self): + path = self._current_audio_path + if not path: + return + if sys.platform == "darwin": + subprocess.Popen(["open", str(path)]) + elif sys.platform == "win32": + subprocess.Popen(["start", "", str(path)], shell=True) + else: + subprocess.Popen(["xdg-open", str(path)]) + + # ── Helpers ─────────────────────────────────────────────────────────────── + + @staticmethod + def _fmt_ms(ms: int) -> str: + s = ms // 1000 + return f"{s // 60}:{s % 60:02d}" + + @staticmethod + def _make_section_label(text: str) -> QLabel: + lbl = QLabel(text) + lbl.setStyleSheet(Labels.SECTION_LABEL) + return lbl + + +__all__ = ["TextGridTimeline", "ViewerStacker"] diff --git a/tests/gui/test_worker_thread.py b/tests/gui/test_worker_thread.py index 0136ca7..cc3655b 100644 --- a/tests/gui/test_worker_thread.py +++ b/tests/gui/test_worker_thread.py @@ -7,6 +7,7 @@ def test_success_signal(self, qtbot): with qtbot.waitSignal(worker.finished, timeout=2000) as blocker: worker.start() + worker.wait() success, message = blocker.args assert success is True @@ -17,6 +18,7 @@ def test_success_none_returns_default_message(self, qtbot): with qtbot.waitSignal(worker.finished, timeout=2000) as blocker: worker.start() + worker.wait() success, message = blocker.args assert success is True @@ -30,6 +32,7 @@ def failing(): with qtbot.waitSignal(worker.finished, timeout=2000) as blocker: worker.start() + worker.wait() success, message = blocker.args assert success is False @@ -43,6 +46,7 @@ def bad_op(): with qtbot.waitSignal(worker.finished, timeout=2000) as blocker: worker.start() + worker.wait() success, message = blocker.args assert success is False diff --git a/uv.lock b/uv.lock index 3401869..78c3ae6 100644 --- a/uv.lock +++ b/uv.lock @@ -158,6 +158,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] +[[package]] +name = "alignment-comparison-plots" +version = "0.1.0" +source = { git = "https://github.com/WISCLab/alignment-comparison-plots#02b4d077470313f091de22216b87787bd7046c96" } +dependencies = [ + { name = "matplotlib" }, + { name = "praat-textgrids" }, + { name = "pyqt6" }, +] + [[package]] name = "altgraph" version = "0.17.5" @@ -717,7 +727,7 @@ name = "cryptography" version = "46.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, + { name = "cffi", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } wheels = [ @@ -1577,7 +1587,7 @@ name = "macholib" version = "1.16.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "altgraph" }, + { name = "altgraph", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427, upload-time = "2025-11-22T08:28:38.373Z" } wheels = [ @@ -2185,7 +2195,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -2196,7 +2206,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -2223,9 +2233,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -2236,7 +2246,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -2533,6 +2543,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/2d/d4bf65e47cea8ff2c794a600c4fd1273a7902f268757c531e0ee9f18aa58/pooch-1.9.0-py3-none-any.whl", hash = "sha256:f265597baa9f760d25ceb29d0beb8186c243d6607b0f60b83ecf14078dbc703b", size = 67175, upload-time = "2026-01-30T19:15:08.36Z" }, ] +[[package]] +name = "praat-textgrids" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a0/15/8bb4cc6198a46ea9727fb6049ae343e5d306286004bae55f86351c9b1a94/praat-textgrids-1.4.0.tar.gz", hash = "sha256:57d86adcbb01722e732a898e37c85833a6326731e2c97802b18793ef1a64602c", size = 25473, upload-time = "2022-10-12T19:59:53.586Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/8b/de9acaf09cf119f3526dc9fdee6aa36f6ea12c70997e1b219bf5992549c9/praat_textgrids-1.4.0-py3-none-any.whl", hash = "sha256:288ebf4061f2994adc0a6110df28eebd3b7fb0c68e3196e1f222d6b4f33fa823", size = 25669, upload-time = "2022-10-12T19:59:51.199Z" }, +] + [[package]] name = "praatio" version = "6.2.2" @@ -2939,6 +2958,7 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "accelerate" }, + { name = "alignment-comparison-plots" }, { name = "datasets" }, { name = "faster-whisper" }, { name = "keyring" }, @@ -2976,6 +2996,7 @@ installation = [ [package.metadata] requires-dist = [ { name = "accelerate", specifier = ">=1.11.0" }, + { name = "alignment-comparison-plots", git = "https://github.com/WISCLab/alignment-comparison-plots" }, { name = "datasets", specifier = ">=4.3.0" }, { name = "faster-whisper", specifier = ">=1.1.0" }, { name = "keyring", specifier = ">=25.6.0" }, @@ -3608,8 +3629,8 @@ name = "secretstorage" version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cryptography" }, - { name = "jeepney" }, + { name = "cryptography", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "jeepney", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } wheels = [ @@ -4017,7 +4038,7 @@ name = "triton" version = "3.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools" }, + { name = "setuptools", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" },