Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/workflows/tests-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ jobs:
python-version: '3.11'

- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
shell: bash
uses: astral-sh/setup-uv@v3

- name: Configure Git for private repos
run: |
Expand Down
30 changes: 25 additions & 5 deletions .github/workflows/tests-ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,31 @@ jobs:
with:
python-version: '3.11'

- name: Install uv
- name: Install Qt system dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
shell: bash
sudo apt-get update
sudo apt-get install -y \
libegl1 \
libgl1 \
libglib2.0-0 \
libdbus-1-3 \
libxkbcommon0 \
libxcb-icccm4 \
libxcb-image0 \
libxcb-keysyms1 \
libxcb-randr0 \
libxcb-render-util0 \
libxcb-shape0 \
libxcb-sync1 \
libxcb-xfixes0 \
libxcb-xinerama0 \
libxcb-cursor0 \
libfontconfig1 \
libxkbcommon-x11-0 \
xvfb

- name: Install uv
uses: astral-sh/setup-uv@v3

- name: Configure Git for private repos
run: |
Expand All @@ -40,7 +60,7 @@ jobs:

- name: Run tests
run: |
uv run invoke run-tests
xvfb-run -a uv run invoke run-tests

- name: Run linting
run: |
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/tests-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ jobs:
python-version: '3.11'

- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
shell: bash
uses: astral-sh/setup-uv@v3

- name: Configure Git for private repos
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ ENV/
build/
dist/
downloads/
installer/windows/output/
tools/
.eggs/
.eggs/**
*.egg-info/
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ repos:
hooks:
- id: shredguard-check
name: shredguard check
entry: shredguard check
entry: uv run --only-group dev shredguard check
language: system
types: [text]

Expand Down
Binary file added assets/voxkit.ico
Binary file not shown.
8 changes: 8 additions & 0 deletions installer/linux/VoxKit.desktop
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Desktop Entry]
Name=VoxKit
Exec=VoxKit
Icon=voxkit
Type=Application
Categories=Science;Audio;Education;
Comment=AI/ML Speech Pathology Analysis
Terminal=false
42 changes: 42 additions & 0 deletions installer/windows/VoxKit.iss
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#define AppName "VoxKit"
#define AppVersion "0.4.1"
#define AppPublisher "Brain Behavior Analytics Lab"
#define AppURL "https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop"
#define AppExeName "VoxKit.exe"

[Setup]
AppId={{A3F2C1D4-8B7E-4F6A-9D2E-1C5B3A7E8F90}
AppName={#AppName}
AppVersion={#AppVersion}
AppPublisher={#AppPublisher}
AppPublisherURL={#AppURL}
AppSupportURL={#AppURL}
AppUpdatesURL={#AppURL}
DefaultDirName={autopf}\{#AppName}
DefaultGroupName={#AppName}
DisableProgramGroupPage=yes
OutputDir=output
OutputBaseFilename=VoxKit-setup-windowsOS
SetupIconFile=..\..\assets\voxkit.ico
Compression=lzma
SolidCompression=yes
WizardStyle=modern
PrivilegesRequired=lowest
ArchitecturesAllowed=x64compatible
ArchitecturesInstallIn64BitMode=x64compatible

[Languages]
Name: "english"; MessagesFile: "compiler:Default.isl"

[Tasks]
Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"

[Files]
Source: "..\..\dist\{#AppExeName}"; DestDir: "{app}"; Flags: ignoreversion

[Icons]
Name: "{autoprograms}\{#AppName}"; Filename: "{app}\{#AppExeName}"; IconFilename: "{app}\{#AppExeName}"
Name: "{autodesktop}\{#AppName}"; Filename: "{app}\{#AppExeName}"; IconFilename: "{app}\{#AppExeName}"; Tasks: desktopicon

[Run]
Filename: "{app}\{#AppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(AppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
37 changes: 31 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
import sys
import multiprocessing
import os

# CRITICAL: Must run before any heavy imports (voxkit, torch, PyQt6, etc.).
# When PyTorch DataLoader workers spawn on Windows, each child re-launches the
# frozen exe and re-imports this module. freeze_support() short-circuits the
# child before it re-runs main() and tries to open another GUI window.
if __name__ == "__main__":
multiprocessing.freeze_support()

# In a --windowed PyInstaller build there is no console, so sys.stdout and
# sys.stderr are None. Libraries like tqdm (used by transformers' Trainer)
# crash with AttributeError: 'NoneType' object has no attribute 'write' when
# they try to print progress. Redirect to devnull so writes silently no-op.
if sys.stdout is None:
sys.stdout = open(os.devnull, "w", encoding="utf-8")
if sys.stderr is None:
sys.stderr = open(os.devnull, "w", encoding="utf-8")

import faulthandler
import logging
import os
import multiprocessing

# Windows: configure console and stdout/stderr for UTF-8 before any output.
# Without this, rich's legacy renderer falls back to cp1252 and chokes on
# Unicode characters (e.g., circled letters in pipeline config YAML).
if sys.platform == 'win32':
try:
import ctypes
ctypes.windll.kernel32.SetConsoleOutputCP(65001)
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
sys.stderr.reconfigure(encoding='utf-8', errors='replace')
except Exception:
pass

# Apply patches for frozen (PyInstaller) environment BEFORE other imports
if getattr(sys, 'frozen', False):
Expand All @@ -17,10 +46,6 @@
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
log = logging.getLogger("voxkit.main")

# CRITICAL: Must be at the top for frozen apps using multiprocessing
if __name__ == "__main__":
multiprocessing.freeze_support()

# Enable detailed crash reports
faulthandler.enable()

Expand Down
17 changes: 13 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ dependencies = [
"pyqt6>=6.9.1",
"torch==2.8.0",
"torchaudio==2.8.0",
"pypllrcomputer @ git+https://github.com/BrainBehaviorAnalyticsLab/PyPLLRComputer.git@48a0c934e75f73235ba5a002538b8588fd6697e7",
"wav2textgrid @ git+https://github.com/pkadambi/Wav2TextGrid.git@3862476185a7558d481f1ae1c9cbca2e48c825a0",
"pypllrcomputer",
"wav2textgrid",
"datasets>=4.3.0",
"accelerate>=1.11.0",
"keyring>=25.6.0",
Expand All @@ -28,7 +28,8 @@ dependencies = [
"pyyaml>=6.0.0",
"rich>=14.2.0",
"faster-whisper>=1.1.0",
"alignment-comparison-plots==0.1.1",
"alignment-comparison-plots",
"speechbrain>=1.0.3",
]

[dependency-groups]
Expand Down Expand Up @@ -58,6 +59,12 @@ Homepage = "https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop"
Repository = "https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop"
Issues = "https://github.com/BrainBehaviorAnalyticsLab/voxkit-desktop/issues"

[tool.uv.sources]
pypllrcomputer = { git = "https://github.com/pkadambi/PyPhonemePronunciationScorer", branch = "voxkit-windows-variant" }
wav2textgrid = { git = "https://github.com/pkadambi/Wav2TextGrid", branch = "voxkit-windows-variant" }
alignment-comparison-plots = { git = "https://github.com/WISCLab/alignment-comparison-plots", branch = "voxkit-windows-variant" }
speechbrain = { git = "https://github.com/BeckettFrey/speechbrain.git", branch = "fix/windows-lazy-import-inspect-path" }

[tool.ruff]
line-length = 100
target-version = "py310"
Expand Down Expand Up @@ -120,7 +127,9 @@ exclude = [
"build.py",
"example_startup_script.py",
"test_imports.py",
"tests/"
"tests/",
"build/",
"dist/",
]


Expand Down
23 changes: 21 additions & 2 deletions src/voxkit/analyzers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
Extracts speaker count and audio file counts per speaker directory.
Includes a bar chart visualization for quick dataset overview.

**ClipDurationStatisticsAnalyzer**
Reads audio metadata to compute total, average, min, and max clip duration
per speaker. Includes a bar chart visualization of total duration.

**AudioFormatProfileAnalyzer**
Reads audio metadata to surface dominant sample rate, channel count, and
flags files that deviate from the speaker's dominant format.

Output Structure
----------------
Analyzer output is stored alongside dataset metadata::
Expand All @@ -35,7 +43,9 @@

from typing import List

from .audio_format_profile import AudioFormatProfileAnalyzer
from .base import DatasetAnalyzer
from .clip_duration_statistics import ClipDurationStatisticsAnalyzer
from .default_analyzer import DefaultAnalyzer


Expand Down Expand Up @@ -69,9 +79,18 @@ def get_analyzer(self, analyzer_id: str) -> DatasetAnalyzer:
raise ValueError(f"No analyzer with id: {analyzer_id}")


default_analyzer_instance = DefaultAnalyzer()
_default = DefaultAnalyzer()
_duration = ClipDurationStatisticsAnalyzer()
_format = AudioFormatProfileAnalyzer()

# Singleton instance for unified export/interface
ManageAnalyzers = AnalyzerManager({default_analyzer_instance.name: default_analyzer_instance})
ManageAnalyzers = AnalyzerManager(
{
_default.name: _default,
_duration.name: _duration,
_format.name: _format,
}
)

__all__ = [
"ManageAnalyzers",
Expand Down
84 changes: 84 additions & 0 deletions src/voxkit/analyzers/audio_format_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Audio Format Profile Analyzer.

Reads audio metadata (no decode) to surface sample rate and channel count
per speaker. Flags speakers with inconsistent formats across their files.

Output Columns
--------------
- **speaker_id**: Name of the speaker subdirectory
- **file_count**: Number of audio files scanned
- **dominant_sample_rate_hz**: Most common sample rate across the speaker's files
- **dominant_channels**: Most common channel count (1 = mono, 2 = stereo)
- **inconsistent_files**: Number of files that differ from the dominant sample rate
"""

import logging
import os
from collections import Counter
from pathlib import Path
from typing import Any, Dict, List

from .base import DatasetAnalyzer

logger = logging.getLogger(__name__)


class AudioFormatProfileAnalyzer(DatasetAnalyzer):
"""Per-speaker audio format profile: sample rate, channels, and consistency."""

@property
def name(self) -> str:
return "Audio Format Profile"

@property
def description(self) -> str:
return "Sample rate, channel count, and format consistency per speaker"

def analyze(self, dataset_path: str) -> List[Dict[str, Any]]:
import torchaudio

results = []
audio_extensions = {".wav", ".flac", ".mp3", ".ogg", ".m4a"}

try:
for entry in os.scandir(dataset_path):
if not entry.is_dir():
continue

sample_rates: List[int] = []
channels: List[int] = []
for f in os.scandir(entry.path):
if not f.is_file() or Path(f.name).suffix.lower() not in audio_extensions:
continue
try:
info = torchaudio.info(f.path)
if info.sample_rate > 0:
sample_rates.append(info.sample_rate)
channels.append(info.num_channels)
else:
waveform, sr = torchaudio.load(f.path)
sample_rates.append(sr)
channels.append(waveform.shape[0])
except Exception as e:
logger.warning("Skipping %s: %s", f.path, e)

if not sample_rates:
continue

dominant_sr = Counter(sample_rates).most_common(1)[0][0]
dominant_ch = Counter(channels).most_common(1)[0][0]
inconsistent = sum(1 for sr in sample_rates if sr != dominant_sr)

results.append(
{
"speaker_id": entry.name,
"file_count": len(sample_rates),
"dominant_sample_rate_hz": dominant_sr,
"dominant_channels": dominant_ch,
"inconsistent_files": inconsistent,
}
)
except Exception as e:
print(f"Error analyzing dataset: {e}")

return results
Loading
Loading