From b4dc946a2677533af47532d79d03e6c6e6ad3141 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Sun, 8 Mar 2026 17:02:59 +0000 Subject: [PATCH 1/5] Silence auto-stop: consider both system audio and mic levels Add silence timeout feature that auto-stops recording after sustained silence. The Swift helper monitors audio levels from both system audio and mic (when --mic is used), only triggering timeout when both sources are silent. Mic uses a higher noise threshold (1e-2) to filter ambient room noise. Muted mic counts as silence. Also adds sounddevice backend support for silence timeout, README docs, and config option (silence_timeout = 300 default). --- README.md | 22 ++- src/ownscribe/audio/base.py | 5 + src/ownscribe/audio/coreaudio.py | 19 ++- src/ownscribe/audio/sounddevice_recorder.py | 30 +++- src/ownscribe/cli.py | 4 + src/ownscribe/config.py | 2 + src/ownscribe/pipeline.py | 29 +++- swift/Sources/AudioCapture.swift | 143 +++++++++++++++----- 8 files changed, 207 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 04d7ac5..9d1e752 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,20 @@ Record, transcribe, and summarize meetings and system audio entirely on your mac > System audio capture requires **macOS 14.2 or later**. Other platforms can use the sounddevice backend with an external audio source. +## Table of Contents + +- [Privacy](#privacy) +- [Features](#features) +- [Requirements](#requirements) +- [Installation](#installation) +- [Usage](#usage) +- [Configuration](#configuration) +- [Summarization Templates](#summarization-templates) +- [Speaker Diarization](#speaker-diarization) +- [Acknowledgments](#acknowledgments) +- [Contributing](#contributing) +- [License](#license) + ## Privacy ownscribe **does not**: @@ -36,6 +50,7 @@ All audio, transcripts, and summaries remain local. - **Summarization templates** — built-in presets for meetings, lectures, and quick briefs; define your own in config - **Ask your meetings** — ask natural-language questions across all your meeting notes; uses a two-stage LLM pipeline with keyword fallback
ownscribe ask demo +- **Silence auto-stop** — automatically stops recording after sustained silence (default: 5 minutes, configurable) - **One command** — just run `ownscribe`, press Ctrl+C when done, get transcript + summary ## Requirements @@ -90,7 +105,7 @@ ownscribe # records system audio, Ctrl+C to stop ``` This will: -1. Capture system audio until you press Ctrl+C +1. Capture system audio until you press Ctrl+C (or auto-stop after 5 minutes of silence) 2. Transcribe with WhisperX 3. Summarize with your local LLM 4. Save everything to `~/ownscribe/YYYY-MM-DD_HHMMSS/` @@ -105,11 +120,13 @@ ownscribe --mic-device "MacBook Pro Microphone" # capture system audio + specifi ownscribe --device "MacBook Pro Microphone" # use mic instead of system audio ownscribe --no-summarize # skip LLM summarization ownscribe --diarize # enable speaker identification -ownscribe --language en # set transcription language (default: auto-detect) +ownscribe --language en # set transcription language (default: auto-detect) ownscribe --model large-v3 # use a larger Whisper model ownscribe --format json # output as JSON instead of markdown ownscribe --no-keep-recording # auto-delete WAV files after transcription ownscribe --template lecture # use the lecture summarization template +ownscribe --silence-timeout 600 # auto-stop after 10 minutes of silence +ownscribe --silence-timeout 0 # disable silence auto-stop ``` ### Subcommands @@ -160,6 +177,7 @@ backend = "coreaudio" # "coreaudio" or "sounddevice" device = "" # empty = system audio mic = false # also capture microphone input mic_device = "" # specific mic device name (empty = default) +silence_timeout = 300 # seconds of silence before auto-stop; 0 = disabled [transcription] model = "base" # tiny, base, small, medium, large-v3 diff --git a/src/ownscribe/audio/base.py b/src/ownscribe/audio/base.py index 5011f00..ec3be99 100644 --- a/src/ownscribe/audio/base.py +++ b/src/ownscribe/audio/base.py @@ -28,3 +28,8 @@ def toggle_mute(self) -> None: # noqa: B027 def is_muted(self) -> bool: """Whether the microphone is currently muted.""" return False + + @property + def is_recording(self) -> bool: + """Whether the recorder is still actively recording.""" + return True diff --git a/src/ownscribe/audio/coreaudio.py b/src/ownscribe/audio/coreaudio.py index 8401247..801bd2c 100644 --- a/src/ownscribe/audio/coreaudio.py +++ b/src/ownscribe/audio/coreaudio.py @@ -78,12 +78,14 @@ def _find_binary() -> Path | None: class CoreAudioRecorder(AudioRecorder): """Records system audio using the ownscribe-audio Swift helper.""" - def __init__(self, mic: bool = False, mic_device: str = "") -> None: + def __init__(self, mic: bool = False, mic_device: str = "", silence_timeout: int = 0) -> None: self._mic = mic self._mic_device = mic_device + self._silence_timeout = silence_timeout self._process: subprocess.Popen | None = None self._binary = _find_binary() self._silence_warning: bool = False + self._silence_timed_out: bool = False self._muted: bool = False def is_available(self) -> bool: @@ -98,6 +100,8 @@ def start(self, output_path: Path) -> None: cmd.append("--mic") if self._mic_device: cmd.extend(["--mic-device", self._mic_device]) + if self._silence_timeout > 0: + cmd.extend(["--silence-timeout", str(self._silence_timeout)]) self._process = subprocess.Popen( cmd, @@ -121,6 +125,14 @@ def toggle_mute(self) -> None: def is_muted(self) -> bool: return self._muted + @property + def is_recording(self) -> bool: + return self._process is not None and self._process.poll() is None + + @property + def silence_timed_out(self) -> bool: + return self._silence_timed_out + def stop(self) -> None: if self._process and self._process.poll() is None: self._process.send_signal(signal.SIGINT) @@ -138,11 +150,14 @@ def stop(self) -> None: if stderr_output: if "[SILENCE_WARNING]" in stderr_output: self._silence_warning = True + if "[SILENCE_TIMEOUT]" in stderr_output: + self._silence_timed_out = True # Filter out mute toggles and known informational lines _NOISE_PREFIXES = ("Recording ", "Saved ", "Merged audio saved") + _NOISE_LINES = ("[MIC_MUTED]", "[MIC_UNMUTED]", "[SILENCE_TIMEOUT]") lines = [ line for line in stderr_output.strip().splitlines() - if line not in ("[MIC_MUTED]", "[MIC_UNMUTED]") + if line not in _NOISE_LINES and not line.startswith(_NOISE_PREFIXES) ] if lines: diff --git a/src/ownscribe/audio/sounddevice_recorder.py b/src/ownscribe/audio/sounddevice_recorder.py index 39c737e..119e1aa 100644 --- a/src/ownscribe/audio/sounddevice_recorder.py +++ b/src/ownscribe/audio/sounddevice_recorder.py @@ -3,24 +3,37 @@ from __future__ import annotations import threading +import time as _time from pathlib import Path +import numpy as np import sounddevice as sd import soundfile as sf from ownscribe.audio.base import AudioRecorder +_SILENCE_THRESHOLD = 1e-4 # ~-80 dB + class SoundDeviceRecorder(AudioRecorder): """Records from any audio input device using sounddevice + soundfile.""" - def __init__(self, device: str | int | None = None, samplerate: int = 48000, channels: int = 1) -> None: + def __init__( + self, + device: str | int | None = None, + samplerate: int = 48000, + channels: int = 1, + silence_timeout: int = 0, + ) -> None: self._device = device self._samplerate = samplerate self._channels = channels + self._silence_timeout = silence_timeout self._stream: sd.InputStream | None = None self._file: sf.SoundFile | None = None self._lock = threading.Lock() + self._last_loud_time: float = 0.0 + self._timed_out: bool = False def is_available(self) -> bool: try: @@ -30,6 +43,9 @@ def is_available(self) -> bool: return False def start(self, output_path: Path) -> None: + self._last_loud_time = _time.monotonic() + self._timed_out = False + self._file = sf.SoundFile( str(output_path), mode="w", @@ -44,6 +60,14 @@ def callback(indata, frames, time, status): if self._file is not None: self._file.write(indata.copy()) + # Silence tracking + if self._silence_timeout > 0: + peak = np.max(np.abs(indata)) + if peak > _SILENCE_THRESHOLD: + self._last_loud_time = _time.monotonic() + elif _time.monotonic() - self._last_loud_time > self._silence_timeout: + self._timed_out = True + self._stream = sd.InputStream( device=self._device, samplerate=self._samplerate, @@ -52,6 +76,10 @@ def callback(indata, frames, time, status): ) self._stream.start() + @property + def is_recording(self) -> bool: + return not self._timed_out and self._stream is not None + def stop(self) -> None: if self._stream is not None: self._stream.stop() diff --git a/src/ownscribe/cli.py b/src/ownscribe/cli.py index 3bcb35f..57cf7b3 100644 --- a/src/ownscribe/cli.py +++ b/src/ownscribe/cli.py @@ -45,6 +45,7 @@ def _dir_size(path: str) -> str: help="Keep or delete WAV recordings after transcription.", ) @click.option("--template", default=None, help="Summarization template (meeting, lecture, brief, or custom).") +@click.option("--silence-timeout", default=None, type=int, help="Seconds of silence before auto-stopping recording (0 to disable).") @click.pass_context def cli( ctx: click.Context, @@ -58,6 +59,7 @@ def cli( mic_device: str | None, keep_recording: bool | None, template: str | None, + silence_timeout: int | None, ) -> None: """Fully local meeting transcription and summarization. @@ -89,6 +91,8 @@ def cli( config.output.keep_recording = keep_recording if template: config.summarization.template = template + if silence_timeout is not None: + config.audio.silence_timeout = silence_timeout ctx.obj["config"] = config diff --git a/src/ownscribe/config.py b/src/ownscribe/config.py index f7dacd5..7233fad 100644 --- a/src/ownscribe/config.py +++ b/src/ownscribe/config.py @@ -16,6 +16,7 @@ device = "" # empty = system audio; or device name/index for sounddevice mic = false # also capture microphone input mic_device = "" # specific mic device name (empty = default) +silence_timeout = 300 # seconds of silence before auto-stop; 0 = disabled [transcription] model = "base" # whisper model: tiny, base, small, medium, large-v3 @@ -55,6 +56,7 @@ class AudioConfig: device: str = "" mic: bool = False mic_device: str = "" + silence_timeout: int = 300 # seconds of silence before auto-stop; 0 = disabled @dataclass diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index dff6f43..d26060e 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -71,7 +71,11 @@ def _create_recorder(config: Config): if config.audio.backend == "coreaudio" and not config.audio.device: from ownscribe.audio.coreaudio import CoreAudioRecorder - recorder = CoreAudioRecorder(mic=config.audio.mic, mic_device=config.audio.mic_device) + recorder = CoreAudioRecorder( + mic=config.audio.mic, + mic_device=config.audio.mic_device, + silence_timeout=config.audio.silence_timeout, + ) if recorder.is_available(): return recorder click.echo("Core Audio helper not found, falling back to sounddevice.") @@ -82,7 +86,7 @@ def _create_recorder(config: Config): # Try to parse as int (device index) if isinstance(device, str) and device.isdigit(): device = int(device) - return SoundDeviceRecorder(device=device) + return SoundDeviceRecorder(device=device, silence_timeout=config.audio.silence_timeout) def _create_transcriber(config: Config, progress=None): @@ -159,10 +163,18 @@ def run_pipeline(config: Config) -> None: can_mute = isinstance(recorder, CoreAudioRecorder) and config.audio.mic is_tty = sys.stdin.isatty() - hint = " Press Ctrl+C to stop." + hints = [] if can_mute and is_tty: - hint = " Press 'm' to mute/unmute mic, Ctrl+C to stop." - click.echo(f"Starting recording...{hint}\n") + hints.append("Press 'm' to mute/unmute mic.") + silence_timeout = config.audio.silence_timeout + if silence_timeout > 0: + mins_timeout = silence_timeout // 60 + if mins_timeout > 0: + hints.append(f"Auto-stops after {mins_timeout}m of silence.") + else: + hints.append(f"Auto-stops after {silence_timeout}s of silence.") + hints.append("Press Ctrl+C to stop.") + click.echo(f"Starting recording... {' '.join(hints)}\n") recorder.start(audio_path) start_time = time.time() @@ -182,7 +194,7 @@ def on_interrupt(sig, frame): warned_no_data = False try: - while not stop_event: + while not stop_event and recorder.is_recording: elapsed = time.time() - start_time mins, secs = divmod(int(elapsed), 60) mute_indicator = " [MIC MUTED]" if recorder.is_muted else "" @@ -216,7 +228,10 @@ def on_interrupt(sig, frame): termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_termios) signal.signal(signal.SIGINT, original_handler) - click.echo("\n\nStopping recording...") + if not stop_event: + click.echo("\n\nRecording auto-stopped after silence timeout.") + else: + click.echo("\n\nStopping recording...") recorder.stop() if not audio_path.exists() or audio_path.stat().st_size <= _WAV_HEADER_SIZE: diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index 4b09f7d..9a24825 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -18,6 +18,16 @@ class MicCapture { private var _isMuted = false private var _muteLock = os_unfair_lock_s() + // Level tracking for silence timeout (mirrors SystemAudioCapture pattern) + private var _lastLoudTime = Date() + private var _lastLoudTimeLock = os_unfair_lock_s() + + var lastLoudTime: Date { + os_unfair_lock_lock(&_lastLoudTimeLock) + defer { os_unfair_lock_unlock(&_lastLoudTimeLock) } + return _lastLoudTime + } + var isMuted: Bool { os_unfair_lock_lock(&_muteLock) defer { os_unfair_lock_unlock(&_muteLock) } @@ -65,13 +75,31 @@ class MicCapture { if self.startHostTime == 0 { self.startHostTime = time.hostTime } - if self.isMuted, let channelData = buffer.floatChannelData { + let muted = self.isMuted + if muted, let channelData = buffer.floatChannelData { let channels = Int(buffer.format.channelCount) let frames = Int(buffer.frameLength) for ch in 0...size) } } + // Track peak level for silence timeout (muted mic = silence) + if !muted, let channelData = buffer.floatChannelData { + let channels = Int(buffer.format.channelCount) + let frames = Int(buffer.frameLength) + var peak: Float = 0 + for ch in 0.. peak { peak = v } + } + } + if peak > 1e-2 { + os_unfair_lock_lock(&self._lastLoudTimeLock) + self._lastLoudTime = Date() + os_unfair_lock_unlock(&self._lastLoudTimeLock) + } + } try? self.audioFile?.write(from: buffer) } try engine.start() @@ -165,6 +193,14 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS private var silenceChecked: Bool = false private var silenceWarned: Bool = false + // Silence timeout auto-stop + var silenceTimeout: TimeInterval = 0 // seconds; 0 = disabled + var onSilenceTimeout: (() -> Void)? + var micCapture: MicCapture? // checked by silence timer + private var lastLoudTime = Date() + private var lastLoudTimeLock = os_unfair_lock_s() + private var silenceTimer: DispatchSourceTimer? + // Picker continuation private var startContinuation: CheckedContinuation? @@ -243,6 +279,36 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS self.stream = stream fputs("Recording system audio to \(outputPath)... Press Ctrl+C to stop.\n", stderr) + + // Start silence timeout timer if configured + if silenceTimeout > 0 { + lastLoudTime = Date() + + let timer = DispatchSource.makeTimerSource(queue: .main) + timer.schedule(deadline: .now() + 1, repeating: 1.0) + timer.setEventHandler { [weak self] in + guard let self else { return } + os_unfair_lock_lock(&self.lastLoudTimeLock) + var effectiveLastLoud = self.lastLoudTime + os_unfair_lock_unlock(&self.lastLoudTimeLock) + // If mic is active, use the more recent of the two + if let mic = self.micCapture { + let micLastLoud = mic.lastLoudTime + if micLastLoud > effectiveLastLoud { + effectiveLastLoud = micLastLoud + } + } + let elapsed = Date().timeIntervalSince(effectiveLastLoud) + if elapsed > self.silenceTimeout { + fputs("[SILENCE_TIMEOUT]\n", stderr) + self.silenceTimer?.cancel() + self.silenceTimer = nil + self.onSilenceTimeout?() + } + } + timer.resume() + silenceTimer = timer + } } // MARK: - SCStreamOutput @@ -300,16 +366,25 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS totalFrames += Int64(frameCount) // Peak detection on float channel data + var bufferPeak: Float = 0.0 if let channelData = pcmBuffer.floatChannelData { let channelCount = Int(sampleFormat.channelCount) for ch in 0.. peakLevel { peakLevel = absVal } + if absVal > bufferPeak { bufferPeak = absVal } } } } + if bufferPeak > peakLevel { peakLevel = bufferPeak } + + // Update last loud time for silence timeout + if bufferPeak > 1e-4 { + os_unfair_lock_lock(&lastLoudTimeLock) + lastLoudTime = Date() + os_unfair_lock_unlock(&lastLoudTimeLock) + } // Check for silence after ~3 seconds of data if !silenceChecked && totalFrames > 48000 * 3 { @@ -657,6 +732,7 @@ func main() { var outputPath: String? var enableMic = false var micDeviceName: String? + var silenceTimeout: TimeInterval = 0 var i = 2 while i < args.count { @@ -678,6 +754,13 @@ func main() { } micDeviceName = args[i] enableMic = true // --mic-device implies --mic + case "--silence-timeout": + i += 1 + guard i < args.count, let val = TimeInterval(args[i]) else { + fputs("Error: --silence-timeout requires a number of seconds\n", stderr) + exit(1) + } + silenceTimeout = val default: fputs("Unknown option: \(args[i])\n", stderr) printUsage() @@ -697,6 +780,7 @@ func main() { let micPath = output + ".mic.tmp.wav" let capture = SystemAudioCapture(outputPath: systemPath) + capture.silenceTimeout = silenceTimeout var micCapture: MicCapture? if enableMic { @@ -708,27 +792,14 @@ func main() { exit(1) } micCapture = mic + capture.micCapture = mic } - // Toggle mic mute on SIGUSR1 (sent by Python wrapper) - var _sigusr1Source: DispatchSourceSignal? // retained to keep source alive - if let mic = micCapture { - signal(SIGUSR1, SIG_IGN) - let src = DispatchSource.makeSignalSource(signal: SIGUSR1, queue: .main) - src.setEventHandler { mic.toggleMute() } - src.resume() - _sigusr1Source = src - } - _ = _sigusr1Source - - // Handle Ctrl+C gracefully - let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main) - signal(SIGINT, SIG_IGN) - sigintSource.setEventHandler { + // Shared shutdown logic for SIGINT, SIGTERM, and silence timeout + let shutdown: () -> Void = { capture.stop() if let mic = micCapture { mic.stop() - // Merge the two files do { try mergeAudioFiles( systemPath: systemPath, @@ -742,27 +813,29 @@ func main() { } exit(0) } + + capture.onSilenceTimeout = shutdown + + // Toggle mic mute on SIGUSR1 (sent by Python wrapper) + var _sigusr1Source: DispatchSourceSignal? // retained to keep source alive + if let mic = micCapture { + signal(SIGUSR1, SIG_IGN) + let src = DispatchSource.makeSignalSource(signal: SIGUSR1, queue: .main) + src.setEventHandler { mic.toggleMute() } + src.resume() + _sigusr1Source = src + } + _ = _sigusr1Source + + // Handle Ctrl+C gracefully + let sigintSource = DispatchSource.makeSignalSource(signal: SIGINT, queue: .main) + signal(SIGINT, SIG_IGN) + sigintSource.setEventHandler { shutdown() } sigintSource.resume() let sigtermSource = DispatchSource.makeSignalSource(signal: SIGTERM, queue: .main) signal(SIGTERM, SIG_IGN) - sigtermSource.setEventHandler { - capture.stop() - if let mic = micCapture { - mic.stop() - do { - try mergeAudioFiles( - systemPath: systemPath, - micPath: micPath, - systemStartHostTime: capture.startHostTime, - micStartHostTime: mic.startHostTime, - outputPath: output) - } catch { - fputs("Error merging audio: \(error)\n", stderr) - } - } - exit(0) - } + sigtermSource.setEventHandler { shutdown() } sigtermSource.resume() Task { From b57e0959547dd5458631c5451d01fc273c6b9d76 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Sun, 8 Mar 2026 17:25:09 +0000 Subject: [PATCH 2/5] Address PR review feedback: fix data race, add tests, improve UX - Fix data race: init lastLoudTime before startCapture() in Swift - Cancel silence timer in stop() to prevent firing during shutdown - Add --silence-timeout to Swift helper usage text - Use silence_timed_out property for stop message instead of stop_event - Raise sd.CallbackStop in sounddevice backend on silence timeout - Add silence_timed_out property to SoundDeviceRecorder - Fix lint: split long --silence-timeout CLI option line - Add tests: config default, CLI flag, recorder passthrough --- src/ownscribe/audio/sounddevice_recorder.py | 5 +++++ src/ownscribe/cli.py | 5 ++++- src/ownscribe/pipeline.py | 2 +- swift/Sources/AudioCapture.swift | 13 +++++++++-- tests/test_cli.py | 16 +++++++++++++ tests/test_config.py | 4 ++++ tests/test_pipeline.py | 25 +++++++++++++++++++++ 7 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/ownscribe/audio/sounddevice_recorder.py b/src/ownscribe/audio/sounddevice_recorder.py index 119e1aa..0afc9b3 100644 --- a/src/ownscribe/audio/sounddevice_recorder.py +++ b/src/ownscribe/audio/sounddevice_recorder.py @@ -67,6 +67,7 @@ def callback(indata, frames, time, status): self._last_loud_time = _time.monotonic() elif _time.monotonic() - self._last_loud_time > self._silence_timeout: self._timed_out = True + raise sd.CallbackStop self._stream = sd.InputStream( device=self._device, @@ -80,6 +81,10 @@ def callback(indata, frames, time, status): def is_recording(self) -> bool: return not self._timed_out and self._stream is not None + @property + def silence_timed_out(self) -> bool: + return self._timed_out + def stop(self) -> None: if self._stream is not None: self._stream.stop() diff --git a/src/ownscribe/cli.py b/src/ownscribe/cli.py index 57cf7b3..698f596 100644 --- a/src/ownscribe/cli.py +++ b/src/ownscribe/cli.py @@ -45,7 +45,10 @@ def _dir_size(path: str) -> str: help="Keep or delete WAV recordings after transcription.", ) @click.option("--template", default=None, help="Summarization template (meeting, lecture, brief, or custom).") -@click.option("--silence-timeout", default=None, type=int, help="Seconds of silence before auto-stopping recording (0 to disable).") +@click.option( + "--silence-timeout", default=None, type=int, + help="Seconds of silence before auto-stopping recording (0 to disable).", +) @click.pass_context def cli( ctx: click.Context, diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index d26060e..1b91ade 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -228,7 +228,7 @@ def on_interrupt(sig, frame): termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_termios) signal.signal(signal.SIGINT, original_handler) - if not stop_event: + if getattr(recorder, "silence_timed_out", False): click.echo("\n\nRecording auto-stopped after silence timeout.") else: click.echo("\n\nStopping recording...") diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index 9a24825..945abd1 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -275,6 +275,12 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS // Create and start stream let stream = SCStream(filter: filter, configuration: config, delegate: self) try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: captureQueue) + + // Initialize last-loud time before starting capture (no lock needed — callbacks haven't started) + if silenceTimeout > 0 { + lastLoudTime = Date() + } + try await stream.startCapture() self.stream = stream @@ -282,7 +288,6 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS // Start silence timeout timer if configured if silenceTimeout > 0 { - lastLoudTime = Date() let timer = DispatchSource.makeTimerSource(queue: .main) timer.schedule(deadline: .now() + 1, repeating: 1.0) @@ -406,6 +411,9 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS // MARK: - Stop func stop() { + silenceTimer?.cancel() + silenceTimer = nil + let sem = DispatchSemaphore(value: 0) Task.detached { [stream] in try? await stream?.stopCapture() @@ -690,7 +698,7 @@ func printUsage() { ownscribe-audio — system audio capture helper USAGE: - ownscribe-audio capture --output FILE [--mic] [--mic-device NAME] + ownscribe-audio capture --output FILE [--mic] [--mic-device NAME] [--silence-timeout N] ownscribe-audio list-apps ownscribe-audio list-devices @@ -698,6 +706,7 @@ func printUsage() { --output, -o FILE Output WAV file path (required for capture) --mic Also capture microphone input --mic-device NAME Use specific mic input device (implies --mic) + --silence-timeout N Auto-stop after N seconds of silence (0 = disabled) --help, -h Show this help SUBCOMMANDS: diff --git a/tests/test_cli.py b/tests/test_cli.py index 35bc4be..edecc7f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -63,6 +63,22 @@ def test_language_flag(self): config = mock_run.call_args[0][0] assert config.transcription.language == "de" + def test_silence_timeout_flag(self): + runner = CliRunner() + with _mock_config(), mock.patch("ownscribe.pipeline.run_pipeline") as mock_run: + result = runner.invoke(cli, ["--silence-timeout", "60"]) + assert result.exit_code == 0 + config = mock_run.call_args[0][0] + assert config.audio.silence_timeout == 60 + + def test_silence_timeout_disable(self): + runner = CliRunner() + with _mock_config(), mock.patch("ownscribe.pipeline.run_pipeline") as mock_run: + result = runner.invoke(cli, ["--silence-timeout", "0"]) + assert result.exit_code == 0 + config = mock_run.call_args[0][0] + assert config.audio.silence_timeout == 0 + class TestSubcommandHelp: def test_transcribe_help(self): diff --git a/tests/test_config.py b/tests/test_config.py index 811c4a1..977c00a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -49,6 +49,10 @@ def test_default_templates_empty(self): cfg = Config() assert cfg.templates == {} + def test_default_silence_timeout(self): + cfg = Config() + assert cfg.audio.silence_timeout == 300 + class TestMergeToml: def test_full_override(self): diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 67e004e..a2c42d2 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -49,6 +49,31 @@ def test_sounddevice_when_device_set(self): recorder = _create_recorder(config) assert recorder == mock_sd.return_value + def test_silence_timeout_passed_to_coreaudio(self): + from ownscribe.pipeline import _create_recorder + + config = Config() + config.audio.backend = "coreaudio" + config.audio.device = "" + config.audio.silence_timeout = 120 + + with mock.patch("ownscribe.audio.coreaudio.CoreAudioRecorder") as mock_cls: + mock_cls.return_value.is_available.return_value = True + _create_recorder(config) + mock_cls.assert_called_once_with(mic=False, mic_device="", silence_timeout=120) + + def test_silence_timeout_passed_to_sounddevice(self): + from ownscribe.pipeline import _create_recorder + + config = Config() + config.audio.backend = "coreaudio" + config.audio.device = "USB Mic" + config.audio.silence_timeout = 60 + + with mock.patch("ownscribe.audio.sounddevice_recorder.SoundDeviceRecorder") as mock_sd: + _create_recorder(config) + mock_sd.assert_called_once_with(device="USB Mic", silence_timeout=60) + class TestFormatOutput: def test_markdown_format(self, sample_transcript): From 51475b2a022a94b565592130b07823ee31e948fe Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Sun, 8 Mar 2026 17:42:16 +0000 Subject: [PATCH 3/5] Fix silence timeout: move stop() before check, use monotonic clock, fix test label - Call recorder.stop() before checking silence_timed_out so CoreAudio stderr is parsed before the flag is read - Replace Date() with DispatchTime.now().uptimeNanoseconds in Swift silence timing to avoid wall-clock jumps from NTP sync or sleep/wake - Fix test_silence_timeout_passed_to_sounddevice to use "sounddevice" backend for clarity --- src/ownscribe/pipeline.py | 2 +- swift/Sources/AudioCapture.swift | 15 ++++++++------- tests/test_pipeline.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index 1b91ade..d9f4d5e 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -228,11 +228,11 @@ def on_interrupt(sig, frame): termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_termios) signal.signal(signal.SIGINT, original_handler) + recorder.stop() if getattr(recorder, "silence_timed_out", False): click.echo("\n\nRecording auto-stopped after silence timeout.") else: click.echo("\n\nStopping recording...") - recorder.stop() if not audio_path.exists() or audio_path.stat().st_size <= _WAV_HEADER_SIZE: click.echo( diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index 945abd1..cf3a55c 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -19,10 +19,10 @@ class MicCapture { private var _muteLock = os_unfair_lock_s() // Level tracking for silence timeout (mirrors SystemAudioCapture pattern) - private var _lastLoudTime = Date() + private var _lastLoudTime: UInt64 = DispatchTime.now().uptimeNanoseconds private var _lastLoudTimeLock = os_unfair_lock_s() - var lastLoudTime: Date { + var lastLoudTime: UInt64 { os_unfair_lock_lock(&_lastLoudTimeLock) defer { os_unfair_lock_unlock(&_lastLoudTimeLock) } return _lastLoudTime @@ -96,7 +96,7 @@ class MicCapture { } if peak > 1e-2 { os_unfair_lock_lock(&self._lastLoudTimeLock) - self._lastLoudTime = Date() + self._lastLoudTime = DispatchTime.now().uptimeNanoseconds os_unfair_lock_unlock(&self._lastLoudTimeLock) } } @@ -197,7 +197,7 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS var silenceTimeout: TimeInterval = 0 // seconds; 0 = disabled var onSilenceTimeout: (() -> Void)? var micCapture: MicCapture? // checked by silence timer - private var lastLoudTime = Date() + private var lastLoudTime: UInt64 = DispatchTime.now().uptimeNanoseconds private var lastLoudTimeLock = os_unfair_lock_s() private var silenceTimer: DispatchSourceTimer? @@ -278,7 +278,7 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS // Initialize last-loud time before starting capture (no lock needed — callbacks haven't started) if silenceTimeout > 0 { - lastLoudTime = Date() + lastLoudTime = DispatchTime.now().uptimeNanoseconds } try await stream.startCapture() @@ -293,6 +293,7 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS timer.schedule(deadline: .now() + 1, repeating: 1.0) timer.setEventHandler { [weak self] in guard let self else { return } + let now = DispatchTime.now().uptimeNanoseconds os_unfair_lock_lock(&self.lastLoudTimeLock) var effectiveLastLoud = self.lastLoudTime os_unfair_lock_unlock(&self.lastLoudTimeLock) @@ -303,7 +304,7 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS effectiveLastLoud = micLastLoud } } - let elapsed = Date().timeIntervalSince(effectiveLastLoud) + let elapsed = Double(now - effectiveLastLoud) / 1_000_000_000.0 if elapsed > self.silenceTimeout { fputs("[SILENCE_TIMEOUT]\n", stderr) self.silenceTimer?.cancel() @@ -387,7 +388,7 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS // Update last loud time for silence timeout if bufferPeak > 1e-4 { os_unfair_lock_lock(&lastLoudTimeLock) - lastLoudTime = Date() + lastLoudTime = DispatchTime.now().uptimeNanoseconds os_unfair_lock_unlock(&lastLoudTimeLock) } diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index a2c42d2..b99c15e 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -66,7 +66,7 @@ def test_silence_timeout_passed_to_sounddevice(self): from ownscribe.pipeline import _create_recorder config = Config() - config.audio.backend = "coreaudio" + config.audio.backend = "sounddevice" config.audio.device = "USB Mic" config.audio.silence_timeout = 60 From 1936f00234edcb7ed2380c5442bfb930c70f08cb Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Sun, 8 Mar 2026 18:09:12 +0000 Subject: [PATCH 4/5] Address third round of PR review: abstract is_recording, input validation, code cleanup - Make is_recording abstract on AudioRecorder base class - Filter SILENCE_WARNING lines from Swift stderr, print clean Python-side message - Check stream.active in SoundDeviceRecorder.is_recording - Validate --silence-timeout as non-negative (click.IntRange in Python, explicit check in Swift) - Fix time formatting to show "Xm Ys" instead of truncating to whole minutes - Extract computePeakLevel() helper and named threshold constants in Swift - Add comments explaining silence timer logic --- src/ownscribe/audio/base.py | 2 +- src/ownscribe/audio/coreaudio.py | 5 +- src/ownscribe/audio/sounddevice_recorder.py | 6 +- src/ownscribe/cli.py | 2 +- src/ownscribe/pipeline.py | 17 ++++-- swift/Sources/AudioCapture.swift | 62 ++++++++++++--------- tests/test_mute.py | 8 +++ 7 files changed, 68 insertions(+), 34 deletions(-) diff --git a/src/ownscribe/audio/base.py b/src/ownscribe/audio/base.py index ec3be99..f738ba0 100644 --- a/src/ownscribe/audio/base.py +++ b/src/ownscribe/audio/base.py @@ -30,6 +30,6 @@ def is_muted(self) -> bool: return False @property + @abc.abstractmethod def is_recording(self) -> bool: """Whether the recorder is still actively recording.""" - return True diff --git a/src/ownscribe/audio/coreaudio.py b/src/ownscribe/audio/coreaudio.py index 801bd2c..9147793 100644 --- a/src/ownscribe/audio/coreaudio.py +++ b/src/ownscribe/audio/coreaudio.py @@ -153,7 +153,10 @@ def stop(self) -> None: if "[SILENCE_TIMEOUT]" in stderr_output: self._silence_timed_out = True # Filter out mute toggles and known informational lines - _NOISE_PREFIXES = ("Recording ", "Saved ", "Merged audio saved") + _NOISE_PREFIXES = ( + "Recording ", "Saved ", "Merged audio saved", + "[SILENCE_WARNING]", "Check: ", + ) _NOISE_LINES = ("[MIC_MUTED]", "[MIC_UNMUTED]", "[SILENCE_TIMEOUT]") lines = [ line for line in stderr_output.strip().splitlines() diff --git a/src/ownscribe/audio/sounddevice_recorder.py b/src/ownscribe/audio/sounddevice_recorder.py index 0afc9b3..bfc3b47 100644 --- a/src/ownscribe/audio/sounddevice_recorder.py +++ b/src/ownscribe/audio/sounddevice_recorder.py @@ -79,7 +79,11 @@ def callback(indata, frames, time, status): @property def is_recording(self) -> bool: - return not self._timed_out and self._stream is not None + return ( + not self._timed_out + and self._stream is not None + and getattr(self._stream, "active", False) + ) @property def silence_timed_out(self) -> bool: diff --git a/src/ownscribe/cli.py b/src/ownscribe/cli.py index 698f596..10b6cd6 100644 --- a/src/ownscribe/cli.py +++ b/src/ownscribe/cli.py @@ -46,7 +46,7 @@ def _dir_size(path: str) -> str: ) @click.option("--template", default=None, help="Summarization template (meeting, lecture, brief, or custom).") @click.option( - "--silence-timeout", default=None, type=int, + "--silence-timeout", default=None, type=click.IntRange(min=0), help="Seconds of silence before auto-stopping recording (0 to disable).", ) @click.pass_context diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index d9f4d5e..1eb5cf9 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -168,9 +168,11 @@ def run_pipeline(config: Config) -> None: hints.append("Press 'm' to mute/unmute mic.") silence_timeout = config.audio.silence_timeout if silence_timeout > 0: - mins_timeout = silence_timeout // 60 - if mins_timeout > 0: - hints.append(f"Auto-stops after {mins_timeout}m of silence.") + mins, secs = divmod(int(silence_timeout), 60) + if mins > 0 and secs > 0: + hints.append(f"Auto-stops after {mins}m {secs}s of silence.") + elif mins > 0: + hints.append(f"Auto-stops after {mins}m of silence.") else: hints.append(f"Auto-stops after {silence_timeout}s of silence.") hints.append("Press Ctrl+C to stop.") @@ -245,8 +247,13 @@ def on_interrupt(sig, frame): click.echo(f"Audio saved to {audio_path}\n") # Check for silent audio before spending time on transcription - # Skip if the recorder already reported a silence warning (CoreAudio helper) - if not getattr(recorder, "silence_warning", False): + if getattr(recorder, "silence_warning", False): + click.echo( + "Warning: audio may be silent — check Screen Recording permissions " + "(System Settings > Privacy & Security > Screen Recording).", + err=True, + ) + else: _check_audio_silence(audio_path) # 2. Transcribe diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index cf3a55c..13e5ff0 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -7,6 +7,26 @@ import AppKit import CoreAudio import AudioToolbox +// MARK: - Constants + +/// Minimum peak amplitude to consider microphone audio "loud" (silence timeout). +private let kMicLoudThreshold: Float = 1e-2 +/// Minimum peak amplitude to consider system audio "loud" (silence timeout). +private let kSystemLoudThreshold: Float = 1e-4 + +/// Compute peak amplitude across all channels of float audio data. +func computePeakLevel(in channelData: UnsafePointer>, + channels: Int, frames: Int) -> Float { + var peak: Float = 0 + for ch in 0.. peak { peak = v } + } + } + return peak +} + // MARK: - Mic Capture via AVAudioEngine class MicCapture { @@ -85,16 +105,10 @@ class MicCapture { } // Track peak level for silence timeout (muted mic = silence) if !muted, let channelData = buffer.floatChannelData { - let channels = Int(buffer.format.channelCount) - let frames = Int(buffer.frameLength) - var peak: Float = 0 - for ch in 0.. peak { peak = v } - } - } - if peak > 1e-2 { + let peak = computePeakLevel(in: channelData, + channels: Int(buffer.format.channelCount), + frames: Int(buffer.frameLength)) + if peak > kMicLoudThreshold { os_unfair_lock_lock(&self._lastLoudTimeLock) self._lastLoudTime = DispatchTime.now().uptimeNanoseconds os_unfair_lock_unlock(&self._lastLoudTimeLock) @@ -286,9 +300,11 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS fputs("Recording system audio to \(outputPath)... Press Ctrl+C to stop.\n", stderr) - // Start silence timeout timer if configured + // Start silence timeout timer if configured. + // Checks every 1s whether both system audio and mic (if active) have been + // quiet longer than silenceTimeout. Uses the most recent "loud" timestamp + // from either source so that activity on either channel prevents auto-stop. if silenceTimeout > 0 { - let timer = DispatchSource.makeTimerSource(queue: .main) timer.schedule(deadline: .now() + 1, repeating: 1.0) timer.setEventHandler { [weak self] in @@ -372,21 +388,13 @@ class SystemAudioCapture: NSObject, SCStreamOutput, SCStreamDelegate, SCContentS totalFrames += Int64(frameCount) // Peak detection on float channel data - var bufferPeak: Float = 0.0 - if let channelData = pcmBuffer.floatChannelData { - let channelCount = Int(sampleFormat.channelCount) - for ch in 0.. bufferPeak { bufferPeak = absVal } - } - } - } - if bufferPeak > peakLevel { peakLevel = bufferPeak } + let bufferPeak: Float = pcmBuffer.floatChannelData.map { + computePeakLevel(in: $0, channels: Int(sampleFormat.channelCount), frames: Int(frameCount)) + } ?? 0.0 + if bufferPeak > self.peakLevel { self.peakLevel = bufferPeak } // Update last loud time for silence timeout - if bufferPeak > 1e-4 { + if bufferPeak > kSystemLoudThreshold { os_unfair_lock_lock(&lastLoudTimeLock) lastLoudTime = DispatchTime.now().uptimeNanoseconds os_unfair_lock_unlock(&lastLoudTimeLock) @@ -770,6 +778,10 @@ func main() { fputs("Error: --silence-timeout requires a number of seconds\n", stderr) exit(1) } + if val < 0 { + fputs("Error: --silence-timeout must be zero (disabled) or a positive number of seconds\n", stderr) + exit(1) + } silenceTimeout = val default: fputs("Unknown option: \(args[i])\n", stderr) diff --git a/tests/test_mute.py b/tests/test_mute.py index 9cc5bac..f42cb85 100644 --- a/tests/test_mute.py +++ b/tests/test_mute.py @@ -23,6 +23,10 @@ def stop(self): def is_available(self): return True + @property + def is_recording(self): + return True + recorder = DummyRecorder() recorder.toggle_mute() # should not raise @@ -37,6 +41,10 @@ def stop(self): def is_available(self): return True + @property + def is_recording(self): + return True + recorder = DummyRecorder() assert recorder.is_muted is False From 943fa8d5ca448647bc92b9670124e6c9c1b19c1a Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Sun, 8 Mar 2026 18:15:23 +0000 Subject: [PATCH 5/5] Revert SILENCE_WARNING filtering so warning shows during recording The Swift helper writes SILENCE_WARNING to stderr during capture, and piped stderr passes it through in real time. Filtering it in stop() and replacing with a Python message would delay it until after recording finishes, defeating its purpose. --- src/ownscribe/audio/coreaudio.py | 5 +---- src/ownscribe/pipeline.py | 9 ++------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/ownscribe/audio/coreaudio.py b/src/ownscribe/audio/coreaudio.py index 9147793..801bd2c 100644 --- a/src/ownscribe/audio/coreaudio.py +++ b/src/ownscribe/audio/coreaudio.py @@ -153,10 +153,7 @@ def stop(self) -> None: if "[SILENCE_TIMEOUT]" in stderr_output: self._silence_timed_out = True # Filter out mute toggles and known informational lines - _NOISE_PREFIXES = ( - "Recording ", "Saved ", "Merged audio saved", - "[SILENCE_WARNING]", "Check: ", - ) + _NOISE_PREFIXES = ("Recording ", "Saved ", "Merged audio saved") _NOISE_LINES = ("[MIC_MUTED]", "[MIC_UNMUTED]", "[SILENCE_TIMEOUT]") lines = [ line for line in stderr_output.strip().splitlines() diff --git a/src/ownscribe/pipeline.py b/src/ownscribe/pipeline.py index 1eb5cf9..c6ca807 100644 --- a/src/ownscribe/pipeline.py +++ b/src/ownscribe/pipeline.py @@ -247,13 +247,8 @@ def on_interrupt(sig, frame): click.echo(f"Audio saved to {audio_path}\n") # Check for silent audio before spending time on transcription - if getattr(recorder, "silence_warning", False): - click.echo( - "Warning: audio may be silent — check Screen Recording permissions " - "(System Settings > Privacy & Security > Screen Recording).", - err=True, - ) - else: + # Skip if the recorder already reported a silence warning (CoreAudio helper) + if not getattr(recorder, "silence_warning", False): _check_audio_silence(audio_path) # 2. Transcribe