Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ Record, transcribe, and summarize meetings and system audio entirely on your mac

> System audio capture requires **macOS 14.2 or later**. Other platforms can use the sounddevice backend with an external audio source.

## Table of Contents

- [Privacy](#privacy)
- [Features](#features)
- [Requirements](#requirements)
- [Installation](#installation)
- [Usage](#usage)
- [Configuration](#configuration)
- [Summarization Templates](#summarization-templates)
- [Speaker Diarization](#speaker-diarization)
- [Acknowledgments](#acknowledgments)
- [Contributing](#contributing)
- [License](#license)

## Privacy

ownscribe **does not**:
Expand All @@ -36,6 +50,7 @@ All audio, transcripts, and summaries remain local.
- **Summarization templates** — built-in presets for meetings, lectures, and quick briefs; define your own in config
- **Ask your meetings** — ask natural-language questions across all your meeting notes; uses a two-stage LLM pipeline with keyword fallback
<br><img src="docs/demo-ask.gif" alt="ownscribe ask demo" width="700">
- **Silence auto-stop** — automatically stops recording after sustained silence (default: 5 minutes, configurable)
- **One command** — just run `ownscribe`, press Ctrl+C when done, get transcript + summary

## Requirements
Expand Down Expand Up @@ -90,7 +105,7 @@ ownscribe # records system audio, Ctrl+C to stop
```

This will:
1. Capture system audio until you press Ctrl+C
1. Capture system audio until you press Ctrl+C (or auto-stop after 5 minutes of silence)
2. Transcribe with WhisperX
3. Summarize with your local LLM
4. Save everything to `~/ownscribe/YYYY-MM-DD_HHMMSS/`
Expand All @@ -105,11 +120,13 @@ ownscribe --mic-device "MacBook Pro Microphone" # capture system audio + specifi
ownscribe --device "MacBook Pro Microphone" # use mic instead of system audio
ownscribe --no-summarize # skip LLM summarization
ownscribe --diarize # enable speaker identification
ownscribe --language en # set transcription language (default: auto-detect)
ownscribe --language en # set transcription language (default: auto-detect)
ownscribe --model large-v3 # use a larger Whisper model
ownscribe --format json # output as JSON instead of markdown
ownscribe --no-keep-recording # auto-delete WAV files after transcription
ownscribe --template lecture # use the lecture summarization template
ownscribe --silence-timeout 600 # auto-stop after 10 minutes of silence
ownscribe --silence-timeout 0 # disable silence auto-stop
```

### Subcommands
Expand Down Expand Up @@ -160,6 +177,7 @@ backend = "coreaudio" # "coreaudio" or "sounddevice"
device = "" # empty = system audio
mic = false # also capture microphone input
mic_device = "" # specific mic device name (empty = default)
silence_timeout = 300 # seconds of silence before auto-stop; 0 = disabled

[transcription]
model = "base" # tiny, base, small, medium, large-v3
Expand Down
5 changes: 5 additions & 0 deletions src/ownscribe/audio/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ def toggle_mute(self) -> None: # noqa: B027
def is_muted(self) -> bool:
"""Whether the microphone is currently muted."""
return False

@property
@abc.abstractmethod
def is_recording(self) -> bool:
"""Whether the recorder is still actively recording."""
19 changes: 17 additions & 2 deletions src/ownscribe/audio/coreaudio.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,14 @@ def _find_binary() -> Path | None:
class CoreAudioRecorder(AudioRecorder):
"""Records system audio using the ownscribe-audio Swift helper."""

def __init__(self, mic: bool = False, mic_device: str = "") -> None:
def __init__(self, mic: bool = False, mic_device: str = "", silence_timeout: int = 0) -> None:
self._mic = mic
self._mic_device = mic_device
self._silence_timeout = silence_timeout
self._process: subprocess.Popen | None = None
self._binary = _find_binary()
self._silence_warning: bool = False
self._silence_timed_out: bool = False
self._muted: bool = False

def is_available(self) -> bool:
Expand All @@ -98,6 +100,8 @@ def start(self, output_path: Path) -> None:
cmd.append("--mic")
if self._mic_device:
cmd.extend(["--mic-device", self._mic_device])
if self._silence_timeout > 0:
cmd.extend(["--silence-timeout", str(self._silence_timeout)])

self._process = subprocess.Popen(
cmd,
Expand All @@ -121,6 +125,14 @@ def toggle_mute(self) -> None:
def is_muted(self) -> bool:
return self._muted

@property
def is_recording(self) -> bool:
return self._process is not None and self._process.poll() is None

@property
def silence_timed_out(self) -> bool:
return self._silence_timed_out

def stop(self) -> None:
if self._process and self._process.poll() is None:
self._process.send_signal(signal.SIGINT)
Expand All @@ -138,11 +150,14 @@ def stop(self) -> None:
if stderr_output:
if "[SILENCE_WARNING]" in stderr_output:
self._silence_warning = True
if "[SILENCE_TIMEOUT]" in stderr_output:
self._silence_timed_out = True
# Filter out mute toggles and known informational lines
_NOISE_PREFIXES = ("Recording ", "Saved ", "Merged audio saved")
_NOISE_LINES = ("[MIC_MUTED]", "[MIC_UNMUTED]", "[SILENCE_TIMEOUT]")
lines = [
line for line in stderr_output.strip().splitlines()
if line not in ("[MIC_MUTED]", "[MIC_UNMUTED]")
if line not in _NOISE_LINES
and not line.startswith(_NOISE_PREFIXES)
]
if lines:
Expand Down
39 changes: 38 additions & 1 deletion src/ownscribe/audio/sounddevice_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,37 @@
from __future__ import annotations

import threading
import time as _time
from pathlib import Path

import numpy as np
import sounddevice as sd
import soundfile as sf

from ownscribe.audio.base import AudioRecorder

_SILENCE_THRESHOLD = 1e-4 # ~-80 dB


class SoundDeviceRecorder(AudioRecorder):
"""Records from any audio input device using sounddevice + soundfile."""

def __init__(self, device: str | int | None = None, samplerate: int = 48000, channels: int = 1) -> None:
def __init__(
self,
device: str | int | None = None,
samplerate: int = 48000,
channels: int = 1,
silence_timeout: int = 0,
) -> None:
self._device = device
self._samplerate = samplerate
self._channels = channels
self._silence_timeout = silence_timeout
self._stream: sd.InputStream | None = None
self._file: sf.SoundFile | None = None
self._lock = threading.Lock()
self._last_loud_time: float = 0.0
self._timed_out: bool = False

def is_available(self) -> bool:
try:
Expand All @@ -30,6 +43,9 @@ def is_available(self) -> bool:
return False

def start(self, output_path: Path) -> None:
self._last_loud_time = _time.monotonic()
self._timed_out = False

self._file = sf.SoundFile(
str(output_path),
mode="w",
Expand All @@ -44,6 +60,15 @@ def callback(indata, frames, time, status):
if self._file is not None:
self._file.write(indata.copy())

# Silence tracking
if self._silence_timeout > 0:
peak = np.max(np.abs(indata))
if peak > _SILENCE_THRESHOLD:
self._last_loud_time = _time.monotonic()
elif _time.monotonic() - self._last_loud_time > self._silence_timeout:
self._timed_out = True
raise sd.CallbackStop

self._stream = sd.InputStream(
device=self._device,
samplerate=self._samplerate,
Expand All @@ -52,6 +77,18 @@ def callback(indata, frames, time, status):
)
self._stream.start()

@property
def is_recording(self) -> bool:
return (
not self._timed_out
and self._stream is not None
and getattr(self._stream, "active", False)
)

@property
def silence_timed_out(self) -> bool:
return self._timed_out

def stop(self) -> None:
if self._stream is not None:
self._stream.stop()
Expand Down
7 changes: 7 additions & 0 deletions src/ownscribe/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ def _dir_size(path: str) -> str:
help="Keep or delete WAV recordings after transcription.",
)
@click.option("--template", default=None, help="Summarization template (meeting, lecture, brief, or custom).")
@click.option(
"--silence-timeout", default=None, type=click.IntRange(min=0),
help="Seconds of silence before auto-stopping recording (0 to disable).",
)
@click.pass_context
def cli(
ctx: click.Context,
Expand All @@ -58,6 +62,7 @@ def cli(
mic_device: str | None,
keep_recording: bool | None,
template: str | None,
silence_timeout: int | None,
) -> None:
"""Fully local meeting transcription and summarization.

Expand Down Expand Up @@ -89,6 +94,8 @@ def cli(
config.output.keep_recording = keep_recording
if template:
config.summarization.template = template
if silence_timeout is not None:
config.audio.silence_timeout = silence_timeout

ctx.obj["config"] = config

Expand Down
2 changes: 2 additions & 0 deletions src/ownscribe/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
device = "" # empty = system audio; or device name/index for sounddevice
mic = false # also capture microphone input
mic_device = "" # specific mic device name (empty = default)
silence_timeout = 300 # seconds of silence before auto-stop; 0 = disabled

[transcription]
model = "base" # whisper model: tiny, base, small, medium, large-v3
Expand Down Expand Up @@ -55,6 +56,7 @@ class AudioConfig:
device: str = ""
mic: bool = False
mic_device: str = ""
silence_timeout: int = 300 # seconds of silence before auto-stop; 0 = disabled


@dataclass
Expand Down
31 changes: 24 additions & 7 deletions src/ownscribe/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def _create_recorder(config: Config):
if config.audio.backend == "coreaudio" and not config.audio.device:
from ownscribe.audio.coreaudio import CoreAudioRecorder

recorder = CoreAudioRecorder(mic=config.audio.mic, mic_device=config.audio.mic_device)
recorder = CoreAudioRecorder(
mic=config.audio.mic,
mic_device=config.audio.mic_device,
silence_timeout=config.audio.silence_timeout,
)
if recorder.is_available():
return recorder
click.echo("Core Audio helper not found, falling back to sounddevice.")
Expand All @@ -82,7 +86,7 @@ def _create_recorder(config: Config):
# Try to parse as int (device index)
if isinstance(device, str) and device.isdigit():
device = int(device)
return SoundDeviceRecorder(device=device)
return SoundDeviceRecorder(device=device, silence_timeout=config.audio.silence_timeout)


def _create_transcriber(config: Config, progress=None):
Expand Down Expand Up @@ -159,10 +163,20 @@ def run_pipeline(config: Config) -> None:
can_mute = isinstance(recorder, CoreAudioRecorder) and config.audio.mic
is_tty = sys.stdin.isatty()

hint = " Press Ctrl+C to stop."
hints = []
if can_mute and is_tty:
hint = " Press 'm' to mute/unmute mic, Ctrl+C to stop."
click.echo(f"Starting recording...{hint}\n")
hints.append("Press 'm' to mute/unmute mic.")
silence_timeout = config.audio.silence_timeout
if silence_timeout > 0:
mins, secs = divmod(int(silence_timeout), 60)
if mins > 0 and secs > 0:
hints.append(f"Auto-stops after {mins}m {secs}s of silence.")
elif mins > 0:
hints.append(f"Auto-stops after {mins}m of silence.")
else:
hints.append(f"Auto-stops after {silence_timeout}s of silence.")
hints.append("Press Ctrl+C to stop.")
click.echo(f"Starting recording... {' '.join(hints)}\n")
recorder.start(audio_path)

start_time = time.time()
Expand All @@ -182,7 +196,7 @@ def on_interrupt(sig, frame):

warned_no_data = False
try:
while not stop_event:
while not stop_event and recorder.is_recording:
elapsed = time.time() - start_time
mins, secs = divmod(int(elapsed), 60)
mute_indicator = " [MIC MUTED]" if recorder.is_muted else ""
Expand Down Expand Up @@ -216,8 +230,11 @@ def on_interrupt(sig, frame):
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_termios)
signal.signal(signal.SIGINT, original_handler)

click.echo("\n\nStopping recording...")
recorder.stop()
if getattr(recorder, "silence_timed_out", False):
click.echo("\n\nRecording auto-stopped after silence timeout.")
else:
click.echo("\n\nStopping recording...")

if not audio_path.exists() or audio_path.stat().st_size <= _WAV_HEADER_SIZE:
click.echo(
Expand Down
Loading