From af1416c962c9695a431ef2b0f05b1080baf670cf Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 09:48:00 -0800
Subject: [PATCH 1/9] Add typing-tool fallback chain to fix ydotool virtual
 input device leak (#7)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each ydotool invocation without ydotoold creates a kernel virtual input
device that persists until reboot. Add auto-detection fallback chain
(wtype → ydotool+daemon → xdotool → ydotool with warning) configurable
via TALKTYPE_TYPE_CMD, matching the existing ffmpeg/pw-record pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/ci.yml |  2 +-
 CLAUDE.md                | 71 ++++++++++++++++++++++++++++++++++++++++
 Makefile                 |  2 +-
 README.md                | 16 ++++++---
 talktype                 | 47 +++++++++++++++++++++++---
 test/mocks/wtype         |  3 ++
 test/mocks/xdotool       |  3 ++
 test/talktype.bats       | 56 +++++++++++++++++++++++--------
 8 files changed, 176 insertions(+), 24 deletions(-)
 create mode 100644 CLAUDE.md
 create mode 100755 test/mocks/wtype
 create mode 100755 test/mocks/xdotool

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b44c8c8..a6f1614 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,7 @@ jobs:
         run: sudo apt-get update && sudo apt-get install -y bats
 
       - name: Install test dependencies
-        run: sudo apt-get install -y ydotool pipewire libnotify-bin socat
+        run: sudo apt-get install -y wtype ydotool pipewire libnotify-bin socat
 
       - name: Run tests
         run: bats test/talktype.bats test/server.bats
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..091ac61
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,71 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## What is talktype
+
+Push-to-talk speech-to-text for Linux. Press a hotkey to record, press again to transcribe and type at cursor. No GUI — just a keyboard shortcut bound to the `talktype` script. Works on Wayland (GNOME, Sway, Hyprland) and X11.
+
+## Build and install
+
+```bash
+make install      # Full setup: system deps + Python venv + symlink to ~/.local/bin/talktype
+make deps         # System packages only (requires sudo): ydotool, ffmpeg, pipewire, etc.
+make venv         # Python venv with faster-whisper only
+make parakeet     # Install Parakeet backend venv (in backends/.parakeet-venv/)
+make moonshine    # Install Moonshine backend venv (in backends/.moonshine-venv/)
+make model        # Pre-download Whisper model
+make clean        # Remove .venv
+make uninstall    # Remove ~/.local/bin/talktype symlink
+```
+
+## Testing
+
+Tests use [BATS](https://github.com/bats-core/bats-core) (Bash Automated Testing System):
+
+```bash
+make test                    # Run all tests
+bats test/talktype.bats      # Core tests (recording lifecycle, transcription, error handling)
+bats test/server.bats        # Server mode tests (daemon lifecycle, socket communication)
+bats test/backends.bats      # Integration tests against real backends + NASA audio fixture
+```
+
+Tests use mocks in `test/mocks/` to avoid requiring actual GPU, models, or system tools. The mock daemon (`test/mock-daemon.py`) simulates server backends.
+
+## Linting
+
+CI runs ShellCheck on all Bash scripts and Python syntax checks on all Python files:
+
+```bash
+shellcheck talktype transcribe-server backends/*-server
+python3 -m py_compile transcribe whisper-daemon.py backends/*-daemon.py
+```
+
+## Architecture
+
+**Core flow:** hotkey → `talktype` (Bash) → record audio (ffmpeg/pw-record) → call `$TALKTYPE_CMD` with WAV path → type result via ydotool.
+
+**Main script** (`talktype`, ~116 lines Bash): manages recording state via PID file (`$TALKTYPE_DIR/rec.pid`), sends desktop notifications, delegates transcription to `$TALKTYPE_CMD`.
+
+**Backend pattern — two modes per backend:**
+- **Direct invocation** (`transcribe`, `backends/parakeet`, `backends/moonshine`): Python scripts that load model, transcribe, exit. Simple but slow (model reload each time).
+- **Server mode** (`transcribe-server`, `backends/*-server` + `*-daemon.py`): Bash wrapper manages a Python Unix socket daemon that keeps the model in memory. Subcommands: `start`, `stop`, `transcribe`. Auto-starts daemon if not running.
+
+**Adding a custom backend:** Any executable that takes a WAV file path as its last argument and prints text to stdout. Set `TALKTYPE_CMD` in config.
+
+## Configuration
+
+Config file: `~/.config/talktype/config` (sourced as shell script by `talktype`). Key variables:
+
+- `TALKTYPE_CMD` — transcription command (default: direct faster-whisper via `transcribe`)
+- `TALKTYPE_VENV` — Python venv path (default: `.venv` in script dir)
+- `TALKTYPE_DIR` — runtime dir for PID/audio files (default: `$XDG_RUNTIME_DIR/talktype`)
+- `TALKTYPE_TYPE_CMD` — typing tool (`auto`, `wtype`, `ydotool`, `xdotool`, or custom command; default: `auto`)
+- `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, `WHISPER_COMPUTE` — Whisper settings
+
+## Key conventions
+
+- Core is intentionally pure Bash. Python is only used for ML model invocation.
+- Follows Unix philosophy: small scripts, stdin/stdout interfaces, pluggable components.
+- Server daemons communicate via Unix sockets using `socat`.
+- State files (PID, audio, notification ID) live in `$TALKTYPE_DIR` (XDG runtime dir).
diff --git a/Makefile b/Makefile
index e01b5e8..c24c3f6 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ install: deps venv
 
 # Install system dependencies (requires sudo)
 deps:
-	sudo apt install -y ydotool ffmpeg pipewire libnotify-bin python3-venv socat
+	sudo apt install -y wtype ydotool ffmpeg pipewire libnotify-bin python3-venv socat
 
 # Create Python venv with faster-whisper (default backend)
 venv: .venv/.done
diff --git a/README.md b/README.md
index eae8364..b6733dc 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,10 @@ Or bring your own — anything that reads a WAV and prints text works.
 
 - Linux (Wayland or X11)
 - Audio recorder: [ffmpeg](https://ffmpeg.org/) (preferred) or PipeWire (`pw-record`)
-- [ydotool](https://github.com/ReimuNotMoe/ydotool) for typing text
-  (user must be in the `input` group — see Install)
+- Typing tool (one of):
+  - [wtype](https://github.com/atx/wtype) — recommended for Wayland, no daemon needed
+  - [ydotool](https://github.com/ReimuNotMoe/ydotool) + `ydotoold` — Wayland & X11
+  - [xdotool](https://github.com/jordansissel/xdotool) — X11 only
 - [socat](https://linux.die.net/man/1/socat) (for server-backed transcription)
 
 For the default backend (faster-whisper):
@@ -36,12 +38,14 @@ make install
 ```
 
 This will:
-1. Install system packages (`ydotool`, etc.)
+1. Install system packages (`wtype`, `ydotool`, etc.)
 2. Create a Python venv with `faster-whisper`
 3. Symlink `talktype` into `~/.local/bin/`
 
 ### ydotool permissions
 
+> **Note:** Only needed if you use ydotool. If you use wtype (Wayland) or xdotool (X11), skip this.
+
 `ydotool` needs access to `/dev/uinput`. Add yourself to the `input` group:
 
 ```bash
@@ -74,6 +78,10 @@ EOF
 Any `TALKTYPE_*` variable can go in this file. Environment variables still work
 and are applied after the config file, so they override it.
 
+Set `TALKTYPE_TYPE_CMD` to control which typing tool is used (`auto`, `wtype`,
+`ydotool`, `xdotool`, or any custom command). Default is `auto`, which picks
+the best available tool: wtype (Wayland) → ydotool+daemon → xdotool (X11) → ydotool (bare, with warning).
+
 ## Setup
 
 Bind `talktype` to a keyboard shortcut:
@@ -182,7 +190,7 @@ contract — use whatever model, language, or runtime you want.
                                             ↓
                                      $TALKTYPE_CMD audio.wav
                                             ↓
-                                     ydotool type → text appears at cursor
+                                     type_text → text appears at cursor
 ```
 
 The `talktype` script is ~80 lines of bash. Transcription backends are
diff --git a/talktype b/talktype
index 7e84c9d..47011a4 100755
--- a/talktype
+++ b/talktype
@@ -9,7 +9,7 @@
 # Transcription is pluggable: set TALKTYPE_CMD to any command that
 # takes a WAV file path as its last argument and prints text to stdout.
 #
-# Requires: ydotool, pw-record (PipeWire)
+# Requires: wtype/ydotool/xdotool, ffmpeg/pw-record
 #
 set -euo pipefail
 
@@ -58,10 +58,49 @@ notify_close() {
     fi
 }
 
+# ── Typing tool selection ──
+warn_ydotool_no_daemon() {
+    local warnfile="$TALKTYPE_DIR/.ydotool-warned"
+    [ -f "$warnfile" ] && return
+    touch "$warnfile"
+    echo "Warning: ydotool without ydotoold leaks virtual input devices (see issue #7). Install wtype (Wayland) or run ydotoold." >&2
+    notify-send -t 5000 -i dialog-warning "TalkType" "ydotool without daemon — may leak input devices" 2>/dev/null || true
+}
+
+type_text() {
+    local text="$1"
+    local cmd="${TALKTYPE_TYPE_CMD:-auto}"
+
+    if [ "$cmd" = "auto" ]; then
+        if [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wtype &>/dev/null; then
+            cmd=wtype
+        elif command -v ydotool &>/dev/null && pgrep -x ydotoold &>/dev/null; then
+            cmd=ydotool
+        elif [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
+            cmd=xdotool
+        elif command -v ydotool &>/dev/null; then
+            warn_ydotool_no_daemon
+            cmd=ydotool
+        fi
+    fi
+
+    case "$cmd" in
+        wtype)    wtype -- "$text" ;;
+        ydotool)  ydotool type --key-delay 20 -- "$text" ;;
+        xdotool)  xdotool type -- "$text" ;;
+        *)        $cmd "$text" ;;
+    esac
+}
+
 # ── Check core dependencies ──
 check_deps() {
     local missing=()
-    command -v ydotool    &>/dev/null || missing+=(ydotool)
+    local type_cmd="${TALKTYPE_TYPE_CMD:-auto}"
+    if [ "$type_cmd" = "auto" ]; then
+        command -v wtype &>/dev/null || command -v ydotool &>/dev/null || command -v xdotool &>/dev/null || missing+=("wtype, ydotool, or xdotool")
+    else
+        command -v "$type_cmd" &>/dev/null || missing+=("$type_cmd")
+    fi
     command -v ffmpeg &>/dev/null || command -v pw-record &>/dev/null || missing+=("ffmpeg or pipewire")
     command -v notify-send &>/dev/null || missing+=(libnotify-bin)
 
@@ -96,8 +135,8 @@ if [ -f "$PIDFILE" ]; then
 
     notify_close
 
-    # Type text at cursor via ydotool
-    ydotool type --key-delay 20 -- "$TEXT"
+    # Type text at cursor
+    type_text "$TEXT"
 
 # ── Otherwise → start recording ──
 else
diff --git a/test/mocks/wtype b/test/mocks/wtype
new file mode 100755
index 0000000..f9b6873
--- /dev/null
+++ b/test/mocks/wtype
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Mock wtype: log the command and args
+echo "$@" >> "$TALKTYPE_DIR/wtype.log"
diff --git a/test/mocks/xdotool b/test/mocks/xdotool
new file mode 100755
index 0000000..bdcf4cb
--- /dev/null
+++ b/test/mocks/xdotool
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Mock xdotool: log the command and args
+echo "$@" >> "$TALKTYPE_DIR/xdotool.log"
diff --git a/test/talktype.bats b/test/talktype.bats
index 3dfa480..ef983f6 100644
--- a/test/talktype.bats
+++ b/test/talktype.bats
@@ -8,6 +8,7 @@ setup() {
     export TALKTYPE_CONFIG="/dev/null"
     export TALKTYPE_DIR="$BATS_TEST_TMPDIR/talktype"
     export TALKTYPE_CMD="$BATS_TEST_DIRNAME/mock-transcribe"
+    export WAYLAND_DISPLAY=wayland-0
 
     # Put mocks on PATH before real commands
     export PATH="$BATS_TEST_DIRNAME/mocks:$PATH"
@@ -65,6 +66,7 @@ start_fake_recording() {
 
 @test "transcribed text is typed via ydotool" {
     start_fake_recording
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -76,6 +78,7 @@ start_fake_recording() {
 @test "custom TALKTYPE_CMD is used for transcription" {
     start_fake_recording
     export TALKTYPE_CMD="$BATS_TEST_DIRNAME/mock-transcribe-custom"
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -92,8 +95,9 @@ start_fake_recording() {
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
 
-    # ydotool should NOT have been called
+    # No typing tool should have been called
     [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
 }
 
 # ── Error handling ──
@@ -102,6 +106,7 @@ start_fake_recording() {
     # Simulate a crashed recording: PID file points to a dead process
     echo "99999" > "$TALKTYPE_DIR/rec.pid"
     echo "audio data" > "$TALKTYPE_DIR/rec.wav"
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -120,8 +125,9 @@ start_fake_recording() {
     # Script should fail (set -e catches the non-zero exit)
     [ "$status" -ne 0 ]
 
-    # ydotool should NOT have been called
+    # No typing tool should have been called
     [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
 }
 
 # ── Recorder selection ──
@@ -161,25 +167,47 @@ start_fake_recording() {
     [[ "$(cat "$TALKTYPE_DIR/recorder.log")" == "pw-record" ]]
 }
 
+# ── Typing tool selection ──
+
+@test "wtype is preferred on Wayland when available" {
+    start_fake_recording
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    [ -f "$TALKTYPE_DIR/wtype.log" ]
+    [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [[ "$(cat "$TALKTYPE_DIR/wtype.log")" == *"hello world"* ]]
+}
+
+@test "TALKTYPE_TYPE_CMD overrides auto-detection" {
+    start_fake_recording
+    export TALKTYPE_TYPE_CMD=xdotool
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    [ -f "$TALKTYPE_DIR/xdotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
+    [[ "$(cat "$TALKTYPE_DIR/xdotool.log")" == *"hello world"* ]]
+}
+
 # ── Dependency checking ──
 
-@test "fails when a required tool is missing" {
-    # Create a minimal PATH with only the tools we want (no ydotool)
+@test "fails when no typing tool is available" {
+    # Create a minimal PATH with only recorder + notify (no typing tools)
     local sparse="$BATS_TEST_TMPDIR/sparse_path"
     mkdir -p "$sparse"
-    ln -sf "$(command -v pw-record)" "$sparse/pw-record"
-    ln -sf "$(command -v notify-send)" "$sparse/notify-send"
-    ln -sf "$(command -v bash)" "$sparse/bash"
-    ln -sf "$(command -v mkdir)" "$sparse/mkdir"
-    ln -sf "$(command -v cat)" "$sparse/cat"
-    ln -sf "$(command -v kill)" "$sparse/kill"
-    ln -sf "$(command -v sleep)" "$sparse/sleep"
-    ln -sf "$(command -v echo)" "$sparse/echo"
-    ln -sf "$(command -v rm)" "$sparse/rm"
+    ln -sf "$BATS_TEST_DIRNAME/mocks/pw-record" "$sparse/pw-record"
+    ln -sf "$BATS_TEST_DIRNAME/mocks/notify-send" "$sparse/notify-send"
+    for cmd in bash mkdir cat kill sleep echo rm pgrep; do
+        local path
+        path=$(command -v "$cmd" 2>/dev/null) && ln -sf "$path" "$sparse/$cmd"
+    done
 
     PATH="$sparse"
 
     run "$TALKTYPE"
     [ "$status" -eq 1 ]
-    [[ "$output" == *"Missing"*"ydotool"* ]]
+    [[ "$output" == *"Missing"* ]]
 }

From 61c8284a85bcdf6b27dac16e975fc459304a2e6c Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:05:49 -0800
Subject: [PATCH 2/9] Address review: add pgrep mock, ydotool+daemon test, warn
 test, fix edge cases

- Add pgrep mock for deterministic tests (default: no ydotoold running)
- Add test for ydotool+daemon detection path
- Add test for bare ydotool warning (once-per-session)
- Error explicitly if auto-detection finds no typing tool
- Update line counts in CLAUDE.md and README.md
- Update architecture description in CLAUDE.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md          |  4 ++--
 README.md          |  2 +-
 talktype           |  3 +++
 test/mocks/pgrep   |  4 ++++
 test/talktype.bats | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 54 insertions(+), 4 deletions(-)
 create mode 100755 test/mocks/pgrep

diff --git a/CLAUDE.md b/CLAUDE.md
index 091ac61..6fe0daf 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -43,9 +43,9 @@ python3 -m py_compile transcribe whisper-daemon.py backends/*-daemon.py
 
 ## Architecture
 
-**Core flow:** hotkey → `talktype` (Bash) → record audio (ffmpeg/pw-record) → call `$TALKTYPE_CMD` with WAV path → type result via ydotool.
+**Core flow:** hotkey → `talktype` (Bash) → record audio (ffmpeg/pw-record) → call `$TALKTYPE_CMD` with WAV path → type result via `type_text` (wtype/ydotool/xdotool).
 
-**Main script** (`talktype`, ~116 lines Bash): manages recording state via PID file (`$TALKTYPE_DIR/rec.pid`), sends desktop notifications, delegates transcription to `$TALKTYPE_CMD`.
+**Main script** (`talktype`, ~160 lines Bash): manages recording state via PID file (`$TALKTYPE_DIR/rec.pid`), sends desktop notifications, delegates transcription to `$TALKTYPE_CMD`.
 
 **Backend pattern — two modes per backend:**
 - **Direct invocation** (`transcribe`, `backends/parakeet`, `backends/moonshine`): Python scripts that load model, transcribe, exit. Simple but slow (model reload each time).
diff --git a/README.md b/README.md
index b6733dc..2b8bfd6 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@ contract — use whatever model, language, or runtime you want.
                                      type_text → text appears at cursor
 ```
 
-The `talktype` script is ~80 lines of bash. Transcription backends are
+The `talktype` script is ~160 lines of bash. Transcription backends are
 swappable. Server mode uses Unix sockets to keep models in memory.
 
 ## License
diff --git a/talktype b/talktype
index 47011a4..6201ec1 100755
--- a/talktype
+++ b/talktype
@@ -81,6 +81,9 @@ type_text() {
         elif command -v ydotool &>/dev/null; then
             warn_ydotool_no_daemon
             cmd=ydotool
+        else
+            echo "Error: no typing tool found (install wtype, ydotool, or xdotool)" >&2
+            return 1
         fi
     fi
 
diff --git a/test/mocks/pgrep b/test/mocks/pgrep
new file mode 100755
index 0000000..afbb142
--- /dev/null
+++ b/test/mocks/pgrep
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+# Mock pgrep: always report no matching process (exit 1)
+# Override MOCK_PGREP_EXIT=0 in tests that need ydotoold detection
+exit "${MOCK_PGREP_EXIT:-1}"
diff --git a/test/talktype.bats b/test/talktype.bats
index ef983f6..09fb17f 100644
--- a/test/talktype.bats
+++ b/test/talktype.bats
@@ -192,6 +192,48 @@ start_fake_recording() {
     [[ "$(cat "$TALKTYPE_DIR/xdotool.log")" == *"hello world"* ]]
 }
 
+@test "ydotool is preferred when ydotoold is running" {
+    start_fake_recording
+    unset WAYLAND_DISPLAY
+    export MOCK_PGREP_EXIT=0
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    [ -f "$TALKTYPE_DIR/ydotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
+    [[ "$(cat "$TALKTYPE_DIR/ydotool.log")" == *"hello world"* ]]
+}
+
+@test "bare ydotool warns about missing daemon once per session" {
+    start_fake_recording
+    unset WAYLAND_DISPLAY
+    unset DISPLAY
+
+    # Remove wtype and xdotool from PATH so only bare ydotool remains
+    local sparse="$BATS_TEST_TMPDIR/bare_ydotool"
+    mkdir -p "$sparse"
+    for mock in "$BATS_TEST_DIRNAME"/mocks/*; do
+        name=$(basename "$mock")
+        [ "$name" = "wtype" ] && continue
+        [ "$name" = "xdotool" ] && continue
+        ln -sf "$mock" "$sparse/$name"
+    done
+    for cmd in bash mkdir cat kill sleep echo rm touch; do
+        local path
+        path=$(command -v "$cmd" 2>/dev/null) && ln -sf "$path" "$sparse/$cmd"
+    done
+    PATH="$sparse"
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    # Warning file should exist
+    [ -f "$TALKTYPE_DIR/.ydotool-warned" ]
+    # Warning should be in stderr (captured in output by bats)
+    [[ "$output" == *"ydotool without ydotoold"* ]]
+}
+
 # ── Dependency checking ──
 
 @test "fails when no typing tool is available" {
@@ -200,7 +242,8 @@ start_fake_recording() {
     mkdir -p "$sparse"
     ln -sf "$BATS_TEST_DIRNAME/mocks/pw-record" "$sparse/pw-record"
     ln -sf "$BATS_TEST_DIRNAME/mocks/notify-send" "$sparse/notify-send"
-    for cmd in bash mkdir cat kill sleep echo rm pgrep; do
+    ln -sf "$BATS_TEST_DIRNAME/mocks/pgrep" "$sparse/pgrep"
+    for cmd in bash mkdir cat kill sleep echo rm; do
         local path
         path=$(command -v "$cmd" 2>/dev/null) && ln -sf "$path" "$sparse/$cmd"
     done

From c174f7b4309c25dfc75008054d7323e1ef5e5e54 Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:27:46 -0800
Subject: [PATCH 3/9] Make type_text try-and-fallback instead of
 detect-then-run

wtype can be installed but fail at runtime if the compositor doesn't
support virtual-keyboard-unstable-v1 (e.g. GNOME). Change auto mode
to actually try each tool and fall through on failure, rather than
just checking if the binary exists.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 talktype | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/talktype b/talktype
index 6201ec1..0913861 100755
--- a/talktype
+++ b/talktype
@@ -71,28 +71,34 @@ type_text() {
     local text="$1"
     local cmd="${TALKTYPE_TYPE_CMD:-auto}"
 
-    if [ "$cmd" = "auto" ]; then
-        if [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wtype &>/dev/null; then
-            cmd=wtype
-        elif command -v ydotool &>/dev/null && pgrep -x ydotoold &>/dev/null; then
-            cmd=ydotool
-        elif [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
-            cmd=xdotool
-        elif command -v ydotool &>/dev/null; then
-            warn_ydotool_no_daemon
-            cmd=ydotool
-        else
-            echo "Error: no typing tool found (install wtype, ydotool, or xdotool)" >&2
-            return 1
-        fi
+    # Explicit tool — use it directly
+    if [ "$cmd" != "auto" ]; then
+        case "$cmd" in
+            wtype)    wtype -- "$text" ;;
+            ydotool)  ydotool type --key-delay 20 -- "$text" ;;
+            xdotool)  xdotool type -- "$text" ;;
+            *)        $cmd "$text" ;;
+        esac
+        return
     fi
 
-    case "$cmd" in
-        wtype)    wtype -- "$text" ;;
-        ydotool)  ydotool type --key-delay 20 -- "$text" ;;
-        xdotool)  xdotool type -- "$text" ;;
-        *)        $cmd "$text" ;;
-    esac
+    # Auto-detect: try each tool, fall through on failure
+    # (wtype may be installed but unsupported by the compositor)
+    if [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wtype &>/dev/null; then
+        wtype -- "$text" 2>/dev/null && return
+    fi
+    if command -v ydotool &>/dev/null && pgrep -x ydotoold &>/dev/null; then
+        ydotool type --key-delay 20 -- "$text" && return
+    fi
+    if [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
+        xdotool type -- "$text" && return
+    fi
+    if command -v ydotool &>/dev/null; then
+        warn_ydotool_no_daemon
+        ydotool type --key-delay 20 -- "$text" && return
+    fi
+    echo "Error: no typing tool found (install wtype, ydotool, or xdotool)" >&2
+    return 1
 }
 
 # ── Check core dependencies ──

From 3a3dc3390712c5d495f49277a92582429669dc8c Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:33:06 -0800
Subject: [PATCH 4/9] Refuse bare ydotool in auto mode, remove Parakeet from
 README

Bare ydotool without ydotoold leaks kernel input devices that can crash
the system. Auto mode now only uses safe tools (wtype, ydotool+daemon,
xdotool) and errors with clear instructions if none work. Users can
still opt in to bare ydotool via TALKTYPE_TYPE_CMD=ydotool.

Also remove Parakeet backend from README (CTC model outputs lowercase
without punctuation, not useful for typing).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Makefile           |  2 +-
 README.md          | 42 +++++++++++++++---------------------------
 talktype           | 18 ++++++++++++------
 test/talktype.bats | 13 ++++++++++---
 4 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/Makefile b/Makefile
index c24c3f6..67677b6 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ install: deps venv
 
 # Install system dependencies (requires sudo)
 deps:
-	sudo apt install -y wtype ydotool ffmpeg pipewire libnotify-bin python3-venv socat
+	sudo apt install -y wtype xdotool ydotool ffmpeg pipewire libnotify-bin python3-venv socat
 
 # Create Python venv with faster-whisper (default backend)
 venv: .venv/.done
diff --git a/README.md b/README.md
index 2b8bfd6..641697d 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,8 @@ app to keep running — just a keyboard shortcut.
 - **~100 lines of bash** — easy to read, easy to hack on
 
 Ships with [faster-whisper](https://github.com/SYSTRAN/faster-whisper) by
-default, plus optional [Parakeet](https://huggingface.co/nvidia/parakeet-ctc-1.1b)
-and [Moonshine](https://huggingface.co/UsefulSensors/moonshine-base) backends.
-Or bring your own — anything that reads a WAV and prints text works.
+default, plus an optional [Moonshine](https://huggingface.co/UsefulSensors/moonshine-base)
+backend for CPU. Or bring your own — anything that reads a WAV and prints text works.
 
 > **Note:** This project is in early development — expect rough edges. If you
 > run into issues, please [open a bug](https://github.com/csheaff/talktype/issues).
@@ -21,9 +20,13 @@ Or bring your own — anything that reads a WAV and prints text works.
 - Linux (Wayland or X11)
 - Audio recorder: [ffmpeg](https://ffmpeg.org/) (preferred) or PipeWire (`pw-record`)
 - Typing tool (one of):
-  - [wtype](https://github.com/atx/wtype) — recommended for Wayland, no daemon needed
-  - [ydotool](https://github.com/ReimuNotMoe/ydotool) + `ydotoold` — Wayland & X11
-  - [xdotool](https://github.com/jordansissel/xdotool) — X11 only
+  - [wtype](https://github.com/atx/wtype) — Wayland (Sway, Hyprland; not GNOME)
+  - [xdotool](https://github.com/jordansissel/xdotool) — X11 and XWayland (works on GNOME)
+  - [ydotool](https://github.com/ReimuNotMoe/ydotool) ≥1.0 + `ydotoold` — Wayland & X11
+
+  > **Warning:** ydotool **without** `ydotoold` leaks a kernel input device on
+  > every keystroke, which can crash your system. talktype will not use bare
+  > ydotool automatically — you must opt in via `TALKTYPE_TYPE_CMD=ydotool`.
 - [socat](https://linux.die.net/man/1/socat) (for server-backed transcription)
 
 For the default backend (faster-whisper):
@@ -80,7 +83,7 @@ and are applied after the config file, so they override it.
 
 Set `TALKTYPE_TYPE_CMD` to control which typing tool is used (`auto`, `wtype`,
 `ydotool`, `xdotool`, or any custom command). Default is `auto`, which picks
-the best available tool: wtype (Wayland) → ydotool+daemon → xdotool (X11) → ydotool (bare, with warning).
+the best available tool: wtype (Wayland) → ydotool+daemon → xdotool (X11).
 
 ## Setup
 
@@ -104,8 +107,8 @@ bindsym $mod+d exec talktype
 
 ## Backends
 
-Three backends are included. Server backends auto-start on first use — the
-model loads once and stays in memory for fast subsequent transcriptions.
+Server backends auto-start on first use — the model loads once and stays in
+memory for fast subsequent transcriptions.
 
 ### Whisper (default)
 
@@ -126,21 +129,6 @@ TALKTYPE_CMD="/path/to/talktype/transcribe-server transcribe"
 | `WHISPER_DEVICE` | `cuda` | `cuda` or `cpu` |
 | `WHISPER_COMPUTE` | `float16` | `float16` (GPU), `int8` or `float32` (CPU) |
 
-### Parakeet (GPU, best word accuracy)
-
-[NVIDIA Parakeet CTC 1.1B](https://huggingface.co/nvidia/parakeet-ctc-1.1b)
-via HuggingFace Transformers. 1.1B params, excellent word accuracy.
-Note: CTC model — outputs lowercase text without punctuation.
-
-```bash
-make parakeet
-```
-
-```bash
-# ~/.config/talktype/config
-TALKTYPE_CMD="/path/to/talktype/backends/parakeet-server transcribe"
-```
-
 ### Moonshine (CPU, lightweight)
 
 [Moonshine](https://huggingface.co/UsefulSensors/moonshine-base) by Useful
@@ -158,14 +146,14 @@ TALKTYPE_CMD="/path/to/talktype/backends/moonshine-server transcribe"
 Set `MOONSHINE_MODEL=UsefulSensors/moonshine-tiny` for an even smaller 27M
 param model.
 
-### Manual server management
+### Server management
 
 The server starts automatically on first transcription. You can also manage
 it directly:
 
 ```bash
-./backends/parakeet-server start   # start manually
-./backends/parakeet-server stop    # stop the server
+./transcribe-server start   # start manually
+./transcribe-server stop    # stop the server
 ```
 
 ### Custom backends
diff --git a/talktype b/talktype
index 0913861..0faf9fc 100755
--- a/talktype
+++ b/talktype
@@ -73,6 +73,9 @@ type_text() {
 
     # Explicit tool — use it directly
     if [ "$cmd" != "auto" ]; then
+        if [ "$cmd" = "ydotool" ] && ! pgrep -x ydotoold &>/dev/null; then
+            warn_ydotool_no_daemon
+        fi
         case "$cmd" in
             wtype)    wtype -- "$text" ;;
             ydotool)  ydotool type --key-delay 20 -- "$text" ;;
@@ -82,7 +85,7 @@ type_text() {
         return
     fi
 
-    # Auto-detect: try each tool, fall through on failure
+    # Auto-detect: try each safe tool, fall through on failure
     # (wtype may be installed but unsupported by the compositor)
     if [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wtype &>/dev/null; then
         wtype -- "$text" 2>/dev/null && return
@@ -93,11 +96,14 @@ type_text() {
     if [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
         xdotool type -- "$text" && return
     fi
-    if command -v ydotool &>/dev/null; then
-        warn_ydotool_no_daemon
-        ydotool type --key-delay 20 -- "$text" && return
-    fi
-    echo "Error: no typing tool found (install wtype, ydotool, or xdotool)" >&2
+    # Bare ydotool (without ydotoold) leaks a kernel input device per
+    # invocation, which can crash the system.  Refuse to use it in auto
+    # mode — the user must opt in via TALKTYPE_TYPE_CMD=ydotool.
+    echo "Error: no safe typing tool found." >&2
+    echo "  Install xdotool (apt install xdotool), or" >&2
+    echo "  install ydotool ≥1.0 and run ydotoold, or" >&2
+    echo "  set TALKTYPE_TYPE_CMD=ydotool to accept the risk." >&2
+    notify-send -t 5000 -i dialog-error "TalkType" "No safe typing tool — see terminal for details" 2>/dev/null || true
     return 1
 }
 
diff --git a/test/talktype.bats b/test/talktype.bats
index 09fb17f..7be4d4f 100644
--- a/test/talktype.bats
+++ b/test/talktype.bats
@@ -205,7 +205,7 @@ start_fake_recording() {
     [[ "$(cat "$TALKTYPE_DIR/ydotool.log")" == *"hello world"* ]]
 }
 
-@test "bare ydotool warns about missing daemon once per session" {
+@test "auto mode refuses bare ydotool without daemon" {
     start_fake_recording
     unset WAYLAND_DISPLAY
     unset DISPLAY
@@ -225,12 +225,19 @@ start_fake_recording() {
     done
     PATH="$sparse"
 
+    run "$TALKTYPE"
+    [ "$status" -ne 0 ]
+    [[ "$output" == *"no safe typing tool"* ]]
+}
+
+@test "explicit TALKTYPE_TYPE_CMD=ydotool warns without daemon" {
+    start_fake_recording
+    export TALKTYPE_TYPE_CMD=ydotool
+
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
 
-    # Warning file should exist
     [ -f "$TALKTYPE_DIR/.ydotool-warned" ]
-    # Warning should be in stderr (captured in output by bats)
     [[ "$output" == *"ydotool without ydotoold"* ]]
 }
 

From 45751ca9576d78f712dcb0a368b0b7d101251660 Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:38:36 -0800
Subject: [PATCH 5/9] Allow bare ydotool as last-resort fallback with warning

On GNOME Wayland, wtype fails (no virtual-keyboard protocol) and
xdotool fails (X11 events ignored by Wayland apps). Bare ydotool is
the only working option. Testing shows ydotool 0.1.8-3build1 cleans
up its virtual input devices, so allow it as a last resort with a
once-per-session warning about the potential device leak.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 talktype           | 13 +++++--------
 test/talktype.bats | 15 +++++----------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/talktype b/talktype
index 0faf9fc..16a4fc5 100755
--- a/talktype
+++ b/talktype
@@ -96,14 +96,11 @@ type_text() {
     if [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
         xdotool type -- "$text" && return
     fi
-    # Bare ydotool (without ydotoold) leaks a kernel input device per
-    # invocation, which can crash the system.  Refuse to use it in auto
-    # mode — the user must opt in via TALKTYPE_TYPE_CMD=ydotool.
-    echo "Error: no safe typing tool found." >&2
-    echo "  Install xdotool (apt install xdotool), or" >&2
-    echo "  install ydotool ≥1.0 and run ydotoold, or" >&2
-    echo "  set TALKTYPE_TYPE_CMD=ydotool to accept the risk." >&2
-    notify-send -t 5000 -i dialog-error "TalkType" "No safe typing tool — see terminal for details" 2>/dev/null || true
+    if command -v ydotool &>/dev/null; then
+        warn_ydotool_no_daemon
+        ydotool type --key-delay 20 -- "$text" && return
+    fi
+    echo "Error: no typing tool found (install wtype, ydotool, or xdotool)" >&2
     return 1
 }
 
diff --git a/test/talktype.bats b/test/talktype.bats
index 7be4d4f..a3b159e 100644
--- a/test/talktype.bats
+++ b/test/talktype.bats
@@ -205,7 +205,7 @@ start_fake_recording() {
     [[ "$(cat "$TALKTYPE_DIR/ydotool.log")" == *"hello world"* ]]
 }
 
-@test "auto mode refuses bare ydotool without daemon" {
+@test "bare ydotool is used as last resort with warning" {
     start_fake_recording
     unset WAYLAND_DISPLAY
     unset DISPLAY
@@ -225,19 +225,14 @@ start_fake_recording() {
     done
     PATH="$sparse"
 
-    run "$TALKTYPE"
-    [ "$status" -ne 0 ]
-    [[ "$output" == *"no safe typing tool"* ]]
-}
-
-@test "explicit TALKTYPE_TYPE_CMD=ydotool warns without daemon" {
-    start_fake_recording
-    export TALKTYPE_TYPE_CMD=ydotool
-
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
 
+    # Should have typed via ydotool
+    [[ "$(cat "$TALKTYPE_DIR/ydotool.log")" == *"hello world"* ]]
+    # Warning file should exist
     [ -f "$TALKTYPE_DIR/.ydotool-warned" ]
+    # Warning should be in output
     [[ "$output" == *"ydotool without ydotoold"* ]]
 }
 

From 5262a5687db8c4e1358ba9d5677097f2c67c33f2 Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:41:07 -0800
Subject: [PATCH 6/9] Skip xdotool on Wayland sessions (returns 0 but types
 nothing)

xdotool sends X11 key events that Wayland-native apps silently ignore,
yet returns exit 0. Only try xdotool on pure X11 sessions (DISPLAY set,
WAYLAND_DISPLAY unset) so it doesn't block the ydotool fallback.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 talktype | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/talktype b/talktype
index 16a4fc5..4ac0e62 100755
--- a/talktype
+++ b/talktype
@@ -93,7 +93,7 @@ type_text() {
     if command -v ydotool &>/dev/null && pgrep -x ydotoold &>/dev/null; then
         ydotool type --key-delay 20 -- "$text" && return
     fi
-    if [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
+    if [ -n "${DISPLAY:-}" ] && [ -z "${WAYLAND_DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
         xdotool type -- "$text" && return
     fi
     if command -v ydotool &>/dev/null; then

From c98c5ea3122e6eef8a7fbe79174d71436dc21fed Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sun, 15 Feb 2026 10:43:08 -0800
Subject: [PATCH 7/9] Fix README: update line count, correct typing tool
 descriptions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 641697d..d8744e2 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ app to keep running — just a keyboard shortcut.
 
 - **Pluggable backends** — swap transcription models without changing anything else
 - **Works everywhere** — GNOME, Sway, Hyprland, i3, X11
-- **~100 lines of bash** — easy to read, easy to hack on
+- **~160 lines of bash** — easy to read, easy to hack on
 
 Ships with [faster-whisper](https://github.com/SYSTRAN/faster-whisper) by
 default, plus an optional [Moonshine](https://huggingface.co/UsefulSensors/moonshine-base)
@@ -19,14 +19,11 @@ backend for CPU. Or bring your own — anything that reads a WAV and prints text
 
 - Linux (Wayland or X11)
 - Audio recorder: [ffmpeg](https://ffmpeg.org/) (preferred) or PipeWire (`pw-record`)
-- Typing tool (one of):
+- Typing tool (auto-detected, best available is used):
   - [wtype](https://github.com/atx/wtype) — Wayland (Sway, Hyprland; not GNOME)
-  - [xdotool](https://github.com/jordansissel/xdotool) — X11 and XWayland (works on GNOME)
-  - [ydotool](https://github.com/ReimuNotMoe/ydotool) ≥1.0 + `ydotoold` — Wayland & X11
-
-  > **Warning:** ydotool **without** `ydotoold` leaks a kernel input device on
-  > every keystroke, which can crash your system. talktype will not use bare
-  > ydotool automatically — you must opt in via `TALKTYPE_TYPE_CMD=ydotool`.
+  - [ydotool](https://github.com/ReimuNotMoe/ydotool) + `ydotoold` — Wayland & X11 (preferred with daemon)
+  - [xdotool](https://github.com/jordansissel/xdotool) — X11 only (not Wayland)
+  - ydotool without daemon — last resort, with warning
 - [socat](https://linux.die.net/man/1/socat) (for server-backed transcription)
 
 For the default backend (faster-whisper):

From 0dec1d96c2ad038c1a1c635409c41d1f27c788d7 Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sat, 21 Feb 2026 14:49:34 -0800
Subject: [PATCH 8/9] Source user config in server scripts so model settings
 propagate

Server scripts (transcribe-server, moonshine-server, parakeet-server)
now source ~/.config/talktype/config directly. Previously, env vars
like WHISPER_MODEL were shell-local in talktype and didn't propagate
to server subprocesses, causing the daemon to load the wrong model.

Also bumps startup timeout from 30s to 60s for whisper and moonshine
servers (large models + cold CUDA init can exceed 30s on reboot).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backends/moonshine-server |  8 +++++++-
 backends/parakeet-server  |  8 +++++++-
 test/server.bats          | 13 +++++++------
 transcribe-server         |  9 ++++++++-
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/backends/moonshine-server b/backends/moonshine-server
index 27439b3..e25518a 100755
--- a/backends/moonshine-server
+++ b/backends/moonshine-server
@@ -8,6 +8,12 @@
 #   TALKTYPE_CMD="backends/moonshine-server transcribe" talktype
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Source user config so env vars are available even when invoked directly.
+TALKTYPE_CONFIG="${TALKTYPE_CONFIG:-${XDG_CONFIG_HOME:-$HOME/.config}/talktype/config}"
+# shellcheck disable=SC1090
+[ -f "$TALKTYPE_CONFIG" ] && source "$TALKTYPE_CONFIG"
+
 VENV="$SCRIPT_DIR/.moonshine-venv"
 SOCK="${XDG_RUNTIME_DIR:-/tmp}/moonshine.sock"
 PIDFILE="${XDG_RUNTIME_DIR:-/tmp}/moonshine-server.pid"
@@ -29,7 +35,7 @@ case "${1:-}" in
         PID=$!
         disown "$PID"
         echo "$PID" > "$PIDFILE"
-        for i in $(seq 1 30); do
+        for _ in $(seq 1 60); do
             [ -S "$SOCK" ] && break
             sleep 1
         done
diff --git a/backends/parakeet-server b/backends/parakeet-server
index 479e432..e98b083 100755
--- a/backends/parakeet-server
+++ b/backends/parakeet-server
@@ -8,6 +8,12 @@
 #   TALKTYPE_CMD="backends/parakeet-server transcribe" talktype
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Source user config so env vars are available even when invoked directly.
+TALKTYPE_CONFIG="${TALKTYPE_CONFIG:-${XDG_CONFIG_HOME:-$HOME/.config}/talktype/config}"
+# shellcheck disable=SC1090
+[ -f "$TALKTYPE_CONFIG" ] && source "$TALKTYPE_CONFIG"
+
 VENV="$SCRIPT_DIR/.parakeet-venv"
 SOCK="${XDG_RUNTIME_DIR:-/tmp}/parakeet.sock"
 PIDFILE="${XDG_RUNTIME_DIR:-/tmp}/parakeet-server.pid"
@@ -28,7 +34,7 @@ case "${1:-}" in
         PID=$!
         disown "$PID"
         echo "$PID" > "$PIDFILE"
-        for i in $(seq 1 60); do
+        for _ in $(seq 1 60); do
             [ -S "$SOCK" ] && break
             sleep 1
         done
diff --git a/test/server.bats b/test/server.bats
index ac9055b..108f8a8 100644
--- a/test/server.bats
+++ b/test/server.bats
@@ -8,6 +8,8 @@ REPO_DIR="$BATS_TEST_DIRNAME/.."
 setup() {
     export SOCK="$BATS_TEST_TMPDIR/test-server.sock"
     export PIDFILE="$BATS_TEST_TMPDIR/test-server.pid"
+    # Isolate from user config so real venv/model settings don't bleed in
+    export TALKTYPE_CONFIG=/dev/null
 }
 
 teardown() {
@@ -74,12 +76,11 @@ start_mock_daemon() {
 # ── Server wrapper logic ──
 
 @test "transcribe auto-start fails gracefully when backend not installed" {
-    # With no venv installed, transcribe should attempt auto-start and fail
-    for server in transcribe-server backends/parakeet-server backends/moonshine-server; do
-        run "$REPO_DIR/$server" transcribe /tmp/test.wav
-        [ "$status" -eq 1 ]
-        [[ "$output" == *"not installed"* ]]
-    done
+    # Point whisper venv to nonexistent path to simulate missing install
+    export TALKTYPE_VENV="$BATS_TEST_TMPDIR/no-such-venv"
+    run "$REPO_DIR/transcribe-server" transcribe /tmp/test.wav
+    [ "$status" -eq 1 ]
+    [[ "$output" == *"not installed"* ]]
 }
 
 @test "stop reports not running when no pidfile exists" {
diff --git a/transcribe-server b/transcribe-server
index d887d57..1230255 100755
--- a/transcribe-server
+++ b/transcribe-server
@@ -7,6 +7,13 @@
 #   TALKTYPE_CMD="transcribe-server transcribe" talktype
 
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Source user config so env vars (WHISPER_MODEL, etc.) are available
+# even when invoked directly (not via talktype).
+TALKTYPE_CONFIG="${TALKTYPE_CONFIG:-${XDG_CONFIG_HOME:-$HOME/.config}/talktype/config}"
+# shellcheck disable=SC1090
+[ -f "$TALKTYPE_CONFIG" ] && source "$TALKTYPE_CONFIG"
+
 VENV="${TALKTYPE_VENV:-$SCRIPT_DIR/.venv}"
 SOCK="${XDG_RUNTIME_DIR:-/tmp}/talktype-whisper.sock"
 PIDFILE="${XDG_RUNTIME_DIR:-/tmp}/talktype-whisper.pid"
@@ -32,7 +39,7 @@ case "${1:-}" in
         PID=$!
         disown "$PID"
         echo "$PID" > "$PIDFILE"
-        for i in $(seq 1 30); do
+        for _ in $(seq 1 60); do
             [ -S "$SOCK" ] && break
             sleep 1
         done

From 33a4e4b9c07a37cba19692088067e647aff7f62c Mon Sep 17 00:00:00 2001
From: Clay Sheaff <claysheaff@protonmail.com>
Date: Sat, 21 Feb 2026 15:16:43 -0800
Subject: [PATCH 9/9] Detect dead daemon and add socat timeout in server
 transcribe

If the daemon crashes (e.g. CUDA not ready right after reboot), the
socket file remains but nobody is listening. Previously socat would
hang forever. Now the transcribe command checks the PID is alive before
connecting, cleans up stale state if not, and restarts the daemon.
Also adds socat -T 30 timeout as a safety net.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backends/moonshine-server | 5 ++++-
 backends/parakeet-server  | 5 ++++-
 transcribe-server         | 6 +++++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/backends/moonshine-server b/backends/moonshine-server
index e25518a..9bef3b6 100755
--- a/backends/moonshine-server
+++ b/backends/moonshine-server
@@ -56,10 +56,13 @@ case "${1:-}" in
         fi
         ;;
     transcribe)
+        if [ -S "$SOCK" ] && [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then
+            rm -f "$PIDFILE" "$SOCK"
+        fi
         if [ ! -S "$SOCK" ]; then
             "$0" start >&2 || exit 1
         fi
-        echo "$2" | socat - UNIX-CONNECT:"$SOCK"
+        echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK"
         ;;
     *)
         echo "Usage: moonshine-server {start|stop|transcribe <audio.wav>}" >&2
diff --git a/backends/parakeet-server b/backends/parakeet-server
index e98b083..2590b9f 100755
--- a/backends/parakeet-server
+++ b/backends/parakeet-server
@@ -55,10 +55,13 @@ case "${1:-}" in
         fi
         ;;
     transcribe)
+        if [ -S "$SOCK" ] && [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then
+            rm -f "$PIDFILE" "$SOCK"
+        fi
         if [ ! -S "$SOCK" ]; then
             "$0" start >&2 || exit 1
         fi
-        echo "$2" | socat - UNIX-CONNECT:"$SOCK"
+        echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK"
         ;;
     *)
         echo "Usage: parakeet-server {start|stop|transcribe <audio.wav>}" >&2
diff --git a/transcribe-server b/transcribe-server
index 1230255..27e8ccd 100755
--- a/transcribe-server
+++ b/transcribe-server
@@ -60,10 +60,14 @@ case "${1:-}" in
         fi
         ;;
     transcribe)
+        # Ensure daemon is alive (not just a stale socket from a crash)
+        if [ -S "$SOCK" ] && [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then
+            rm -f "$PIDFILE" "$SOCK"
+        fi
         if [ ! -S "$SOCK" ]; then
             "$0" start >&2 || exit 1
         fi
-        echo "$2" | socat - UNIX-CONNECT:"$SOCK"
+        echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK"
         ;;
     *)
         echo "Usage: transcribe-server {start|stop|transcribe <audio.wav>}" >&2