diff --git a/.github/workflows/clips-desktop-build-check.yml b/.github/workflows/clips-desktop-build-check.yml
index 02043567ff..c640a99748 100644
--- a/.github/workflows/clips-desktop-build-check.yml
+++ b/.github/workflows/clips-desktop-build-check.yml
@@ -58,6 +58,20 @@ jobs:
       - name: Install dependencies
         run: pnpm install --frozen-lockfile
 
+      # whisper-rs compiles whisper.cpp (CMake + C/C++) and runs bindgen, which
+      # needs libclang. windows-latest ships LLVM and CMake preinstalled; point
+      # bindgen at libclang and fall back to a Chocolatey install if missing.
+      - name: Set up libclang for whisper-rs bindgen (Windows)
+        if: runner.os == 'Windows'
+        shell: bash
+        run: |
+          if [ -f "/c/Program Files/LLVM/bin/libclang.dll" ]; then
+            echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> "$GITHUB_ENV"
+          else
+            choco install llvm --no-progress -y
+            echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> "$GITHUB_ENV"
+          fi
+
       - name: Build frontend
         working-directory: ${{ env.TAURI_APP_PATH }}
         run: pnpm run build
diff --git a/.github/workflows/clips-desktop-release.yml b/.github/workflows/clips-desktop-release.yml
index 9db6d057b3..59cb507ce1 100644
--- a/.github/workflows/clips-desktop-release.yml
+++ b/.github/workflows/clips-desktop-release.yml
@@ -123,6 +123,21 @@ jobs:
           V=$(node -p "require('./${{ env.TAURI_APP_PATH }}/package.json').version")
           echo "version=$V" >> "$GITHUB_OUTPUT"
 
+      # whisper-rs compiles whisper.cpp (CMake + C/C++) and runs bindgen, which
+      # needs libclang. windows-latest ships LLVM and CMake preinstalled; point
+      # bindgen at libclang and fall back to a Chocolatey install if the image
+      # ever drops it. macOS already has clang via Xcode.
+      - name: Set up libclang for whisper-rs bindgen (Windows)
+        if: runner.os == 'Windows'
+        shell: bash
+        run: |
+          if [ -f "/c/Program Files/LLVM/bin/libclang.dll" ]; then
+            echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> "$GITHUB_ENV"
+          else
+            choco install llvm --no-progress -y
+            echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> "$GITHUB_ENV"
+          fi
+
       - name: Build and release (tauri-action)
         uses: tauri-apps/tauri-action@84b9d35b5fc46c1e45415bdb6144030364f7ebc5 # v0
         env:
diff --git a/templates/clips/desktop/README.md b/templates/clips/desktop/README.md
index 5665c1cb37..b8a73d3c03 100644
--- a/templates/clips/desktop/README.md
+++ b/templates/clips/desktop/README.md
@@ -6,6 +6,37 @@ A small Tauri 2.x menu-bar app that lives in the macOS menu bar / Windows system
 - **Recent** — your three most recent recordings
 - Quick links to **Open library** and **Settings**
 
+## Meeting transcription (macOS + Windows)
+
+When the calendar watcher detects a meeting, the popover surfaces a "Start
+notes" notification. Accepting it captures **both** the microphone and the
+system (speaker) audio, transcribes them locally with Whisper (whisper.cpp, no
+cloud round-trip), and streams a live mic/system-labeled transcript.
+
+Audio capture is platform-dispatched in `src-tauri/src/capture/`:
+
+- **macOS** — AVAudioEngine with VoiceProcessingIO acoustic echo cancellation
+  (mic) and ScreenCaptureKit (system audio).
+- **Windows** — [`cpal`](https://crates.io/crates/cpal): the default WASAPI
+  input device (mic) and WASAPI **loopback** of the default output device
+  (system audio). Loopback needs no OS permission prompt.
+
+The Whisper engine (`src-tauri/src/whisper_speech.rs`) and the meeting
+detection, notification, and transcript-rendering flows are otherwise identical
+across platforms.
+
+### Known Windows limitations (v1)
+
+- **No mic echo cancellation.** macOS applies hardware AEC so the mic stream
+  doesn't echo the system audio. `cpal` delivers the raw mic, so expect some
+  speaker bleed into the mic transcript when not on headphones. Mic and system
+  are transcribed and labeled separately, so this is cosmetic.
+- **No sleep / call-ended auto-stop.** The macOS sleep and call-ended watchers
+  are no-ops on Windows; the silence-based auto-stop still works. Stop notes
+  manually or via the silence timeout.
+- macOS-only features stay macOS-only: screen/window video recording, EventKit
+  local calendar, and Accessibility-based personal-vocabulary auto-learn.
+
 ## Develop
 
 First install the desktop workspace's own deps (this folder is outside the monorepo's `templates/*` glob because it ships its own Tauri/Vite toolchain):
diff --git a/templates/clips/desktop/src-tauri/Cargo.toml b/templates/clips/desktop/src-tauri/Cargo.toml
index 2b04b12e54..f9cf897784 100644
--- a/templates/clips/desktop/src-tauri/Cargo.toml
+++ b/templates/clips/desktop/src-tauri/Cargo.toml
@@ -112,13 +112,27 @@ screencapturekit = { version = "2.0.0", features = ["macos_15_0"] }
 # AudioBufferList type so we can hand SCK's CMSampleBuffer audio bytes to
 # AVAudioPCMBuffer for the speech recognizer.
 objc2-core-audio-types = "0.3"
-whisper-rs = { version = "0.16.0" }
 # NOTE: We hand-roll the few EventKit selectors we need via raw `objc2`
 # msg_send! in `eventkit.rs` rather than depending on the `objc2-event-kit`
 # crate. Its API surface drifts between minor 0.3.x releases (and not
 # every version exposes `EKEventStore::eventsMatchingPredicate`); the
 # slice we need is small enough that hand-rolling is more durable.
 
+# Windows-only: cross-platform audio capture. Covers both the microphone
+# (default WASAPI input) and system audio (WASAPI loopback — an input stream on
+# the default output device). One crate for both meeting capture streams.
+# NOTE: pin verified against crates.io before release; network was unavailable
+# in the dev environment where this was added.
+[target."cfg(target_os = \"windows\")".dependencies]
+cpal = "0.15"
+
+# Local meeting transcription via whisper.cpp. Cross-platform (bundles its own
+# native C/C++ build via CMake), needed only on the two platforms that run the
+# Whisper meeting engine. Scoped here rather than the macOS-only block (it is
+# not macOS-specific) but kept off Linux, which has no meeting capture.
+[target."cfg(any(target_os = \"macos\", target_os = \"windows\"))".dependencies]
+whisper-rs = { version = "0.16.0" }
+
 [profile.release]
 panic = "abort"
 codegen-units = 1
diff --git a/templates/clips/desktop/src-tauri/src/capture/macos.rs b/templates/clips/desktop/src-tauri/src/capture/macos.rs
new file mode 100644
index 0000000000..95e907e000
--- /dev/null
+++ b/templates/clips/desktop/src-tauri/src/capture/macos.rs
@@ -0,0 +1,10 @@
+//! macOS capture backend — thin re-exports of the existing native impls.
+//!
+//! The high-quality macOS path is left entirely untouched: the microphone runs
+//! through `native_speech` (AVAudioEngine + VoiceProcessingIO AEC) and the
+//! system audio through `system_audio` (ScreenCaptureKit). This file only
+//! surfaces them under the platform-agnostic names the `capture` contract and
+//! `whisper_speech.rs` expect.
+
+pub(crate) use crate::native_speech::macos::{start_raw_mic_capture, RawMicCapture};
+pub(crate) use crate::system_audio::macos::{start_raw_system_capture, RawSystemCapture};
diff --git a/templates/clips/desktop/src-tauri/src/capture/mod.rs b/templates/clips/desktop/src-tauri/src/capture/mod.rs
new file mode 100644
index 0000000000..d3382303d0
--- /dev/null
+++ b/templates/clips/desktop/src-tauri/src/capture/mod.rs
@@ -0,0 +1,39 @@
+//! Platform-dispatched audio capture for meeting transcription.
+//!
+//! The local Whisper engine (`whisper_speech.rs`) needs exactly two capture
+//! primitives — a microphone stream and a system-audio (loopback) stream —
+//! each forwarding mono `f32` samples to a callback and exposing the hardware
+//! sample rate plus a `stop()`. Everything else in the meeting pipeline
+//! (detection, notifications, transcript rendering) is already cross-platform.
+//!
+//! This module owns that public contract and dispatches to the right backend
+//! at compile time:
+//!
+//!   - macOS  → thin re-exports of the proven `native_speech` (AVAudioEngine +
+//!              VPIO AEC) mic path and `system_audio` (ScreenCaptureKit) loopback.
+//!   - Windows → `cpal` mic + WASAPI loopback (see `windows.rs`).
+//!
+//! Both backends expose the same names so `whisper_speech.rs` stays
+//! platform-agnostic:
+//!
+//! ```ignore
+//! start_raw_mic_capture(app, mic_device_id, mic_device_label, on_samples) -> RawMicCapture
+//! start_raw_system_capture(app, on_samples) -> RawSystemCapture
+//! ```
+//!
+//! Each handle type exposes `sample_rate() -> f64` and `stop()` so the session
+//! teardown in `whisper_speech.rs` works unchanged across platforms.
+
+#[cfg(target_os = "macos")]
+mod macos;
+#[cfg(target_os = "macos")]
+pub(crate) use macos::{
+    start_raw_mic_capture, start_raw_system_capture, RawMicCapture, RawSystemCapture,
+};
+
+#[cfg(target_os = "windows")]
+mod windows;
+#[cfg(target_os = "windows")]
+pub(crate) use windows::{
+    start_raw_mic_capture, start_raw_system_capture, RawMicCapture, RawSystemCapture,
+};
diff --git a/templates/clips/desktop/src-tauri/src/capture/windows.rs b/templates/clips/desktop/src-tauri/src/capture/windows.rs
new file mode 100644
index 0000000000..552154df3b
--- /dev/null
+++ b/templates/clips/desktop/src-tauri/src/capture/windows.rs
@@ -0,0 +1,333 @@
+//! Windows capture backend — `cpal` microphone + WASAPI loopback system audio.
+//!
+//! `cpal` is the single cross-platform crate that covers both primitives the
+//! Whisper engine needs on Windows:
+//!
+//!   - **Microphone:** the default (or label-matched) WASAPI input device.
+//!   - **System audio:** WASAPI *loopback* — `cpal` builds an input stream on
+//!     the default *output* device, which sets `AUDCLNT_STREAMFLAGS_LOOPBACK`
+//!     and captures whatever the speakers are playing. No OS permission prompt.
+//!
+//! Both paths down-mix the device's native interleaved format (any
+//! `SampleFormat`, any channel count) to mono `f32`, forward it to
+//! `on_samples`, and emit `voice:audio-level` on the same cadence as the macOS
+//! backend so the silence detector and waveform UI behave identically. The real
+//! device sample rate is reported via `sample_rate()`; `whisper_speech.rs`
+//! resamples to 16 kHz from there.
+//!
+//! ## Known limitation (v1): no mic AEC
+//!
+//! macOS applies hardware acoustic echo cancellation (VoiceProcessingIO) so the
+//! mic stream doesn't echo the system audio. `cpal` delivers the raw mic with
+//! no AEC, so expect some speaker bleed into the mic transcript when the user
+//! is not on headphones. Acceptable for v1 — mic and system are transcribed and
+//! labeled separately. No software AEC is added here.
+
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::Arc;
+
+use cpal::traits::{DeviceTrait, HostTrait};
+use cpal::{FromSample, SampleFormat, SizedSample, Stream, StreamConfig};
+use serde::Serialize;
+use tauri::{AppHandle, Emitter};
+
+/// Mirrors the macOS `voice:audio-level` payload so the waveform meter and the
+/// silence detector consume an identical event shape on every platform.
+#[derive(Serialize, Clone)]
+struct AudioLevelPayload {
+    level: f32,
+    source: &'static str,
+}
+
+// ---- public handles ---------------------------------------------------------
+
+/// Handle for a running cpal microphone capture. Dropping the inner `Stream`
+/// (via `stop()`) tears down the WASAPI client and its callback thread.
+pub(crate) struct RawMicCapture {
+    stream: Stream,
+    sample_rate: f64,
+}
+
+// SAFETY: cpal's WASAPI `Stream` holds COM interface pointers and is `!Send`.
+// We never call methods on it from another thread — the only cross-thread
+// operation is moving the handle into the session `Mutex` and later dropping
+// it, which signals the capture thread to stop. This mirrors the macOS handles'
+// `unsafe impl Send`.
+unsafe impl Send for RawMicCapture {}
+
+impl RawMicCapture {
+    /// Hardware sample rate of the mic stream (e.g. 48000) — the Whisper engine
+    /// resamples to 16 kHz from this.
+    pub(crate) fn sample_rate(&self) -> f64 {
+        self.sample_rate
+    }
+
+    pub(crate) fn stop(self) {
+        drop(self.stream);
+    }
+}
+
+/// Handle for a running WASAPI loopback (system audio) capture.
+pub(crate) struct RawSystemCapture {
+    stream: Stream,
+    #[allow(dead_code)]
+    sample_rate: f64,
+}
+
+// SAFETY: same argument as `RawMicCapture`.
+unsafe impl Send for RawSystemCapture {}
+
+impl RawSystemCapture {
+    pub(crate) fn stop(self) {
+        drop(self.stream);
+    }
+}
+
+// ---- sample conversion ------------------------------------------------------
+
+/// Down-mix an interleaved buffer of any sample format / channel count to mono
+/// `f32`. cpal may deliver `i16`/`u16`/`f32`/… and 1–N channels; we average all
+/// channels per frame so neither stereo music nor multi-channel mixes are
+/// truncated to one side.
+fn interleaved_to_mono_f32<T>(data: &[T], channels: usize) -> Vec<f32>
+where
+    T: Copy,
+    f32: FromSample<T>,
+{
+    if channels <= 1 {
+        return data.iter().map(|&s| f32::from_sample(s)).collect();
+    }
+    let frames = data.len() / channels;
+    let mut out = Vec::with_capacity(frames);
+    for frame in 0..frames {
+        let base = frame * channels;
+        let mut acc = 0.0f32;
+        for c in 0..channels {
+            acc += f32::from_sample(data[base + c]);
+        }
+        out.push(acc / channels as f32);
+    }
+    out
+}
+
+/// Peak absolute amplitude of a mono buffer — the level the waveform meter and
+/// silence detector expect.
+fn peak_level(mono: &[f32]) -> f32 {
+    mono.iter().fold(0.0f32, |m, &s| m.max(s.abs()))
+}
+
+// ---- stream construction ----------------------------------------------------
+
+/// Build a cpal input stream for sample type `T`, converting each callback
+/// buffer to mono `f32` and forwarding it to `mono_cb`.
+fn build_stream<T>(
+    device: &cpal::Device,
+    config: &StreamConfig,
+    mono_cb: impl Fn(&[f32]) + Send + 'static,
+) -> Result<Stream, String>
+where
+    T: SizedSample,
+    f32: FromSample<T>,
+{
+    let channels = config.channels as usize;
+    device
+        .build_input_stream(
+            config,
+            move |data: &[T], _: &cpal::InputCallbackInfo| {
+                let mono = interleaved_to_mono_f32(data, channels);
+                mono_cb(&mono);
+            },
+            |err| eprintln!("[capture-win] stream error: {err}"),
+            None,
+        )
+        .map_err(|e| format!("build_input_stream failed: {e}"))
+}
+
+/// Construct the per-buffer callback that forwards mono samples to the Whisper
+/// engine and emits `voice:audio-level` on `level_every`-th buffer.
+fn make_callback(
+    app: AppHandle,
+    source: &'static str,
+    level_every: u32,
+    on_samples: Arc<dyn Fn(&[f32]) + Send + Sync>,
+) -> impl Fn(&[f32]) + Send + 'static {
+    let tick = AtomicU32::new(0);
+    move |mono: &[f32]| {
+        on_samples(mono);
+        let n = tick.fetch_add(1, Ordering::Relaxed);
+        if level_every > 0 && n % level_every == 0 {
+            let _ = app.emit(
+                "voice:audio-level",
+                AudioLevelPayload {
+                    level: peak_level(mono),
+                    source,
+                },
+            );
+        }
+    }
+}
+
+/// Dispatch stream construction over cpal's runtime `SampleFormat`. Each match
+/// arm moves `mono_cb` — allowed because only one arm executes.
+fn build_for_format(
+    device: &cpal::Device,
+    config: &StreamConfig,
+    sample_format: SampleFormat,
+    mono_cb: impl Fn(&[f32]) + Send + 'static,
+) -> Result<Stream, String> {
+    match sample_format {
+        SampleFormat::F32 => build_stream::<f32>(device, config, mono_cb),
+        SampleFormat::F64 => build_stream::<f64>(device, config, mono_cb),
+        SampleFormat::I8 => build_stream::<i8>(device, config, mono_cb),
+        SampleFormat::I16 => build_stream::<i16>(device, config, mono_cb),
+        SampleFormat::I32 => build_stream::<i32>(device, config, mono_cb),
+        SampleFormat::I64 => build_stream::<i64>(device, config, mono_cb),
+        SampleFormat::U8 => build_stream::<u8>(device, config, mono_cb),
+        SampleFormat::U16 => build_stream::<u16>(device, config, mono_cb),
+        SampleFormat::U32 => build_stream::<u32>(device, config, mono_cb),
+        SampleFormat::U64 => build_stream::<u64>(device, config, mono_cb),
+        other => Err(format!("unsupported sample format: {other:?}")),
+    }
+}
+
+// ---- device selection -------------------------------------------------------
+
+/// Resolve the microphone device. The renderer passes a web
+/// `enumerateDevices()` id/label, which does not map to cpal's device names, so
+/// we best-effort match the label against cpal's device names (case-insensitive
+/// substring) and fall back to the system default input.
+fn resolve_input_device(
+    host: &cpal::Host,
+    mic_device_label: Option<&str>,
+) -> Result<cpal::Device, String> {
+    if let Some(label) = mic_device_label.map(str::trim).filter(|l| !l.is_empty()) {
+        let needle = label.to_ascii_lowercase();
+        if let Ok(devices) = host.input_devices() {
+            for device in devices {
+                if let Ok(name) = device.name() {
+                    if name.to_ascii_lowercase().contains(&needle) {
+                        return Ok(device);
+                    }
+                }
+            }
+        }
+    }
+    host.default_input_device()
+        .ok_or_else(|| "no default microphone device available".to_string())
+}
+
+// ---- public entry points ----------------------------------------------------
+
+/// Start microphone capture and forward every mono `f32` buffer to
+/// `on_samples`. `mic_device_id` is currently unused on Windows (the web device
+/// id has no cpal equivalent); selection is by `mic_device_label`.
+pub(crate) fn start_raw_mic_capture(
+    app: AppHandle,
+    mic_device_id: Option<String>,
+    mic_device_label: Option<String>,
+    on_samples: Arc<dyn Fn(&[f32]) + Send + Sync>,
+) -> Result<RawMicCapture, String> {
+    let _ = mic_device_id;
+    let host = cpal::default_host();
+    let device = resolve_input_device(&host, mic_device_label.as_deref())?;
+    let supported = device
+        .default_input_config()
+        .map_err(|e| format!("default_input_config failed: {e}"))?;
+    let sample_format = supported.sample_format();
+    let sample_rate = supported.sample_rate().0 as f64;
+    let config: StreamConfig = supported.into();
+
+    let cb = make_callback(app.clone(), "mic", 2, on_samples);
+    let stream = build_for_format(&device, &config, sample_format, cb)?;
+    use cpal::traits::StreamTrait;
+    stream
+        .play()
+        .map_err(|e| format!("mic stream play failed: {e}"))?;
+
+    eprintln!(
+        "[capture-win] mic capture started: {} Hz, {} ch, {:?}",
+        sample_rate as u32, config.channels, sample_format
+    );
+    Ok(RawMicCapture {
+        stream,
+        sample_rate,
+    })
+}
+
+/// Start system-audio capture via WASAPI loopback. cpal builds an *input*
+/// stream on the default *output* device, which captures the speaker mix. No OS
+/// permission prompt is required for loopback.
+pub(crate) fn start_raw_system_capture(
+    app: AppHandle,
+    on_samples: Arc<dyn Fn(&[f32]) + Send + Sync>,
+) -> Result<RawSystemCapture, String> {
+    let host = cpal::default_host();
+    let device = host
+        .default_output_device()
+        .ok_or_else(|| "no default output device available for loopback".to_string())?;
+    // The loopback stream uses the output device's native render format.
+    let supported = device
+        .default_output_config()
+        .map_err(|e| format!("default_output_config failed: {e}"))?;
+    let sample_format = supported.sample_format();
+    let sample_rate = supported.sample_rate().0 as f64;
+    let config: StreamConfig = supported.into();
+
+    let cb = make_callback(app.clone(), "system", 3, on_samples);
+    let stream = build_for_format(&device, &config, sample_format, cb)?;
+    use cpal::traits::StreamTrait;
+    stream
+        .play()
+        .map_err(|e| format!("loopback stream play failed: {e}"))?;
+
+    eprintln!(
+        "[capture-win] system loopback started: {} Hz, {} ch, {:?}",
+        sample_rate as u32, config.channels, sample_format
+    );
+    Ok(RawSystemCapture {
+        stream,
+        sample_rate,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn mono_passthrough() {
+        let data: [f32; 3] = [0.1, -0.2, 0.3];
+        let out = interleaved_to_mono_f32(&data, 1);
+        assert_eq!(out, vec![0.1, -0.2, 0.3]);
+    }
+
+    #[test]
+    fn stereo_f32_averaged_to_mono() {
+        // L/R interleaved: (1.0,-1.0) -> 0.0 ; (0.5,0.5) -> 0.5
+        let data: [f32; 4] = [1.0, -1.0, 0.5, 0.5];
+        let out = interleaved_to_mono_f32(&data, 2);
+        assert_eq!(out, vec![0.0, 0.5]);
+    }
+
+    #[test]
+    fn stereo_i16_converted_and_averaged() {
+        // i16::MAX in both channels -> ~1.0 mono; opposite extremes -> ~0.0
+        let data: [i16; 4] = [i16::MAX, i16::MAX, i16::MAX, i16::MIN];
+        let out = interleaved_to_mono_f32(&data, 2);
+        assert!((out[0] - 1.0).abs() < 1e-3, "got {}", out[0]);
+        assert!(out[1].abs() < 1e-3, "got {}", out[1]);
+    }
+
+    #[test]
+    fn four_channel_averaged() {
+        let data: [f32; 4] = [1.0, 1.0, 1.0, 1.0];
+        let out = interleaved_to_mono_f32(&data, 4);
+        assert_eq!(out, vec![1.0]);
+    }
+
+    #[test]
+    fn peak_level_picks_max_abs() {
+        assert_eq!(peak_level(&[0.1, -0.9, 0.4]), 0.9);
+        assert_eq!(peak_level(&[]), 0.0);
+    }
+}
diff --git a/templates/clips/desktop/src-tauri/src/lib.rs b/templates/clips/desktop/src-tauri/src/lib.rs
index 493e2ccf1e..a3055184ff 100644
--- a/templates/clips/desktop/src-tauri/src/lib.rs
+++ b/templates/clips/desktop/src-tauri/src/lib.rs
@@ -5,6 +5,7 @@
 //! is served by the Vite-built React UI (see `../dist`).
 
 mod accessibility;
+mod capture;
 mod clips;
 mod config;
 mod debug;
@@ -202,8 +203,9 @@ pub fn run() {
 
             // Pre-download the Whisper model in the background so the first
             // meeting doesn't pay the ~142 MB download cost mid-call. Skipped
-            // when the user has disabled the model in Settings.
-            #[cfg(target_os = "macos")]
+            // when the user has disabled the model in Settings. Runs on both
+            // macOS and Windows (the two platforms with meeting transcription).
+            #[cfg(any(target_os = "macos", target_os = "windows"))]
             {
                 let cfg = config::feature_config(app.handle());
                 if cfg.whisper_model_enabled
diff --git a/templates/clips/desktop/src-tauri/src/system_audio.rs b/templates/clips/desktop/src-tauri/src/system_audio.rs
index 6295402ff4..45713c70ce 100644
--- a/templates/clips/desktop/src-tauri/src/system_audio.rs
+++ b/templates/clips/desktop/src-tauri/src/system_audio.rs
@@ -58,12 +58,22 @@ pub fn system_audio_version_status() -> VersionStatus {
     {
         macos::version_status()
     }
-    #[cfg(not(target_os = "macos"))]
+    #[cfg(target_os = "windows")]
+    {
+        // WASAPI loopback is available on all supported Windows versions and
+        // needs no special OS capability check.
+        VersionStatus {
+            supported: true,
+            os_version: std::env::consts::OS.to_string(),
+            reason: None,
+        }
+    }
+    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
     {
         VersionStatus {
             supported: false,
             os_version: std::env::consts::OS.to_string(),
-            reason: Some("System audio capture is only supported on macOS.".into()),
+            reason: Some("System audio capture is only supported on macOS and Windows.".into()),
         }
     }
 }
@@ -85,9 +95,16 @@ pub async fn system_audio_request_permission() -> Result<bool, String> {
         }
         macos::request_screen_capture_access().await
     }
-    #[cfg(not(target_os = "macos"))]
+    #[cfg(target_os = "windows")]
+    {
+        // WASAPI loopback requires no permission grant; the mic permission is
+        // handled by the OS at first capture. Report success so the renderer
+        // proceeds straight to capture.
+        Ok(true)
+    }
+    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
     {
-        Err("System audio capture is only supported on macOS.".into())
+        Err("System audio capture is only supported on macOS and Windows.".into())
     }
 }
 
diff --git a/templates/clips/desktop/src-tauri/src/whisper_speech.rs b/templates/clips/desktop/src-tauri/src/whisper_speech.rs
index bba6644c7e..5c3d1869bf 100644
--- a/templates/clips/desktop/src-tauri/src/whisper_speech.rs
+++ b/templates/clips/desktop/src-tauri/src/whisper_speech.rs
@@ -7,10 +7,10 @@
 //! by `source`. whisper.cpp has no such limit: we run one whisper context with
 //! a per-stream worker thread, fully offline.
 //!
-//! Capture is reused from the existing modules:
-//!   - mic    → `native_speech::macos::start_raw_mic_capture` (AVAudioEngine +
-//!              VoiceProcessingIO AEC, other-audio ducking off)
-//!   - system → `system_audio::macos::start_raw_system_capture` (ScreenCaptureKit)
+//! Capture is delegated to the platform-dispatched `crate::capture` module:
+//!   - macOS   → AVAudioEngine + VoiceProcessingIO AEC (mic) and
+//!               ScreenCaptureKit (system audio).
+//!   - Windows → `cpal` microphone + WASAPI loopback (system audio).
 //!
 use tauri::AppHandle;
 
@@ -24,33 +24,33 @@ pub async fn meeting_whisper_start(
     if !crate::config::feature_config(&app).whisper_model_enabled {
         return Err("whisper-model-disabled".into());
     }
-    #[cfg(target_os = "macos")]
+    #[cfg(any(target_os = "macos", target_os = "windows"))]
     {
-        macos::start(app, language, mic_device_id, mic_device_label).await
+        engine::start(app, language, mic_device_id, mic_device_label).await
     }
-    #[cfg(not(target_os = "macos"))]
+    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
     {
         let _ = (app, language, mic_device_id, mic_device_label);
-        Err("Whisper meeting transcription is only supported on macOS.".into())
+        Err("Whisper meeting transcription is not supported on this platform.".into())
     }
 }
 
 #[tauri::command]
 pub async fn meeting_whisper_stop(app: AppHandle) -> Result<(), String> {
-    #[cfg(target_os = "macos")]
+    #[cfg(any(target_os = "macos", target_os = "windows"))]
     {
-        macos::stop(&app);
+        engine::stop(&app);
         Ok(())
     }
-    #[cfg(not(target_os = "macos"))]
+    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
     {
         let _ = app;
         Ok(())
     }
 }
 
-#[cfg(target_os = "macos")]
-mod macos {
+#[cfg(any(target_os = "macos", target_os = "windows"))]
+mod engine {
     use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
     use std::sync::{Arc, Mutex, OnceLock};
     use std::time::{Duration, Instant};
@@ -59,8 +59,9 @@ mod macos {
     use tauri::{AppHandle, Emitter};
     use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
 
-    use crate::native_speech::macos::{start_raw_mic_capture, RawMicCapture};
-    use crate::system_audio::macos::{start_raw_system_capture, RawSystemCapture};
+    use crate::capture::{
+        start_raw_mic_capture, start_raw_system_capture, RawMicCapture, RawSystemCapture,
+    };
     use crate::whisper_model::{custom_model_override, ensure_model, model_file};
 
     /// One transcript segment with real timestamps from whisper, already
@@ -456,9 +457,10 @@ mod macos {
         sys: Arc<WhisperStream>,
     }
 
-    // SAFETY: the capture handles hold refcounted ObjC objects (already
-    // `Send`); the streams are `Arc` over `Send + Sync` interiors. We only move
-    // the session through the `Mutex`, never alias across threads.
+    // SAFETY: the capture handles wrap platform audio resources that are each
+    // `unsafe impl Send` (refcounted ObjC objects on macOS; cpal stream handles
+    // on Windows); the streams are `Arc` over `Send + Sync` interiors. We only
+    // move the session through the `Mutex`, never alias across threads.
     unsafe impl Send for Session {}
 
     fn session_slot() -> &'static Mutex<Option<Session>> {