diff --git a/docs/source/API/openapi.json b/docs/source/API/openapi.json
index 1865115ba..05aec8dea 100644
--- a/docs/source/API/openapi.json
+++ b/docs/source/API/openapi.json
@@ -1235,6 +1235,52 @@
         }
       }
     },
+    "/api/media/wobbling/enable": {
+      "post": {
+        "summary": "Enable Wobbling",
+        "description": "Enable audio-reactive head wobbling.\n\nWhen enabled, audio played on the daemon (sounds, incoming WebRTC\naudio) is analysed and converted into subtle head movements.",
+        "operationId": "enable_wobbling_api_media_wobbling_enable_post",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "additionalProperties": {
+                    "type": "string"
+                  },
+                  "type": "object",
+                  "title": "Response Enable Wobbling Api Media Wobbling Enable Post"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/api/media/wobbling/disable": {
+      "post": {
+        "summary": "Disable Wobbling",
+        "description": "Disable audio-reactive head wobbling and reset offsets.",
+        "operationId": "disable_wobbling_api_media_wobbling_disable_post",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "additionalProperties": {
+                    "type": "string"
+                  },
+                  "type": "object",
+                  "title": "Response Disable Wobbling Api Media Wobbling Disable Post"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/api/media/sounds/upload": {
       "post": {
         "summary": "Upload Sound",
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index e7f41b4ef..bf5c80351 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -106,6 +106,8 @@
       title: Sound Playback
     - local: examples/sound_record
       title: Sound Recording
+    - local: examples/sound_tts
+      title: Sound TTS (with head wobbling)
     - local: examples/custom_media_manager
       title: Custom Media Manager
   title: Examples
diff --git a/docs/source/examples/sound_tts.md b/docs/source/examples/sound_tts.md
new file mode 100644
index 000000000..f9b7f08d2
--- /dev/null
+++ b/docs/source/examples/sound_tts.md
@@ -0,0 +1,46 @@
+# Sound TTS (with head wobbling)
+
+This example synthesises speech from text via ResembleAI's
+[Chatterbox Multilingual TTS](https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS)
+Hugging Face Space, plays the returned audio on Reachy Mini, and
+wobbles the head in sync with the speech.
+
+Chatterbox supports zero-shot voice cloning: pass a short reference
+audio file and the synthesis matches that voice. 23 languages are
+supported.
+
+**Usage:**
+
+```bash
+# Default English voice
+uv run python examples/sound_tts.py --text "Hello, I can wobble my head!"
+
+# Different language
+uv run python examples/sound_tts.py --text "Bonjour, je suis Reachy Mini" --lang fr
+
+# Clone a voice from a local sample
+uv run python examples/sound_tts.py \
+    --text "Hello world" \
+    --ref-audio ~/Downloads/my_voice.wav
+```
+
+**Options:**
+
+- `--text <str>`: Text to synthesize (max 300 chars per request).
+- `--lang <code>`: ISO 639-1 language code. Supported: `ar`, `da`,
+  `de`, `el`, `en`, `es`, `fi`, `fr`, `he`, `hi`, `it`, `ja`, `ko`,
+  `ms`, `nl`, `no`, `pl`, `pt`, `ru`, `sv`, `sw`, `tr`, `zh`.
+- `--ref-audio <path|url>`: Reference audio for zero-shot voice
+  cloning. Local paths and URLs both work; defaults to a Gradio
+  sample voice.
+
+Synthesis runs on the Space's shared GPU and typically takes
+60–90 s per sentence.
+
+<literalinclude>
+{"path": "../../../examples/sound_tts.py",
+"language": "python",
+"start-after": "START doc_example",
+"end-before": "END doc_example"
+}
+</literalinclude>
diff --git a/examples/sound_play.py b/examples/sound_play.py
index acc282359..e0155d220 100644
--- a/examples/sound_play.py
+++ b/examples/sound_play.py
@@ -51,9 +51,13 @@ def play_live_tone(mini: "ReachyMini", tone_hz: float) -> None:
         mini.media.stop_playing()
 
 
-def main(backend: str, wav_path: str | None, tone_hz: float) -> None:
+def main(
+    backend: str, wav_path: str | None, tone_hz: float, wobbling: bool = False
+) -> None:
     """Run the sound playback example."""
     with ReachyMini(log_level="DEBUG", media_backend=backend) as mini:
+        if wobbling:
+            mini.enable_wobbling()
         if wav_path:
             play_wav(mini, wav_path)
         else:
@@ -90,8 +94,18 @@ def main(backend: str, wav_path: str | None, tone_hz: float) -> None:
         type=float,
         help="Sine wave frequency in Hz (--live mode only).",
     )
+    parser.add_argument(
+        "--wobbling",
+        action="store_true",
+        help="Enable audio-reactive head wobbling.",
+    )
 
     args = parser.parse_args()
-    main(backend=args.backend, wav_path=args.wav, tone_hz=args.tone_hz)
+    main(
+        backend=args.backend,
+        wav_path=args.wav,
+        tone_hz=args.tone_hz,
+        wobbling=args.wobbling,
+    )
 
 # END doc_example
diff --git a/examples/sound_tts.py b/examples/sound_tts.py
new file mode 100644
index 000000000..98bb8837e
--- /dev/null
+++ b/examples/sound_tts.py
@@ -0,0 +1,108 @@
+"""TTS demo with head wobbling.
+
+Sends text to ResembleAI's Chatterbox Multilingual TTS Hugging Face
+Space (zero-shot voice cloning, 23 languages), plays the returned
+audio on Reachy Mini, and wobbles the head in sync.
+
+Usage::
+
+    uv run python examples/sound_tts.py --text "Hello world"
+    uv run python examples/sound_tts.py --text "Bonjour" --lang fr
+    uv run python examples/sound_tts.py --text "..." --ref-audio /path/to/voice.wav
+
+Browse the Space: https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS
+"""
+
+# START doc_example
+
+import argparse
+import os
+import time
+
+import gi
+from gradio_client import Client, handle_file
+
+gi.require_version("Gst", "1.0")
+gi.require_version("GstPbutils", "1.0")
+from gi.repository import Gst, GstPbutils  # noqa: E402
+
+from reachy_mini import ReachyMini  # noqa: E402
+
+HF_SPACE = "ResembleAI/Chatterbox-Multilingual-TTS"
+LANGUAGES = [
+    "ar", "da", "de", "el", "en", "es", "fi", "fr", "he", "hi", "it",
+    "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh",
+]
+DEFAULT_REF_AUDIO = (
+    "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
+)
+
+
+def synthesize(text: str, lang: str, ref_audio: str) -> str:
+    """Submit *text* to Chatterbox; return a path to a local audio file."""
+    if not ref_audio.startswith(("http://", "https://")):
+        ref_audio = os.path.expanduser(ref_audio)
+    client = Client(HF_SPACE)
+    audio_path = client.predict(
+        text_input=text,
+        language_id=lang,
+        audio_prompt_path_input=handle_file(ref_audio),
+        api_name="/generate_tts_audio",
+    )
+    return str(audio_path)
+
+
+def probe_duration_s(path: str) -> float:
+    """Return the media duration of *path* in seconds via GStreamer."""
+    Gst.init([])
+    disc = GstPbutils.Discoverer.new(10 * Gst.SECOND)
+    info = disc.discover_uri(f"file://{path}")
+    return float(info.get_duration() / Gst.SECOND)
+
+
+def main(text: str, lang: str, ref_audio: str) -> None:
+    """Synthesize *text*, play it on Reachy Mini with wobbling enabled."""
+    print(f"Synthesizing {len(text)} chars ({lang}) with Chatterbox...")
+    audio_path = synthesize(text, lang, ref_audio)
+    duration = probe_duration_s(audio_path)
+    print(f"Got {audio_path} ({duration:.1f}s)")
+
+    with ReachyMini(log_level="INFO") as mini:
+        mini.enable_wobbling()
+        mini.media.play_sound(audio_path)
+        time.sleep(duration + 0.5)
+        mini.disable_wobbling()
+    print("Done.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Chatterbox Multilingual TTS + head wobbler demo.",
+    )
+    parser.add_argument(
+        "--text",
+        type=str,
+        default="Hello, I am Reachy Mini. Let me wobble my head while I speak.",
+        help="Text to synthesize (max 300 chars per request).",
+    )
+    parser.add_argument(
+        "--lang",
+        type=str,
+        default="en",
+        choices=LANGUAGES,
+        help="Language code (ISO 639-1).",
+    )
+    parser.add_argument(
+        "--ref-audio",
+        type=str,
+        default=DEFAULT_REF_AUDIO,
+        help="Reference audio (URL or local path) for zero-shot voice cloning.",
+    )
+    args = parser.parse_args()
+    main(
+        text=args.text,
+        lang=args.lang,
+        ref_audio=args.ref_audio,
+    )
+
+# END doc_example
diff --git a/pyproject.toml b/pyproject.toml
index 916b37de2..d0968c382 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ examples = [
     "soundfile",
     "opencv-python<=5.0",
     "cv2_enumerate_cameras>=1.2.1",
+    "gradio-client"
 ]
 mujoco = ["mujoco==3.3.0"]
 nn_kinematics = ["onnxruntime==1.22.1"]
@@ -73,7 +74,7 @@ all = [
     "reachy_mini[placo_kinematics]",
     "reachy_mini[rerun]",
     "reachy_mini[wireless-version]",
-    "reachy_mini[opencv]",
+    "reachy_mini[opencv]"
 ]
 
 [dependency-groups]
diff --git a/src/reachy_mini/daemon/app/routers/media.py b/src/reachy_mini/daemon/app/routers/media.py
index 79d59924c..a938ddd1e 100644
--- a/src/reachy_mini/daemon/app/routers/media.py
+++ b/src/reachy_mini/daemon/app/routers/media.py
@@ -95,6 +95,39 @@ async def stop_sound(
     return {"status": "ok"}
 
 
+@router.post("/wobbling/enable")
+async def enable_wobbling(
+    daemon: Daemon = Depends(get_daemon),
+) -> dict[str, str]:
+    """Enable audio-reactive head wobbling.
+
+    When enabled, audio played on the daemon (sounds, incoming WebRTC
+    audio) is analysed and converted into subtle head movements.
+    """
+    backend = daemon.backend
+    if backend is None or not backend.ready.is_set():
+        raise HTTPException(status_code=503, detail="Backend not running")
+
+    if backend._media_server is not None:
+        backend._media_server.enable_wobbling(backend.set_speech_offsets)
+    return {"status": "ok"}
+
+
+@router.post("/wobbling/disable")
+async def disable_wobbling(
+    daemon: Daemon = Depends(get_daemon),
+) -> dict[str, str]:
+    """Disable audio-reactive head wobbling and reset offsets."""
+    backend = daemon.backend
+    if backend is None or not backend.ready.is_set():
+        raise HTTPException(status_code=503, detail="Backend not running")
+
+    if backend._media_server is not None:
+        backend._media_server.disable_wobbling()
+    backend.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+    return {"status": "ok"}
+
+
 @router.post("/sounds/upload")
 async def upload_sound(
     file: UploadFile = File(...),
diff --git a/src/reachy_mini/daemon/backend/abstract.py b/src/reachy_mini/daemon/backend/abstract.py
index 66ce7abd9..f8c68c9ac 100644
--- a/src/reachy_mini/daemon/backend/abstract.py
+++ b/src/reachy_mini/daemon/backend/abstract.py
@@ -56,9 +56,11 @@
     SetHeadJointsCmd,
     SetMicrophoneVolumeCmd,
     SetMotorModeCmd,
+    SetSpeechOffsetsCmd,
     SetTargetCmd,
     SetTorqueCmd,
     SetVolumeCmd,
+    SetWobblingCmd,
     StartRecordingCmd,
     StopRecordingCmd,
     SubscribeLogsCmd,
@@ -80,9 +82,11 @@
 from reachy_mini.media.audio_doa import AudioDoA
 from reachy_mini.motion.goto import GotoMove
 from reachy_mini.motion.move import Move
+from reachy_mini.utils import create_head_pose
 from reachy_mini.utils.constants import MODELS_ROOT_PATH, URDF_ROOT_PATH
 from reachy_mini.utils.interpolation import (
     InterpolationTechnique,
+    compose_world_offset,
     distance_between_poses,
     time_trajectory,
 )
@@ -286,6 +290,11 @@ def __init__(
             tempfile.gettempdir(), "reachy-mini-uploads", "audio"
         )
 
+        # Head wobbler speech offsets (x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad)
+        self._speech_offsets: tuple[float, float, float, float, float, float] = (
+            0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        )
+
         # WebRTC support
         self._send_message_to_webrtc: Optional[Callable[[Optional[str], str], None]] = (
             None
@@ -425,6 +434,16 @@ def update_target_head_joints_from_ik(
         if body_yaw is None:
             body_yaw = self.target_body_yaw if self.target_body_yaw is not None else 0.0
 
+        # Compose speech wobbler offsets (if any) before IK
+        if any(o != 0.0 for o in self._speech_offsets):
+            x_m, y_m, z_m, roll_r, pitch_r, yaw_r = self._speech_offsets
+            offset_pose = create_head_pose(
+                x=x_m, y=y_m, z=z_m,
+                roll=roll_r, pitch=pitch_r, yaw=yaw_r,
+                degrees=False,
+            )
+            pose = compose_world_offset(pose, offset_pose)
+
         # Compute the inverse kinematics to get the head joint positions
         joints = self.head_kinematics.ik(pose, body_yaw=body_yaw)
         if joints is None or np.any(np.isnan(joints)):
@@ -502,6 +521,20 @@ def set_target_antenna_joint_positions(
         """
         self.target_antenna_joint_positions = positions
 
+    def set_speech_offsets(
+        self,
+        offsets: tuple[float, float, float, float, float, float],
+    ) -> None:
+        """Set head wobbler speech offsets, composed with target pose before IK.
+
+        Args:
+            offsets: ``(x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad)`` in
+                world frame.  Zero tuple disables the offset.
+
+        """
+        self._speech_offsets = offsets
+        self.ik_required = True
+
     def set_target_head_joint_current(
         self,
         current: Annotated[NDArray[np.float64], (7,)],
@@ -926,6 +959,12 @@ async def goto_sleep(self) -> None:
             - If we are far from the initial position, we move there first.
             - If we are close to the initial position, we move directly to the sleep position.
         """
+        # Stop head wobbling so leftover speech offsets don't fight the
+        # sleep pose during the goto.
+        if self._media_server is not None:
+            self._media_server.disable_wobbling()
+        self.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+
         # Magic units
         _, _, dist_to_sleep_pose = distance_between_poses(
             self.get_current_head_pose(), self.SLEEP_HEAD_POSE
@@ -1108,6 +1147,23 @@ def _maybe_ignore(field: str) -> bool:
             self.play_sound(cmd.file)
             send_response({"status": "ok", "command": "play_sound"})
 
+        elif isinstance(cmd, SetSpeechOffsetsCmd):
+            offsets = cmd.offsets
+            if len(offsets) == 6:
+                self.set_speech_offsets(
+                    (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
+                )
+            send_response({"status": "ok", "command": "set_speech_offsets"})
+
+        elif isinstance(cmd, SetWobblingCmd):
+            if self._media_server is not None:
+                if cmd.enabled:
+                    self._media_server.enable_wobbling(self.set_speech_offsets)
+                else:
+                    self._media_server.disable_wobbling()
+                    self.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+            send_response({"status": "ok", "command": "set_wobbling"})
+
         elif isinstance(cmd, SetMotorModeCmd):
             self.set_motor_control_mode(MotorControlMode(cmd.mode))
             send_response({"motor_mode": cmd.mode, "status": "ok"})
diff --git a/src/reachy_mini/io/protocol.py b/src/reachy_mini/io/protocol.py
index d4ab0fce6..9b9dbc67c 100644
--- a/src/reachy_mini/io/protocol.py
+++ b/src/reachy_mini/io/protocol.py
@@ -279,6 +279,18 @@ class GetMicrophoneVolumeCmd(BaseModel):
 
     type: Literal["get_microphone_volume"] = "get_microphone_volume"
 
+class SetSpeechOffsetsCmd(BaseModel):
+    """Set head-wobbler speech offsets (composed with target pose before IK)."""
+
+    type: Literal["set_speech_offsets"] = "set_speech_offsets"
+    offsets: list[float]  # [x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad]
+
+
+class SetWobblingCmd(BaseModel):
+    """Enable or disable daemon-side audio-reactive head wobbling."""
+
+    type: Literal["set_wobbling"] = "set_wobbling"
+    enabled: bool
 
 # ------------------------------------------------------------------
 # Daemon log streaming over the DataChannel.
@@ -621,6 +633,8 @@ class CancelAudioCmd(BaseModel):
     | StartRecordingCmd
     | StopRecordingCmd
     | AppendRecordCmd
+    | SetSpeechOffsetsCmd
+    | SetWobblingCmd
     | SetVolumeCmd
     | GetVolumeCmd
     | SetMicrophoneVolumeCmd
diff --git a/src/reachy_mini/media/audio_base.py b/src/reachy_mini/media/audio_base.py
index 5a0880930..9c066401c 100644
--- a/src/reachy_mini/media/audio_base.py
+++ b/src/reachy_mini/media/audio_base.py
@@ -19,6 +19,7 @@
 from abc import ABC, abstractmethod
 from typing import Optional
 
+import gi
 import numpy as np
 import numpy.typing as npt
 
@@ -28,7 +29,10 @@
     init_respeaker_usb,
 )
 from reachy_mini.media.audio_doa import AudioDoA
-from reachy_mini.media.gstreamer_utils import get_sample
+from reachy_mini.media.gstreamer_utils import get_sample, handle_default_bus_message
+
+gi.require_version("Gst", "1.0")
+from gi.repository import Gst  # noqa: E402
 
 
 class AudioBase(ABC):
@@ -37,17 +41,57 @@ class AudioBase(ABC):
     Attributes:
         SAMPLE_RATE: Default sample rate (16 000 Hz — ReSpeaker hardware).
         CHANNELS: Number of audio channels (2 — stereo).
+        GAP_RESET_NS: PTS-continuity threshold for ``_compute_pts``.
+            If the gap between the next expected PTS and the appsrc's
+            current running-time exceeds this value, we treat it as a
+            new utterance and re-anchor to running-time.
 
     """
 
     SAMPLE_RATE = 16000
     CHANNELS = 2
+    GAP_RESET_NS = 200_000_000  # 200 ms
 
     def __init__(self, log_level: str = "INFO") -> None:
         """Initialize shared audio attributes (DoA helper)."""
         self.logger = logging.getLogger(type(self).__module__)
         self.logger.setLevel(log_level)
         self._doa = AudioDoA()
+        # Next expected PTS for the playback / send appsrc; -1 means
+        # "no previous buffer, anchor to running-time on next push".
+        self._appsrc_pts: int = -1
+
+    def _compute_pts(
+        self,
+        num_samples: int,
+        running_time_ns: int,
+        next_pts_ns: int,
+    ) -> tuple[int, int, int]:
+        """Return ``(pts_ns, duration_ns, next_pts_ns)`` for an appsrc buffer.
+
+        Anchors PTS to ``running_time_ns`` when ``next_pts_ns`` is
+        negative (sentinel for "no previous") or the gap is larger
+        than ``GAP_RESET_NS``; otherwise continues the previous
+        stream's PTS to keep audio contiguous across consecutive
+        push calls.
+        """
+        duration_ns = (num_samples * 1_000_000_000) // self.SAMPLE_RATE
+        if next_pts_ns < 0 or running_time_ns > next_pts_ns + self.GAP_RESET_NS:
+            pts_ns = running_time_ns
+        else:
+            pts_ns = next_pts_ns
+        return pts_ns, duration_ns, pts_ns + duration_ns
+
+    def _on_bus_message(
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
+    ) -> bool:
+        """Delegate to the shared default-bus-message helper.
+
+        Subclasses can override to add custom behaviour, then return
+        ``super()._on_bus_message(bus, msg, pipeline)`` to keep the
+        default handling.
+        """
+        return handle_default_bus_message(self.logger, msg, pipeline)
 
     def get_audio_sample(self) -> Optional[npt.NDArray[np.float32]]:
         """Pull the next recorded audio chunk.
diff --git a/src/reachy_mini/media/audio_gstreamer.py b/src/reachy_mini/media/audio_gstreamer.py
index 59789843b..6b9981cab 100644
--- a/src/reachy_mini/media/audio_gstreamer.py
+++ b/src/reachy_mini/media/audio_gstreamer.py
@@ -53,6 +53,8 @@
 
 import os
 import platform
+import time
+from collections.abc import Callable
 from threading import Thread
 from typing import Optional
 
@@ -62,6 +64,7 @@
 from reachy_mini.media.audio_base import AudioBase
 from reachy_mini.media.audio_utils import has_reachymini_asoundrc
 from reachy_mini.media.device_detection import get_audio_device
+from reachy_mini.motion.head_wobbler import HeadWobbler, SpeechOffsets
 from reachy_mini.utils.constants import ASSETS_ROOT_PATH
 
 try:
@@ -90,7 +93,6 @@ class GStreamerAudio(AudioBase):
 
     """
 
-    PLAYBACK_GAP_RESET_NS = 200 * Gst.MSECOND
     PLAYBACK_SINK_BUFFER_TIME_US = 50_000
     PLAYBACK_SINK_LATENCY_TIME_US = 5_000
 
@@ -105,6 +107,8 @@ def __init__(self, log_level: str = "INFO") -> None:
         """
         super().__init__(log_level=log_level)
 
+        self._head_wobbler: Optional[HeadWobbler] = None
+
         Gst.init([])
         self._loop = GLib.MainLoop()
         self._thread_bus_calls = Thread(target=lambda: self._loop.run(), daemon=True)
@@ -114,36 +118,17 @@ def __init__(self, log_level: str = "INFO") -> None:
         self._init_pipeline_record(self._pipeline_record)
         self._bus_record = self._pipeline_record.get_bus()
         self._bus_record.add_watch(
-            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop
+            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_record
         )
 
         self._playbin: Optional[Gst.Element] = None
         self._pipeline_playback = Gst.Pipeline.new("audio_player")
-        self._playback_next_pts_ns: int | None = None
         self._init_pipeline_playback(self._pipeline_playback)
         self._bus_playback = self._pipeline_playback.get_bus()
         self._bus_playback.add_watch(
-            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop
+            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_playback
         )
 
-    def _compute_playback_buffer_timing(
-        self,
-        num_samples: int,
-        sample_rate: int,
-        running_time_ns: int,
-        next_pts_ns: int | None,
-        gap_reset_ns: int | None = None,
-    ) -> tuple[int, int, int]:
-        """Return ``(pts_ns, duration_ns, next_pts_ns)`` for a playback buffer."""
-        if gap_reset_ns is None:
-            gap_reset_ns = self.PLAYBACK_GAP_RESET_NS
-        duration_ns = (num_samples * Gst.SECOND) // sample_rate
-        if next_pts_ns is None or running_time_ns > next_pts_ns + gap_reset_ns:
-            pts_ns = running_time_ns
-        else:
-            pts_ns = next_pts_ns
-        return pts_ns, duration_ns, pts_ns + duration_ns
-
     def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None:
         self._appsink_audio = Gst.ElementFactory.make("appsink")
         caps = Gst.Caps.from_string(
@@ -197,24 +182,11 @@ def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None:
         audioconvert.link(audioresample)
         audioresample.link(self._appsink_audio)
 
-    def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None:
-        self._appsrc = Gst.ElementFactory.make("appsrc")
-        self._appsrc.set_property("do-timestamp", False)
-        self._appsrc.set_property("format", Gst.Format.TIME)
-        self._appsrc.set_property("is-live", True)
-        caps = Gst.Caps.from_string(
-            f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved"
-        )
-        self._appsrc.set_property("caps", caps)
-
-        audioconvert = Gst.ElementFactory.make("audioconvert")
-        audioresample = Gst.ElementFactory.make("audioresample")
-
+    def _build_audiosink_element(self) -> Gst.Element:
+        """Create a platform-appropriate audio sink element."""
         audiosink: Optional[Gst.Element] = None
 
         if has_reachymini_asoundrc():
-            # Wireless CM4: use the preconfigured .asoundrc ALSA devices
-            # which route through the XMOS AEC loopback properly.
             audiosink = Gst.ElementFactory.make("alsasink")
             audiosink.set_property("device", "reachymini_audio_sink")
             self.logger.info("Using .asoundrc audio sink: reachymini_audio_sink")
@@ -225,9 +197,7 @@ def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None:
                 self.logger.warning(
                     "No specific audio card found, using default audio sink."
                 )
-                audiosink = Gst.ElementFactory.make(
-                    "autoaudiosink"
-                )  # use default speaker
+                audiosink = Gst.ElementFactory.make("autoaudiosink")
             elif platform.system() == "Windows":
                 audiosink = Gst.ElementFactory.make("wasapi2sink")
                 audiosink.set_property("device", id_audio_card)
@@ -238,50 +208,173 @@ def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None:
                 audiosink = Gst.ElementFactory.make("pulsesink")
                 audiosink.set_property("device", f"{id_audio_card}")
 
-        if audiosink is not None:
-            if audiosink.find_property("buffer-time") is not None:
-                audiosink.set_property("buffer-time", self.PLAYBACK_SINK_BUFFER_TIME_US)
-            if audiosink.find_property("latency-time") is not None:
-                audiosink.set_property("latency-time", self.PLAYBACK_SINK_LATENCY_TIME_US)
+        if audiosink is None:
+            raise RuntimeError("Failed to create audio sink element")
 
-        queue = Gst.ElementFactory.make("queue")
+        if audiosink.find_property("buffer-time") is not None:
+            audiosink.set_property("buffer-time", self.PLAYBACK_SINK_BUFFER_TIME_US)
+        if audiosink.find_property("latency-time") is not None:
+            audiosink.set_property("latency-time", self.PLAYBACK_SINK_LATENCY_TIME_US)
 
-        pipeline.add(audiosink)
-        pipeline.add(self._appsrc)
-        pipeline.add(audioconvert)
-        pipeline.add(audioresample)
-        pipeline.add(queue)
+        return audiosink
 
-        self._appsrc.link(audioconvert)
-        audioconvert.link(audioresample)
-        audioresample.link(queue)
-        queue.link(audiosink)
+    def _make_wobbler_appsink(self) -> Gst.Element:
+        """Create an appsink that feeds audio to the head wobbler.
 
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self.logger.warning("End-of-stream")
-            return False
+        ``sync=True`` so new-sample fires at the buffer's PTS on the
+        pipeline clock — i.e. when the audiosink outputs it. The local
+        pipeline has a deterministic clock and no network jitter, so
+        PTS-based sync gives correct A/V timing for both playbin
+        (``play_sound``) and push (``push_audio_sample``) paths.
+        """
+        appsink = Gst.ElementFactory.make("appsink")
+        # Force mono so the speech tapper receives a 1-D float32 array.
+        # The per-branch audioconvert in _build_audiosink_tee_bin /
+        # _init_pipeline_playback handles the downmix.
+        caps = Gst.Caps.from_string(
+            f"audio/x-raw,format=F32LE,channels=1,"
+            f"rate={self.SAMPLE_RATE},layout=interleaved"
+        )
+        appsink.set_property("caps", caps)
+        appsink.set_property("drop", True)
+        appsink.set_property("max-buffers", 5)
+        appsink.set_property("sync", True)
+        appsink.set_property("emit-signals", True)
+        appsink.connect("new-sample", self._on_wobbler_sample)
+        return appsink
+
+    def _on_wobbler_sample(self, appsink: Gst.Element) -> Gst.FlowReturn:
+        """GStreamer callback: forward audio buffer to the head wobbler.
+
+        The appsink is ``sync=True``, so this callback fires at the
+        buffer's PTS on the pipeline clock — audio is playing NOW.
+        """
+        sample = appsink.pull_sample()
+        if sample is None or self._head_wobbler is None:
+            return Gst.FlowReturn.OK
+        buf = sample.get_buffer()
+        data = buf.extract_dup(0, buf.get_size())
+        pcm = np.frombuffer(data, dtype=np.float32)
+        self._head_wobbler.feed(pcm, time.monotonic_ns())
+        return Gst.FlowReturn.OK
+
+    def _build_audiosink_tee_bin(self) -> Gst.Bin:
+        """Build a Gst.Bin with a tee splitting audio to speaker and wobbler.
+
+        Per-branch audioconvert+audioresample isolate each leaf's caps
+        from the other (the wobbler appsink demands F32LE/2/16000; the
+        audiosink wants whatever the device prefers — e.g. on the
+        wireless XMOS PCM, anything but its native rate triggers an
+        IEC958 fallback that fails to open).
+
+        The bin exposes a single ghost sink pad for use as a playbin audio-sink::
 
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
-            self.logger.error(f"Error: {err} {debug}")
-            return False
+            ghost_sink → tee ─┬→ queue → audioconvert → audioresample → audiosink
+                               └→ queue → audioconvert → audioresample → appsink
+
+        """
+        audio_bin = Gst.Bin.new("audio_tee_bin")
+
+        tee = Gst.ElementFactory.make("tee")
+        queue_speaker = Gst.ElementFactory.make("queue")
+        ac_speaker = Gst.ElementFactory.make("audioconvert")
+        ar_speaker = Gst.ElementFactory.make("audioresample")
+        audiosink = self._build_audiosink_element()
+        queue_wobbler = Gst.ElementFactory.make("queue")
+        ac_wobbler = Gst.ElementFactory.make("audioconvert")
+        ar_wobbler = Gst.ElementFactory.make("audioresample")
+        appsink_wobbler = self._make_wobbler_appsink()
+
+        for el in (
+            tee,
+            queue_speaker,
+            ac_speaker,
+            ar_speaker,
+            audiosink,
+            queue_wobbler,
+            ac_wobbler,
+            ar_wobbler,
+            appsink_wobbler,
+        ):
+            audio_bin.add(el)
+
+        tee.link(queue_speaker)
+        queue_speaker.link(ac_speaker)
+        ac_speaker.link(ar_speaker)
+        ar_speaker.link(audiosink)
+
+        tee.link(queue_wobbler)
+        queue_wobbler.link(ac_wobbler)
+        ac_wobbler.link(ar_wobbler)
+        ar_wobbler.link(appsink_wobbler)
+
+        ghost_pad = Gst.GhostPad.new("sink", tee.get_static_pad("sink"))
+        audio_bin.add_pad(ghost_pad)
+
+        return audio_bin
+
+    def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None:
+        self._appsrc = Gst.ElementFactory.make("appsrc")
+        self._appsrc.set_property("do-timestamp", False)
+        self._appsrc.set_property("format", Gst.Format.TIME)
+        self._appsrc.set_property("is-live", True)
+        caps = Gst.Caps.from_string(
+            f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved"
+        )
+        self._appsrc.set_property("caps", caps)
 
-        return True
+        # Always build tee so wobbling can be enabled/disabled at runtime.
+        # Per-branch audioconvert+audioresample so the wobbler appsink's
+        # F32LE/1/16000 caps don't drag the audiosink branch into a rate
+        # the device can't accept (e.g. wireless XMOS PCM falls back to
+        # IEC958 at non-native rates). The appsink with drop=True has
+        # negligible overhead when no wobbler is connected.
+        tee = Gst.ElementFactory.make("tee")
+        queue_speaker = Gst.ElementFactory.make("queue")
+        ac_speaker = Gst.ElementFactory.make("audioconvert")
+        ar_speaker = Gst.ElementFactory.make("audioresample")
+        audiosink = self._build_audiosink_element()
+        queue_wobbler = Gst.ElementFactory.make("queue")
+        ac_wobbler = Gst.ElementFactory.make("audioconvert")
+        ar_wobbler = Gst.ElementFactory.make("audioresample")
+        appsink_wobbler = self._make_wobbler_appsink()
+
+        for el in (
+            self._appsrc,
+            tee,
+            queue_speaker,
+            ac_speaker,
+            ar_speaker,
+            audiosink,
+            queue_wobbler,
+            ac_wobbler,
+            ar_wobbler,
+            appsink_wobbler,
+        ):
+            pipeline.add(el)
+
+        self._appsrc.link(tee)
+        tee.link(queue_speaker)
+        queue_speaker.link(ac_speaker)
+        ac_speaker.link(ar_speaker)
+        ar_speaker.link(audiosink)
+        tee.link(queue_wobbler)
+        queue_wobbler.link(ac_wobbler)
+        ac_wobbler.link(ar_wobbler)
+        ar_wobbler.link(appsink_wobbler)
+
+    def _on_bus_message(
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
+    ) -> bool:
+        if msg.type == Gst.MessageType.EOS and self._head_wobbler is not None:
+            self._head_wobbler.stop()
+        return super()._on_bus_message(bus, msg, pipeline)
 
     def _dump_latency(self) -> None:
         query = Gst.Query.new_latency()
         self._pipeline_playback.query(query)
         self.logger.info(f"Audio pipeline latency {query.parse_latency()}")
 
-    def _get_playback_running_time_ns(self) -> int:
-        """Return the current playback running time in nanoseconds."""
-        clock = self._pipeline_playback.get_clock()
-        if clock is None:
-            return 0
-        return int(max(0, clock.get_time() - self._pipeline_playback.get_base_time()))
-
     def start_recording(self) -> None:
         """Start capturing audio from the microphone."""
         self._pipeline_record.set_state(Gst.State.PLAYING)
@@ -292,7 +385,9 @@ def stop_recording(self) -> None:
 
     def start_playing(self) -> None:
         """Start the playback pipeline so ``push_audio_sample`` can feed data."""
-        self._playback_next_pts_ns = None
+        if self._head_wobbler is not None:
+            self._head_wobbler.start()
+        self._appsrc_pts = -1
         self._pipeline_playback.set_state(Gst.State.PLAYING)
         GLib.timeout_add_seconds(5, self._dump_latency)
 
@@ -305,27 +400,30 @@ def push_audio_sample(self, data: npt.NDArray[np.float32]) -> None:
                 mono (the caller is responsible for channel adaptation).
 
         """
-        if self._appsrc is not None:
-            pts_ns, duration_ns, self._playback_next_pts_ns = (
-                self._compute_playback_buffer_timing(
-                    int(data.shape[0]),
-                    self.SAMPLE_RATE,
-                    self._get_playback_running_time_ns(),
-                    self._playback_next_pts_ns,
-                )
-            )
-            buf = Gst.Buffer.new_wrapped(data.tobytes())
-            buf.pts = pts_ns
-            buf.duration = duration_ns
-            self._appsrc.push_buffer(buf)
-        else:
+        if self._appsrc is None:
             self.logger.warning(
                 "AppSrc is not initialized. Call start_playing() first."
             )
+            return
+
+        pts_ns, duration_ns, self._appsrc_pts = self._compute_pts(
+            int(data.shape[0]),
+            self._appsrc.get_current_running_time(),
+            self._appsrc_pts,
+        )
+        buf = Gst.Buffer.new_wrapped(data.tobytes())
+        buf.pts = pts_ns
+        buf.dts = pts_ns
+        buf.duration = duration_ns
+        ret = self._appsrc.push_buffer(buf)
+        if ret != Gst.FlowReturn.OK:
+            self.logger.warning(f"push_buffer dropped: {ret}")
 
     def stop_playing(self) -> None:
         """Stop the playback pipeline."""
-        self._playback_next_pts_ns = None
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
+        self._appsrc_pts = -1
         self._pipeline_playback.set_state(Gst.State.NULL)
         if self._playbin is not None:
             self._playbin.set_state(Gst.State.NULL)
@@ -342,8 +440,10 @@ def clear_output_buffer(self) -> None:
 
     def clear_player(self) -> None:
         """Flush the player's appsrc to drop any queued audio immediately."""
+        if self._head_wobbler is not None:
+            self._head_wobbler.reset()
         if self._appsrc is not None:
-            self._playback_next_pts_ns = None
+            self._appsrc_pts = -1
             self._pipeline_playback.set_state(Gst.State.PAUSED)
             self._appsrc.send_event(Gst.Event.new_flush_start())
             self._appsrc.send_event(Gst.Event.new_flush_stop(reset_time=True))
@@ -358,7 +458,8 @@ def play_sound(self, sound_file: str) -> None:
         """Play a sound file through the Reachy Mini Audio card.
 
         The file is played via a GStreamer ``playbin`` routed to the same
-        audio sink used by the push-based playback pipeline.
+        audio sink used by the push-based playback pipeline.  When the head
+        wobbler is enabled the audio is also forked to it via a tee.
 
         Args:
             sound_file: Absolute path **or** filename relative to the
@@ -377,33 +478,6 @@ def play_sound(self, sound_file: str) -> None:
         else:
             file_path = sound_file
 
-        audiosink: Optional[Gst.Element] = None
-
-        if has_reachymini_asoundrc():
-            # reachy mini wireless has a preconfigured asoundrc
-            audiosink = Gst.ElementFactory.make("alsasink")
-            audiosink.set_property("device", "reachymini_audio_sink")
-            self.logger.info("Using audio device reachymini_audio_sink for playback.")
-        elif platform.system() == "Windows":
-            id_audio_card = get_audio_device("Sink")
-            audiosink = Gst.ElementFactory.make("wasapi2sink")
-            audiosink.set_property("device", id_audio_card)
-            self.logger.info(
-                f"Using audio device {id_audio_card} for playback on Windows."
-            )
-        elif platform.system() == "Darwin":
-            id_audio_card = get_audio_device("Sink")
-            audiosink = Gst.ElementFactory.make("osxaudiosink")
-            audiosink.set_property("unique-id", id_audio_card)
-            self.logger.info(
-                f"Using audio device {id_audio_card} for playback on macOS."
-            )
-        else:
-            id_audio_card = get_audio_device("Sink")
-            audiosink = Gst.ElementFactory.make("pulsesink")
-            audiosink.set_property("device", f"{id_audio_card}")
-            self.logger.info(f"Using audio device {id_audio_card} for playback.")
-
         if self._playbin is not None:
             self._playbin.set_state(Gst.State.NULL)
 
@@ -423,8 +497,11 @@ def play_sound(self, sound_file: str) -> None:
         else:
             uri = f"file://{file_path}"
         playbin.set_property("uri", uri)
-        if audiosink is not None:
-            playbin.set_property("audio-sink", audiosink)
+
+        playbin.set_property("audio-sink", self._build_audiosink_tee_bin())
+        if self._head_wobbler is not None:
+            self._head_wobbler.reset()
+            self._head_wobbler.start()
 
         self._playbin = playbin
         playbin.set_state(Gst.State.PLAYING)
@@ -466,8 +543,30 @@ def get_DoA(self) -> tuple[float, bool] | None:
         """
         return self._doa.get_DoA()
 
+    def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None:
+        """Enable head wobbling driven by audio playback.
+
+        Args:
+            callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad,
+                yaw_rad)`` for each movement hop.
+
+        """
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
+        self._head_wobbler = HeadWobbler(callback, sample_rate=self.SAMPLE_RATE)
+        self.logger.info("Head wobbler enabled")
+
+    def disable_wobbling(self) -> None:
+        """Disable head wobbling."""
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
+            self._head_wobbler = None
+            self.logger.info("Head wobbler disabled")
+
     def cleanup(self) -> None:
         """Release all resources (pipelines, USB devices)."""
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
         self._doa.close()
 
     def __del__(self) -> None:
diff --git a/src/reachy_mini/media/camera_gstreamer.py b/src/reachy_mini/media/camera_gstreamer.py
index 3a0ee9db7..f6fe82dd5 100644
--- a/src/reachy_mini/media/camera_gstreamer.py
+++ b/src/reachy_mini/media/camera_gstreamer.py
@@ -57,7 +57,7 @@
     CameraSpecs,
     ReachyMiniLiteCamSpecs,
 )
-from reachy_mini.media.gstreamer_utils import get_sample
+from reachy_mini.media.gstreamer_utils import get_sample, handle_default_bus_message
 
 try:
     import gi
@@ -210,27 +210,25 @@ def _build_ipc_source(self) -> None:
         queue.link(convert)
         convert.link(self._appsink_video)
 
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self.logger.warning("End-of-stream")
-            return False
-        elif t == Gst.MessageType.ERROR:
+    def _on_bus_message(
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
+    ) -> bool:
+        # Some camera errors are transient and the pipeline can
+        # self-recover, so we log them but keep the bus watch alive.
+        # Default handler would tear it down.
+        if msg.type == Gst.MessageType.ERROR:
             err, debug = msg.parse_error()
             self.logger.warning(
                 f"GStreamer pipeline error (domain={err.domain}, code={err.code}): {err.message}"
             )
             self.logger.debug(f"GStreamer error debug info: {debug}")
-            # Keep the bus watch active — some errors are transient and the pipeline
-            # will self-recover. Fatal errors should be handled by inspecting
-            # err.domain and err.code.
             return True
-        return True
+        return handle_default_bus_message(self.logger, msg, pipeline)
 
     def _handle_bus_calls(self) -> None:
         self.logger.debug("starting bus message loop")
         bus = self.pipeline.get_bus()
-        bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop)
+        bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self.pipeline)
         self._loop.run()
         bus.remove_watch()
         self.logger.debug("bus message loop stopped")
diff --git a/src/reachy_mini/media/gstreamer_udp_camera.py b/src/reachy_mini/media/gstreamer_udp_camera.py
index 4d9a4f2ad..221e9c0f4 100644
--- a/src/reachy_mini/media/gstreamer_udp_camera.py
+++ b/src/reachy_mini/media/gstreamer_udp_camera.py
@@ -23,6 +23,8 @@
 
 from gi.repository import GLib, Gst, GstApp  # noqa: E402
 
+from reachy_mini.media.gstreamer_utils import handle_default_bus_message  # noqa: E402
+
 
 class GStreamerUDPCamera:
     """A class to send frames over UDP using GStreamer."""
@@ -62,7 +64,7 @@ def __init__(
         # Create pipeline
         self.pipeline = Gst.Pipeline.new("udp_sender")
         self._bus = self.pipeline.get_bus()
-        self._bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop)
+        self._bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self.pipeline)
 
         # Configure pipeline elements
         self._configure_pipeline()
@@ -138,32 +140,10 @@ def _configure_pipeline(self) -> None:
         self._logger.debug("UDP sender pipeline configured successfully")
 
     def _on_bus_message(
-        self, bus: Gst.Bus, msg: Gst.Message, loop: GLib.MainLoop
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
     ) -> bool:
-        """Handle GStreamer bus messages.
-
-        Args:
-            bus: GStreamer bus.
-            msg: GStreamer message.
-            loop: GLib main loop.
-
-        Returns:
-            bool: True to continue receiving messages, False to stop.
-
-        """
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self._logger.warning("End-of-stream")
-            return False
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
-            self._logger.error(f"Error: {err} {debug}")
-            return False
-        elif t == Gst.MessageType.WARNING:
-            err, debug = msg.parse_warning()
-            self._logger.warning(f"Warning: {err} {debug}")
-
-        return True
+        """Handle GStreamer bus messages via the shared helper."""
+        return handle_default_bus_message(self._logger, msg, pipeline)
 
     def _handle_bus_calls(self) -> None:
         """Run the GLib main loop for handling bus messages."""
diff --git a/src/reachy_mini/media/gstreamer_utils.py b/src/reachy_mini/media/gstreamer_utils.py
index 36d7636e2..23ace29a4 100644
--- a/src/reachy_mini/media/gstreamer_utils.py
+++ b/src/reachy_mini/media/gstreamer_utils.py
@@ -17,6 +17,41 @@
 from gi.repository import Gst, GstApp  # noqa: E402
 
 
+def handle_default_bus_message(
+    logger: logging.Logger,
+    msg: Gst.Message,
+    pipeline: Gst.Pipeline,
+) -> bool:
+    """Handle GStreamer bus messages with sensible defaults.
+
+    - ``EOS``: log a warning and return False (the bus watch is
+      removed).
+    - ``ERROR``: log the parsed error and return False.
+    - ``WARNING``: log the parsed warning and keep the watch alive.
+    - ``LATENCY``: call ``pipeline.recalculate_latency()`` and return
+      True.
+    - Anything else: return True (keep the watch alive).
+
+    Callers can wrap this in their own handler to inject extra logic
+    for a specific message type, then fall through to this helper for
+    the common cases.
+    """
+    if msg.type == Gst.MessageType.EOS:
+        logger.warning("End-of-stream")
+        return False
+    elif msg.type == Gst.MessageType.ERROR:
+        err, debug = msg.parse_error()
+        logger.error(f"Error: {err} {debug}")
+        return False
+    elif msg.type == Gst.MessageType.WARNING:
+        err, debug = msg.parse_warning()
+        logger.warning(f"Warning: {err} {debug}")
+    elif msg.type == Gst.MessageType.LATENCY:
+        pipeline.recalculate_latency()
+        logger.debug("Recalculate latency")
+    return True
+
+
 def get_sample(appsink: GstApp.AppSink, logger: logging.Logger) -> Optional[bytes]:
     """Pull a sample from a GStreamer AppSink with a 20 ms timeout.
 
diff --git a/src/reachy_mini/media/media_manager.py b/src/reachy_mini/media/media_manager.py
index b16c6a658..5969289ef 100644
--- a/src/reachy_mini/media/media_manager.py
+++ b/src/reachy_mini/media/media_manager.py
@@ -24,6 +24,7 @@
 
 import logging
 import warnings
+from collections.abc import Callable
 from enum import Enum
 from typing import TYPE_CHECKING, Optional, Union
 
@@ -31,6 +32,7 @@
 import numpy.typing as npt
 
 from reachy_mini.media.camera_constants import CameraSpecs
+from reachy_mini.motion.head_wobbler import SpeechOffsets
 
 
 class MediaBackend(Enum):
@@ -161,7 +163,10 @@ def __init__(
                 self.logger.info(
                     "Using LOCAL backend (GStreamer IPC camera + GStreamer audio)."
                 )
-                self._init_camera(log_level, camera_specs)
+                try:
+                    self._init_camera(log_level, camera_specs)
+                except Exception as e:
+                    self.logger.warning(f"Camera init failed, continuing without camera: {e}")
                 self._init_audio(log_level)
             case MediaBackend.WEBRTC:
                 self.logger.info("Using WebRTC streaming backend.")
@@ -372,6 +377,37 @@ def stop_playing(self) -> None:
             return
         self.audio.stop_playing()
 
+    def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None:
+        """Enable head wobbling driven by audio playback.
+
+        Only supported with the LOCAL backend (GStreamerAudio).
+
+        Args:
+            callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad,
+                yaw_rad)`` for each movement hop.
+
+        """
+        if self.audio is None:
+            self.logger.warning("Audio system is not initialized.")
+            return
+
+        from reachy_mini.media.audio_gstreamer import GStreamerAudio
+
+        if not isinstance(self.audio, GStreamerAudio):
+            self.logger.warning("Head wobbling is only supported with the LOCAL audio backend.")
+            return
+        self.audio.enable_wobbling(callback)
+
+    def disable_wobbling(self) -> None:
+        """Disable head wobbling."""
+        if self.audio is None:
+            return
+
+        from reachy_mini.media.audio_gstreamer import GStreamerAudio
+
+        if isinstance(self.audio, GStreamerAudio):
+            self.audio.disable_wobbling()
+
     def get_DoA(self) -> tuple[float, bool] | None:
         """Get the Direction of Arrival (DoA) from the microphone array.
 
diff --git a/src/reachy_mini/media/media_server.py b/src/reachy_mini/media/media_server.py
index 36ff3ba72..03210d8c5 100644
--- a/src/reachy_mini/media/media_server.py
+++ b/src/reachy_mini/media/media_server.py
@@ -31,6 +31,7 @@
 from typing import Any, Callable, Dict, Optional
 
 import gi
+import numpy as np
 
 from reachy_mini.daemon.utils import (
     CAMERA_PIPE_NAME,
@@ -47,12 +48,14 @@
     ReachyMiniLiteCamSpecs,
 )
 from reachy_mini.media.device_detection import get_audio_device, get_video_device
+from reachy_mini.media.gstreamer_utils import handle_default_bus_message
+from reachy_mini.motion.head_wobbler import HeadWobbler, SpeechOffsets
 from reachy_mini.utils.constants import ASSETS_ROOT_PATH
 
 gi.require_version("Gst", "1.0")
 gi.require_version("GstApp", "1.0")
 
-from gi.repository import GLib, Gst  # noqa: E402
+from gi.repository import GLib, Gst, GstApp  # noqa: E402, F401
 
 # Hard cap on how long a freshly-added consumer is allowed to spend
 # before its `webrtcbin.connection-state` reaches "connected". In a
@@ -131,6 +134,10 @@ class GstMediaServer:
 
     """
 
+    # Sample rate the wobbler appsink demands; the per-branch audioresample
+    # converts whatever the source produces down to this rate before delivery.
+    WOBBLER_SAMPLE_RATE = 16_000
+
     def __init__(
         self,
         log_level: str = "INFO",
@@ -208,6 +215,8 @@ def __init__(
         self._peer_states_lock = Lock()
         self._incoming_audio: Dict[str, Dict[str, Any]] = {}
         self._playbin: Optional[Gst.Element] = None
+        self._head_wobbler: Optional[HeadWobbler] = None
+        self._pipeline_playback: Optional[Gst.Pipeline] = None
 
         self._build_pipeline()
 
@@ -216,7 +225,7 @@ def _build_pipeline(self) -> None:
         self._pipeline_sender = Gst.Pipeline.new("reachymini_webrtc_sender")
         self._bus_sender = self._pipeline_sender.get_bus()
         self._bus_sender.add_watch(
-            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop
+            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_sender
         )
 
         webrtcsink = self._configure_webrtc(self._pipeline_sender)
@@ -270,9 +279,9 @@ def _consumer_added(
     ) -> None:
         self._logger.info(f"consumer added with peer id: {peer_id}")
 
-        Gst.debug_bin_to_dot_file(
-            self._pipeline_sender, Gst.DebugGraphDetails.ALL, "pipeline_full"
-        )
+        # Gst.debug_bin_to_dot_file(
+        #     self._pipeline_sender, Gst.DebugGraphDetails.ALL, "pipeline_full"
+        # )
 
         GLib.timeout_add_seconds(5, self._dump_latency)
 
@@ -372,7 +381,11 @@ def _on_consumer_pad_added(
         self._logger.info(f"Setting up incoming audio playback for peer {peer_id}")
 
         # Build playback pipeline element-by-element
-        playback_pipe = Gst.Pipeline.new(f"audio_playback_{peer_id}")
+        self._pipeline_playback = Gst.Pipeline.new(f"audio_playback_{peer_id}")
+
+        sender_clock = self._pipeline_sender.get_pipeline_clock()
+        self._pipeline_playback.use_clock(sender_clock)
+        self._pipeline_playback.set_start_time(Gst.CLOCK_TIME_NONE)
 
         appsrc = Gst.ElementFactory.make("appsrc", "audio_in")
         appsrc.set_property("format", Gst.Format.TIME)
@@ -381,49 +394,78 @@ def _on_consumer_pad_added(
 
         rtpopusdepay = Gst.ElementFactory.make("rtpopusdepay")
         opusdec = Gst.ElementFactory.make("opusdec")
-        audioconvert = Gst.ElementFactory.make("audioconvert")
-        audioresample = Gst.ElementFactory.make("audioresample")
 
         audiosink = self._build_audiosink_element()
         if audiosink is None:
             self._logger.error("Failed to create audio sink element")
             return
-        audiosink.set_property("sync", False)
+        audiosink.set_property("sync", True)
+
+        # Per-branch audioconvert+audioresample so the wobbler appsink's
+        # F32LE/2/16000 caps don't drag the audiosink branch into a rate
+        # the device can't accept (e.g. wireless XMOS PCM falls back to
+        # IEC958 at non-native rates).
+        tee = Gst.ElementFactory.make("tee")
+        queue_speaker = Gst.ElementFactory.make("queue")
+        ac_speaker = Gst.ElementFactory.make("audioconvert")
+        ar_speaker = Gst.ElementFactory.make("audioresample")
+        queue_wobbler = Gst.ElementFactory.make("queue")
+        ac_wobbler = Gst.ElementFactory.make("audioconvert")
+        ar_wobbler = Gst.ElementFactory.make("audioresample")
+
+        appsink_wobbler = self._make_wobbler_appsink()
 
         for elem in [
             appsrc,
             rtpopusdepay,
             opusdec,
-            audioconvert,
-            audioresample,
+            tee,
+            queue_speaker,
+            ac_speaker,
+            ar_speaker,
             audiosink,
+            queue_wobbler,
+            ac_wobbler,
+            ar_wobbler,
+            appsink_wobbler,
         ]:
-            playback_pipe.add(elem)
+            self._pipeline_playback.add(elem)
         appsrc.link(rtpopusdepay)
         rtpopusdepay.link(opusdec)
-        opusdec.link(audioconvert)
-        audioconvert.link(audioresample)
-        audioresample.link(audiosink)
-
-        play_bus = playback_pipe.get_bus()
+        opusdec.link(tee)
+        tee.link(queue_speaker)
+        queue_speaker.link(ac_speaker)
+        ac_speaker.link(ar_speaker)
+        ar_speaker.link(audiosink)
+        tee.link(queue_wobbler)
+        queue_wobbler.link(ac_wobbler)
+        ac_wobbler.link(ar_wobbler)
+        ar_wobbler.link(appsink_wobbler)
+
+        play_bus = self._pipeline_playback.get_bus()
         play_bus.add_watch(
-            GLib.PRIORITY_DEFAULT, self._on_playback_bus_message, peer_id
+            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_playback
         )
 
-        playback_pipe.set_state(Gst.State.PLAYING)
+        self._pipeline_playback.set_state(Gst.State.PAUSED)
+        self._pipeline_playback.set_base_time(self._pipeline_sender.get_base_time())
+        self._pipeline_playback.set_state(Gst.State.PLAYING)
 
         # Pad probe: intercept every RTP buffer, forward to the separate
         # playback pipeline, then DROP so webrtcsink's pipeline is unaffected.
         def _buffer_probe(pad: Gst.Pad, info: Gst.PadProbeInfo, _: None) -> int:
             buf = info.get_buffer()
-            if buf is not None:
-                appsrc.emit("push-buffer", buf.copy())
+            appsrc.push_buffer(buf)
             return int(Gst.PadProbeReturn.DROP)
 
         probe_id = pad.add_probe(Gst.PadProbeType.BUFFER, _buffer_probe, None)
 
+        if self._head_wobbler is not None:
+            self._head_wobbler.reset()
+            self._head_wobbler.start()
+
         self._incoming_audio[peer_id] = {
-            "playback_pipeline": playback_pipe,
+            "playback_pipeline": self._pipeline_playback,
             "probe_id": probe_id,
             "pad": pad,
         }
@@ -957,18 +999,10 @@ def _build_audio_source(self) -> Optional[Gst.Element]:
         )
         return Gst.ElementFactory.make("autoaudiosrc")
 
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self._logger.warning("End-of-stream")
-            return False
-
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
-            self._logger.error(f"Error: {err} {debug}")
-            return False
-
-        return True
+    def _on_bus_message(
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
+    ) -> bool:
+        return handle_default_bus_message(self._logger, msg, pipeline)
 
     def start(self) -> None:
         """Rebuild the pipeline from scratch and start it.
@@ -1007,9 +1041,6 @@ def play_sound(self, sound_file: str) -> None:
         else:
             file_path = sound_file
 
-        # Build platform-aware audio sink element
-        audiosink = self._build_audiosink_element()
-
         if self._playbin is not None:
             self._playbin.set_state(Gst.State.NULL)
 
@@ -1029,8 +1060,11 @@ def play_sound(self, sound_file: str) -> None:
             uri = f"file://{file_path}"
 
         playbin.set_property("uri", uri)
-        if audiosink is not None:
-            playbin.set_property("audio-sink", audiosink)
+        playbin.set_property("audio-sink", self._build_audiosink_tee_bin())
+
+        if self._head_wobbler is not None:
+            self._head_wobbler.reset()
+            self._head_wobbler.start()
 
         self._playbin = playbin
         playbin.set_state(Gst.State.PLAYING)
@@ -1084,6 +1118,115 @@ def _build_audiosink_element(self) -> Optional[Gst.Element]:
 
         return Gst.ElementFactory.make("autoaudiosink")
 
+    def _make_wobbler_appsink(self) -> Gst.Element:
+        """Create a sync=True appsink that feeds audio to the head wobbler.
+
+        new-sample fires at the buffer's PTS on the pipeline clock —
+        the same instant the audiosink renders that audio.
+        """
+        appsink = Gst.ElementFactory.make("appsink")
+        # Force mono so the speech tapper receives a 1-D float32 array.
+        # The per-branch audioconvert handles the downmix.
+        caps = Gst.Caps.from_string(
+            f"audio/x-raw,format=F32LE,channels=1,rate={self.WOBBLER_SAMPLE_RATE},layout=interleaved"
+        )
+        appsink.set_property("caps", caps)
+        appsink.set_property("drop", True)
+        appsink.set_property("max-buffers", 5)
+        appsink.set_property("sync", True)
+        appsink.set_property("emit-signals", True)
+        appsink.connect("new-sample", self._on_wobbler_sample)
+        return appsink
+
+    def _on_wobbler_sample(self, appsink: Gst.Element) -> Gst.FlowReturn:
+        """GStreamer callback: forward audio buffer to the head wobbler.
+
+        The appsink is sync=True so the callback fires at the buffer's
+        PTS on the pipeline clock — audio is playing NOW.
+        """
+        sample = appsink.pull_sample()
+        if sample is None or self._head_wobbler is None:
+            return Gst.FlowReturn.OK
+        buf = sample.get_buffer()
+        data = buf.extract_dup(0, buf.get_size())
+        pcm = np.frombuffer(data, dtype=np.float32)
+        self._head_wobbler.feed(pcm, time.monotonic_ns())
+        return Gst.FlowReturn.OK
+
+    def _build_audiosink_tee_bin(self) -> Gst.Bin:
+        """Build a Gst.Bin splitting audio to speaker and wobbler appsink.
+
+        Per-branch audioconvert+audioresample isolate each leaf's caps
+        from the other (the wobbler appsink demands F32LE/2/16000; the
+        audiosink wants whatever the device prefers — e.g. on the
+        wireless XMOS PCM, anything but its native rate triggers an
+        IEC958 fallback that fails to open).
+
+        The bin exposes a single ghost sink pad for use as a playbin audio-sink::
+
+            ghost_sink → tee ─┬→ queue → audioconvert → audioresample → audiosink
+                               └→ queue → audioconvert → audioresample → appsink
+        """
+        audio_bin = Gst.Bin.new("audio_tee_bin")
+
+        tee = Gst.ElementFactory.make("tee")
+        queue_speaker = Gst.ElementFactory.make("queue")
+        ac_speaker = Gst.ElementFactory.make("audioconvert")
+        ar_speaker = Gst.ElementFactory.make("audioresample")
+        audiosink = self._build_audiosink_element()
+        queue_wobbler = Gst.ElementFactory.make("queue")
+        ac_wobbler = Gst.ElementFactory.make("audioconvert")
+        ar_wobbler = Gst.ElementFactory.make("audioresample")
+        appsink_wobbler = self._make_wobbler_appsink()
+
+        for el in (
+            tee,
+            queue_speaker,
+            ac_speaker,
+            ar_speaker,
+            audiosink,
+            queue_wobbler,
+            ac_wobbler,
+            ar_wobbler,
+            appsink_wobbler,
+        ):
+            audio_bin.add(el)
+
+        tee.link(queue_speaker)
+        queue_speaker.link(ac_speaker)
+        ac_speaker.link(ar_speaker)
+        ar_speaker.link(audiosink)
+
+        tee.link(queue_wobbler)
+        queue_wobbler.link(ac_wobbler)
+        ac_wobbler.link(ar_wobbler)
+        ar_wobbler.link(appsink_wobbler)
+
+        ghost_pad = Gst.GhostPad.new("sink", tee.get_static_pad("sink"))
+        audio_bin.add_pad(ghost_pad)
+
+        return audio_bin
+
+    def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None:
+        """Enable head wobbling driven by audio playback.
+
+        Args:
+            callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad,
+                yaw_rad)`` for each movement hop.
+
+        """
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
+        self._head_wobbler = HeadWobbler(callback, sample_rate=self.WOBBLER_SAMPLE_RATE)
+        self._logger.info("Head wobbler enabled (daemon-side)")
+
+    def disable_wobbling(self) -> None:
+        """Disable head wobbling."""
+        if self._head_wobbler is not None:
+            self._head_wobbler.stop()
+            self._head_wobbler = None
+            self._logger.info("Head wobbler disabled (daemon-side)")
+
     def set_message_handler(
         self,
         handler: Callable[[str, str], None],  # cb(peer_id, message)
diff --git a/src/reachy_mini/media/webrtc_client_gstreamer.py b/src/reachy_mini/media/webrtc_client_gstreamer.py
index a10d27bf4..869dab1ed 100644
--- a/src/reachy_mini/media/webrtc_client_gstreamer.py
+++ b/src/reachy_mini/media/webrtc_client_gstreamer.py
@@ -106,7 +106,7 @@ def __init__(
         self._pipeline_record = Gst.Pipeline.new("audio_recorder")
         self._bus_record = self._pipeline_record.get_bus()
         self._bus_record.add_watch(
-            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop
+            GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_record
         )
 
         self._appsink_audio = Gst.ElementFactory.make("appsink")
@@ -144,7 +144,7 @@ def __init__(
         self._webrtcbin = None
         self._audio_send_ready = False
         self._appsrc = None
-        self._appsrc_pts = 0  # running PTS in nanoseconds for appsrc buffers
+        self._first_push_done = False
         self.daemon_url: str = ""  # set by MediaManager for remote sound ops
         self._webrtcsrc.connect("deep-element-added", self._on_deep_element_added)
         self.logger.info("GstWebRTCClient initialized (bidirectional audio support)")
@@ -300,21 +300,18 @@ def _webrtcsrc_pad_added_cb(self, webrtcsrc: Gst.Element, pad: Gst.Pad) -> None:
 
         GLib.timeout_add_seconds(5, self._dump_latency)
 
-    def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type: ignore[no-untyped-def]
-        t = msg.type
-        if t == Gst.MessageType.EOS:
-            self.logger.warning("End-of-stream")
-            return False
-        elif t == Gst.MessageType.ERROR:
-            err, debug = msg.parse_error()
+    def _on_bus_message(
+        self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline
+    ) -> bool:
+        # webrtcsrc may emit non-fatal errors from its internal
+        # elements (e.g. appsrc not-negotiated when a sendrecv
+        # transceiver has no data to send).  GStreamer wraps the
+        # actual reason as "Internal data stream error." in the
+        # GError, with "not-negotiated" only in the debug string.
+        # These should not tear down the whole pipeline.
+        if msg.type == Gst.MessageType.ERROR:
+            err, _ = msg.parse_error()
             src = msg.src
-
-            # webrtcsrc may emit non-fatal errors from its internal
-            # elements (e.g. appsrc not-negotiated when a sendrecv
-            # transceiver has no data to send).  GStreamer wraps the
-            # actual reason as "Internal data stream error." in the
-            # GError, with "not-negotiated" only in the debug string.
-            # These should not tear down the whole pipeline.
             if (
                 src is not None
                 and src.get_factory() is not None
@@ -326,10 +323,7 @@ def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool:  # type
             ):
                 self.logger.debug(f"Ignoring non-fatal webrtcsrc internal error: {err}")
                 return True
-
-            self.logger.error(f"Error: {err} {debug}")
-            return False
-        return True
+        return super()._on_bus_message(bus, msg, pipeline)
 
     def open(self) -> None:
         """Start the WebRTC pipeline (both video and audio)."""
@@ -404,6 +398,7 @@ def _setup_audio_send_chain(self) -> None:
         appsrc = Gst.ElementFactory.make("appsrc")
         appsrc.set_property("format", Gst.Format.TIME)
         appsrc.set_property("is-live", True)
+
         caps = Gst.Caps.from_string(
             f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved"
         )
@@ -417,7 +412,13 @@ def _setup_audio_send_chain(self) -> None:
         rtpopuspay = Gst.ElementFactory.make("rtpopuspay")
         rtpopuspay.set_property("pt", pt)
 
-        elems = (appsrc, audioconvert, audioresample, opusenc, rtpopuspay)
+        elems = (
+            appsrc,
+            audioconvert,
+            audioresample,
+            opusenc,
+            rtpopuspay,
+        )
 
         target_bin = self._pipeline_record
         for elem in elems:
@@ -453,7 +454,7 @@ def start_playing(self) -> None:
 
     def stop_playing(self) -> None:
         """Reset the PTS counter for the send chain and stop daemon-side sound."""
-        self._appsrc_pts = 0
+        self._appsrc_pts = -1
         # Also stop any sound file playing on the daemon's speaker.
         if self.daemon_url:
             try:
@@ -468,34 +469,54 @@ def clear_output_buffer(self) -> None:
         """No-op (WebRTC send chain does not buffer significantly)."""
         pass
 
+    def _push_buffer(self, data: npt.NDArray[np.float32]) -> None:
+        """Single push of one F32LE chunk with gap-aware PTS."""
+        if self._appsrc is None:
+            return
+
+        pts_ns, duration_ns, self._appsrc_pts = self._compute_pts(
+            int(data.shape[0]),
+            self._appsrc.get_current_running_time(),
+            self._appsrc_pts,
+        )
+        buf = Gst.Buffer.new_wrapped(data.tobytes())
+        buf.pts = pts_ns
+        buf.dts = pts_ns
+        buf.duration = duration_ns
+
+        ret = self._appsrc.push_buffer(buf)
+        if ret != Gst.FlowReturn.OK:
+            self.logger.warning("push_buffer dropped: %s", ret)
+
     def push_audio_sample(self, data: npt.NDArray[np.float32]) -> None:
         """Push audio data to the remote peer via WebRTC.
 
+        The very first call also primes the send chain with 0.5 s of
+        silence so the Opus encoder and webrtcbin can warm up before
+        the caller's real audio arrives; without this the first word
+        of an utterance gets swallowed.
+
         Args:
             data: Float32 audio samples.
 
         """
         if self._appsrc is None:
-            return  # send chain not ready yet, silently drop
-
-        num_samples = data.shape[0]
-        duration_ns = (num_samples * Gst.SECOND) // self.SAMPLE_RATE
-
-        buf = Gst.Buffer.new_wrapped(data.tobytes())
-        buf.pts = self._appsrc_pts
-        buf.duration = duration_ns
-        self._appsrc_pts += duration_ns
+            return
 
-        self._appsrc.push_buffer(buf)
+        if not self._first_push_done:
+            self._first_push_done = True
+            warmup = np.zeros(self.SAMPLE_RATE // 2, dtype=np.float32)
+            self._push_buffer(warmup)
+        self._push_buffer(data)
 
     def play_sound(self, sound_file: str) -> None:
         """Play a sound file on the robot's speaker via the daemon REST API.
 
         If *sound_file* is a local path that exists on this machine the
-        file is automatically uploaded to the daemon's temporary sound
-        directory (skipping the upload when a file with the same name is
-        already present).  Otherwise the filename is sent as-is and the
-        daemon resolves it from its built-in assets or filesystem.
+        file is uploaded to the daemon's temporary sound directory
+        (overwriting any previous upload with the same basename).
+        Otherwise the filename is sent as-is and the daemon resolves it
+        from its built-in assets or filesystem.
 
         Args:
             sound_file: Absolute local path **or** asset filename
@@ -506,17 +527,9 @@ def play_sound(self, sound_file: str) -> None:
             self.logger.error("No daemon URL configured — cannot play sound remotely.")
             return
 
-        # If the file exists on the client, ensure it is uploaded first.
         remote_file = sound_file
         if os.path.isfile(sound_file):
-            filename = os.path.basename(sound_file)
-            remote_files = self.list_sounds()
-            if filename not in remote_files:
-                remote_file = self.upload_sound(sound_file)
-            else:
-                # Already uploaded — ask the daemon to resolve by filename.
-                # The daemon's play_sound checks the temp dir, assets, etc.
-                remote_file = filename
+            remote_file = self.upload_sound(sound_file)
 
         try:
             resp = _requests.post(
diff --git a/src/reachy_mini/motion/head_wobbler.py b/src/reachy_mini/motion/head_wobbler.py
new file mode 100644
index 000000000..abe183305
--- /dev/null
+++ b/src/reachy_mini/motion/head_wobbler.py
@@ -0,0 +1,134 @@
+"""PTS-driven head wobbler.
+
+Drives 6-DOF head movement offsets from PCM audio analysed by
+:class:`SwayRollRT` (the speech tapper). Each call to :meth:`feed`
+turns one PCM chunk into a list of per-hop sway dicts and registers a
+``GLib.timeout_add`` for each, firing the offset callback at the
+audio's actual playback time (computed by the caller from buffer PTS +
+audiosink latency).
+
+There is no background thread: scheduling runs on whichever GLib main
+loop the caller's pipeline already uses for its bus watch.
+"""
+
+import logging
+import threading
+import time
+from collections.abc import Callable
+from typing import Any
+
+from gi.repository import GLib
+from numpy.typing import NDArray
+
+from reachy_mini.motion import speech_tapper
+
+logger = logging.getLogger(__name__)
+
+# Public type alias; re-exported by ``media/*`` modules.
+SpeechOffsets = tuple[float, float, float, float, float, float]
+
+
+class HeadWobbler:
+    """PTS-driven scheduler that turns audio into timed head offsets."""
+
+    _ZERO_OFFSETS: SpeechOffsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+    def __init__(
+        self,
+        set_speech_offsets: Callable[[SpeechOffsets], None],
+        sample_rate: int,
+    ) -> None:
+        """Initialize the wobbler with the offset callback and audio rate.
+
+        Args:
+            set_speech_offsets: Called with a 6-tuple of head offsets per hop.
+            sample_rate: Sample rate of the PCM that will be fed via
+                :meth:`feed` — must match the wobbler appsink's caps.
+
+        """
+        self._apply_offsets = set_speech_offsets
+
+        self._hop_ms = speech_tapper.HOP_MS
+        self._sample_rate = int(sample_rate)
+        self.sway = speech_tapper.SwayRollRT(sample_rate=self._sample_rate)
+
+        self._lock = threading.Lock()
+        self._sway_lock = threading.Lock()
+        # Bumped on stop/reset so in-flight GLib timeouts no-op when fired.
+        self._generation = 0
+
+    def start(self) -> None:
+        """Reset DSP and hop generation. Idempotent."""
+        with self._lock:
+            self._generation += 1
+        with self._sway_lock:
+            self.sway.reset()
+        logger.debug("Head wobbler started")
+
+    def stop(self) -> None:
+        """Cancel pending offsets and zero the head."""
+        with self._lock:
+            self._generation += 1
+        self._apply_offsets(self._ZERO_OFFSETS)
+        logger.debug("Head wobbler stopped")
+
+    def reset(self) -> None:
+        """Cancel pending offsets, recreate DSP state, zero the head."""
+        with self._lock:
+            self._generation += 1
+        with self._sway_lock:
+            self.sway = speech_tapper.SwayRollRT(sample_rate=self._sample_rate)
+        self._apply_offsets(self._ZERO_OFFSETS)
+
+    def feed(
+        self,
+        pcm: NDArray[Any],
+        play_at_monotonic_ns: int,
+    ) -> None:
+        """Schedule per-hop offsets for *pcm* against its playback time.
+
+        Args:
+            pcm: Float32 mono samples at this wobbler's ``sample_rate``.
+            play_at_monotonic_ns: ``time.monotonic_ns()``-comparable
+                instant at which the *first* sample of *pcm* will be
+                heard from the speaker. Subsequent hops are scheduled at
+                ``play_at_monotonic_ns + i * HOP_MS * 1_000_000``.
+
+        """
+        with self._sway_lock:
+            results = self.sway.feed(pcm)
+        if not results:
+            return
+
+        with self._lock:
+            generation = self._generation
+
+        hop_ns = self._hop_ms * 1_000_000
+        now_ns = time.monotonic_ns()
+
+        # Skip hops more than one hop's worth in the past (genuinely
+        # stale); clamp small sub-hop negatives to 0 so they fire on
+        # the next main-loop iteration.
+        stale_threshold_ms = -self._hop_ms
+        for i, hop in enumerate(results):
+            target_ns = play_at_monotonic_ns + i * hop_ns
+            delay_ms = (target_ns - now_ns) // 1_000_000
+            if delay_ms < stale_threshold_ms:
+                continue
+            offsets: SpeechOffsets = (
+                hop["x_mm"] / 1000.0,
+                hop["y_mm"] / 1000.0,
+                hop["z_mm"] / 1000.0,
+                hop["roll_rad"],
+                hop["pitch_rad"],
+                hop["yaw_rad"],
+            )
+            GLib.timeout_add(max(0, int(delay_ms)), self._fire, offsets, generation)
+
+    def _fire(self, offsets: SpeechOffsets, generation: int) -> bool:
+        """GLib timeout callback. Returns False so the source is removed."""
+        with self._lock:
+            current = self._generation
+        if generation == current:
+            self._apply_offsets(offsets)
+        return False  # one-shot
diff --git a/src/reachy_mini/motion/speech_tapper.py b/src/reachy_mini/motion/speech_tapper.py
new file mode 100644
index 000000000..0340e916f
--- /dev/null
+++ b/src/reachy_mini/motion/speech_tapper.py
@@ -0,0 +1,228 @@
+"""Audio-reactive sway/roll generator for head wobbling.
+
+Analyses PCM audio in real time and produces per-hop movement parameters
+(pitch, yaw, roll, x, y, z) driven by voice activity and loudness.
+
+Ported from *reachy_mini_conversation_app*.
+"""
+
+from __future__ import annotations
+
+import math
+from collections import deque
+from itertools import islice
+
+import numpy as np
+from numpy.typing import NDArray
+
+# ---------------------------------------------------------------------------
+# Tunables
+# ---------------------------------------------------------------------------
+FRAME_MS = 20
+HOP_MS = 50
+
+SWAY_MASTER = 1.5
+SENS_DB_OFFSET = +4.0
+VAD_DB_ON = -35.0
+VAD_DB_OFF = -45.0
+VAD_ATTACK_MS = 40
+VAD_RELEASE_MS = 250
+ENV_FOLLOW_GAIN = 0.65
+
+SWAY_F_PITCH = 2.2
+SWAY_A_PITCH_DEG = 4.5
+SWAY_F_YAW = 0.6
+SWAY_A_YAW_DEG = 7.5
+SWAY_F_ROLL = 1.3
+SWAY_A_ROLL_DEG = 2.25
+SWAY_F_X = 0.35
+SWAY_A_X_MM = 4.5
+SWAY_F_Y = 0.45
+SWAY_A_Y_MM = 3.75
+SWAY_F_Z = 0.25
+SWAY_A_Z_MM = 2.25
+
+SWAY_DB_LOW = -46.0
+SWAY_DB_HIGH = -18.0
+LOUDNESS_GAMMA = 0.9
+SWAY_ATTACK_MS = 50
+SWAY_RELEASE_MS = 250
+
+# ---------------------------------------------------------------------------
+# Derived constants (rate-independent — FRAME/HOP are per-instance)
+# ---------------------------------------------------------------------------
+ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS))
+RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS))
+SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS))
+SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS))
+
+
+def _rms_dbfs(x: NDArray[np.float32]) -> float:
+    """Root-mean-square in dBFS for float32 mono array in [-1,1]."""
+    x = x.astype(np.float32, copy=False)
+    rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32)
+    return float(20.0 * math.log10(float(rms) + 1e-12))
+
+
+def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
+    """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
+    t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
+    if t < 0.0:
+        t = 0.0
+    elif t > 1.0:
+        t = 1.0
+    return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
+
+
+class SwayRollRT:
+    """Feed audio chunks and get per-hop sway outputs.
+
+    Usage::
+
+        rt = SwayRollRT(sample_rate=16_000)
+        results = rt.feed(pcm_float32_mono)
+        # results is a list of dicts, one per HOP_MS
+
+    """
+
+    def __init__(self, rng_seed: int = 7, sample_rate: int = 16_000) -> None:
+        """Initialize state with random oscillator phases."""
+        self._seed = int(rng_seed)
+        self.sample_rate = int(sample_rate)
+        self.frame = int(self.sample_rate * FRAME_MS / 1000)
+        self.hop = int(self.sample_rate * HOP_MS / 1000)
+        self.samples: deque[float] = deque(maxlen=10 * self.sample_rate)
+        self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32)
+
+        self.vad_on = False
+        self.vad_above = 0
+        self.vad_below = 0
+
+        self.sway_env = 0.0
+        self.sway_up = 0
+        self.sway_down = 0
+
+        rng = np.random.default_rng(self._seed)
+        self.phase_pitch = float(rng.random() * 2 * math.pi)
+        self.phase_yaw = float(rng.random() * 2 * math.pi)
+        self.phase_roll = float(rng.random() * 2 * math.pi)
+        self.phase_x = float(rng.random() * 2 * math.pi)
+        self.phase_y = float(rng.random() * 2 * math.pi)
+        self.phase_z = float(rng.random() * 2 * math.pi)
+        self.t = 0.0
+
+    def reset(self) -> None:
+        """Reset state (VAD/env/buffers/time) but keep initial phases/seed."""
+        self.samples.clear()
+        self.carry = np.zeros(0, dtype=np.float32)
+        self.vad_on = False
+        self.vad_above = 0
+        self.vad_below = 0
+        self.sway_env = 0.0
+        self.sway_up = 0
+        self.sway_down = 0
+        self.t = 0.0
+
+    def feed(self, pcm: NDArray[np.float32]) -> list[dict[str, float]]:
+        """Stream in a float32 mono PCM chunk; returns sway dicts (one per hop).
+
+        *pcm* must already match this instance's ``sample_rate`` — the
+        upstream GStreamer audioresample handles rate conversion.
+
+        Args:
+            pcm: Float32 mono samples ``(N,)`` in ``[-1, 1]``.
+
+        """
+        if pcm.size == 0:
+            return []
+
+        if self.carry.size:
+            self.carry = np.concatenate([self.carry, pcm])
+        else:
+            self.carry = pcm
+
+        out: list[dict[str, float]] = []
+
+        while self.carry.size >= self.hop:
+            hop = self.carry[:self.hop]
+            self.carry = self.carry[self.hop:]
+
+            self.samples.extend(hop.tolist())
+            if len(self.samples) < self.frame:
+                self.t += HOP_MS / 1000.0
+                continue
+
+            frame = np.fromiter(
+                islice(self.samples, len(self.samples) - self.frame, len(self.samples)),
+                dtype=np.float32,
+                count=self.frame,
+            )
+            db = _rms_dbfs(frame)
+
+            # VAD with hysteresis + attack/release
+            if db >= VAD_DB_ON:
+                self.vad_above += 1
+                self.vad_below = 0
+                if not self.vad_on and self.vad_above >= ATTACK_FR:
+                    self.vad_on = True
+            elif db <= VAD_DB_OFF:
+                self.vad_below += 1
+                self.vad_above = 0
+                if self.vad_on and self.vad_below >= RELEASE_FR:
+                    self.vad_on = False
+
+            if self.vad_on:
+                self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1)
+                self.sway_down = 0
+            else:
+                self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1)
+                self.sway_up = 0
+
+            up = self.sway_up / SWAY_ATTACK_FR
+            down = 1.0 - (self.sway_down / SWAY_RELEASE_FR)
+            target = up if self.vad_on else down
+            self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env)
+            if self.sway_env < 0.0:
+                self.sway_env = 0.0
+            elif self.sway_env > 1.0:
+                self.sway_env = 1.0
+
+            loud = _loudness_gain(db) * SWAY_MASTER
+            env = self.sway_env
+            self.t += HOP_MS / 1000.0
+
+            # Oscillators
+            pitch = (
+                math.radians(SWAY_A_PITCH_DEG)
+                * loud
+                * env
+                * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
+            )
+            yaw = (
+                math.radians(SWAY_A_YAW_DEG)
+                * loud
+                * env
+                * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
+            )
+            roll = (
+                math.radians(SWAY_A_ROLL_DEG)
+                * loud
+                * env
+                * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
+            )
+            x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
+            y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
+            z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
+
+            out.append(
+                {
+                    "pitch_rad": pitch,
+                    "yaw_rad": yaw,
+                    "roll_rad": roll,
+                    "x_mm": x_mm,
+                    "y_mm": y_mm,
+                    "z_mm": z_mm,
+                },
+            )
+
+        return out
diff --git a/src/reachy_mini/reachy_mini.py b/src/reachy_mini/reachy_mini.py
index 6e7c62508..1b441ee4a 100644
--- a/src/reachy_mini/reachy_mini.py
+++ b/src/reachy_mini/reachy_mini.py
@@ -29,8 +29,10 @@
     SetFullTargetCmd,
     SetGravityCompensationCmd,
     SetHeadJointsCmd,
+    SetSpeechOffsetsCmd,
     SetTargetCmd,
     SetTorqueCmd,
+    SetWobblingCmd,
     StartRecordingCmd,
     StopRecordingCmd,
 )
@@ -232,6 +234,35 @@ def acquire_media(self) -> None:
         self._media_released = False
         self.logger.info("Media re-acquired by daemon.")
 
+    def enable_wobbling(self) -> None:
+        """Enable audio-reactive head wobbling.
+
+        When enabled, audio played through ``media.play_sound()`` or
+        ``media.push_audio_sample()`` is analysed and converted into
+        subtle head movements that are composed with the current target
+        pose on the daemon side.
+
+        For LOCAL backend: wobbling runs on the SDK side; offsets are sent
+        over WebSocket.  For all backends the daemon is also told to enable
+        wobbling so that daemon-side sounds (wake-up, sleep, etc.) and
+        incoming WebRTC audio also produce head movement.
+
+        """
+        def _send_offsets(offsets: tuple[float, float, float, float, float, float]) -> None:
+            self.client.send_command(SetSpeechOffsetsCmd(offsets=list(offsets)))
+
+        # Enable SDK-side wobbling (LOCAL backend only, no-op for WEBRTC)
+        self.media_manager.enable_wobbling(_send_offsets)
+        # Enable daemon-side wobbling (media server play_sound + incoming audio)
+        self.client.send_command(SetWobblingCmd(enabled=True))
+        self.logger.info("Head wobbling enabled")
+
+    def disable_wobbling(self) -> None:
+        """Disable audio-reactive head wobbling and reset offsets to zero."""
+        self.media_manager.disable_wobbling()
+        self.client.send_command(SetWobblingCmd(enabled=False))
+        self.logger.info("Head wobbling disabled")
+
     @property
     def imu(self) -> Dict[str, List[float] | float] | None:
         """Get the current IMU data from the backend.
diff --git a/tests/unit_tests/test_audio_gstreamer.py b/tests/unit_tests/test_audio_gstreamer.py
index 37cd4592c..493786a60 100644
--- a/tests/unit_tests/test_audio_gstreamer.py
+++ b/tests/unit_tests/test_audio_gstreamer.py
@@ -1,19 +1,30 @@
-"""Unit tests for GStreamer audio playback timestamp helpers."""
+"""Unit tests for the shared appsrc PTS helper."""
 
+from types import SimpleNamespace
 from typing import cast
 
+from reachy_mini.media.audio_base import AudioBase
 from reachy_mini.media.audio_gstreamer import GStreamerAudio
 
 
-def test_compute_playback_buffer_timing_starts_at_running_time() -> None:
+def _fake_self() -> AudioBase:
+    """Return a stand-in with just the constants ``_compute_pts`` reads."""
+    return cast(
+        AudioBase,
+        SimpleNamespace(
+            SAMPLE_RATE=GStreamerAudio.SAMPLE_RATE,
+            GAP_RESET_NS=GStreamerAudio.GAP_RESET_NS,
+        ),
+    )
+
+
+def test_compute_pts_starts_at_running_time() -> None:
     """Start the first buffer at the current playback running time."""
-    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing(
-        cast(GStreamerAudio, object()),
+    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts(
+        _fake_self(),
         1600,
-        16000,
         2_000_000_000,
-        None,
-        GStreamerAudio.PLAYBACK_GAP_RESET_NS,
+        -1,
     )
 
     assert pts_ns == 2_000_000_000
@@ -21,15 +32,13 @@ def test_compute_playback_buffer_timing_starts_at_running_time() -> None:
     assert next_pts_ns == 2_100_000_000
 
 
-def test_compute_playback_buffer_timing_continues_without_gap() -> None:
+def test_compute_pts_continues_without_gap() -> None:
     """Keep appending buffers when the running time has not drifted ahead."""
-    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing(
-        cast(GStreamerAudio, object()),
+    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts(
+        _fake_self(),
         800,
-        16000,
         1_050_000_000,
         1_100_000_000,
-        GStreamerAudio.PLAYBACK_GAP_RESET_NS,
     )
 
     assert pts_ns == 1_100_000_000
@@ -37,15 +46,13 @@ def test_compute_playback_buffer_timing_continues_without_gap() -> None:
     assert next_pts_ns == 1_150_000_000
 
 
-def test_compute_playback_buffer_timing_resets_after_large_gap() -> None:
+def test_compute_pts_resets_after_large_gap() -> None:
     """Realign buffer timing after a long idle gap in sparse realtime audio."""
-    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing(
-        cast(GStreamerAudio, object()),
+    pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts(
+        _fake_self(),
         800,
-        16000,
         1_400_000_000,
         1_100_000_000,
-        GStreamerAudio.PLAYBACK_GAP_RESET_NS,
     )
 
     assert pts_ns == 1_400_000_000
diff --git a/tests/unit_tests/test_head_wobbler.py b/tests/unit_tests/test_head_wobbler.py
new file mode 100644
index 000000000..fff31cbea
--- /dev/null
+++ b/tests/unit_tests/test_head_wobbler.py
@@ -0,0 +1,318 @@
+"""Unit tests for speech_tapper and head_wobbler modules."""  # noqa: D100
+
+import time
+
+import numpy as np
+import pytest
+
+from reachy_mini.motion.speech_tapper import (
+    HOP_MS,
+    SwayRollRT,
+    _loudness_gain,
+    _rms_dbfs,
+)
+
+SR = 16_000  # sample rate used for tone generation in tests
+
+
+def _patch_glib_timeout(monkeypatch):
+    """Replace ``GLib.timeout_add`` with a recorder; return the schedule list.
+
+    Each entry is ``(delay_ms, fn, args)``. Tests can call
+    ``fn(*args)`` to simulate the GLib main loop firing the timeout.
+    """
+    schedule: list[tuple[int, object, tuple]] = []
+
+    def fake_timeout_add(delay_ms, fn, *args):
+        schedule.append((delay_ms, fn, args))
+        return len(schedule)  # source id
+
+    monkeypatch.setattr(
+        "reachy_mini.motion.head_wobbler.GLib.timeout_add", fake_timeout_add
+    )
+    return schedule
+
+# ---------------------------------------------------------------------------
+# speech_tapper: helper functions
+# ---------------------------------------------------------------------------
+
+
+def test_rms_silence_is_very_negative():  # noqa: D103
+    silence = np.zeros(320, dtype=np.float32)
+    assert _rms_dbfs(silence) < -100
+
+
+def test_rms_full_scale_sine_near_zero():  # noqa: D103
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    sine = np.sin(2 * np.pi * 440 * t).astype(np.float32)
+    db = _rms_dbfs(sine)
+    assert -5 < db < 0  # RMS of sine ≈ -3 dBFS
+
+
+def test_rms_quiet_signal_is_negative():  # noqa: D103
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    quiet = (np.sin(2 * np.pi * 440 * t) * 0.01).astype(np.float32)
+    assert _rms_dbfs(quiet) < -35
+
+
+def test_loudness_below_low_threshold_is_zero():  # noqa: D103
+    assert _loudness_gain(-100.0) == 0.0
+
+
+def test_loudness_above_high_threshold_clamped():  # noqa: D103
+    gain = _loudness_gain(0.0)
+    assert gain <= 1.0
+    assert gain > 0.9
+
+
+def test_loudness_monotonically_increasing():  # noqa: D103
+    dbs = [-50, -40, -30, -20, -10]
+    gains = [_loudness_gain(db) for db in dbs]
+    for i in range(len(gains) - 1):
+        assert gains[i] <= gains[i + 1]
+
+
+# ---------------------------------------------------------------------------
+# speech_tapper: SwayRollRT
+# ---------------------------------------------------------------------------
+
+
+def test_sway_empty_input():  # noqa: D103
+    rt = SwayRollRT()
+    assert rt.feed(np.zeros(0, dtype=np.float32)) == []
+
+
+def test_sway_short_input_no_output():  # noqa: D103
+    """Input shorter than one hop produces no output."""
+    rt = SwayRollRT()
+    short = np.zeros(100, dtype=np.float32)
+    assert rt.feed(short) == []
+
+
+def test_sway_one_second_produces_hops():  # noqa: D103
+    rt = SwayRollRT()
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    results = rt.feed(tone)
+    expected_hops = 1000 // HOP_MS
+    assert len(results) == expected_hops
+
+
+def test_sway_output_keys():  # noqa: D103
+    rt = SwayRollRT()
+    t = np.linspace(0, 0.1, int(SR * 0.1), dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    results = rt.feed(tone)
+    assert len(results) >= 1
+    expected_keys = {"pitch_rad", "yaw_rad", "roll_rad", "x_mm", "y_mm", "z_mm"}
+    assert expected_keys <= set(results[0].keys())
+
+
+def test_sway_silence_produces_near_zero():  # noqa: D103
+    rt = SwayRollRT()
+    silence = np.zeros(SR, dtype=np.float32)
+    results = rt.feed(silence)
+    for r in results:
+        assert abs(r["pitch_rad"]) < 0.01
+        assert abs(r["yaw_rad"]) < 0.01
+        assert abs(r["x_mm"]) < 0.1
+
+
+def test_sway_loud_signal_produces_nonzero():  # noqa: D103
+    rt = SwayRollRT()
+    t = np.linspace(0, 3, SR * 3, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 300 * t) * 0.8).astype(np.float32)
+    results = rt.feed(tone)
+    max_yaw = max(abs(r["yaw_rad"]) for r in results)
+    assert max_yaw > 0.01
+
+
+def test_sway_custom_sample_rate():  # noqa: D103
+    """Frame/hop derive from the per-instance sample_rate."""
+    rt = SwayRollRT(sample_rate=48_000)
+    assert rt.sample_rate == 48_000
+    assert rt.frame == int(48_000 * 20 / 1000)
+    assert rt.hop == int(48_000 * 50 / 1000)
+    # 1s of 48kHz audio still yields ~20 hops (1000ms / HOP_MS).
+    t = np.linspace(0, 1, 48_000, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    results = rt.feed(tone)
+    assert len(results) == 1000 // HOP_MS
+
+
+def test_sway_reset_clears_state():  # noqa: D103
+    rt = SwayRollRT()
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    rt.feed(tone)
+    rt.reset()
+    assert rt.t == 0.0
+    assert rt.vad_on is False
+    assert rt.carry.size == 0
+
+
+def test_sway_deterministic_with_same_seed():  # noqa: D103
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+
+    rt1 = SwayRollRT(rng_seed=42)
+    r1 = rt1.feed(tone.copy())
+
+    rt2 = SwayRollRT(rng_seed=42)
+    r2 = rt2.feed(tone.copy())
+
+    assert len(r1) == len(r2)
+    for a, b in zip(r1, r2):
+        assert a == pytest.approx(b)
+
+
+def test_sway_incremental_feeding():  # noqa: D103
+    """Feeding small chunks should produce same total hops as one big chunk."""
+    rt_batch = SwayRollRT(rng_seed=7)
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    results_batch = rt_batch.feed(tone)
+
+    rt_inc = SwayRollRT(rng_seed=7)
+    results_inc = []
+    chunk_size = 1600  # 100ms chunks
+    for i in range(0, len(tone), chunk_size):
+        results_inc.extend(rt_inc.feed(tone[i : i + chunk_size]))
+
+    assert len(results_inc) == len(results_batch)
+
+
+# ---------------------------------------------------------------------------
+# head_wobbler: HeadWobbler (PTS-driven scheduler, no thread)
+# ---------------------------------------------------------------------------
+
+
+def test_wobbler_schedules_offsets_for_a_tone(monkeypatch):  # noqa: D103
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    schedule = _patch_glib_timeout(monkeypatch)
+    received: list[tuple[float, ...]] = []
+    wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR)
+    wobbler.start()
+
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    play_at = time.monotonic_ns() + 5_000_000_000  # 5 s ahead → all deadlines positive
+    wobbler.feed(tone, play_at)
+
+    assert len(schedule) > 0
+    # Fire each scheduled timeout.
+    for _delay, fn, args in schedule:
+        fn(*args)
+    assert len(received) == len(schedule)
+
+
+def test_wobbler_offsets_are_6_tuples(monkeypatch):  # noqa: D103
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    schedule = _patch_glib_timeout(monkeypatch)
+    received: list[tuple[float, ...]] = []
+    wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR)
+    wobbler.start()
+
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    play_at = time.monotonic_ns() + 5_000_000_000
+    wobbler.feed(tone, play_at)
+    for _delay, fn, args in schedule:
+        fn(*args)
+
+    assert len(received) > 0
+    for offsets in received:
+        assert len(offsets) == 6
+        assert all(isinstance(v, float) for v in offsets)
+
+
+def test_wobbler_stop_zeros_offsets(monkeypatch):  # noqa: D103
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    _patch_glib_timeout(monkeypatch)
+    received: list[tuple[float, ...]] = []
+    wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR)
+    wobbler.stop()
+
+    assert received[-1] == (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+
+def test_wobbler_reset_zeros_offsets(monkeypatch):  # noqa: D103
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    _patch_glib_timeout(monkeypatch)
+    received: list[tuple[float, ...]] = []
+    wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR)
+    wobbler.reset()
+
+    assert received[-1] == (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+
+def test_wobbler_stop_cancels_pending(monkeypatch):  # noqa: D103
+    """After stop(), pending GLib timeouts no-op when fired."""
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    schedule = _patch_glib_timeout(monkeypatch)
+    received: list[tuple[float, ...]] = []
+    wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR)
+    wobbler.start()
+
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    play_at = time.monotonic_ns() + 5_000_000_000
+    wobbler.feed(tone, play_at)
+    pending = list(schedule)
+    assert pending  # sanity: we did schedule something
+
+    wobbler.stop()
+    received.clear()  # discard the zero-offsets call from stop()
+
+    for _delay, fn, args in pending:
+        fn(*args)
+    assert received == []  # all canceled
+
+
+def test_wobbler_start_is_idempotent(monkeypatch):  # noqa: D103
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    _patch_glib_timeout(monkeypatch)
+    wobbler = HeadWobbler(lambda o: None, sample_rate=SR)
+    wobbler.start()
+    wobbler.start()  # should not crash
+
+
+def test_wobbler_schedules_hops_at_hop_intervals(monkeypatch):  # noqa: D103
+    """Consecutive scheduled delays are spaced by HOP_MS."""
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    schedule = _patch_glib_timeout(monkeypatch)
+    wobbler = HeadWobbler(lambda o: None, sample_rate=SR)
+    wobbler.start()
+
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    play_at = time.monotonic_ns() + 5_000_000_000
+    wobbler.feed(tone, play_at)
+
+    delays = [d for d, _, _ in schedule]
+    assert len(delays) >= 2
+    diffs = [delays[i + 1] - delays[i] for i in range(len(delays) - 1)]
+    assert all(abs(d - HOP_MS) <= 1 for d in diffs)
+
+
+def test_wobbler_drops_past_deadlines(monkeypatch):  # noqa: D103
+    """Hops whose deadline has already passed are not scheduled."""
+    from reachy_mini.motion.head_wobbler import HeadWobbler
+
+    schedule = _patch_glib_timeout(monkeypatch)
+    wobbler = HeadWobbler(lambda o: None, sample_rate=SR)
+    wobbler.start()
+
+    t = np.linspace(0, 1, SR, dtype=np.float32)
+    tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32)
+    play_at = time.monotonic_ns() - 10_000_000_000  # 10 s in the past
+    wobbler.feed(tone, play_at)
+
+    assert schedule == []
diff --git a/uv.lock b/uv.lock
index 013979b64..e2e732d3f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1063,6 +1063,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/eb/6518a1b00488d48995034226846653c382d676cf5f04be62b3c3fae2c6a1/gpiozero-2.0.1-py3-none-any.whl", hash = "sha256:8f621de357171d574c0b7ea0e358cb66e560818a47b0eeedf41ce1cdbd20c70b", size = 150818, upload-time = "2024-02-15T11:07:00.451Z" },
 ]
 
+[[package]]
+name = "gradio-client"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e8/e6/6b6029f5fe2ad7f1211105d530e34d991014c2cae463f9223033031cfc4f/gradio_client-2.5.0.tar.gz", hash = "sha256:4cde99bad62149595c30c90876ca2e405e3a13687ecf895474f3412cb476673d", size = 59013, upload-time = "2026-04-20T23:16:21.518Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/81/0a861b8e1ff42960139c6cd4c7dd591292fa09ea1ae2d87677441cba4c00/gradio_client-2.5.0-py3-none-any.whl", hash = "sha256:d43e2179c29076292a76485ad7ed2e6eaa19d14ac58283bd7f5beabfe4ca958c", size = 59952, upload-time = "2026-04-20T23:16:20.186Z" },
+]
+
 [[package]]
 name = "gstreamer-bundle"
 version = "1.28.3"
@@ -3311,6 +3327,7 @@ dependencies = [
 all = [
     { name = "cv2-enumerate-cameras" },
     { name = "gpiozero", marker = "sys_platform == 'linux'" },
+    { name = "gradio-client" },
     { name = "lgpio", marker = "sys_platform == 'linux'" },
     { name = "mujoco" },
     { name = "nmcli", marker = "sys_platform == 'linux'" },
@@ -3325,6 +3342,7 @@ all = [
 ]
 examples = [
     { name = "cv2-enumerate-cameras" },
+    { name = "gradio-client" },
     { name = "opencv-python" },
     { name = "pynput" },
     { name = "soundfile" },
@@ -3374,6 +3392,7 @@ requires-dist = [
     { name = "cv2-enumerate-cameras", marker = "extra == 'opencv'", specifier = ">=1.2.1" },
     { name = "fastapi" },
     { name = "gpiozero", marker = "sys_platform == 'linux' and extra == 'wireless-version'", specifier = ">=2.0.0" },
+    { name = "gradio-client", marker = "extra == 'examples'" },
     { name = "gstreamer-bundle", marker = "sys_platform != 'linux'", specifier = "==1.28.3" },
     { name = "huggingface-hub", specifier = "==1.3.0" },
     { name = "jinja2" },