Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9e8600a
Add audio-reactive head wobbling feature
FabienDanieau Mar 27, 2026
20a21c6
Add daemon-side head wobbling for wireless version and REST API
FabienDanieau Mar 27, 2026
b7c10b0
temporary fix to be able to test this in simulation, allegedly this i…
RemiFabre Apr 14, 2026
535f8be
Add wobbler versions v1/v2/v3 with CLI selection via --wobbler-version
RemiFabre Apr 14, 2026
2354dbd
Merge branch 'main' into feature/head-wobbler
FabienDanieau Apr 22, 2026
8c6bdf2
fix: clean up merge of main into feature/head-wobbler
FabienDanieau Apr 22, 2026
8ec9569
refactor: drive head wobbler from buffer PTS, drop background thread
FabienDanieau Apr 22, 2026
e158da5
chore: clean up ruff and mypy in motion/
FabienDanieau Apr 22, 2026
6e34964
chore: regenerate openapi.json for wobbling endpoints
FabienDanieau Apr 22, 2026
6a4fffd
refactor: simplify speech tapper, fix wireless audio playback
FabienDanieau Apr 22, 2026
ce84d67
refactor: cache wobbler pipeline, drop dead sink block in play_sound
FabienDanieau Apr 23, 2026
c125be7
fix: disable wobbling at goto_sleep so offsets don't fight sleep pose
FabienDanieau Apr 23, 2026
cb328c5
fix: sync wobbler to audio play-time, split live vs file paths
FabienDanieau Apr 23, 2026
2c66d6a
feat: add TTS + wobbler demo via Qwen3-TTS HF Space
FabienDanieau Apr 23, 2026
0641078
fix: cast untyped gradio-client / GStreamer return values in sound_tts
FabienDanieau Apr 23, 2026
3f8495e
refactor(wobbler): remove unused experimental speech-tapper versions
RemiFabre Apr 29, 2026
fca9f7b
fix: sync wobbler to audio play-time, split live vs file paths
FabienDanieau Apr 29, 2026
c88ddd1
remove buffer copy
FabienDanieau May 4, 2026
7dc92ad
fix(sdk): always re-upload local sound files in play_sound
FabienDanieau May 4, 2026
7274b97
refactor(examples): switch sound_tts demo from Qwen3-TTS to Chatterbox
FabienDanieau May 4, 2026
d45f8f7
fix: share clock+base_time across pipelines and factor PTS helper
FabienDanieau May 19, 2026
2b83f8b
refactor(media): factor a shared bus-message handler
FabienDanieau May 19, 2026
ce02431
chore(media): comment + log-level cleanup
FabienDanieau May 19, 2026
633877d
Merge branch 'main' into feature/head-wobbler
FabienDanieau May 19, 2026
b041ef6
update uv.lock
FabienDanieau May 19, 2026
5dc151f
test: update audio PTS-helper tests for the AudioBase refactor
FabienDanieau May 19, 2026
d1b0552
fix(webrtc): prime audio send chain with 0.5s of silence
FabienDanieau May 20, 2026
cdcb89b
Merge branch 'main' into feature/head-wobbler
FabienDanieau May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions docs/source/API/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,52 @@
}
}
},
"/api/media/wobbling/enable": {
"post": {
"summary": "Enable Wobbling",
"description": "Enable audio-reactive head wobbling.\n\nWhen enabled, audio played on the daemon (sounds, incoming WebRTC\naudio) is analysed and converted into subtle head movements.",
"operationId": "enable_wobbling_api_media_wobbling_enable_post",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"additionalProperties": {
"type": "string"
},
"type": "object",
"title": "Response Enable Wobbling Api Media Wobbling Enable Post"
}
}
}
}
}
}
},
"/api/media/wobbling/disable": {
"post": {
"summary": "Disable Wobbling",
"description": "Disable audio-reactive head wobbling and reset offsets.",
"operationId": "disable_wobbling_api_media_wobbling_disable_post",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"additionalProperties": {
"type": "string"
},
"type": "object",
"title": "Response Disable Wobbling Api Media Wobbling Disable Post"
}
}
}
}
}
}
},
"/api/media/sounds/upload": {
"post": {
"summary": "Upload Sound",
Expand Down
2 changes: 2 additions & 0 deletions docs/source/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@
title: Sound Playback
- local: examples/sound_record
title: Sound Recording
- local: examples/sound_tts
title: Sound TTS (with head wobbling)
- local: examples/custom_media_manager
title: Custom Media Manager
title: Examples
Expand Down
46 changes: 46 additions & 0 deletions docs/source/examples/sound_tts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Sound TTS (with head wobbling)

This example synthesises speech from text via ResembleAI's
[Chatterbox Multilingual TTS](https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS)
Hugging Face Space, plays the returned audio on Reachy Mini, and
wobbles the head in sync with the speech.

Chatterbox supports zero-shot voice cloning: pass a short reference
audio file and the synthesis matches that voice. 23 languages are
supported.

**Usage:**

```bash
# Default English voice
uv run python examples/sound_tts.py --text "Hello, I can wobble my head!"

# Different language
uv run python examples/sound_tts.py --text "Bonjour, je suis Reachy Mini" --lang fr

# Clone a voice from a local sample
uv run python examples/sound_tts.py \
--text "Hello world" \
--ref-audio ~/Downloads/my_voice.wav
```

**Options:**

- `--text <str>`: Text to synthesize (max 300 chars per request).
- `--lang <code>`: ISO 639-1 language code. Supported: `ar`, `da`,
`de`, `el`, `en`, `es`, `fi`, `fr`, `he`, `hi`, `it`, `ja`, `ko`,
`ms`, `nl`, `no`, `pl`, `pt`, `ru`, `sv`, `sw`, `tr`, `zh`.
- `--ref-audio <path|url>`: Reference audio for zero-shot voice
cloning. Local paths and URLs both work; defaults to a Gradio
sample voice.

Synthesis runs on the Space's shared GPU and typically takes
60–90 s per sentence.

<literalinclude>
{"path": "../../../examples/sound_tts.py",
"language": "python",
"start-after": "START doc_example",
"end-before": "END doc_example"
}
</literalinclude>
18 changes: 16 additions & 2 deletions examples/sound_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,13 @@ def play_live_tone(mini: "ReachyMini", tone_hz: float) -> None:
mini.media.stop_playing()


def main(backend: str, wav_path: str | None, tone_hz: float) -> None:
def main(
backend: str, wav_path: str | None, tone_hz: float, wobbling: bool = False
) -> None:
"""Run the sound playback example."""
with ReachyMini(log_level="DEBUG", media_backend=backend) as mini:
if wobbling:
mini.enable_wobbling()
if wav_path:
play_wav(mini, wav_path)
else:
Expand Down Expand Up @@ -90,8 +94,18 @@ def main(backend: str, wav_path: str | None, tone_hz: float) -> None:
type=float,
help="Sine wave frequency in Hz (--live mode only).",
)
parser.add_argument(
"--wobbling",
action="store_true",
help="Enable audio-reactive head wobbling.",
)

args = parser.parse_args()
main(backend=args.backend, wav_path=args.wav, tone_hz=args.tone_hz)
main(
backend=args.backend,
wav_path=args.wav,
tone_hz=args.tone_hz,
wobbling=args.wobbling,
)

# END doc_example
108 changes: 108 additions & 0 deletions examples/sound_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""TTS demo with head wobbling.

Sends text to ResembleAI's Chatterbox Multilingual TTS Hugging Face
Space (zero-shot voice cloning, 23 languages), plays the returned
audio on Reachy Mini, and wobbles the head in sync.

Usage::

uv run python examples/sound_tts.py --text "Hello world"
uv run python examples/sound_tts.py --text "Bonjour" --lang fr
uv run python examples/sound_tts.py --text "..." --ref-audio /path/to/voice.wav

Browse the Space: https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS
"""

# START doc_example

import argparse
import os
import time

import gi
from gradio_client import Client, handle_file

gi.require_version("Gst", "1.0")
gi.require_version("GstPbutils", "1.0")
from gi.repository import Gst, GstPbutils # noqa: E402

from reachy_mini import ReachyMini # noqa: E402

HF_SPACE = "ResembleAI/Chatterbox-Multilingual-TTS"
LANGUAGES = [
"ar", "da", "de", "el", "en", "es", "fi", "fr", "he", "hi", "it",
"ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh",
]
DEFAULT_REF_AUDIO = (
"https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
)


def synthesize(text: str, lang: str, ref_audio: str) -> str:
"""Submit *text* to Chatterbox; return a path to a local audio file."""
if not ref_audio.startswith(("http://", "https://")):
ref_audio = os.path.expanduser(ref_audio)
client = Client(HF_SPACE)
audio_path = client.predict(
text_input=text,
language_id=lang,
audio_prompt_path_input=handle_file(ref_audio),
api_name="/generate_tts_audio",
)
return str(audio_path)


def probe_duration_s(path: str) -> float:
"""Return the media duration of *path* in seconds via GStreamer."""
Gst.init([])
disc = GstPbutils.Discoverer.new(10 * Gst.SECOND)
info = disc.discover_uri(f"file://{path}")
return float(info.get_duration() / Gst.SECOND)


def main(text: str, lang: str, ref_audio: str) -> None:
"""Synthesize *text*, play it on Reachy Mini with wobbling enabled."""
print(f"Synthesizing {len(text)} chars ({lang}) with Chatterbox...")
audio_path = synthesize(text, lang, ref_audio)
duration = probe_duration_s(audio_path)
print(f"Got {audio_path} ({duration:.1f}s)")

with ReachyMini(log_level="INFO") as mini:
mini.enable_wobbling()
mini.media.play_sound(audio_path)
time.sleep(duration + 0.5)
mini.disable_wobbling()
print("Done.")


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Chatterbox Multilingual TTS + head wobbler demo.",
)
parser.add_argument(
"--text",
type=str,
default="Hello, I am Reachy Mini. Let me wobble my head while I speak.",
help="Text to synthesize (max 300 chars per request).",
)
parser.add_argument(
"--lang",
type=str,
default="en",
choices=LANGUAGES,
help="Language code (ISO 639-1).",
)
parser.add_argument(
"--ref-audio",
type=str,
default=DEFAULT_REF_AUDIO,
help="Reference audio (URL or local path) for zero-shot voice cloning.",
)
args = parser.parse_args()
main(
text=args.text,
lang=args.lang,
ref_audio=args.ref_audio,
)

# END doc_example
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ examples = [
"soundfile",
"opencv-python<=5.0",
"cv2_enumerate_cameras>=1.2.1",
"gradio-client"
]
mujoco = ["mujoco==3.3.0"]
nn_kinematics = ["onnxruntime==1.22.1"]
Expand All @@ -73,7 +74,7 @@ all = [
"reachy_mini[placo_kinematics]",
"reachy_mini[rerun]",
"reachy_mini[wireless-version]",
"reachy_mini[opencv]",
"reachy_mini[opencv]"
]

[dependency-groups]
Expand Down
33 changes: 33 additions & 0 deletions src/reachy_mini/daemon/app/routers/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,39 @@ async def stop_sound(
return {"status": "ok"}


@router.post("/wobbling/enable")
async def enable_wobbling(
daemon: Daemon = Depends(get_daemon),
) -> dict[str, str]:
"""Enable audio-reactive head wobbling.

When enabled, audio played on the daemon (sounds, incoming WebRTC
audio) is analysed and converted into subtle head movements.
"""
backend = daemon.backend
if backend is None or not backend.ready.is_set():
raise HTTPException(status_code=503, detail="Backend not running")

if backend._media_server is not None:
backend._media_server.enable_wobbling(backend.set_speech_offsets)
return {"status": "ok"}


@router.post("/wobbling/disable")
async def disable_wobbling(
daemon: Daemon = Depends(get_daemon),
) -> dict[str, str]:
"""Disable audio-reactive head wobbling and reset offsets."""
backend = daemon.backend
if backend is None or not backend.ready.is_set():
raise HTTPException(status_code=503, detail="Backend not running")

if backend._media_server is not None:
backend._media_server.disable_wobbling()
backend.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
return {"status": "ok"}


@router.post("/sounds/upload")
async def upload_sound(
file: UploadFile = File(...),
Expand Down
Loading
Loading