diff --git a/docs/source/API/openapi.json b/docs/source/API/openapi.json index 1865115ba..05aec8dea 100644 --- a/docs/source/API/openapi.json +++ b/docs/source/API/openapi.json @@ -1235,6 +1235,52 @@ } } }, + "/api/media/wobbling/enable": { + "post": { + "summary": "Enable Wobbling", + "description": "Enable audio-reactive head wobbling.\n\nWhen enabled, audio played on the daemon (sounds, incoming WebRTC\naudio) is analysed and converted into subtle head movements.", + "operationId": "enable_wobbling_api_media_wobbling_enable_post", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Response Enable Wobbling Api Media Wobbling Enable Post" + } + } + } + } + } + } + }, + "/api/media/wobbling/disable": { + "post": { + "summary": "Disable Wobbling", + "description": "Disable audio-reactive head wobbling and reset offsets.", + "operationId": "disable_wobbling_api_media_wobbling_disable_post", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Response Disable Wobbling Api Media Wobbling Disable Post" + } + } + } + } + } + } + }, "/api/media/sounds/upload": { "post": { "summary": "Upload Sound", diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index e7f41b4ef..bf5c80351 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -106,6 +106,8 @@ title: Sound Playback - local: examples/sound_record title: Sound Recording + - local: examples/sound_tts + title: Sound TTS (with head wobbling) - local: examples/custom_media_manager title: Custom Media Manager title: Examples diff --git a/docs/source/examples/sound_tts.md b/docs/source/examples/sound_tts.md new file mode 100644 index 000000000..f9b7f08d2 --- /dev/null +++ b/docs/source/examples/sound_tts.md @@ -0,0 +1,46 @@ +# Sound TTS (with head wobbling) + +This example synthesises speech from text via ResembleAI's +[Chatterbox Multilingual TTS](https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS) +Hugging Face Space, plays the returned audio on Reachy Mini, and +wobbles the head in sync with the speech. + +Chatterbox supports zero-shot voice cloning: pass a short reference +audio file and the synthesis matches that voice. 23 languages are +supported. + +**Usage:** + +```bash +# Default English voice +uv run python examples/sound_tts.py --text "Hello, I can wobble my head!" + +# Different language +uv run python examples/sound_tts.py --text "Bonjour, je suis Reachy Mini" --lang fr + +# Clone a voice from a local sample +uv run python examples/sound_tts.py \ + --text "Hello world" \ + --ref-audio ~/Downloads/my_voice.wav +``` + +**Options:** + +- `--text `: Text to synthesize (max 300 chars per request). +- `--lang `: ISO 639-1 language code. Supported: `ar`, `da`, + `de`, `el`, `en`, `es`, `fi`, `fr`, `he`, `hi`, `it`, `ja`, `ko`, + `ms`, `nl`, `no`, `pl`, `pt`, `ru`, `sv`, `sw`, `tr`, `zh`. +- `--ref-audio `: Reference audio for zero-shot voice + cloning. Local paths and URLs both work; defaults to a Gradio + sample voice. + +Synthesis runs on the Space's shared GPU and typically takes +60–90 s per sentence. + + +{"path": "../../../examples/sound_tts.py", +"language": "python", +"start-after": "START doc_example", +"end-before": "END doc_example" +} + diff --git a/examples/sound_play.py b/examples/sound_play.py index acc282359..e0155d220 100644 --- a/examples/sound_play.py +++ b/examples/sound_play.py @@ -51,9 +51,13 @@ def play_live_tone(mini: "ReachyMini", tone_hz: float) -> None: mini.media.stop_playing() -def main(backend: str, wav_path: str | None, tone_hz: float) -> None: +def main( + backend: str, wav_path: str | None, tone_hz: float, wobbling: bool = False +) -> None: """Run the sound playback example.""" with ReachyMini(log_level="DEBUG", media_backend=backend) as mini: + if wobbling: + mini.enable_wobbling() if wav_path: play_wav(mini, wav_path) else: @@ -90,8 +94,18 @@ def main(backend: str, wav_path: str | None, tone_hz: float) -> None: type=float, help="Sine wave frequency in Hz (--live mode only).", ) + parser.add_argument( + "--wobbling", + action="store_true", + help="Enable audio-reactive head wobbling.", + ) args = parser.parse_args() - main(backend=args.backend, wav_path=args.wav, tone_hz=args.tone_hz) + main( + backend=args.backend, + wav_path=args.wav, + tone_hz=args.tone_hz, + wobbling=args.wobbling, + ) # END doc_example diff --git a/examples/sound_tts.py b/examples/sound_tts.py new file mode 100644 index 000000000..98bb8837e --- /dev/null +++ b/examples/sound_tts.py @@ -0,0 +1,108 @@ +"""TTS demo with head wobbling. + +Sends text to ResembleAI's Chatterbox Multilingual TTS Hugging Face +Space (zero-shot voice cloning, 23 languages), plays the returned +audio on Reachy Mini, and wobbles the head in sync. + +Usage:: + + uv run python examples/sound_tts.py --text "Hello world" + uv run python examples/sound_tts.py --text "Bonjour" --lang fr + uv run python examples/sound_tts.py --text "..." --ref-audio /path/to/voice.wav + +Browse the Space: https://huggingface.co/spaces/ResembleAI/Chatterbox-Multilingual-TTS +""" + +# START doc_example + +import argparse +import os +import time + +import gi +from gradio_client import Client, handle_file + +gi.require_version("Gst", "1.0") +gi.require_version("GstPbutils", "1.0") +from gi.repository import Gst, GstPbutils # noqa: E402 + +from reachy_mini import ReachyMini # noqa: E402 + +HF_SPACE = "ResembleAI/Chatterbox-Multilingual-TTS" +LANGUAGES = [ + "ar", "da", "de", "el", "en", "es", "fi", "fr", "he", "hi", "it", + "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh", +] +DEFAULT_REF_AUDIO = ( + "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav" +) + + +def synthesize(text: str, lang: str, ref_audio: str) -> str: + """Submit *text* to Chatterbox; return a path to a local audio file.""" + if not ref_audio.startswith(("http://", "https://")): + ref_audio = os.path.expanduser(ref_audio) + client = Client(HF_SPACE) + audio_path = client.predict( + text_input=text, + language_id=lang, + audio_prompt_path_input=handle_file(ref_audio), + api_name="/generate_tts_audio", + ) + return str(audio_path) + + +def probe_duration_s(path: str) -> float: + """Return the media duration of *path* in seconds via GStreamer.""" + Gst.init([]) + disc = GstPbutils.Discoverer.new(10 * Gst.SECOND) + info = disc.discover_uri(f"file://{path}") + return float(info.get_duration() / Gst.SECOND) + + +def main(text: str, lang: str, ref_audio: str) -> None: + """Synthesize *text*, play it on Reachy Mini with wobbling enabled.""" + print(f"Synthesizing {len(text)} chars ({lang}) with Chatterbox...") + audio_path = synthesize(text, lang, ref_audio) + duration = probe_duration_s(audio_path) + print(f"Got {audio_path} ({duration:.1f}s)") + + with ReachyMini(log_level="INFO") as mini: + mini.enable_wobbling() + mini.media.play_sound(audio_path) + time.sleep(duration + 0.5) + mini.disable_wobbling() + print("Done.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Chatterbox Multilingual TTS + head wobbler demo.", + ) + parser.add_argument( + "--text", + type=str, + default="Hello, I am Reachy Mini. Let me wobble my head while I speak.", + help="Text to synthesize (max 300 chars per request).", + ) + parser.add_argument( + "--lang", + type=str, + default="en", + choices=LANGUAGES, + help="Language code (ISO 639-1).", + ) + parser.add_argument( + "--ref-audio", + type=str, + default=DEFAULT_REF_AUDIO, + help="Reference audio (URL or local path) for zero-shot voice cloning.", + ) + args = parser.parse_args() + main( + text=args.text, + lang=args.lang, + ref_audio=args.ref_audio, + ) + +# END doc_example diff --git a/pyproject.toml b/pyproject.toml index 916b37de2..d0968c382 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ examples = [ "soundfile", "opencv-python<=5.0", "cv2_enumerate_cameras>=1.2.1", + "gradio-client" ] mujoco = ["mujoco==3.3.0"] nn_kinematics = ["onnxruntime==1.22.1"] @@ -73,7 +74,7 @@ all = [ "reachy_mini[placo_kinematics]", "reachy_mini[rerun]", "reachy_mini[wireless-version]", - "reachy_mini[opencv]", + "reachy_mini[opencv]" ] [dependency-groups] diff --git a/src/reachy_mini/daemon/app/routers/media.py b/src/reachy_mini/daemon/app/routers/media.py index 79d59924c..a938ddd1e 100644 --- a/src/reachy_mini/daemon/app/routers/media.py +++ b/src/reachy_mini/daemon/app/routers/media.py @@ -95,6 +95,39 @@ async def stop_sound( return {"status": "ok"} +@router.post("/wobbling/enable") +async def enable_wobbling( + daemon: Daemon = Depends(get_daemon), +) -> dict[str, str]: + """Enable audio-reactive head wobbling. + + When enabled, audio played on the daemon (sounds, incoming WebRTC + audio) is analysed and converted into subtle head movements. + """ + backend = daemon.backend + if backend is None or not backend.ready.is_set(): + raise HTTPException(status_code=503, detail="Backend not running") + + if backend._media_server is not None: + backend._media_server.enable_wobbling(backend.set_speech_offsets) + return {"status": "ok"} + + +@router.post("/wobbling/disable") +async def disable_wobbling( + daemon: Daemon = Depends(get_daemon), +) -> dict[str, str]: + """Disable audio-reactive head wobbling and reset offsets.""" + backend = daemon.backend + if backend is None or not backend.ready.is_set(): + raise HTTPException(status_code=503, detail="Backend not running") + + if backend._media_server is not None: + backend._media_server.disable_wobbling() + backend.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + return {"status": "ok"} + + @router.post("/sounds/upload") async def upload_sound( file: UploadFile = File(...), diff --git a/src/reachy_mini/daemon/backend/abstract.py b/src/reachy_mini/daemon/backend/abstract.py index 66ce7abd9..f8c68c9ac 100644 --- a/src/reachy_mini/daemon/backend/abstract.py +++ b/src/reachy_mini/daemon/backend/abstract.py @@ -56,9 +56,11 @@ SetHeadJointsCmd, SetMicrophoneVolumeCmd, SetMotorModeCmd, + SetSpeechOffsetsCmd, SetTargetCmd, SetTorqueCmd, SetVolumeCmd, + SetWobblingCmd, StartRecordingCmd, StopRecordingCmd, SubscribeLogsCmd, @@ -80,9 +82,11 @@ from reachy_mini.media.audio_doa import AudioDoA from reachy_mini.motion.goto import GotoMove from reachy_mini.motion.move import Move +from reachy_mini.utils import create_head_pose from reachy_mini.utils.constants import MODELS_ROOT_PATH, URDF_ROOT_PATH from reachy_mini.utils.interpolation import ( InterpolationTechnique, + compose_world_offset, distance_between_poses, time_trajectory, ) @@ -286,6 +290,11 @@ def __init__( tempfile.gettempdir(), "reachy-mini-uploads", "audio" ) + # Head wobbler speech offsets (x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad) + self._speech_offsets: tuple[float, float, float, float, float, float] = ( + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + ) + # WebRTC support self._send_message_to_webrtc: Optional[Callable[[Optional[str], str], None]] = ( None @@ -425,6 +434,16 @@ def update_target_head_joints_from_ik( if body_yaw is None: body_yaw = self.target_body_yaw if self.target_body_yaw is not None else 0.0 + # Compose speech wobbler offsets (if any) before IK + if any(o != 0.0 for o in self._speech_offsets): + x_m, y_m, z_m, roll_r, pitch_r, yaw_r = self._speech_offsets + offset_pose = create_head_pose( + x=x_m, y=y_m, z=z_m, + roll=roll_r, pitch=pitch_r, yaw=yaw_r, + degrees=False, + ) + pose = compose_world_offset(pose, offset_pose) + # Compute the inverse kinematics to get the head joint positions joints = self.head_kinematics.ik(pose, body_yaw=body_yaw) if joints is None or np.any(np.isnan(joints)): @@ -502,6 +521,20 @@ def set_target_antenna_joint_positions( """ self.target_antenna_joint_positions = positions + def set_speech_offsets( + self, + offsets: tuple[float, float, float, float, float, float], + ) -> None: + """Set head wobbler speech offsets, composed with target pose before IK. + + Args: + offsets: ``(x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad)`` in + world frame. Zero tuple disables the offset. + + """ + self._speech_offsets = offsets + self.ik_required = True + def set_target_head_joint_current( self, current: Annotated[NDArray[np.float64], (7,)], @@ -926,6 +959,12 @@ async def goto_sleep(self) -> None: - If we are far from the initial position, we move there first. - If we are close to the initial position, we move directly to the sleep position. """ + # Stop head wobbling so leftover speech offsets don't fight the + # sleep pose during the goto. + if self._media_server is not None: + self._media_server.disable_wobbling() + self.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + # Magic units _, _, dist_to_sleep_pose = distance_between_poses( self.get_current_head_pose(), self.SLEEP_HEAD_POSE @@ -1108,6 +1147,23 @@ def _maybe_ignore(field: str) -> bool: self.play_sound(cmd.file) send_response({"status": "ok", "command": "play_sound"}) + elif isinstance(cmd, SetSpeechOffsetsCmd): + offsets = cmd.offsets + if len(offsets) == 6: + self.set_speech_offsets( + (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5]) + ) + send_response({"status": "ok", "command": "set_speech_offsets"}) + + elif isinstance(cmd, SetWobblingCmd): + if self._media_server is not None: + if cmd.enabled: + self._media_server.enable_wobbling(self.set_speech_offsets) + else: + self._media_server.disable_wobbling() + self.set_speech_offsets((0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + send_response({"status": "ok", "command": "set_wobbling"}) + elif isinstance(cmd, SetMotorModeCmd): self.set_motor_control_mode(MotorControlMode(cmd.mode)) send_response({"motor_mode": cmd.mode, "status": "ok"}) diff --git a/src/reachy_mini/io/protocol.py b/src/reachy_mini/io/protocol.py index d4ab0fce6..9b9dbc67c 100644 --- a/src/reachy_mini/io/protocol.py +++ b/src/reachy_mini/io/protocol.py @@ -279,6 +279,18 @@ class GetMicrophoneVolumeCmd(BaseModel): type: Literal["get_microphone_volume"] = "get_microphone_volume" +class SetSpeechOffsetsCmd(BaseModel): + """Set head-wobbler speech offsets (composed with target pose before IK).""" + + type: Literal["set_speech_offsets"] = "set_speech_offsets" + offsets: list[float] # [x_m, y_m, z_m, roll_rad, pitch_rad, yaw_rad] + + +class SetWobblingCmd(BaseModel): + """Enable or disable daemon-side audio-reactive head wobbling.""" + + type: Literal["set_wobbling"] = "set_wobbling" + enabled: bool # ------------------------------------------------------------------ # Daemon log streaming over the DataChannel. @@ -621,6 +633,8 @@ class CancelAudioCmd(BaseModel): | StartRecordingCmd | StopRecordingCmd | AppendRecordCmd + | SetSpeechOffsetsCmd + | SetWobblingCmd | SetVolumeCmd | GetVolumeCmd | SetMicrophoneVolumeCmd diff --git a/src/reachy_mini/media/audio_base.py b/src/reachy_mini/media/audio_base.py index 5a0880930..9c066401c 100644 --- a/src/reachy_mini/media/audio_base.py +++ b/src/reachy_mini/media/audio_base.py @@ -19,6 +19,7 @@ from abc import ABC, abstractmethod from typing import Optional +import gi import numpy as np import numpy.typing as npt @@ -28,7 +29,10 @@ init_respeaker_usb, ) from reachy_mini.media.audio_doa import AudioDoA -from reachy_mini.media.gstreamer_utils import get_sample +from reachy_mini.media.gstreamer_utils import get_sample, handle_default_bus_message + +gi.require_version("Gst", "1.0") +from gi.repository import Gst # noqa: E402 class AudioBase(ABC): @@ -37,17 +41,57 @@ class AudioBase(ABC): Attributes: SAMPLE_RATE: Default sample rate (16 000 Hz — ReSpeaker hardware). CHANNELS: Number of audio channels (2 — stereo). + GAP_RESET_NS: PTS-continuity threshold for ``_compute_pts``. + If the gap between the next expected PTS and the appsrc's + current running-time exceeds this value, we treat it as a + new utterance and re-anchor to running-time. """ SAMPLE_RATE = 16000 CHANNELS = 2 + GAP_RESET_NS = 200_000_000 # 200 ms def __init__(self, log_level: str = "INFO") -> None: """Initialize shared audio attributes (DoA helper).""" self.logger = logging.getLogger(type(self).__module__) self.logger.setLevel(log_level) self._doa = AudioDoA() + # Next expected PTS for the playback / send appsrc; -1 means + # "no previous buffer, anchor to running-time on next push". + self._appsrc_pts: int = -1 + + def _compute_pts( + self, + num_samples: int, + running_time_ns: int, + next_pts_ns: int, + ) -> tuple[int, int, int]: + """Return ``(pts_ns, duration_ns, next_pts_ns)`` for an appsrc buffer. + + Anchors PTS to ``running_time_ns`` when ``next_pts_ns`` is + negative (sentinel for "no previous") or the gap is larger + than ``GAP_RESET_NS``; otherwise continues the previous + stream's PTS to keep audio contiguous across consecutive + push calls. + """ + duration_ns = (num_samples * 1_000_000_000) // self.SAMPLE_RATE + if next_pts_ns < 0 or running_time_ns > next_pts_ns + self.GAP_RESET_NS: + pts_ns = running_time_ns + else: + pts_ns = next_pts_ns + return pts_ns, duration_ns, pts_ns + duration_ns + + def _on_bus_message( + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline + ) -> bool: + """Delegate to the shared default-bus-message helper. + + Subclasses can override to add custom behaviour, then return + ``super()._on_bus_message(bus, msg, pipeline)`` to keep the + default handling. + """ + return handle_default_bus_message(self.logger, msg, pipeline) def get_audio_sample(self) -> Optional[npt.NDArray[np.float32]]: """Pull the next recorded audio chunk. diff --git a/src/reachy_mini/media/audio_gstreamer.py b/src/reachy_mini/media/audio_gstreamer.py index 59789843b..6b9981cab 100644 --- a/src/reachy_mini/media/audio_gstreamer.py +++ b/src/reachy_mini/media/audio_gstreamer.py @@ -53,6 +53,8 @@ import os import platform +import time +from collections.abc import Callable from threading import Thread from typing import Optional @@ -62,6 +64,7 @@ from reachy_mini.media.audio_base import AudioBase from reachy_mini.media.audio_utils import has_reachymini_asoundrc from reachy_mini.media.device_detection import get_audio_device +from reachy_mini.motion.head_wobbler import HeadWobbler, SpeechOffsets from reachy_mini.utils.constants import ASSETS_ROOT_PATH try: @@ -90,7 +93,6 @@ class GStreamerAudio(AudioBase): """ - PLAYBACK_GAP_RESET_NS = 200 * Gst.MSECOND PLAYBACK_SINK_BUFFER_TIME_US = 50_000 PLAYBACK_SINK_LATENCY_TIME_US = 5_000 @@ -105,6 +107,8 @@ def __init__(self, log_level: str = "INFO") -> None: """ super().__init__(log_level=log_level) + self._head_wobbler: Optional[HeadWobbler] = None + Gst.init([]) self._loop = GLib.MainLoop() self._thread_bus_calls = Thread(target=lambda: self._loop.run(), daemon=True) @@ -114,36 +118,17 @@ def __init__(self, log_level: str = "INFO") -> None: self._init_pipeline_record(self._pipeline_record) self._bus_record = self._pipeline_record.get_bus() self._bus_record.add_watch( - GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop + GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_record ) self._playbin: Optional[Gst.Element] = None self._pipeline_playback = Gst.Pipeline.new("audio_player") - self._playback_next_pts_ns: int | None = None self._init_pipeline_playback(self._pipeline_playback) self._bus_playback = self._pipeline_playback.get_bus() self._bus_playback.add_watch( - GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop + GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_playback ) - def _compute_playback_buffer_timing( - self, - num_samples: int, - sample_rate: int, - running_time_ns: int, - next_pts_ns: int | None, - gap_reset_ns: int | None = None, - ) -> tuple[int, int, int]: - """Return ``(pts_ns, duration_ns, next_pts_ns)`` for a playback buffer.""" - if gap_reset_ns is None: - gap_reset_ns = self.PLAYBACK_GAP_RESET_NS - duration_ns = (num_samples * Gst.SECOND) // sample_rate - if next_pts_ns is None or running_time_ns > next_pts_ns + gap_reset_ns: - pts_ns = running_time_ns - else: - pts_ns = next_pts_ns - return pts_ns, duration_ns, pts_ns + duration_ns - def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None: self._appsink_audio = Gst.ElementFactory.make("appsink") caps = Gst.Caps.from_string( @@ -197,24 +182,11 @@ def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None: audioconvert.link(audioresample) audioresample.link(self._appsink_audio) - def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None: - self._appsrc = Gst.ElementFactory.make("appsrc") - self._appsrc.set_property("do-timestamp", False) - self._appsrc.set_property("format", Gst.Format.TIME) - self._appsrc.set_property("is-live", True) - caps = Gst.Caps.from_string( - f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved" - ) - self._appsrc.set_property("caps", caps) - - audioconvert = Gst.ElementFactory.make("audioconvert") - audioresample = Gst.ElementFactory.make("audioresample") - + def _build_audiosink_element(self) -> Gst.Element: + """Create a platform-appropriate audio sink element.""" audiosink: Optional[Gst.Element] = None if has_reachymini_asoundrc(): - # Wireless CM4: use the preconfigured .asoundrc ALSA devices - # which route through the XMOS AEC loopback properly. audiosink = Gst.ElementFactory.make("alsasink") audiosink.set_property("device", "reachymini_audio_sink") self.logger.info("Using .asoundrc audio sink: reachymini_audio_sink") @@ -225,9 +197,7 @@ def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None: self.logger.warning( "No specific audio card found, using default audio sink." ) - audiosink = Gst.ElementFactory.make( - "autoaudiosink" - ) # use default speaker + audiosink = Gst.ElementFactory.make("autoaudiosink") elif platform.system() == "Windows": audiosink = Gst.ElementFactory.make("wasapi2sink") audiosink.set_property("device", id_audio_card) @@ -238,50 +208,173 @@ def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None: audiosink = Gst.ElementFactory.make("pulsesink") audiosink.set_property("device", f"{id_audio_card}") - if audiosink is not None: - if audiosink.find_property("buffer-time") is not None: - audiosink.set_property("buffer-time", self.PLAYBACK_SINK_BUFFER_TIME_US) - if audiosink.find_property("latency-time") is not None: - audiosink.set_property("latency-time", self.PLAYBACK_SINK_LATENCY_TIME_US) + if audiosink is None: + raise RuntimeError("Failed to create audio sink element") - queue = Gst.ElementFactory.make("queue") + if audiosink.find_property("buffer-time") is not None: + audiosink.set_property("buffer-time", self.PLAYBACK_SINK_BUFFER_TIME_US) + if audiosink.find_property("latency-time") is not None: + audiosink.set_property("latency-time", self.PLAYBACK_SINK_LATENCY_TIME_US) - pipeline.add(audiosink) - pipeline.add(self._appsrc) - pipeline.add(audioconvert) - pipeline.add(audioresample) - pipeline.add(queue) + return audiosink - self._appsrc.link(audioconvert) - audioconvert.link(audioresample) - audioresample.link(queue) - queue.link(audiosink) + def _make_wobbler_appsink(self) -> Gst.Element: + """Create an appsink that feeds audio to the head wobbler. - def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool: # type: ignore[no-untyped-def] - t = msg.type - if t == Gst.MessageType.EOS: - self.logger.warning("End-of-stream") - return False + ``sync=True`` so new-sample fires at the buffer's PTS on the + pipeline clock — i.e. when the audiosink outputs it. The local + pipeline has a deterministic clock and no network jitter, so + PTS-based sync gives correct A/V timing for both playbin + (``play_sound``) and push (``push_audio_sample``) paths. + """ + appsink = Gst.ElementFactory.make("appsink") + # Force mono so the speech tapper receives a 1-D float32 array. + # The per-branch audioconvert in _build_audiosink_tee_bin / + # _init_pipeline_playback handles the downmix. + caps = Gst.Caps.from_string( + f"audio/x-raw,format=F32LE,channels=1," + f"rate={self.SAMPLE_RATE},layout=interleaved" + ) + appsink.set_property("caps", caps) + appsink.set_property("drop", True) + appsink.set_property("max-buffers", 5) + appsink.set_property("sync", True) + appsink.set_property("emit-signals", True) + appsink.connect("new-sample", self._on_wobbler_sample) + return appsink + + def _on_wobbler_sample(self, appsink: Gst.Element) -> Gst.FlowReturn: + """GStreamer callback: forward audio buffer to the head wobbler. + + The appsink is ``sync=True``, so this callback fires at the + buffer's PTS on the pipeline clock — audio is playing NOW. + """ + sample = appsink.pull_sample() + if sample is None or self._head_wobbler is None: + return Gst.FlowReturn.OK + buf = sample.get_buffer() + data = buf.extract_dup(0, buf.get_size()) + pcm = np.frombuffer(data, dtype=np.float32) + self._head_wobbler.feed(pcm, time.monotonic_ns()) + return Gst.FlowReturn.OK + + def _build_audiosink_tee_bin(self) -> Gst.Bin: + """Build a Gst.Bin with a tee splitting audio to speaker and wobbler. + + Per-branch audioconvert+audioresample isolate each leaf's caps + from the other (the wobbler appsink demands F32LE/2/16000; the + audiosink wants whatever the device prefers — e.g. on the + wireless XMOS PCM, anything but its native rate triggers an + IEC958 fallback that fails to open). + + The bin exposes a single ghost sink pad for use as a playbin audio-sink:: - elif t == Gst.MessageType.ERROR: - err, debug = msg.parse_error() - self.logger.error(f"Error: {err} {debug}") - return False + ghost_sink → tee ─┬→ queue → audioconvert → audioresample → audiosink + └→ queue → audioconvert → audioresample → appsink + + """ + audio_bin = Gst.Bin.new("audio_tee_bin") + + tee = Gst.ElementFactory.make("tee") + queue_speaker = Gst.ElementFactory.make("queue") + ac_speaker = Gst.ElementFactory.make("audioconvert") + ar_speaker = Gst.ElementFactory.make("audioresample") + audiosink = self._build_audiosink_element() + queue_wobbler = Gst.ElementFactory.make("queue") + ac_wobbler = Gst.ElementFactory.make("audioconvert") + ar_wobbler = Gst.ElementFactory.make("audioresample") + appsink_wobbler = self._make_wobbler_appsink() + + for el in ( + tee, + queue_speaker, + ac_speaker, + ar_speaker, + audiosink, + queue_wobbler, + ac_wobbler, + ar_wobbler, + appsink_wobbler, + ): + audio_bin.add(el) + + tee.link(queue_speaker) + queue_speaker.link(ac_speaker) + ac_speaker.link(ar_speaker) + ar_speaker.link(audiosink) + + tee.link(queue_wobbler) + queue_wobbler.link(ac_wobbler) + ac_wobbler.link(ar_wobbler) + ar_wobbler.link(appsink_wobbler) + + ghost_pad = Gst.GhostPad.new("sink", tee.get_static_pad("sink")) + audio_bin.add_pad(ghost_pad) + + return audio_bin + + def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None: + self._appsrc = Gst.ElementFactory.make("appsrc") + self._appsrc.set_property("do-timestamp", False) + self._appsrc.set_property("format", Gst.Format.TIME) + self._appsrc.set_property("is-live", True) + caps = Gst.Caps.from_string( + f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved" + ) + self._appsrc.set_property("caps", caps) - return True + # Always build tee so wobbling can be enabled/disabled at runtime. + # Per-branch audioconvert+audioresample so the wobbler appsink's + # F32LE/1/16000 caps don't drag the audiosink branch into a rate + # the device can't accept (e.g. wireless XMOS PCM falls back to + # IEC958 at non-native rates). The appsink with drop=True has + # negligible overhead when no wobbler is connected. + tee = Gst.ElementFactory.make("tee") + queue_speaker = Gst.ElementFactory.make("queue") + ac_speaker = Gst.ElementFactory.make("audioconvert") + ar_speaker = Gst.ElementFactory.make("audioresample") + audiosink = self._build_audiosink_element() + queue_wobbler = Gst.ElementFactory.make("queue") + ac_wobbler = Gst.ElementFactory.make("audioconvert") + ar_wobbler = Gst.ElementFactory.make("audioresample") + appsink_wobbler = self._make_wobbler_appsink() + + for el in ( + self._appsrc, + tee, + queue_speaker, + ac_speaker, + ar_speaker, + audiosink, + queue_wobbler, + ac_wobbler, + ar_wobbler, + appsink_wobbler, + ): + pipeline.add(el) + + self._appsrc.link(tee) + tee.link(queue_speaker) + queue_speaker.link(ac_speaker) + ac_speaker.link(ar_speaker) + ar_speaker.link(audiosink) + tee.link(queue_wobbler) + queue_wobbler.link(ac_wobbler) + ac_wobbler.link(ar_wobbler) + ar_wobbler.link(appsink_wobbler) + + def _on_bus_message( + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline + ) -> bool: + if msg.type == Gst.MessageType.EOS and self._head_wobbler is not None: + self._head_wobbler.stop() + return super()._on_bus_message(bus, msg, pipeline) def _dump_latency(self) -> None: query = Gst.Query.new_latency() self._pipeline_playback.query(query) self.logger.info(f"Audio pipeline latency {query.parse_latency()}") - def _get_playback_running_time_ns(self) -> int: - """Return the current playback running time in nanoseconds.""" - clock = self._pipeline_playback.get_clock() - if clock is None: - return 0 - return int(max(0, clock.get_time() - self._pipeline_playback.get_base_time())) - def start_recording(self) -> None: """Start capturing audio from the microphone.""" self._pipeline_record.set_state(Gst.State.PLAYING) @@ -292,7 +385,9 @@ def stop_recording(self) -> None: def start_playing(self) -> None: """Start the playback pipeline so ``push_audio_sample`` can feed data.""" - self._playback_next_pts_ns = None + if self._head_wobbler is not None: + self._head_wobbler.start() + self._appsrc_pts = -1 self._pipeline_playback.set_state(Gst.State.PLAYING) GLib.timeout_add_seconds(5, self._dump_latency) @@ -305,27 +400,30 @@ def push_audio_sample(self, data: npt.NDArray[np.float32]) -> None: mono (the caller is responsible for channel adaptation). """ - if self._appsrc is not None: - pts_ns, duration_ns, self._playback_next_pts_ns = ( - self._compute_playback_buffer_timing( - int(data.shape[0]), - self.SAMPLE_RATE, - self._get_playback_running_time_ns(), - self._playback_next_pts_ns, - ) - ) - buf = Gst.Buffer.new_wrapped(data.tobytes()) - buf.pts = pts_ns - buf.duration = duration_ns - self._appsrc.push_buffer(buf) - else: + if self._appsrc is None: self.logger.warning( "AppSrc is not initialized. Call start_playing() first." ) + return + + pts_ns, duration_ns, self._appsrc_pts = self._compute_pts( + int(data.shape[0]), + self._appsrc.get_current_running_time(), + self._appsrc_pts, + ) + buf = Gst.Buffer.new_wrapped(data.tobytes()) + buf.pts = pts_ns + buf.dts = pts_ns + buf.duration = duration_ns + ret = self._appsrc.push_buffer(buf) + if ret != Gst.FlowReturn.OK: + self.logger.warning(f"push_buffer dropped: {ret}") def stop_playing(self) -> None: """Stop the playback pipeline.""" - self._playback_next_pts_ns = None + if self._head_wobbler is not None: + self._head_wobbler.stop() + self._appsrc_pts = -1 self._pipeline_playback.set_state(Gst.State.NULL) if self._playbin is not None: self._playbin.set_state(Gst.State.NULL) @@ -342,8 +440,10 @@ def clear_output_buffer(self) -> None: def clear_player(self) -> None: """Flush the player's appsrc to drop any queued audio immediately.""" + if self._head_wobbler is not None: + self._head_wobbler.reset() if self._appsrc is not None: - self._playback_next_pts_ns = None + self._appsrc_pts = -1 self._pipeline_playback.set_state(Gst.State.PAUSED) self._appsrc.send_event(Gst.Event.new_flush_start()) self._appsrc.send_event(Gst.Event.new_flush_stop(reset_time=True)) @@ -358,7 +458,8 @@ def play_sound(self, sound_file: str) -> None: """Play a sound file through the Reachy Mini Audio card. The file is played via a GStreamer ``playbin`` routed to the same - audio sink used by the push-based playback pipeline. + audio sink used by the push-based playback pipeline. When the head + wobbler is enabled the audio is also forked to it via a tee. Args: sound_file: Absolute path **or** filename relative to the @@ -377,33 +478,6 @@ def play_sound(self, sound_file: str) -> None: else: file_path = sound_file - audiosink: Optional[Gst.Element] = None - - if has_reachymini_asoundrc(): - # reachy mini wireless has a preconfigured asoundrc - audiosink = Gst.ElementFactory.make("alsasink") - audiosink.set_property("device", "reachymini_audio_sink") - self.logger.info("Using audio device reachymini_audio_sink for playback.") - elif platform.system() == "Windows": - id_audio_card = get_audio_device("Sink") - audiosink = Gst.ElementFactory.make("wasapi2sink") - audiosink.set_property("device", id_audio_card) - self.logger.info( - f"Using audio device {id_audio_card} for playback on Windows." - ) - elif platform.system() == "Darwin": - id_audio_card = get_audio_device("Sink") - audiosink = Gst.ElementFactory.make("osxaudiosink") - audiosink.set_property("unique-id", id_audio_card) - self.logger.info( - f"Using audio device {id_audio_card} for playback on macOS." - ) - else: - id_audio_card = get_audio_device("Sink") - audiosink = Gst.ElementFactory.make("pulsesink") - audiosink.set_property("device", f"{id_audio_card}") - self.logger.info(f"Using audio device {id_audio_card} for playback.") - if self._playbin is not None: self._playbin.set_state(Gst.State.NULL) @@ -423,8 +497,11 @@ def play_sound(self, sound_file: str) -> None: else: uri = f"file://{file_path}" playbin.set_property("uri", uri) - if audiosink is not None: - playbin.set_property("audio-sink", audiosink) + + playbin.set_property("audio-sink", self._build_audiosink_tee_bin()) + if self._head_wobbler is not None: + self._head_wobbler.reset() + self._head_wobbler.start() self._playbin = playbin playbin.set_state(Gst.State.PLAYING) @@ -466,8 +543,30 @@ def get_DoA(self) -> tuple[float, bool] | None: """ return self._doa.get_DoA() + def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None: + """Enable head wobbling driven by audio playback. + + Args: + callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad, + yaw_rad)`` for each movement hop. + + """ + if self._head_wobbler is not None: + self._head_wobbler.stop() + self._head_wobbler = HeadWobbler(callback, sample_rate=self.SAMPLE_RATE) + self.logger.info("Head wobbler enabled") + + def disable_wobbling(self) -> None: + """Disable head wobbling.""" + if self._head_wobbler is not None: + self._head_wobbler.stop() + self._head_wobbler = None + self.logger.info("Head wobbler disabled") + def cleanup(self) -> None: """Release all resources (pipelines, USB devices).""" + if self._head_wobbler is not None: + self._head_wobbler.stop() self._doa.close() def __del__(self) -> None: diff --git a/src/reachy_mini/media/camera_gstreamer.py b/src/reachy_mini/media/camera_gstreamer.py index 3a0ee9db7..f6fe82dd5 100644 --- a/src/reachy_mini/media/camera_gstreamer.py +++ b/src/reachy_mini/media/camera_gstreamer.py @@ -57,7 +57,7 @@ CameraSpecs, ReachyMiniLiteCamSpecs, ) -from reachy_mini.media.gstreamer_utils import get_sample +from reachy_mini.media.gstreamer_utils import get_sample, handle_default_bus_message try: import gi @@ -210,27 +210,25 @@ def _build_ipc_source(self) -> None: queue.link(convert) convert.link(self._appsink_video) - def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool: # type: ignore[no-untyped-def] - t = msg.type - if t == Gst.MessageType.EOS: - self.logger.warning("End-of-stream") - return False - elif t == Gst.MessageType.ERROR: + def _on_bus_message( + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline + ) -> bool: + # Some camera errors are transient and the pipeline can + # self-recover, so we log them but keep the bus watch alive. + # Default handler would tear it down. + if msg.type == Gst.MessageType.ERROR: err, debug = msg.parse_error() self.logger.warning( f"GStreamer pipeline error (domain={err.domain}, code={err.code}): {err.message}" ) self.logger.debug(f"GStreamer error debug info: {debug}") - # Keep the bus watch active — some errors are transient and the pipeline - # will self-recover. Fatal errors should be handled by inspecting - # err.domain and err.code. return True - return True + return handle_default_bus_message(self.logger, msg, pipeline) def _handle_bus_calls(self) -> None: self.logger.debug("starting bus message loop") bus = self.pipeline.get_bus() - bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop) + bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self.pipeline) self._loop.run() bus.remove_watch() self.logger.debug("bus message loop stopped") diff --git a/src/reachy_mini/media/gstreamer_udp_camera.py b/src/reachy_mini/media/gstreamer_udp_camera.py index 4d9a4f2ad..221e9c0f4 100644 --- a/src/reachy_mini/media/gstreamer_udp_camera.py +++ b/src/reachy_mini/media/gstreamer_udp_camera.py @@ -23,6 +23,8 @@ from gi.repository import GLib, Gst, GstApp # noqa: E402 +from reachy_mini.media.gstreamer_utils import handle_default_bus_message # noqa: E402 + class GStreamerUDPCamera: """A class to send frames over UDP using GStreamer.""" @@ -62,7 +64,7 @@ def __init__( # Create pipeline self.pipeline = Gst.Pipeline.new("udp_sender") self._bus = self.pipeline.get_bus() - self._bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop) + self._bus.add_watch(GLib.PRIORITY_DEFAULT, self._on_bus_message, self.pipeline) # Configure pipeline elements self._configure_pipeline() @@ -138,32 +140,10 @@ def _configure_pipeline(self) -> None: self._logger.debug("UDP sender pipeline configured successfully") def _on_bus_message( - self, bus: Gst.Bus, msg: Gst.Message, loop: GLib.MainLoop + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline ) -> bool: - """Handle GStreamer bus messages. - - Args: - bus: GStreamer bus. - msg: GStreamer message. - loop: GLib main loop. - - Returns: - bool: True to continue receiving messages, False to stop. - - """ - t = msg.type - if t == Gst.MessageType.EOS: - self._logger.warning("End-of-stream") - return False - elif t == Gst.MessageType.ERROR: - err, debug = msg.parse_error() - self._logger.error(f"Error: {err} {debug}") - return False - elif t == Gst.MessageType.WARNING: - err, debug = msg.parse_warning() - self._logger.warning(f"Warning: {err} {debug}") - - return True + """Handle GStreamer bus messages via the shared helper.""" + return handle_default_bus_message(self._logger, msg, pipeline) def _handle_bus_calls(self) -> None: """Run the GLib main loop for handling bus messages.""" diff --git a/src/reachy_mini/media/gstreamer_utils.py b/src/reachy_mini/media/gstreamer_utils.py index 36d7636e2..23ace29a4 100644 --- a/src/reachy_mini/media/gstreamer_utils.py +++ b/src/reachy_mini/media/gstreamer_utils.py @@ -17,6 +17,41 @@ from gi.repository import Gst, GstApp # noqa: E402 +def handle_default_bus_message( + logger: logging.Logger, + msg: Gst.Message, + pipeline: Gst.Pipeline, +) -> bool: + """Handle GStreamer bus messages with sensible defaults. + + - ``EOS``: log a warning and return False (the bus watch is + removed). + - ``ERROR``: log the parsed error and return False. + - ``WARNING``: log the parsed warning and keep the watch alive. + - ``LATENCY``: call ``pipeline.recalculate_latency()`` and return + True. + - Anything else: return True (keep the watch alive). + + Callers can wrap this in their own handler to inject extra logic + for a specific message type, then fall through to this helper for + the common cases. + """ + if msg.type == Gst.MessageType.EOS: + logger.warning("End-of-stream") + return False + elif msg.type == Gst.MessageType.ERROR: + err, debug = msg.parse_error() + logger.error(f"Error: {err} {debug}") + return False + elif msg.type == Gst.MessageType.WARNING: + err, debug = msg.parse_warning() + logger.warning(f"Warning: {err} {debug}") + elif msg.type == Gst.MessageType.LATENCY: + pipeline.recalculate_latency() + logger.debug("Recalculate latency") + return True + + def get_sample(appsink: GstApp.AppSink, logger: logging.Logger) -> Optional[bytes]: """Pull a sample from a GStreamer AppSink with a 20 ms timeout. diff --git a/src/reachy_mini/media/media_manager.py b/src/reachy_mini/media/media_manager.py index b16c6a658..5969289ef 100644 --- a/src/reachy_mini/media/media_manager.py +++ b/src/reachy_mini/media/media_manager.py @@ -24,6 +24,7 @@ import logging import warnings +from collections.abc import Callable from enum import Enum from typing import TYPE_CHECKING, Optional, Union @@ -31,6 +32,7 @@ import numpy.typing as npt from reachy_mini.media.camera_constants import CameraSpecs +from reachy_mini.motion.head_wobbler import SpeechOffsets class MediaBackend(Enum): @@ -161,7 +163,10 @@ def __init__( self.logger.info( "Using LOCAL backend (GStreamer IPC camera + GStreamer audio)." ) - self._init_camera(log_level, camera_specs) + try: + self._init_camera(log_level, camera_specs) + except Exception as e: + self.logger.warning(f"Camera init failed, continuing without camera: {e}") self._init_audio(log_level) case MediaBackend.WEBRTC: self.logger.info("Using WebRTC streaming backend.") @@ -372,6 +377,37 @@ def stop_playing(self) -> None: return self.audio.stop_playing() + def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None: + """Enable head wobbling driven by audio playback. + + Only supported with the LOCAL backend (GStreamerAudio). + + Args: + callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad, + yaw_rad)`` for each movement hop. + + """ + if self.audio is None: + self.logger.warning("Audio system is not initialized.") + return + + from reachy_mini.media.audio_gstreamer import GStreamerAudio + + if not isinstance(self.audio, GStreamerAudio): + self.logger.warning("Head wobbling is only supported with the LOCAL audio backend.") + return + self.audio.enable_wobbling(callback) + + def disable_wobbling(self) -> None: + """Disable head wobbling.""" + if self.audio is None: + return + + from reachy_mini.media.audio_gstreamer import GStreamerAudio + + if isinstance(self.audio, GStreamerAudio): + self.audio.disable_wobbling() + def get_DoA(self) -> tuple[float, bool] | None: """Get the Direction of Arrival (DoA) from the microphone array. diff --git a/src/reachy_mini/media/media_server.py b/src/reachy_mini/media/media_server.py index 36ff3ba72..03210d8c5 100644 --- a/src/reachy_mini/media/media_server.py +++ b/src/reachy_mini/media/media_server.py @@ -31,6 +31,7 @@ from typing import Any, Callable, Dict, Optional import gi +import numpy as np from reachy_mini.daemon.utils import ( CAMERA_PIPE_NAME, @@ -47,12 +48,14 @@ ReachyMiniLiteCamSpecs, ) from reachy_mini.media.device_detection import get_audio_device, get_video_device +from reachy_mini.media.gstreamer_utils import handle_default_bus_message +from reachy_mini.motion.head_wobbler import HeadWobbler, SpeechOffsets from reachy_mini.utils.constants import ASSETS_ROOT_PATH gi.require_version("Gst", "1.0") gi.require_version("GstApp", "1.0") -from gi.repository import GLib, Gst # noqa: E402 +from gi.repository import GLib, Gst, GstApp # noqa: E402, F401 # Hard cap on how long a freshly-added consumer is allowed to spend # before its `webrtcbin.connection-state` reaches "connected". In a @@ -131,6 +134,10 @@ class GstMediaServer: """ + # Sample rate the wobbler appsink demands; the per-branch audioresample + # converts whatever the source produces down to this rate before delivery. + WOBBLER_SAMPLE_RATE = 16_000 + def __init__( self, log_level: str = "INFO", @@ -208,6 +215,8 @@ def __init__( self._peer_states_lock = Lock() self._incoming_audio: Dict[str, Dict[str, Any]] = {} self._playbin: Optional[Gst.Element] = None + self._head_wobbler: Optional[HeadWobbler] = None + self._pipeline_playback: Optional[Gst.Pipeline] = None self._build_pipeline() @@ -216,7 +225,7 @@ def _build_pipeline(self) -> None: self._pipeline_sender = Gst.Pipeline.new("reachymini_webrtc_sender") self._bus_sender = self._pipeline_sender.get_bus() self._bus_sender.add_watch( - GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop + GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_sender ) webrtcsink = self._configure_webrtc(self._pipeline_sender) @@ -270,9 +279,9 @@ def _consumer_added( ) -> None: self._logger.info(f"consumer added with peer id: {peer_id}") - Gst.debug_bin_to_dot_file( - self._pipeline_sender, Gst.DebugGraphDetails.ALL, "pipeline_full" - ) + # Gst.debug_bin_to_dot_file( + # self._pipeline_sender, Gst.DebugGraphDetails.ALL, "pipeline_full" + # ) GLib.timeout_add_seconds(5, self._dump_latency) @@ -372,7 +381,11 @@ def _on_consumer_pad_added( self._logger.info(f"Setting up incoming audio playback for peer {peer_id}") # Build playback pipeline element-by-element - playback_pipe = Gst.Pipeline.new(f"audio_playback_{peer_id}") + self._pipeline_playback = Gst.Pipeline.new(f"audio_playback_{peer_id}") + + sender_clock = self._pipeline_sender.get_pipeline_clock() + self._pipeline_playback.use_clock(sender_clock) + self._pipeline_playback.set_start_time(Gst.CLOCK_TIME_NONE) appsrc = Gst.ElementFactory.make("appsrc", "audio_in") appsrc.set_property("format", Gst.Format.TIME) @@ -381,49 +394,78 @@ def _on_consumer_pad_added( rtpopusdepay = Gst.ElementFactory.make("rtpopusdepay") opusdec = Gst.ElementFactory.make("opusdec") - audioconvert = Gst.ElementFactory.make("audioconvert") - audioresample = Gst.ElementFactory.make("audioresample") audiosink = self._build_audiosink_element() if audiosink is None: self._logger.error("Failed to create audio sink element") return - audiosink.set_property("sync", False) + audiosink.set_property("sync", True) + + # Per-branch audioconvert+audioresample so the wobbler appsink's + # F32LE/2/16000 caps don't drag the audiosink branch into a rate + # the device can't accept (e.g. wireless XMOS PCM falls back to + # IEC958 at non-native rates). + tee = Gst.ElementFactory.make("tee") + queue_speaker = Gst.ElementFactory.make("queue") + ac_speaker = Gst.ElementFactory.make("audioconvert") + ar_speaker = Gst.ElementFactory.make("audioresample") + queue_wobbler = Gst.ElementFactory.make("queue") + ac_wobbler = Gst.ElementFactory.make("audioconvert") + ar_wobbler = Gst.ElementFactory.make("audioresample") + + appsink_wobbler = self._make_wobbler_appsink() for elem in [ appsrc, rtpopusdepay, opusdec, - audioconvert, - audioresample, + tee, + queue_speaker, + ac_speaker, + ar_speaker, audiosink, + queue_wobbler, + ac_wobbler, + ar_wobbler, + appsink_wobbler, ]: - playback_pipe.add(elem) + self._pipeline_playback.add(elem) appsrc.link(rtpopusdepay) rtpopusdepay.link(opusdec) - opusdec.link(audioconvert) - audioconvert.link(audioresample) - audioresample.link(audiosink) - - play_bus = playback_pipe.get_bus() + opusdec.link(tee) + tee.link(queue_speaker) + queue_speaker.link(ac_speaker) + ac_speaker.link(ar_speaker) + ar_speaker.link(audiosink) + tee.link(queue_wobbler) + queue_wobbler.link(ac_wobbler) + ac_wobbler.link(ar_wobbler) + ar_wobbler.link(appsink_wobbler) + + play_bus = self._pipeline_playback.get_bus() play_bus.add_watch( - GLib.PRIORITY_DEFAULT, self._on_playback_bus_message, peer_id + GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_playback ) - playback_pipe.set_state(Gst.State.PLAYING) + self._pipeline_playback.set_state(Gst.State.PAUSED) + self._pipeline_playback.set_base_time(self._pipeline_sender.get_base_time()) + self._pipeline_playback.set_state(Gst.State.PLAYING) # Pad probe: intercept every RTP buffer, forward to the separate # playback pipeline, then DROP so webrtcsink's pipeline is unaffected. def _buffer_probe(pad: Gst.Pad, info: Gst.PadProbeInfo, _: None) -> int: buf = info.get_buffer() - if buf is not None: - appsrc.emit("push-buffer", buf.copy()) + appsrc.push_buffer(buf) return int(Gst.PadProbeReturn.DROP) probe_id = pad.add_probe(Gst.PadProbeType.BUFFER, _buffer_probe, None) + if self._head_wobbler is not None: + self._head_wobbler.reset() + self._head_wobbler.start() + self._incoming_audio[peer_id] = { - "playback_pipeline": playback_pipe, + "playback_pipeline": self._pipeline_playback, "probe_id": probe_id, "pad": pad, } @@ -957,18 +999,10 @@ def _build_audio_source(self) -> Optional[Gst.Element]: ) return Gst.ElementFactory.make("autoaudiosrc") - def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool: # type: ignore[no-untyped-def] - t = msg.type - if t == Gst.MessageType.EOS: - self._logger.warning("End-of-stream") - return False - - elif t == Gst.MessageType.ERROR: - err, debug = msg.parse_error() - self._logger.error(f"Error: {err} {debug}") - return False - - return True + def _on_bus_message( + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline + ) -> bool: + return handle_default_bus_message(self._logger, msg, pipeline) def start(self) -> None: """Rebuild the pipeline from scratch and start it. @@ -1007,9 +1041,6 @@ def play_sound(self, sound_file: str) -> None: else: file_path = sound_file - # Build platform-aware audio sink element - audiosink = self._build_audiosink_element() - if self._playbin is not None: self._playbin.set_state(Gst.State.NULL) @@ -1029,8 +1060,11 @@ def play_sound(self, sound_file: str) -> None: uri = f"file://{file_path}" playbin.set_property("uri", uri) - if audiosink is not None: - playbin.set_property("audio-sink", audiosink) + playbin.set_property("audio-sink", self._build_audiosink_tee_bin()) + + if self._head_wobbler is not None: + self._head_wobbler.reset() + self._head_wobbler.start() self._playbin = playbin playbin.set_state(Gst.State.PLAYING) @@ -1084,6 +1118,115 @@ def _build_audiosink_element(self) -> Optional[Gst.Element]: return Gst.ElementFactory.make("autoaudiosink") + def _make_wobbler_appsink(self) -> Gst.Element: + """Create a sync=True appsink that feeds audio to the head wobbler. + + new-sample fires at the buffer's PTS on the pipeline clock — + the same instant the audiosink renders that audio. + """ + appsink = Gst.ElementFactory.make("appsink") + # Force mono so the speech tapper receives a 1-D float32 array. + # The per-branch audioconvert handles the downmix. + caps = Gst.Caps.from_string( + f"audio/x-raw,format=F32LE,channels=1,rate={self.WOBBLER_SAMPLE_RATE},layout=interleaved" + ) + appsink.set_property("caps", caps) + appsink.set_property("drop", True) + appsink.set_property("max-buffers", 5) + appsink.set_property("sync", True) + appsink.set_property("emit-signals", True) + appsink.connect("new-sample", self._on_wobbler_sample) + return appsink + + def _on_wobbler_sample(self, appsink: Gst.Element) -> Gst.FlowReturn: + """GStreamer callback: forward audio buffer to the head wobbler. + + The appsink is sync=True so the callback fires at the buffer's + PTS on the pipeline clock — audio is playing NOW. + """ + sample = appsink.pull_sample() + if sample is None or self._head_wobbler is None: + return Gst.FlowReturn.OK + buf = sample.get_buffer() + data = buf.extract_dup(0, buf.get_size()) + pcm = np.frombuffer(data, dtype=np.float32) + self._head_wobbler.feed(pcm, time.monotonic_ns()) + return Gst.FlowReturn.OK + + def _build_audiosink_tee_bin(self) -> Gst.Bin: + """Build a Gst.Bin splitting audio to speaker and wobbler appsink. + + Per-branch audioconvert+audioresample isolate each leaf's caps + from the other (the wobbler appsink demands F32LE/2/16000; the + audiosink wants whatever the device prefers — e.g. on the + wireless XMOS PCM, anything but its native rate triggers an + IEC958 fallback that fails to open). + + The bin exposes a single ghost sink pad for use as a playbin audio-sink:: + + ghost_sink → tee ─┬→ queue → audioconvert → audioresample → audiosink + └→ queue → audioconvert → audioresample → appsink + """ + audio_bin = Gst.Bin.new("audio_tee_bin") + + tee = Gst.ElementFactory.make("tee") + queue_speaker = Gst.ElementFactory.make("queue") + ac_speaker = Gst.ElementFactory.make("audioconvert") + ar_speaker = Gst.ElementFactory.make("audioresample") + audiosink = self._build_audiosink_element() + queue_wobbler = Gst.ElementFactory.make("queue") + ac_wobbler = Gst.ElementFactory.make("audioconvert") + ar_wobbler = Gst.ElementFactory.make("audioresample") + appsink_wobbler = self._make_wobbler_appsink() + + for el in ( + tee, + queue_speaker, + ac_speaker, + ar_speaker, + audiosink, + queue_wobbler, + ac_wobbler, + ar_wobbler, + appsink_wobbler, + ): + audio_bin.add(el) + + tee.link(queue_speaker) + queue_speaker.link(ac_speaker) + ac_speaker.link(ar_speaker) + ar_speaker.link(audiosink) + + tee.link(queue_wobbler) + queue_wobbler.link(ac_wobbler) + ac_wobbler.link(ar_wobbler) + ar_wobbler.link(appsink_wobbler) + + ghost_pad = Gst.GhostPad.new("sink", tee.get_static_pad("sink")) + audio_bin.add_pad(ghost_pad) + + return audio_bin + + def enable_wobbling(self, callback: Callable[[SpeechOffsets], None]) -> None: + """Enable head wobbling driven by audio playback. + + Args: + callback: Called with ``(x_m, y_m, z_m, roll_rad, pitch_rad, + yaw_rad)`` for each movement hop. + + """ + if self._head_wobbler is not None: + self._head_wobbler.stop() + self._head_wobbler = HeadWobbler(callback, sample_rate=self.WOBBLER_SAMPLE_RATE) + self._logger.info("Head wobbler enabled (daemon-side)") + + def disable_wobbling(self) -> None: + """Disable head wobbling.""" + if self._head_wobbler is not None: + self._head_wobbler.stop() + self._head_wobbler = None + self._logger.info("Head wobbler disabled (daemon-side)") + def set_message_handler( self, handler: Callable[[str, str], None], # cb(peer_id, message) diff --git a/src/reachy_mini/media/webrtc_client_gstreamer.py b/src/reachy_mini/media/webrtc_client_gstreamer.py index a10d27bf4..869dab1ed 100644 --- a/src/reachy_mini/media/webrtc_client_gstreamer.py +++ b/src/reachy_mini/media/webrtc_client_gstreamer.py @@ -106,7 +106,7 @@ def __init__( self._pipeline_record = Gst.Pipeline.new("audio_recorder") self._bus_record = self._pipeline_record.get_bus() self._bus_record.add_watch( - GLib.PRIORITY_DEFAULT, self._on_bus_message, self._loop + GLib.PRIORITY_DEFAULT, self._on_bus_message, self._pipeline_record ) self._appsink_audio = Gst.ElementFactory.make("appsink") @@ -144,7 +144,7 @@ def __init__( self._webrtcbin = None self._audio_send_ready = False self._appsrc = None - self._appsrc_pts = 0 # running PTS in nanoseconds for appsrc buffers + self._first_push_done = False self.daemon_url: str = "" # set by MediaManager for remote sound ops self._webrtcsrc.connect("deep-element-added", self._on_deep_element_added) self.logger.info("GstWebRTCClient initialized (bidirectional audio support)") @@ -300,21 +300,18 @@ def _webrtcsrc_pad_added_cb(self, webrtcsrc: Gst.Element, pad: Gst.Pad) -> None: GLib.timeout_add_seconds(5, self._dump_latency) - def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool: # type: ignore[no-untyped-def] - t = msg.type - if t == Gst.MessageType.EOS: - self.logger.warning("End-of-stream") - return False - elif t == Gst.MessageType.ERROR: - err, debug = msg.parse_error() + def _on_bus_message( + self, bus: Gst.Bus, msg: Gst.Message, pipeline: Gst.Pipeline + ) -> bool: + # webrtcsrc may emit non-fatal errors from its internal + # elements (e.g. appsrc not-negotiated when a sendrecv + # transceiver has no data to send). GStreamer wraps the + # actual reason as "Internal data stream error." in the + # GError, with "not-negotiated" only in the debug string. + # These should not tear down the whole pipeline. + if msg.type == Gst.MessageType.ERROR: + err, _ = msg.parse_error() src = msg.src - - # webrtcsrc may emit non-fatal errors from its internal - # elements (e.g. appsrc not-negotiated when a sendrecv - # transceiver has no data to send). GStreamer wraps the - # actual reason as "Internal data stream error." in the - # GError, with "not-negotiated" only in the debug string. - # These should not tear down the whole pipeline. if ( src is not None and src.get_factory() is not None @@ -326,10 +323,7 @@ def _on_bus_message(self, bus: Gst.Bus, msg: Gst.Message, loop) -> bool: # type ): self.logger.debug(f"Ignoring non-fatal webrtcsrc internal error: {err}") return True - - self.logger.error(f"Error: {err} {debug}") - return False - return True + return super()._on_bus_message(bus, msg, pipeline) def open(self) -> None: """Start the WebRTC pipeline (both video and audio).""" @@ -404,6 +398,7 @@ def _setup_audio_send_chain(self) -> None: appsrc = Gst.ElementFactory.make("appsrc") appsrc.set_property("format", Gst.Format.TIME) appsrc.set_property("is-live", True) + caps = Gst.Caps.from_string( f"audio/x-raw,format=F32LE,channels={self.CHANNELS},rate={self.SAMPLE_RATE},layout=interleaved" ) @@ -417,7 +412,13 @@ def _setup_audio_send_chain(self) -> None: rtpopuspay = Gst.ElementFactory.make("rtpopuspay") rtpopuspay.set_property("pt", pt) - elems = (appsrc, audioconvert, audioresample, opusenc, rtpopuspay) + elems = ( + appsrc, + audioconvert, + audioresample, + opusenc, + rtpopuspay, + ) target_bin = self._pipeline_record for elem in elems: @@ -453,7 +454,7 @@ def start_playing(self) -> None: def stop_playing(self) -> None: """Reset the PTS counter for the send chain and stop daemon-side sound.""" - self._appsrc_pts = 0 + self._appsrc_pts = -1 # Also stop any sound file playing on the daemon's speaker. if self.daemon_url: try: @@ -468,34 +469,54 @@ def clear_output_buffer(self) -> None: """No-op (WebRTC send chain does not buffer significantly).""" pass + def _push_buffer(self, data: npt.NDArray[np.float32]) -> None: + """Single push of one F32LE chunk with gap-aware PTS.""" + if self._appsrc is None: + return + + pts_ns, duration_ns, self._appsrc_pts = self._compute_pts( + int(data.shape[0]), + self._appsrc.get_current_running_time(), + self._appsrc_pts, + ) + buf = Gst.Buffer.new_wrapped(data.tobytes()) + buf.pts = pts_ns + buf.dts = pts_ns + buf.duration = duration_ns + + ret = self._appsrc.push_buffer(buf) + if ret != Gst.FlowReturn.OK: + self.logger.warning("push_buffer dropped: %s", ret) + def push_audio_sample(self, data: npt.NDArray[np.float32]) -> None: """Push audio data to the remote peer via WebRTC. + The very first call also primes the send chain with 0.5 s of + silence so the Opus encoder and webrtcbin can warm up before + the caller's real audio arrives; without this the first word + of an utterance gets swallowed. + Args: data: Float32 audio samples. """ if self._appsrc is None: - return # send chain not ready yet, silently drop - - num_samples = data.shape[0] - duration_ns = (num_samples * Gst.SECOND) // self.SAMPLE_RATE - - buf = Gst.Buffer.new_wrapped(data.tobytes()) - buf.pts = self._appsrc_pts - buf.duration = duration_ns - self._appsrc_pts += duration_ns + return - self._appsrc.push_buffer(buf) + if not self._first_push_done: + self._first_push_done = True + warmup = np.zeros(self.SAMPLE_RATE // 2, dtype=np.float32) + self._push_buffer(warmup) + self._push_buffer(data) def play_sound(self, sound_file: str) -> None: """Play a sound file on the robot's speaker via the daemon REST API. If *sound_file* is a local path that exists on this machine the - file is automatically uploaded to the daemon's temporary sound - directory (skipping the upload when a file with the same name is - already present). Otherwise the filename is sent as-is and the - daemon resolves it from its built-in assets or filesystem. + file is uploaded to the daemon's temporary sound directory + (overwriting any previous upload with the same basename). + Otherwise the filename is sent as-is and the daemon resolves it + from its built-in assets or filesystem. Args: sound_file: Absolute local path **or** asset filename @@ -506,17 +527,9 @@ def play_sound(self, sound_file: str) -> None: self.logger.error("No daemon URL configured — cannot play sound remotely.") return - # If the file exists on the client, ensure it is uploaded first. remote_file = sound_file if os.path.isfile(sound_file): - filename = os.path.basename(sound_file) - remote_files = self.list_sounds() - if filename not in remote_files: - remote_file = self.upload_sound(sound_file) - else: - # Already uploaded — ask the daemon to resolve by filename. - # The daemon's play_sound checks the temp dir, assets, etc. - remote_file = filename + remote_file = self.upload_sound(sound_file) try: resp = _requests.post( diff --git a/src/reachy_mini/motion/head_wobbler.py b/src/reachy_mini/motion/head_wobbler.py new file mode 100644 index 000000000..abe183305 --- /dev/null +++ b/src/reachy_mini/motion/head_wobbler.py @@ -0,0 +1,134 @@ +"""PTS-driven head wobbler. + +Drives 6-DOF head movement offsets from PCM audio analysed by +:class:`SwayRollRT` (the speech tapper). Each call to :meth:`feed` +turns one PCM chunk into a list of per-hop sway dicts and registers a +``GLib.timeout_add`` for each, firing the offset callback at the +audio's actual playback time (computed by the caller from buffer PTS + +audiosink latency). + +There is no background thread: scheduling runs on whichever GLib main +loop the caller's pipeline already uses for its bus watch. +""" + +import logging +import threading +import time +from collections.abc import Callable +from typing import Any + +from gi.repository import GLib +from numpy.typing import NDArray + +from reachy_mini.motion import speech_tapper + +logger = logging.getLogger(__name__) + +# Public type alias; re-exported by ``media/*`` modules. +SpeechOffsets = tuple[float, float, float, float, float, float] + + +class HeadWobbler: + """PTS-driven scheduler that turns audio into timed head offsets.""" + + _ZERO_OFFSETS: SpeechOffsets = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + + def __init__( + self, + set_speech_offsets: Callable[[SpeechOffsets], None], + sample_rate: int, + ) -> None: + """Initialize the wobbler with the offset callback and audio rate. + + Args: + set_speech_offsets: Called with a 6-tuple of head offsets per hop. + sample_rate: Sample rate of the PCM that will be fed via + :meth:`feed` — must match the wobbler appsink's caps. + + """ + self._apply_offsets = set_speech_offsets + + self._hop_ms = speech_tapper.HOP_MS + self._sample_rate = int(sample_rate) + self.sway = speech_tapper.SwayRollRT(sample_rate=self._sample_rate) + + self._lock = threading.Lock() + self._sway_lock = threading.Lock() + # Bumped on stop/reset so in-flight GLib timeouts no-op when fired. + self._generation = 0 + + def start(self) -> None: + """Reset DSP and hop generation. Idempotent.""" + with self._lock: + self._generation += 1 + with self._sway_lock: + self.sway.reset() + logger.debug("Head wobbler started") + + def stop(self) -> None: + """Cancel pending offsets and zero the head.""" + with self._lock: + self._generation += 1 + self._apply_offsets(self._ZERO_OFFSETS) + logger.debug("Head wobbler stopped") + + def reset(self) -> None: + """Cancel pending offsets, recreate DSP state, zero the head.""" + with self._lock: + self._generation += 1 + with self._sway_lock: + self.sway = speech_tapper.SwayRollRT(sample_rate=self._sample_rate) + self._apply_offsets(self._ZERO_OFFSETS) + + def feed( + self, + pcm: NDArray[Any], + play_at_monotonic_ns: int, + ) -> None: + """Schedule per-hop offsets for *pcm* against its playback time. + + Args: + pcm: Float32 mono samples at this wobbler's ``sample_rate``. + play_at_monotonic_ns: ``time.monotonic_ns()``-comparable + instant at which the *first* sample of *pcm* will be + heard from the speaker. Subsequent hops are scheduled at + ``play_at_monotonic_ns + i * HOP_MS * 1_000_000``. + + """ + with self._sway_lock: + results = self.sway.feed(pcm) + if not results: + return + + with self._lock: + generation = self._generation + + hop_ns = self._hop_ms * 1_000_000 + now_ns = time.monotonic_ns() + + # Skip hops more than one hop's worth in the past (genuinely + # stale); clamp small sub-hop negatives to 0 so they fire on + # the next main-loop iteration. + stale_threshold_ms = -self._hop_ms + for i, hop in enumerate(results): + target_ns = play_at_monotonic_ns + i * hop_ns + delay_ms = (target_ns - now_ns) // 1_000_000 + if delay_ms < stale_threshold_ms: + continue + offsets: SpeechOffsets = ( + hop["x_mm"] / 1000.0, + hop["y_mm"] / 1000.0, + hop["z_mm"] / 1000.0, + hop["roll_rad"], + hop["pitch_rad"], + hop["yaw_rad"], + ) + GLib.timeout_add(max(0, int(delay_ms)), self._fire, offsets, generation) + + def _fire(self, offsets: SpeechOffsets, generation: int) -> bool: + """GLib timeout callback. Returns False so the source is removed.""" + with self._lock: + current = self._generation + if generation == current: + self._apply_offsets(offsets) + return False # one-shot diff --git a/src/reachy_mini/motion/speech_tapper.py b/src/reachy_mini/motion/speech_tapper.py new file mode 100644 index 000000000..0340e916f --- /dev/null +++ b/src/reachy_mini/motion/speech_tapper.py @@ -0,0 +1,228 @@ +"""Audio-reactive sway/roll generator for head wobbling. + +Analyses PCM audio in real time and produces per-hop movement parameters +(pitch, yaw, roll, x, y, z) driven by voice activity and loudness. + +Ported from *reachy_mini_conversation_app*. +""" + +from __future__ import annotations + +import math +from collections import deque +from itertools import islice + +import numpy as np +from numpy.typing import NDArray + +# --------------------------------------------------------------------------- +# Tunables +# --------------------------------------------------------------------------- +FRAME_MS = 20 +HOP_MS = 50 + +SWAY_MASTER = 1.5 +SENS_DB_OFFSET = +4.0 +VAD_DB_ON = -35.0 +VAD_DB_OFF = -45.0 +VAD_ATTACK_MS = 40 +VAD_RELEASE_MS = 250 +ENV_FOLLOW_GAIN = 0.65 + +SWAY_F_PITCH = 2.2 +SWAY_A_PITCH_DEG = 4.5 +SWAY_F_YAW = 0.6 +SWAY_A_YAW_DEG = 7.5 +SWAY_F_ROLL = 1.3 +SWAY_A_ROLL_DEG = 2.25 +SWAY_F_X = 0.35 +SWAY_A_X_MM = 4.5 +SWAY_F_Y = 0.45 +SWAY_A_Y_MM = 3.75 +SWAY_F_Z = 0.25 +SWAY_A_Z_MM = 2.25 + +SWAY_DB_LOW = -46.0 +SWAY_DB_HIGH = -18.0 +LOUDNESS_GAMMA = 0.9 +SWAY_ATTACK_MS = 50 +SWAY_RELEASE_MS = 250 + +# --------------------------------------------------------------------------- +# Derived constants (rate-independent — FRAME/HOP are per-instance) +# --------------------------------------------------------------------------- +ATTACK_FR = max(1, int(VAD_ATTACK_MS / HOP_MS)) +RELEASE_FR = max(1, int(VAD_RELEASE_MS / HOP_MS)) +SWAY_ATTACK_FR = max(1, int(SWAY_ATTACK_MS / HOP_MS)) +SWAY_RELEASE_FR = max(1, int(SWAY_RELEASE_MS / HOP_MS)) + + +def _rms_dbfs(x: NDArray[np.float32]) -> float: + """Root-mean-square in dBFS for float32 mono array in [-1,1].""" + x = x.astype(np.float32, copy=False) + rms = np.sqrt(np.mean(x * x, dtype=np.float32) + 1e-12, dtype=np.float32) + return float(20.0 * math.log10(float(rms) + 1e-12)) + + +def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float: + """Normalize dB into [0,1] with gamma; clipped to [0,1].""" + t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW) + if t < 0.0: + t = 0.0 + elif t > 1.0: + t = 1.0 + return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t + + +class SwayRollRT: + """Feed audio chunks and get per-hop sway outputs. + + Usage:: + + rt = SwayRollRT(sample_rate=16_000) + results = rt.feed(pcm_float32_mono) + # results is a list of dicts, one per HOP_MS + + """ + + def __init__(self, rng_seed: int = 7, sample_rate: int = 16_000) -> None: + """Initialize state with random oscillator phases.""" + self._seed = int(rng_seed) + self.sample_rate = int(sample_rate) + self.frame = int(self.sample_rate * FRAME_MS / 1000) + self.hop = int(self.sample_rate * HOP_MS / 1000) + self.samples: deque[float] = deque(maxlen=10 * self.sample_rate) + self.carry: NDArray[np.float32] = np.zeros(0, dtype=np.float32) + + self.vad_on = False + self.vad_above = 0 + self.vad_below = 0 + + self.sway_env = 0.0 + self.sway_up = 0 + self.sway_down = 0 + + rng = np.random.default_rng(self._seed) + self.phase_pitch = float(rng.random() * 2 * math.pi) + self.phase_yaw = float(rng.random() * 2 * math.pi) + self.phase_roll = float(rng.random() * 2 * math.pi) + self.phase_x = float(rng.random() * 2 * math.pi) + self.phase_y = float(rng.random() * 2 * math.pi) + self.phase_z = float(rng.random() * 2 * math.pi) + self.t = 0.0 + + def reset(self) -> None: + """Reset state (VAD/env/buffers/time) but keep initial phases/seed.""" + self.samples.clear() + self.carry = np.zeros(0, dtype=np.float32) + self.vad_on = False + self.vad_above = 0 + self.vad_below = 0 + self.sway_env = 0.0 + self.sway_up = 0 + self.sway_down = 0 + self.t = 0.0 + + def feed(self, pcm: NDArray[np.float32]) -> list[dict[str, float]]: + """Stream in a float32 mono PCM chunk; returns sway dicts (one per hop). + + *pcm* must already match this instance's ``sample_rate`` — the + upstream GStreamer audioresample handles rate conversion. + + Args: + pcm: Float32 mono samples ``(N,)`` in ``[-1, 1]``. + + """ + if pcm.size == 0: + return [] + + if self.carry.size: + self.carry = np.concatenate([self.carry, pcm]) + else: + self.carry = pcm + + out: list[dict[str, float]] = [] + + while self.carry.size >= self.hop: + hop = self.carry[:self.hop] + self.carry = self.carry[self.hop:] + + self.samples.extend(hop.tolist()) + if len(self.samples) < self.frame: + self.t += HOP_MS / 1000.0 + continue + + frame = np.fromiter( + islice(self.samples, len(self.samples) - self.frame, len(self.samples)), + dtype=np.float32, + count=self.frame, + ) + db = _rms_dbfs(frame) + + # VAD with hysteresis + attack/release + if db >= VAD_DB_ON: + self.vad_above += 1 + self.vad_below = 0 + if not self.vad_on and self.vad_above >= ATTACK_FR: + self.vad_on = True + elif db <= VAD_DB_OFF: + self.vad_below += 1 + self.vad_above = 0 + if self.vad_on and self.vad_below >= RELEASE_FR: + self.vad_on = False + + if self.vad_on: + self.sway_up = min(SWAY_ATTACK_FR, self.sway_up + 1) + self.sway_down = 0 + else: + self.sway_down = min(SWAY_RELEASE_FR, self.sway_down + 1) + self.sway_up = 0 + + up = self.sway_up / SWAY_ATTACK_FR + down = 1.0 - (self.sway_down / SWAY_RELEASE_FR) + target = up if self.vad_on else down + self.sway_env += ENV_FOLLOW_GAIN * (target - self.sway_env) + if self.sway_env < 0.0: + self.sway_env = 0.0 + elif self.sway_env > 1.0: + self.sway_env = 1.0 + + loud = _loudness_gain(db) * SWAY_MASTER + env = self.sway_env + self.t += HOP_MS / 1000.0 + + # Oscillators + pitch = ( + math.radians(SWAY_A_PITCH_DEG) + * loud + * env + * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch) + ) + yaw = ( + math.radians(SWAY_A_YAW_DEG) + * loud + * env + * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw) + ) + roll = ( + math.radians(SWAY_A_ROLL_DEG) + * loud + * env + * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll) + ) + x_mm = SWAY_A_X_MM * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x) + y_mm = SWAY_A_Y_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y) + z_mm = SWAY_A_Z_MM * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z) + + out.append( + { + "pitch_rad": pitch, + "yaw_rad": yaw, + "roll_rad": roll, + "x_mm": x_mm, + "y_mm": y_mm, + "z_mm": z_mm, + }, + ) + + return out diff --git a/src/reachy_mini/reachy_mini.py b/src/reachy_mini/reachy_mini.py index 6e7c62508..1b441ee4a 100644 --- a/src/reachy_mini/reachy_mini.py +++ b/src/reachy_mini/reachy_mini.py @@ -29,8 +29,10 @@ SetFullTargetCmd, SetGravityCompensationCmd, SetHeadJointsCmd, + SetSpeechOffsetsCmd, SetTargetCmd, SetTorqueCmd, + SetWobblingCmd, StartRecordingCmd, StopRecordingCmd, ) @@ -232,6 +234,35 @@ def acquire_media(self) -> None: self._media_released = False self.logger.info("Media re-acquired by daemon.") + def enable_wobbling(self) -> None: + """Enable audio-reactive head wobbling. + + When enabled, audio played through ``media.play_sound()`` or + ``media.push_audio_sample()`` is analysed and converted into + subtle head movements that are composed with the current target + pose on the daemon side. + + For LOCAL backend: wobbling runs on the SDK side; offsets are sent + over WebSocket. For all backends the daemon is also told to enable + wobbling so that daemon-side sounds (wake-up, sleep, etc.) and + incoming WebRTC audio also produce head movement. + + """ + def _send_offsets(offsets: tuple[float, float, float, float, float, float]) -> None: + self.client.send_command(SetSpeechOffsetsCmd(offsets=list(offsets))) + + # Enable SDK-side wobbling (LOCAL backend only, no-op for WEBRTC) + self.media_manager.enable_wobbling(_send_offsets) + # Enable daemon-side wobbling (media server play_sound + incoming audio) + self.client.send_command(SetWobblingCmd(enabled=True)) + self.logger.info("Head wobbling enabled") + + def disable_wobbling(self) -> None: + """Disable audio-reactive head wobbling and reset offsets to zero.""" + self.media_manager.disable_wobbling() + self.client.send_command(SetWobblingCmd(enabled=False)) + self.logger.info("Head wobbling disabled") + @property def imu(self) -> Dict[str, List[float] | float] | None: """Get the current IMU data from the backend. diff --git a/tests/unit_tests/test_audio_gstreamer.py b/tests/unit_tests/test_audio_gstreamer.py index 37cd4592c..493786a60 100644 --- a/tests/unit_tests/test_audio_gstreamer.py +++ b/tests/unit_tests/test_audio_gstreamer.py @@ -1,19 +1,30 @@ -"""Unit tests for GStreamer audio playback timestamp helpers.""" +"""Unit tests for the shared appsrc PTS helper.""" +from types import SimpleNamespace from typing import cast +from reachy_mini.media.audio_base import AudioBase from reachy_mini.media.audio_gstreamer import GStreamerAudio -def test_compute_playback_buffer_timing_starts_at_running_time() -> None: +def _fake_self() -> AudioBase: + """Return a stand-in with just the constants ``_compute_pts`` reads.""" + return cast( + AudioBase, + SimpleNamespace( + SAMPLE_RATE=GStreamerAudio.SAMPLE_RATE, + GAP_RESET_NS=GStreamerAudio.GAP_RESET_NS, + ), + ) + + +def test_compute_pts_starts_at_running_time() -> None: """Start the first buffer at the current playback running time.""" - pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing( - cast(GStreamerAudio, object()), + pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts( + _fake_self(), 1600, - 16000, 2_000_000_000, - None, - GStreamerAudio.PLAYBACK_GAP_RESET_NS, + -1, ) assert pts_ns == 2_000_000_000 @@ -21,15 +32,13 @@ def test_compute_playback_buffer_timing_starts_at_running_time() -> None: assert next_pts_ns == 2_100_000_000 -def test_compute_playback_buffer_timing_continues_without_gap() -> None: +def test_compute_pts_continues_without_gap() -> None: """Keep appending buffers when the running time has not drifted ahead.""" - pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing( - cast(GStreamerAudio, object()), + pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts( + _fake_self(), 800, - 16000, 1_050_000_000, 1_100_000_000, - GStreamerAudio.PLAYBACK_GAP_RESET_NS, ) assert pts_ns == 1_100_000_000 @@ -37,15 +46,13 @@ def test_compute_playback_buffer_timing_continues_without_gap() -> None: assert next_pts_ns == 1_150_000_000 -def test_compute_playback_buffer_timing_resets_after_large_gap() -> None: +def test_compute_pts_resets_after_large_gap() -> None: """Realign buffer timing after a long idle gap in sparse realtime audio.""" - pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_playback_buffer_timing( - cast(GStreamerAudio, object()), + pts_ns, duration_ns, next_pts_ns = GStreamerAudio._compute_pts( + _fake_self(), 800, - 16000, 1_400_000_000, 1_100_000_000, - GStreamerAudio.PLAYBACK_GAP_RESET_NS, ) assert pts_ns == 1_400_000_000 diff --git a/tests/unit_tests/test_head_wobbler.py b/tests/unit_tests/test_head_wobbler.py new file mode 100644 index 000000000..fff31cbea --- /dev/null +++ b/tests/unit_tests/test_head_wobbler.py @@ -0,0 +1,318 @@ +"""Unit tests for speech_tapper and head_wobbler modules.""" # noqa: D100 + +import time + +import numpy as np +import pytest + +from reachy_mini.motion.speech_tapper import ( + HOP_MS, + SwayRollRT, + _loudness_gain, + _rms_dbfs, +) + +SR = 16_000 # sample rate used for tone generation in tests + + +def _patch_glib_timeout(monkeypatch): + """Replace ``GLib.timeout_add`` with a recorder; return the schedule list. + + Each entry is ``(delay_ms, fn, args)``. Tests can call + ``fn(*args)`` to simulate the GLib main loop firing the timeout. + """ + schedule: list[tuple[int, object, tuple]] = [] + + def fake_timeout_add(delay_ms, fn, *args): + schedule.append((delay_ms, fn, args)) + return len(schedule) # source id + + monkeypatch.setattr( + "reachy_mini.motion.head_wobbler.GLib.timeout_add", fake_timeout_add + ) + return schedule + +# --------------------------------------------------------------------------- +# speech_tapper: helper functions +# --------------------------------------------------------------------------- + + +def test_rms_silence_is_very_negative(): # noqa: D103 + silence = np.zeros(320, dtype=np.float32) + assert _rms_dbfs(silence) < -100 + + +def test_rms_full_scale_sine_near_zero(): # noqa: D103 + t = np.linspace(0, 1, SR, dtype=np.float32) + sine = np.sin(2 * np.pi * 440 * t).astype(np.float32) + db = _rms_dbfs(sine) + assert -5 < db < 0 # RMS of sine ≈ -3 dBFS + + +def test_rms_quiet_signal_is_negative(): # noqa: D103 + t = np.linspace(0, 1, SR, dtype=np.float32) + quiet = (np.sin(2 * np.pi * 440 * t) * 0.01).astype(np.float32) + assert _rms_dbfs(quiet) < -35 + + +def test_loudness_below_low_threshold_is_zero(): # noqa: D103 + assert _loudness_gain(-100.0) == 0.0 + + +def test_loudness_above_high_threshold_clamped(): # noqa: D103 + gain = _loudness_gain(0.0) + assert gain <= 1.0 + assert gain > 0.9 + + +def test_loudness_monotonically_increasing(): # noqa: D103 + dbs = [-50, -40, -30, -20, -10] + gains = [_loudness_gain(db) for db in dbs] + for i in range(len(gains) - 1): + assert gains[i] <= gains[i + 1] + + +# --------------------------------------------------------------------------- +# speech_tapper: SwayRollRT +# --------------------------------------------------------------------------- + + +def test_sway_empty_input(): # noqa: D103 + rt = SwayRollRT() + assert rt.feed(np.zeros(0, dtype=np.float32)) == [] + + +def test_sway_short_input_no_output(): # noqa: D103 + """Input shorter than one hop produces no output.""" + rt = SwayRollRT() + short = np.zeros(100, dtype=np.float32) + assert rt.feed(short) == [] + + +def test_sway_one_second_produces_hops(): # noqa: D103 + rt = SwayRollRT() + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + results = rt.feed(tone) + expected_hops = 1000 // HOP_MS + assert len(results) == expected_hops + + +def test_sway_output_keys(): # noqa: D103 + rt = SwayRollRT() + t = np.linspace(0, 0.1, int(SR * 0.1), dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + results = rt.feed(tone) + assert len(results) >= 1 + expected_keys = {"pitch_rad", "yaw_rad", "roll_rad", "x_mm", "y_mm", "z_mm"} + assert expected_keys <= set(results[0].keys()) + + +def test_sway_silence_produces_near_zero(): # noqa: D103 + rt = SwayRollRT() + silence = np.zeros(SR, dtype=np.float32) + results = rt.feed(silence) + for r in results: + assert abs(r["pitch_rad"]) < 0.01 + assert abs(r["yaw_rad"]) < 0.01 + assert abs(r["x_mm"]) < 0.1 + + +def test_sway_loud_signal_produces_nonzero(): # noqa: D103 + rt = SwayRollRT() + t = np.linspace(0, 3, SR * 3, dtype=np.float32) + tone = (np.sin(2 * np.pi * 300 * t) * 0.8).astype(np.float32) + results = rt.feed(tone) + max_yaw = max(abs(r["yaw_rad"]) for r in results) + assert max_yaw > 0.01 + + +def test_sway_custom_sample_rate(): # noqa: D103 + """Frame/hop derive from the per-instance sample_rate.""" + rt = SwayRollRT(sample_rate=48_000) + assert rt.sample_rate == 48_000 + assert rt.frame == int(48_000 * 20 / 1000) + assert rt.hop == int(48_000 * 50 / 1000) + # 1s of 48kHz audio still yields ~20 hops (1000ms / HOP_MS). + t = np.linspace(0, 1, 48_000, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + results = rt.feed(tone) + assert len(results) == 1000 // HOP_MS + + +def test_sway_reset_clears_state(): # noqa: D103 + rt = SwayRollRT() + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + rt.feed(tone) + rt.reset() + assert rt.t == 0.0 + assert rt.vad_on is False + assert rt.carry.size == 0 + + +def test_sway_deterministic_with_same_seed(): # noqa: D103 + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + + rt1 = SwayRollRT(rng_seed=42) + r1 = rt1.feed(tone.copy()) + + rt2 = SwayRollRT(rng_seed=42) + r2 = rt2.feed(tone.copy()) + + assert len(r1) == len(r2) + for a, b in zip(r1, r2): + assert a == pytest.approx(b) + + +def test_sway_incremental_feeding(): # noqa: D103 + """Feeding small chunks should produce same total hops as one big chunk.""" + rt_batch = SwayRollRT(rng_seed=7) + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + results_batch = rt_batch.feed(tone) + + rt_inc = SwayRollRT(rng_seed=7) + results_inc = [] + chunk_size = 1600 # 100ms chunks + for i in range(0, len(tone), chunk_size): + results_inc.extend(rt_inc.feed(tone[i : i + chunk_size])) + + assert len(results_inc) == len(results_batch) + + +# --------------------------------------------------------------------------- +# head_wobbler: HeadWobbler (PTS-driven scheduler, no thread) +# --------------------------------------------------------------------------- + + +def test_wobbler_schedules_offsets_for_a_tone(monkeypatch): # noqa: D103 + from reachy_mini.motion.head_wobbler import HeadWobbler + + schedule = _patch_glib_timeout(monkeypatch) + received: list[tuple[float, ...]] = [] + wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR) + wobbler.start() + + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + play_at = time.monotonic_ns() + 5_000_000_000 # 5 s ahead → all deadlines positive + wobbler.feed(tone, play_at) + + assert len(schedule) > 0 + # Fire each scheduled timeout. + for _delay, fn, args in schedule: + fn(*args) + assert len(received) == len(schedule) + + +def test_wobbler_offsets_are_6_tuples(monkeypatch): # noqa: D103 + from reachy_mini.motion.head_wobbler import HeadWobbler + + schedule = _patch_glib_timeout(monkeypatch) + received: list[tuple[float, ...]] = [] + wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR) + wobbler.start() + + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + play_at = time.monotonic_ns() + 5_000_000_000 + wobbler.feed(tone, play_at) + for _delay, fn, args in schedule: + fn(*args) + + assert len(received) > 0 + for offsets in received: + assert len(offsets) == 6 + assert all(isinstance(v, float) for v in offsets) + + +def test_wobbler_stop_zeros_offsets(monkeypatch): # noqa: D103 + from reachy_mini.motion.head_wobbler import HeadWobbler + + _patch_glib_timeout(monkeypatch) + received: list[tuple[float, ...]] = [] + wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR) + wobbler.stop() + + assert received[-1] == (0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + + +def test_wobbler_reset_zeros_offsets(monkeypatch): # noqa: D103 + from reachy_mini.motion.head_wobbler import HeadWobbler + + _patch_glib_timeout(monkeypatch) + received: list[tuple[float, ...]] = [] + wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR) + wobbler.reset() + + assert received[-1] == (0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + + +def test_wobbler_stop_cancels_pending(monkeypatch): # noqa: D103 + """After stop(), pending GLib timeouts no-op when fired.""" + from reachy_mini.motion.head_wobbler import HeadWobbler + + schedule = _patch_glib_timeout(monkeypatch) + received: list[tuple[float, ...]] = [] + wobbler = HeadWobbler(lambda o: received.append(o), sample_rate=SR) + wobbler.start() + + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + play_at = time.monotonic_ns() + 5_000_000_000 + wobbler.feed(tone, play_at) + pending = list(schedule) + assert pending # sanity: we did schedule something + + wobbler.stop() + received.clear() # discard the zero-offsets call from stop() + + for _delay, fn, args in pending: + fn(*args) + assert received == [] # all canceled + + +def test_wobbler_start_is_idempotent(monkeypatch): # noqa: D103 + from reachy_mini.motion.head_wobbler import HeadWobbler + + _patch_glib_timeout(monkeypatch) + wobbler = HeadWobbler(lambda o: None, sample_rate=SR) + wobbler.start() + wobbler.start() # should not crash + + +def test_wobbler_schedules_hops_at_hop_intervals(monkeypatch): # noqa: D103 + """Consecutive scheduled delays are spaced by HOP_MS.""" + from reachy_mini.motion.head_wobbler import HeadWobbler + + schedule = _patch_glib_timeout(monkeypatch) + wobbler = HeadWobbler(lambda o: None, sample_rate=SR) + wobbler.start() + + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + play_at = time.monotonic_ns() + 5_000_000_000 + wobbler.feed(tone, play_at) + + delays = [d for d, _, _ in schedule] + assert len(delays) >= 2 + diffs = [delays[i + 1] - delays[i] for i in range(len(delays) - 1)] + assert all(abs(d - HOP_MS) <= 1 for d in diffs) + + +def test_wobbler_drops_past_deadlines(monkeypatch): # noqa: D103 + """Hops whose deadline has already passed are not scheduled.""" + from reachy_mini.motion.head_wobbler import HeadWobbler + + schedule = _patch_glib_timeout(monkeypatch) + wobbler = HeadWobbler(lambda o: None, sample_rate=SR) + wobbler.start() + + t = np.linspace(0, 1, SR, dtype=np.float32) + tone = (np.sin(2 * np.pi * 440 * t) * 0.5).astype(np.float32) + play_at = time.monotonic_ns() - 10_000_000_000 # 10 s in the past + wobbler.feed(tone, play_at) + + assert schedule == [] diff --git a/uv.lock b/uv.lock index 013979b64..e2e732d3f 100644 --- a/uv.lock +++ b/uv.lock @@ -1063,6 +1063,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/eb/6518a1b00488d48995034226846653c382d676cf5f04be62b3c3fae2c6a1/gpiozero-2.0.1-py3-none-any.whl", hash = "sha256:8f621de357171d574c0b7ea0e358cb66e560818a47b0eeedf41ce1cdbd20c70b", size = 150818, upload-time = "2024-02-15T11:07:00.451Z" }, ] +[[package]] +name = "gradio-client" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fsspec" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/e6/6b6029f5fe2ad7f1211105d530e34d991014c2cae463f9223033031cfc4f/gradio_client-2.5.0.tar.gz", hash = "sha256:4cde99bad62149595c30c90876ca2e405e3a13687ecf895474f3412cb476673d", size = 59013, upload-time = "2026-04-20T23:16:21.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/81/0a861b8e1ff42960139c6cd4c7dd591292fa09ea1ae2d87677441cba4c00/gradio_client-2.5.0-py3-none-any.whl", hash = "sha256:d43e2179c29076292a76485ad7ed2e6eaa19d14ac58283bd7f5beabfe4ca958c", size = 59952, upload-time = "2026-04-20T23:16:20.186Z" }, +] + [[package]] name = "gstreamer-bundle" version = "1.28.3" @@ -3311,6 +3327,7 @@ dependencies = [ all = [ { name = "cv2-enumerate-cameras" }, { name = "gpiozero", marker = "sys_platform == 'linux'" }, + { name = "gradio-client" }, { name = "lgpio", marker = "sys_platform == 'linux'" }, { name = "mujoco" }, { name = "nmcli", marker = "sys_platform == 'linux'" }, @@ -3325,6 +3342,7 @@ all = [ ] examples = [ { name = "cv2-enumerate-cameras" }, + { name = "gradio-client" }, { name = "opencv-python" }, { name = "pynput" }, { name = "soundfile" }, @@ -3374,6 +3392,7 @@ requires-dist = [ { name = "cv2-enumerate-cameras", marker = "extra == 'opencv'", specifier = ">=1.2.1" }, { name = "fastapi" }, { name = "gpiozero", marker = "sys_platform == 'linux' and extra == 'wireless-version'", specifier = ">=2.0.0" }, + { name = "gradio-client", marker = "extra == 'examples'" }, { name = "gstreamer-bundle", marker = "sys_platform != 'linux'", specifier = "==1.28.3" }, { name = "huggingface-hub", specifier = "==1.3.0" }, { name = "jinja2" },