From fa134e85c1ea48601ff6d9505d2ef7803aaaed41 Mon Sep 17 00:00:00 2001 From: Kanak Agrawal Date: Fri, 17 Apr 2026 16:50:01 +0530 Subject: [PATCH] Added support for noise cancellation model for daily --- Dockerfile | 4 +- .../agents/breeze_buddy/agent/transport.py | 42 +++++++++++++++---- app/core/config/static.py | 4 ++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7339bd66c..030b3860d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ PORT=8000 \ NLTK_DATA=/usr/local/nltk_data\ AIC_MODEL_PATH=/app/models/voice/aic/quail_l_8khz.aicmodel \ + AIC_MODEL_PATH_16KHZ=/app/models/voice/aic/quail_l_16khz.aicmodel \ UV_CACHE_DIR=/app/.uv-cache # Install system dependencies required for audio processing and compilation + curl for GCP CLI @@ -51,7 +52,8 @@ RUN --mount=type=secret,id=gcp_token \ curl -sSL https://sdk.cloud.google.com | bash && \ export PATH=$PATH:/root/google-cloud-sdk/bin && \ echo "=== Downloading AIC assets ===" && \ - gcloud storage cp --access-token-file=/run/secrets/gcp_token ${AIC_BUCKET_PATH}/quail_l_8khz.aicmodel /app/models/voice/aic/ || echo "Warning: AIC model not found"; \ + gcloud storage cp --access-token-file=/run/secrets/gcp_token ${AIC_BUCKET_PATH}/quail_l_8khz.aicmodel /app/models/voice/aic/ && \ + gcloud storage cp --access-token-file=/run/secrets/gcp_token ${AIC_BUCKET_PATH}/quail_l_16khz.aicmodel /app/models/voice/aic/ || echo "Warning: AIC model not found"; \ else \ echo "Warning: GCP token secret not provided, skipping AIC installation (AWS deployment)"; \ fi diff --git a/app/ai/voice/agents/breeze_buddy/agent/transport.py b/app/ai/voice/agents/breeze_buddy/agent/transport.py index 1806a5b17..66d4a96b3 100644 --- a/app/ai/voice/agents/breeze_buddy/agent/transport.py +++ b/app/ai/voice/agents/breeze_buddy/agent/transport.py @@ -29,8 +29,27 @@ TRANSPORT_TYPE_DAILY = "daily" +def _get_aic_model_path(transport_type: str) -> Path: + """Select appropriate AIC model based on transport. + + Auto-selects model based on transport: + - Daily (web): 16kHz + - Telephony (Twilio/Plivo/Exotel): 8kHz + + Args: + transport_type: The transport type (daily, twilio, etc.). + + Returns: + Path to the selected AIC model file. + """ + if transport_type == TRANSPORT_TYPE_DAILY: + return Path(static.AIC_MODEL_PATH_16KHZ) + return Path(static.AIC_MODEL_PATH) + + def _create_audio_input_filter( configurations: Optional[ConfigurationModel] = None, + transport_type: str = TRANSPORT_TYPE_DAILY, ) -> Optional[BaseAudioFilter]: """Create audio input filter based on configuration. @@ -41,6 +60,7 @@ def _create_audio_input_filter( Args: configurations: The configuration model containing noise filter settings. + transport_type: The transport type to determine model selection. Returns: Audio filter instance if enabled and successfully created, None otherwise. @@ -58,11 +78,16 @@ def _create_audio_input_filter( if not static.BREEZE_BUDDY_AICOUSTICS_LICENSE_KEY: logger.warning("AIC filter enabled but license key not configured") return None + + model_path = _get_aic_model_path(transport_type) + try: - return AICFilter( + aic_filter = AICFilter( license_key=static.BREEZE_BUDDY_AICOUSTICS_LICENSE_KEY, - model_path=Path(static.AIC_MODEL_PATH), + model_path=model_path, ) + logger.info(f"AIC filter initialized successfully with model: {model_path}") + return aic_filter except Exception as e: logger.warning( f"Failed to initialize AIC filter, proceeding without it: {e}" @@ -93,14 +118,15 @@ def get_transport_params( telephony_mixer = create_background_sound_mixer( template, sample_rate=TELEPHONY_SAMPLE_RATE ) - audio_in_filter = _create_audio_input_filter(configurations) return { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=audio_in_filter, audio_out_mixer=daily_mixer, + audio_in_filter=_create_audio_input_filter( + configurations, TRANSPORT_TYPE_DAILY + ), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, @@ -108,7 +134,7 @@ def get_transport_params( audio_in_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_mixer=telephony_mixer, - audio_in_filter=audio_in_filter, + audio_in_filter=_create_audio_input_filter(configurations, "telephony"), ), "exotel": lambda: FastAPIWebsocketParams( audio_in_enabled=True, @@ -116,7 +142,7 @@ def get_transport_params( audio_in_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_mixer=telephony_mixer, - audio_in_filter=audio_in_filter, + audio_in_filter=_create_audio_input_filter(configurations, "telephony"), ), "telnyx": lambda: FastAPIWebsocketParams( audio_in_enabled=True, @@ -124,7 +150,7 @@ def get_transport_params( audio_in_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_mixer=telephony_mixer, - audio_in_filter=audio_in_filter, + audio_in_filter=_create_audio_input_filter(configurations, "telephony"), ), "plivo": lambda: FastAPIWebsocketParams( audio_in_enabled=True, @@ -132,6 +158,6 @@ def get_transport_params( audio_in_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_sample_rate=TELEPHONY_SAMPLE_RATE, audio_out_mixer=telephony_mixer, - audio_in_filter=audio_in_filter, + audio_in_filter=_create_audio_input_filter(configurations, "telephony"), ), } diff --git a/app/core/config/static.py b/app/core/config/static.py index 6541630ea..7dd826d30 100644 --- a/app/core/config/static.py +++ b/app/core/config/static.py @@ -59,6 +59,10 @@ AIC_MODEL_PATH = os.environ.get( "AIC_MODEL_PATH", "/app/models/voice/aic/quail_l_8khz.aicmodel" ) +AIC_MODEL_PATH_16KHZ = os.environ.get( + "AIC_MODEL_PATH_16KHZ", + "/app/models/voice/aic/quail_l_16khz.aicmodel", +) # TTS Configuration ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")