diff --git a/Docs/audio.md b/Docs/audio.md index 385ae7af4..0f0b50b79 100644 --- a/Docs/audio.md +++ b/Docs/audio.md @@ -48,23 +48,38 @@ When set to `false`, the audio session remains active after the LiveKit call end ## Disabling Voice Processing -Apple's voice processing is enabled by default, such as echo cancellation and auto-gain control. +Platform voice processing is enabled by default, such as echo cancellation and auto-gain control. +On Apple platforms this is Apple's Voice-Processing I/O. -If your app doesn't require voice processing at all, you can disable it entirely: +Use `AudioProcessingMode` to choose the processing backend before publishing or starting local recording: + +```swift +try AudioManager.shared.setAudioProcessingMode(.automatic) // default +try AudioManager.shared.setAudioProcessingMode(.platform) // require platform processing +try AudioManager.shared.setAudioProcessingMode(.software) // use WebRTC APM +try AudioManager.shared.setAudioProcessingMode(.disabled) // no voice processing +``` + +Mode changes are only supported by audio device modules that implement this API, and only +while audio is idle. To switch during a call, unpublish or stop local recording first, set +the mode, then publish or start recording again. + +If your app doesn't require voice processing at all, you can also use the compatibility API: ```swift try AudioManager.shared.setVoiceProcessingEnabled(false) ``` -This restarts the internal `AVAudioEngine` to apply the change. It can cause a short audio glitch, so it is recommended to set it once before connecting to a Room. Disabling voice processing also disables muted speaker detection. +This is equivalent to `try AudioManager.shared.setAudioProcessingMode(.disabled)`. +Disabling platform voice processing also disables muted speaker detection. -If your app requires toggling voice processing at run-time, it is recommended to use: +If your app only needs to bypass Apple's platform processing at run-time, use: ```swift AudioManager.shared.isVoiceProcessingBypassed = true ``` -Set it back to `false` to re-enable processing. This uses `AVAudioEngine`'s [isVoiceProcessingBypassed](https://developer.apple.com/documentation/avfaudio/avaudioinputnode/isvoiceprocessingbypassed) and works seamlessly at run-time. +Set it back to `false` to re-enable Apple's processing. This uses `AVAudioEngine`'s [isVoiceProcessingBypassed](https://developer.apple.com/documentation/avfaudio/avaudioinputnode/isvoiceprocessingbypassed) and works seamlessly at run-time, but it does not switch to WebRTC software processing. ## Other audio ducking diff --git a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift new file mode 100644 index 000000000..77ab58c74 --- /dev/null +++ b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift @@ -0,0 +1,70 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +internal import LiveKitWebRTC + +public enum AudioProcessingMode: Sendable { + /// Prefer platform voice processing when available, otherwise use WebRTC software processing. + case automatic + /// Require platform voice processing. + case platform + /// Use WebRTC software processing and disable platform voice processing. + case software + /// Disable both platform voice processing and WebRTC software processing. + case disabled + case unknown +} + +public extension AudioManager { + var audioProcessingMode: AudioProcessingMode { + RTC.audioDeviceModule.audioProcessingMode.toLKType() + } + + func setAudioProcessingMode(_ mode: AudioProcessingMode) throws { + guard mode != .unknown else { + throw LiveKitError(.invalidState, message: "Unsupported audio processing mode specified") + } + + let result = RTC.audioDeviceModule.setAudioProcessingMode(mode.toRTCType()) + try checkAdmResult(code: result) + } +} + +// MARK: - Internal + +extension LKRTCAudioProcessingMode { + func toLKType() -> AudioProcessingMode { + switch self { + case .automatic: return .automatic + case .platform: return .platform + case .software: return .software + case .disabled: return .disabled + @unknown default: return .unknown + } + } +} + +extension AudioProcessingMode { + func toRTCType() -> LKRTCAudioProcessingMode { + switch self { + case .automatic: .automatic + case .platform: .platform + case .software: .software + case .disabled: .disabled + case .unknown: .automatic + } + } +} diff --git a/Sources/LiveKit/Audio/Manager/AudioManager.swift b/Sources/LiveKit/Audio/Manager/AudioManager.swift index 672a64c51..a70bd2a05 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager.swift @@ -320,12 +320,13 @@ public class AudioManager: Loggable { set { RTC.audioDeviceModule.duckingLevel = newValue.toRTCType() } } - /// The main flag that determines whether to enable Voice-Processing I/O of the internal AVAudioEngine. Toggling this requires restarting the AudioEngine. - /// Setting this to `false` prevents any voice-processing-related initialization, and muted talker detection will not work. - /// Typically, it is recommended to keep this set to `true` and toggle ``isVoiceProcessingBypassed`` when possible. + /// Whether the current audio processing mode uses Voice-Processing I/O of the internal AVAudioEngine. + /// Use ``setAudioProcessingMode(_:)`` for explicit platform/software/disabled selection. /// Defaults to `true`. public var isVoiceProcessingEnabled: Bool { RTC.audioDeviceModule.isVoiceProcessingEnabled } + /// Compatibility API for enabling automatic processing or disabling processing entirely. + /// Prefer ``setAudioProcessingMode(_:)`` for new code. public func setVoiceProcessingEnabled(_ enabled: Bool) throws { let result = RTC.audioDeviceModule.setVoiceProcessingEnabled(enabled) try checkAdmResult(code: result) @@ -544,6 +545,7 @@ let kAudioEngineErrorFailedToConfigureAudioSession = -4100 let kAudioEngineErrorAudioSessionCategoryRecordingRequired = -4102 let kAudioEngineErrorInsufficientDevicePermission = -4101 +let kAudioEngineInvalidStateError = -5000 extension AudioManager { func checkAdmResult(code: Int) throws { @@ -553,6 +555,8 @@ extension AudioManager { throw LiveKitError(.deviceAccessDenied, message: "Device permissions are not granted") } else if code == kAudioEngineErrorAudioSessionCategoryRecordingRequired { throw LiveKitError(.audioSession, message: "Recording category required for audio session") + } else if code == kAudioEngineInvalidStateError { + throw LiveKitError(.invalidState, message: "Audio engine returned invalid state") } else if code != 0 { throw LiveKitError(.audioEngine, message: "Audio engine returned error code: \(code)") } diff --git a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift index 99c8ecc21..38516e1d7 100644 --- a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift +++ b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift @@ -20,20 +20,23 @@ internal import LiveKitWebRTC @objcMembers public final class AudioCaptureOptions: NSObject, CaptureOptions, Sendable { - // Defaults are `true` on all platforms. In practice these options only affect - // software (WebRTC) APM on iOS Simulator. On iOS device or macOS, Apple's VPIO - // handles AEC/AGC/NS and software APM is always off regardless of these flags. + // Defaults are `true` on all platforms. These options affect WebRTC's + // software APM. In the default audio processing mode, platform processing + // handles AEC/AGC/NS on iOS device and macOS, so software APM is off. Use + // `AudioManager.shared.setAudioProcessingMode(.software)` to explicitly + // select WebRTC software processing on supported WebRTC builds. // // Platform behavior: - // - iOS device or macOS: VPIO is active. Software APM is always off. These - // flags are effectively ignored for runtime processing, but still reported - // to the server as audio track features for telemetry. + // - iOS device or macOS with `.automatic`: platform processing is active. + // Software APM is off. These flags are still reported to the server as + // audio track features for telemetry. + // - iOS device or macOS with `.software`: Software APM is active and these + // flags are respected. // - iOS Simulator: VPIO is not reliably available. Software APM is used and // these flags are respected. // - // To control VPIO on device, see ``AudioManager/isVoiceProcessingEnabled``, - // ``AudioManager/isVoiceProcessingBypassed``, and - // ``AudioManager/isVoiceProcessingAGCEnabled``. + // To control the processing backend, see + // ``AudioManager/setAudioProcessingMode(_:)``. public static let defaultEchoCancellation = true public static let defaultAutoGainControl = true public static let defaultNoiseSuppression = true @@ -47,20 +50,17 @@ public final class AudioCaptureOptions: NSObject, CaptureOptions, Sendable { ) /// Whether to enable software (WebRTC's) echo cancellation. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles AEC and this flag is ignored for runtime processing. - /// See ``AudioManager/isVoiceProcessingBypassed`` for device-side VPIO controls. + /// Takes effect when WebRTC software processing is active. + /// See ``AudioManager/setAudioProcessingMode(_:)`` for backend selection. public let echoCancellation: Bool /// Whether to enable software (WebRTC's) gain control. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles AGC and this flag is ignored for runtime processing. - /// See ``AudioManager/isVoiceProcessingAGCEnabled`` for device-side VPIO controls. + /// Takes effect when WebRTC software processing is active. + /// See ``AudioManager/setAudioProcessingMode(_:)`` for backend selection. public let autoGainControl: Bool /// Whether to enable software (WebRTC's) noise suppression. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles NS and this flag is ignored for runtime processing. + /// Takes effect when WebRTC software processing is active. public let noiseSuppression: Bool public let highpassFilter: Bool diff --git a/Tests/LiveKitAudioTests/AudioProcessingTests.swift b/Tests/LiveKitAudioTests/AudioProcessingTests.swift index 4552ba193..c6f781e9f 100644 --- a/Tests/LiveKitAudioTests/AudioProcessingTests.swift +++ b/Tests/LiveKitAudioTests/AudioProcessingTests.swift @@ -65,8 +65,8 @@ import LiveKitTestSupport } @Test func optionsAppliedToAudioProcessingModule() async throws { - // Disable Apple VPIO. - AudioManager.shared.isVoiceProcessingBypassed = true + try AudioManager.shared.setAudioProcessingMode(.software) + defer { try? AudioManager.shared.setAudioProcessingMode(.automatic) } try await TestEnvironment.withRoom(RoomTestingOptions(canPublish: true)) { room in let allOnOptions = AudioCaptureOptions(