Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ dependencies {
implementation(libs.androidx.hilt.navigation.compose)
implementation(libs.androidx.documentfile)
implementation(libs.timber)
implementation(libs.coil.compose)
implementation(libs.coil.network.okhttp)
implementation(libs.taskerpluginlibrary)
ksp(libs.hilt.android.compiler)
testImplementation(libs.junit)
Expand Down
32 changes: 32 additions & 0 deletions app/src/main/java/com/example/ava/MainActivity.kt
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
package com.example.ava

import android.app.KeyguardManager
import android.content.pm.ActivityInfo
import android.os.Build
import android.os.Bundle
import android.view.WindowManager
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge
import androidx.activity.viewModels
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.runtime.Composable
import androidx.compose.runtime.DisposableEffect
import androidx.lifecycle.lifecycleScope
import com.example.ava.permissions.VOICE_SATELLITE_PERMISSIONS
import com.example.ava.ui.MainNavHost
import com.example.ava.ui.services.ServiceViewModel
import com.example.ava.ui.services.rememberLaunchWithMultiplePermissions
import com.example.ava.ui.theme.AvaTheme
import dagger.hilt.android.AndroidEntryPoint
import kotlinx.coroutines.launch

@AndroidEntryPoint
class MainActivity : ComponentActivity() {
Expand All @@ -23,6 +29,32 @@ class MainActivity : ComponentActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
enableEdgeToEdge()
// Allow the activity to show over the lock screen and turn the screen on.
// Combined with the service's ScreenWakeLock (ACQUIRE_CAUSES_WAKEUP), this
// ensures the app is visible when a wake word fires with the screen off.
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) {
setShowWhenLocked(true)
setTurnScreenOn(true)
} else {
@Suppress("DEPRECATION")
window.addFlags(
WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED or
WindowManager.LayoutParams.FLAG_TURN_SCREEN_ON
)
}
// Dismiss the keyguard on devices with no lock screen security.
// On password-protected devices this is a no-op (no auth UI is shown from here).
(getSystemService(KEYGUARD_SERVICE) as KeyguardManager)
.requestDismissKeyguard(this, null)
lifecycleScope.launch {
serviceViewModel.allowRotation.collect { allow ->
requestedOrientation = if (allow) {
ActivityInfo.SCREEN_ORIENTATION_FULL_SENSOR
} else {
ActivityInfo.SCREEN_ORIENTATION_PORTRAIT
}
}
}
setContent {
AvaTheme {
OnCreate()
Expand Down
33 changes: 31 additions & 2 deletions app/src/main/java/com/example/ava/audio/MicrophoneInput.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import android.Manifest
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.media.audiofx.AcousticEchoCanceler
import android.media.audiofx.AutomaticGainControl
import android.media.audiofx.NoiseSuppressor
import androidx.annotation.RequiresPermission
import timber.log.Timber
import java.nio.ByteBuffer
Expand All @@ -12,12 +15,19 @@ class MicrophoneInput(
val audioSource: Int = DEFAULT_AUDIO_SOURCE,
val sampleRateInHz: Int = DEFAULT_SAMPLE_RATE_IN_HZ,
val channelConfig: Int = DEFAULT_CHANNEL_CONFIG,
val audioFormat: Int = DEFAULT_AUDIO_FORMAT
val audioFormat: Int = DEFAULT_AUDIO_FORMAT,
val enableNoiseSuppressor: Boolean = true,
val enableAutomaticGainControl: Boolean = true,
val enableAcousticEchoCanceler: Boolean = true
) : AutoCloseable {
private val bufferSize =
val bufferSize =
AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
private val buffer = ByteBuffer.allocateDirect(bufferSize)
private var audioRecord: AudioRecord? = null
private var noiseSuppressor: NoiseSuppressor? = null
private var automaticGainControl: AutomaticGainControl? = null
private var acousticEchoCanceler: AcousticEchoCanceler? = null

val isRecording get() = audioRecord?.recordingState == AudioRecord.RECORDSTATE_RECORDING

@RequiresPermission(Manifest.permission.RECORD_AUDIO)
Expand Down Expand Up @@ -56,10 +66,29 @@ class MicrophoneInput(
check(audioRecord.state == AudioRecord.STATE_INITIALIZED) {
"Failed to initialize AudioRecord"
}
val sessionId = audioRecord.audioSessionId
if (enableNoiseSuppressor && NoiseSuppressor.isAvailable()) {
noiseSuppressor = NoiseSuppressor.create(sessionId)?.also { it.enabled = true }
Timber.d("NoiseSuppressor enabled: ${noiseSuppressor != null}")
}
if (enableAutomaticGainControl && AutomaticGainControl.isAvailable()) {
automaticGainControl = AutomaticGainControl.create(sessionId)?.also { it.enabled = true }
Timber.d("AutomaticGainControl enabled: ${automaticGainControl != null}")
}
if (enableAcousticEchoCanceler && AcousticEchoCanceler.isAvailable()) {
acousticEchoCanceler = AcousticEchoCanceler.create(sessionId)?.also { it.enabled = true }
Timber.d("AcousticEchoCanceler enabled: ${acousticEchoCanceler != null}")
}
Comment on lines +69 to +81
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you confirmed this actually works on any devices? I have tried implementing something similar but it had no effect on the 2 (Samsung and Lenovo) devices I tried it on? I could only get the effects to work by changing the audio source of the microphone to VOICE_COMMUNICATION and setting the AudioManager mode to MODE_IN_COMMUNICATION, however this had the caveat of reducing audio playback quality, a known issue on Android devices.

return audioRecord
}

override fun close() {
noiseSuppressor?.release()
noiseSuppressor = null
automaticGainControl?.release()
automaticGainControl = null
acousticEchoCanceler?.release()
acousticEchoCanceler = null
audioRecord?.let {
if (isRecording) {
it.stop()
Expand Down
2 changes: 2 additions & 0 deletions app/src/main/java/com/example/ava/esphome/EspHomeDevice.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.example.ava.esphome
import android.Manifest
import androidx.annotation.RequiresPermission
import com.example.ava.esphome.entities.Entity
import com.example.ava.esphome.entities.MediaPlayer
import com.example.ava.esphome.logger.Logger
import com.example.ava.esphome.voiceassistant.VoiceAssistant
import com.example.ava.server.DEFAULT_SERVER_PORT
Expand Down Expand Up @@ -60,6 +61,7 @@ class EspHomeDevice(
private val server: Server = ServerImpl(),
private val deviceInfo: DeviceInfoResponse,
val voiceAssistant: VoiceAssistant,
val mediaPlayer: MediaPlayer? = null,
private val logger: Logger? = null,
entities: Iterable<Entity> = emptyList()
) : AutoCloseable {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,36 @@ interface MediaPlayer {
*/
val mediaState: Flow<MediaPlayerState>

/**
* The title of the currently playing media, or null if unavailable.
*/
val mediaTitle: StateFlow<String?>

/**
* The artist of the currently playing media, or null if unavailable.
*/
val mediaArtist: StateFlow<String?>

/**
* The current playback position in milliseconds, or 0 if not playing.
*/
val currentPosition: Long

/**
* The duration of the currently playing media in milliseconds, or 0 if unknown.
*/
val duration: Long

/**
* Raw artwork image bytes, or null if unavailable.
*/
val artworkData: StateFlow<ByteArray?>

/**
* URL of the artwork image, or null if unavailable.
*/
val artworkUri: StateFlow<String?>

/**
* Starts playback of the specified media.
*/
Expand All @@ -35,6 +65,21 @@ interface MediaPlayer {
*/
fun stopMedia()

/**
* Toggles between play and pause.
*/
fun togglePlayback()

/**
* Skips to the next track.
*/
fun skipToNext()

/**
* Returns to the previous track.
*/
fun skipToPrevious()

/**
* Gets the playback volume.
*/
Expand Down Expand Up @@ -71,6 +116,16 @@ class MediaPlayerEntity(
name = this@MediaPlayerEntity.name
objectId = this@MediaPlayerEntity.objectId
supportsPause = true
// Explicit feature flag bitmask so HA knows exactly what we support.
// Bits: PAUSE=1, VOLUME_SET=4, VOLUME_MUTE=8, PLAY_MEDIA=512, STOP=4096,
// PLAY=16384, SHUFFLE_SET=32768
// SHUFFLE_SET is advertised so HA accepts shuffle_set calls from Music Assistant
// LLM scripts (which always emit shuffle:false). The ESPHome proto has no shuffle
// command so the call is silently accepted and ignored — correct, since
// shuffle:false is a no-op on a device that doesn't have a queue.
// NEXT_TRACK(32) and PREVIOUS_TRACK(16) are absent: no ESPHome proto commands
// exist for them, so HA could never send them; skip buttons are UI-only.
featureFlags = 1 or 4 or 8 or 512 or 4096 or 16384 or 32768
})

is MediaPlayerCommandRequest -> {
Expand All @@ -94,6 +149,9 @@ class MediaPlayerEntity(
MediaPlayerCommand.MEDIA_PLAYER_COMMAND_UNMUTE ->
mediaPlayer.setMuted(false)

MediaPlayerCommand.MEDIA_PLAYER_COMMAND_TOGGLE ->
mediaPlayer.togglePlayback()

else -> {}
}
} else if (message.hasVolume) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ import com.example.esphomeproto.api.VoiceAssistantSetConfiguration
import com.example.esphomeproto.api.VoiceAssistantTimerEvent
import com.example.esphomeproto.api.VoiceAssistantTimerEventResponse
import com.example.esphomeproto.api.voiceAssistantConfigurationResponse
import com.example.esphomeproto.api.voiceAssistantTimerEventResponse
import com.example.esphomeproto.api.voiceAssistantWakeWord
import com.google.protobuf.MessageLite
import kotlinx.coroutines.CoroutineName
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Job
import kotlinx.coroutines.flow.update
import kotlinx.coroutines.cancel
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.MutableSharedFlow
Expand All @@ -35,13 +37,19 @@ import kotlinx.coroutines.launch
import timber.log.Timber
import kotlin.coroutines.CoroutineContext
import kotlin.time.Clock
import kotlin.time.Duration.Companion.seconds

data object Listening : EspHomeState
data object Responding : EspHomeState
data object Processing : EspHomeState

data class VoiceError(val message: String) : EspHomeState

data class Transcript(
val sttText: String? = null,
val ttsText: String? = null
)

class VoiceAssistant(
coroutineContext: CoroutineContext,
val voiceInput: VoiceInput,
Expand All @@ -54,11 +62,16 @@ class VoiceAssistant(
private val subscription = MutableSharedFlow<MessageLite>()
protected val _state = MutableStateFlow<EspHomeState>(Disconnected)
val state = _state.asStateFlow()
private val _transcript = MutableStateFlow<Transcript?>(null)
val transcript = _transcript.asStateFlow()

private var pipeline: VoicePipeline? = null
private var announcement: Announcement? = null
private val _pendingTimers = MutableStateFlow<Map<String, VoiceTimer>>(emptyMap())
private val _ringingTimer = MutableStateFlow<VoiceTimer?>(null)
// IDs of timers cancelled locally via cancelTimer() before HA confirmed cancellation.
// Used to suppress spurious VOICE_ASSISTANT_TIMER_FINISHED events that arrive in-flight.
private val _cancelledTimerIds = mutableSetOf<String>()

val allTimers = combine(_pendingTimers, _ringingTimer) { pending, ringing ->
listOfNotNull(ringing) + pending.values.sorted()
Expand Down Expand Up @@ -86,6 +99,49 @@ class VoiceAssistant(
doStopTimer()
}

fun cancelTimer(timerId: String) {
scope.launch {
if (_ringingTimer.value?.id == timerId) {
doStopTimer()
} else {
val timer = _pendingTimers.value[timerId] ?: return@launch
_cancelledTimerIds += timerId
_pendingTimers.update { it - timerId }
subscription.emit(voiceAssistantTimerEventResponse {
eventType = VoiceAssistantTimerEvent.VOICE_ASSISTANT_TIMER_CANCELLED
this.timerId = timerId
name = timer.name
totalSeconds = timer.totalDuration.inWholeSeconds.toInt()
secondsLeft = 0
isActive = false
})
}
}
}

fun addTimeToTimer(timerId: String, seconds: Int) {
scope.launch {
val timer = _pendingTimers.value[timerId] ?: return@launch
val now = Clock.System.now()
val newRemaining = timer.remainingDuration(now) + seconds.seconds
val newTotal = timer.totalDuration + seconds.seconds
val updatedTimer = when (timer) {
is VoiceTimer.Running -> timer.copy(totalDuration = newTotal, endsAt = now + newRemaining)
is VoiceTimer.Paused -> timer.copy(totalDuration = newTotal, remainingDuration = newRemaining)
is VoiceTimer.Ringing -> return@launch
}
_pendingTimers.update { it + (timerId to updatedTimer) }
subscription.emit(voiceAssistantTimerEventResponse {
eventType = VoiceAssistantTimerEvent.VOICE_ASSISTANT_TIMER_UPDATED
this.timerId = timerId
name = timer.name
totalSeconds = newTotal.inWholeSeconds.toInt()
secondsLeft = newRemaining.inWholeSeconds.toInt()
isActive = timer is VoiceTimer.Running
})
}
}

@RequiresPermission(Manifest.permission.RECORD_AUDIO)
private fun startVoiceInput() = isConnected
.flatMapLatest { isConnected ->
Expand All @@ -99,11 +155,13 @@ class VoiceAssistant(
suspend fun onConnected() {
isConnected.value = true
resetState()
voiceOutput.startWakeWordListening()
}

suspend fun onDisconnected() {
isConnected.value = false
resetState(Disconnected)
voiceOutput.stopWakeWordListening()
}

suspend fun handleMessage(message: MessageLite) {
Expand Down Expand Up @@ -157,10 +215,14 @@ class VoiceAssistant(
}

VoiceAssistantTimerEvent.VOICE_ASSISTANT_TIMER_CANCELLED -> {
_cancelledTimerIds -= timer.id
_pendingTimers.update { it - timer.id }
}

VoiceAssistantTimerEvent.VOICE_ASSISTANT_TIMER_FINISHED -> {
// If we locally cancelled this timer, ignore the stale FINISHED event.
if (_cancelledTimerIds.remove(timer.id)) return

// Remove the timer now and stash it into _ringingTimer to avoid
// race conditions if several timers finish at the same time.
val wasNotRinging = !isRinging
Expand Down Expand Up @@ -260,7 +322,10 @@ class VoiceAssistant(
voiceInput.isStreaming = it
},
stateChanged = { _state.value = it },
ended = { onTtsFinished(it) }
ended = { onTtsFinished(it) },
onTranscriptReset = { _transcript.value = null },
onSttText = { text -> _transcript.update { (it ?: Transcript()).copy(sttText = text) } },
onTtsText = { text -> _transcript.update { (it ?: Transcript()).copy(ttsText = text) } }
)

private suspend fun doStopAssistant() {
Expand Down Expand Up @@ -317,6 +382,10 @@ class VoiceAssistant(
voiceInput.isStreaming = false
voiceOutput.stopTTS()
_state.value = newState
if (newState == Disconnected) {
_cancelledTimerIds.clear()
_pendingTimers.value = emptyMap()
}
}

override fun close() {
Expand Down
Loading
Loading