Fixed TTS queuing mechanism and volume override resets (#4480)

fixed TTS queuing mechanism and volume override resets A new TextToSpeechEngine abstraction has been introduced, along with a default Android TTS implementation and a TextToSpeechClient as the entry point. The client now features an independent queue, allowing for better control over the start and finish of each utterance and separating it from the engine, which focuses solely on playback. This resolves issues with interrupting utterances and volume overrides not resetting correctly when utterances are queued or force-stopped.
home-assistant · Jul 12, 2024 · 2281e64 · 2281e64
1 parent 851b1c7
commit 2281e64
Show file tree

Hide file tree

Showing 8 changed files with 369 additions and 127 deletions.
diff --git a/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt b/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt
@@ -65,11 +65,10 @@ import io.homeassistant.companion.android.common.notifications.handleText
 import io.homeassistant.companion.android.common.notifications.parseColor
 import io.homeassistant.companion.android.common.notifications.parseVibrationPattern
 import io.homeassistant.companion.android.common.notifications.prepareText
-import io.homeassistant.companion.android.common.util.TextToSpeechData
 import io.homeassistant.companion.android.common.util.cancelGroupIfNeeded
 import io.homeassistant.companion.android.common.util.getActiveNotification
-import io.homeassistant.companion.android.common.util.speakText
-import io.homeassistant.companion.android.common.util.stopTTS
+import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient
+import io.homeassistant.companion.android.common.util.tts.TextToSpeechData
 import io.homeassistant.companion.android.database.notification.NotificationDao
 import io.homeassistant.companion.android.database.notification.NotificationItem
 import io.homeassistant.companion.android.database.sensor.SensorDao
@@ -111,7 +110,8 @@ class MessagingManager @Inject constructor(
     private val prefsRepository: PrefsRepository,
     private val notificationDao: NotificationDao,
     private val sensorDao: SensorDao,
-    private val settingsDao: SettingsDao
+    private val settingsDao: SettingsDao,
+    private val textToSpeechClient: TextToSpeechClient
 ) {
     companion object {
         const val TAG = "MessagingService"
@@ -323,9 +323,9 @@ class MessagingManager @Inject constructor(
                     removeNotificationChannel(jsonData[NotificationData.CHANNEL]!!)
                 }
                 jsonData[NotificationData.MESSAGE] == TextToSpeechData.TTS -> {
-                    speakText(context, jsonData)
+                    textToSpeechClient.speakText(jsonData)
                 }
-                jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> stopTTS()
+                jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> textToSpeechClient.stopTTS()
                 jsonData[NotificationData.MESSAGE] in DEVICE_COMMANDS && allowCommands -> {
                     Log.d(TAG, "Processing device command")
                     when (jsonData[NotificationData.MESSAGE]) {

diff --git a/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt b/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt
@@ -27,6 +27,8 @@ import io.homeassistant.companion.android.common.data.servers.ServerManager
 import io.homeassistant.companion.android.common.data.servers.ServerManagerImpl
 import io.homeassistant.companion.android.common.data.wifi.WifiHelper
 import io.homeassistant.companion.android.common.data.wifi.WifiHelperImpl
+import io.homeassistant.companion.android.common.util.tts.AndroidTextToSpeechEngine
+import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient
 import java.util.UUID
 import javax.inject.Named
 import javax.inject.Singleton
@@ -144,6 +146,12 @@ abstract class DataModule {
         @Provides
         @Singleton
         fun packageManager(@ApplicationContext appContext: Context) = appContext.packageManager
+
+        @Provides
+        @Singleton
+        fun providesTextToSpeechClient(
+            @ApplicationContext appContext: Context
+        ): TextToSpeechClient = TextToSpeechClient(appContext, AndroidTextToSpeechEngine(appContext))
     }
 
     @Binds

diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt
diff --git a/...main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt b/...main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt
@@ -0,0 +1,89 @@
+package io.homeassistant.companion.android.common.util.tts
+
+import android.content.Context
+import android.speech.tts.TextToSpeech
+import android.speech.tts.UtteranceProgressListener
+import android.util.Log
+import kotlin.coroutines.resume
+import kotlinx.coroutines.suspendCancellableCoroutine
+import kotlinx.coroutines.sync.Mutex
+import kotlinx.coroutines.sync.withLock
+
+private const val TAG = "AndroidTTSEngine"
+
+/**
+ * Implementation of [TextToSpeechEngine] that uses the default [TextToSpeech] engine found on the device.
+ */
+class AndroidTextToSpeechEngine(private val applicationContext: Context) : TextToSpeechEngine {
+
+    private val initMutex = Mutex()
+    private var textToSpeech: TextToSpeech? = null
+    private var lastVolumeOverridingUtterance: Utterance? = null
+
+    override suspend fun initialize(): Result<Unit> = initMutex.withLock {
+        if (textToSpeech != null) {
+            Result.success(Unit)
+        } else {
+            suspendCancellableCoroutine { continuation ->
+                textToSpeech = TextToSpeech(applicationContext) { code ->
+                    if (code == TextToSpeech.SUCCESS) {
+                        continuation.resume(Result.success(Unit))
+                    } else {
+                        textToSpeech?.shutdown()
+                        textToSpeech = null
+                        continuation.resume(
+                            Result.failure(RuntimeException("Failed to initialize TTS client. Code: $code."))
+                        )
+                    }
+                }
+            }
+        }
+    }
+
+    override suspend fun play(utterance: Utterance): Result<Unit> {
+        val textToSpeech = initMutex.withLock { textToSpeech }
+        return suspendCancellableCoroutine { continuation ->
+            if (textToSpeech == null) {
+                continuation.resume(Result.failure(IllegalStateException("TextToSpeechEngine not initialized.")))
+            } else {
+                textToSpeech.setAudioAttributes(utterance.audioAttributes)
+                val listener = object : UtteranceProgressListener() {
+                    override fun onStart(p0: String?) {
+                        utterance.streamVolumeAdjustment.overrideVolume()
+                        lastVolumeOverridingUtterance = utterance
+                    }
+
+                    override fun onDone(p0: String?) {
+                        Log.d(TAG, "Done speaking; utterance ID: $p0")
+                        utterance.streamVolumeAdjustment.resetVolume()
+                        continuation.resume(Result.success(Unit))
+                    }
+
+                    @Deprecated("Deprecated in Java")
+                    override fun onError(utteranceId: String?) {
+                        utterance.streamVolumeAdjustment.resetVolume()
+                        continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId")))
+                    }
+
+                    override fun onError(utteranceId: String?, errorCode: Int) {
+                        utterance.streamVolumeAdjustment.resetVolume()
+                        continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId; error code: $errorCode")))
+                    }
+                }
+                textToSpeech.setOnUtteranceProgressListener(listener)
+                textToSpeech.speak(utterance.text, TextToSpeech.QUEUE_FLUSH, null, utterance.id)
+                Log.d(TAG, "Speaking; utterance ID: ${utterance.id}")
+            }
+        }
+    }
+
+    override fun release() {
+        if (textToSpeech?.isSpeaking == true) {
+            // resets the volume back if the playback was interrupted
+            lastVolumeOverridingUtterance?.streamVolumeAdjustment?.resetVolume()
+        }
+        textToSpeech?.stop()
+        textToSpeech?.shutdown()
+        textToSpeech = null
+    }
+}
diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt
@@ -0,0 +1,106 @@
+package io.homeassistant.companion.android.common.util.tts
+
+import android.media.AudioAttributes
+import android.media.AudioManager
+
+object TextToSpeechData {
+    const val TTS = "TTS"
+    const val TTS_TEXT = "tts_text"
+
+    const val COMMAND_STOP_TTS = "command_stop_tts"
+}
+
+/**
+ * Interface for a text to speech engine.
+ */
+interface TextToSpeechEngine {
+
+    /**
+     * Suspends until the engine is initialized.
+     *
+     * If already initialized, a successful [Result] returns immediately.
+     *
+     * @return success or initialization error [Throwable]
+     */
+    suspend fun initialize(): Result<Unit>
+
+    /**
+     * Suspends until the engine finishes the playback.
+     *
+     * @return success or playback error [Throwable]
+     */
+    suspend fun play(utterance: Utterance): Result<Unit>
+
+    /**
+     * Stops all playback and releases engines resources.
+     */
+    fun release()
+}
+
+/**
+ * Data model for an utterance to be played.
+ *
+ * @param id a unique identifier
+ * @param text message to be synthesized
+ * @param streamVolumeAdjustment utility object to adjust the volume ahead of this utterance's playback,
+ * and reset it back after it's finished
+ * @param audioAttributes attributes to be set for the media player responsible for the audio playback
+ */
+data class Utterance(
+    val id: String,
+    val text: String,
+    val streamVolumeAdjustment: StreamVolumeAdjustment,
+    val audioAttributes: AudioAttributes
+)
+
+/**
+ * Utility object to adjust the volume ahead of this utterance's playback, and reset it back after it's finished.
+ */
+sealed class StreamVolumeAdjustment {
+
+    /**
+     * Applies volume adjustment.
+     */
+    abstract fun overrideVolume()
+
+    /**
+     * Resets the volume back to pre-adjustment levels. Does nothing if [overrideVolume] wasn't called before.
+     */
+    abstract fun resetVolume()
+
+    /**
+     * Object that does no adjustments to audio stream's volume level.
+     */
+    data object None : StreamVolumeAdjustment() {
+        override fun overrideVolume() {
+            // no-op
+        }
+
+        override fun resetVolume() {
+            // no-op
+        }
+    }
+
+    /**
+     * Object that maximizes the volume of a specific [streamId].
+     */
+    class Maximize(
+        private val audioManager: AudioManager,
+        private val streamId: Int
+    ) : StreamVolumeAdjustment() {
+        private val maxVolume: Int = audioManager.getStreamMaxVolume(streamId)
+        private var resetVolume: Int? = null
+
+        override fun overrideVolume() {
+            resetVolume = audioManager.getStreamVolume(streamId)
+            audioManager.setStreamVolume(streamId, maxVolume, 0)
+        }
+
+        override fun resetVolume() {
+            resetVolume?.let { volume ->
+                audioManager.setStreamVolume(streamId, volume, 0)
+            }
+            resetVolume = null
+        }
+    }
+}