From 2281e64f8112ac9d8cf64882e8d3d09952809455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Paczos?= Date: Fri, 12 Jul 2024 18:32:27 +0200 Subject: [PATCH] Fixed TTS queuing mechanism and volume override resets (#4480) fixed TTS queuing mechanism and volume override resets A new TextToSpeechEngine abstraction has been introduced, along with a default Android TTS implementation and a TextToSpeechClient as the entry point. The client now features an independent queue, allowing for better control over the start and finish of each utterance and separating it from the engine, which focuses solely on playback. This resolves issues with interrupting utterances and volume overrides not resetting correctly when utterances are queued or force-stopped. --- .../android/notifications/MessagingManager.kt | 12 +- .../android/common/data/DataModule.kt | 8 + .../android/common/util/TextToSpeech.kt | 114 ------------- .../util/tts/AndroidTextToSpeechEngine.kt | 89 ++++++++++ .../android/common/util/tts/TextToSpeech.kt | 106 ++++++++++++ .../common/util/tts/TextToSpeechClient.kt | 152 ++++++++++++++++++ common/src/main/res/values/strings.xml | 3 +- .../android/notifications/MessagingManager.kt | 12 +- 8 files changed, 369 insertions(+), 127 deletions(-) delete mode 100755 common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt create mode 100644 common/src/main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt create mode 100644 common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt create mode 100644 common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeechClient.kt diff --git a/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt b/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt index 49644eb9d99..99b0de2ef53 100644 --- a/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt +++ b/app/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt @@ -65,11 +65,10 @@ import io.homeassistant.companion.android.common.notifications.handleText import io.homeassistant.companion.android.common.notifications.parseColor import io.homeassistant.companion.android.common.notifications.parseVibrationPattern import io.homeassistant.companion.android.common.notifications.prepareText -import io.homeassistant.companion.android.common.util.TextToSpeechData import io.homeassistant.companion.android.common.util.cancelGroupIfNeeded import io.homeassistant.companion.android.common.util.getActiveNotification -import io.homeassistant.companion.android.common.util.speakText -import io.homeassistant.companion.android.common.util.stopTTS +import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient +import io.homeassistant.companion.android.common.util.tts.TextToSpeechData import io.homeassistant.companion.android.database.notification.NotificationDao import io.homeassistant.companion.android.database.notification.NotificationItem import io.homeassistant.companion.android.database.sensor.SensorDao @@ -111,7 +110,8 @@ class MessagingManager @Inject constructor( private val prefsRepository: PrefsRepository, private val notificationDao: NotificationDao, private val sensorDao: SensorDao, - private val settingsDao: SettingsDao + private val settingsDao: SettingsDao, + private val textToSpeechClient: TextToSpeechClient ) { companion object { const val TAG = "MessagingService" @@ -323,9 +323,9 @@ class MessagingManager @Inject constructor( removeNotificationChannel(jsonData[NotificationData.CHANNEL]!!) } jsonData[NotificationData.MESSAGE] == TextToSpeechData.TTS -> { - speakText(context, jsonData) + textToSpeechClient.speakText(jsonData) } - jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> stopTTS() + jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> textToSpeechClient.stopTTS() jsonData[NotificationData.MESSAGE] in DEVICE_COMMANDS && allowCommands -> { Log.d(TAG, "Processing device command") when (jsonData[NotificationData.MESSAGE]) { diff --git a/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt b/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt index 2514b62a18b..7bc7cfe3896 100644 --- a/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt +++ b/common/src/main/java/io/homeassistant/companion/android/common/data/DataModule.kt @@ -27,6 +27,8 @@ import io.homeassistant.companion.android.common.data.servers.ServerManager import io.homeassistant.companion.android.common.data.servers.ServerManagerImpl import io.homeassistant.companion.android.common.data.wifi.WifiHelper import io.homeassistant.companion.android.common.data.wifi.WifiHelperImpl +import io.homeassistant.companion.android.common.util.tts.AndroidTextToSpeechEngine +import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient import java.util.UUID import javax.inject.Named import javax.inject.Singleton @@ -144,6 +146,12 @@ abstract class DataModule { @Provides @Singleton fun packageManager(@ApplicationContext appContext: Context) = appContext.packageManager + + @Provides + @Singleton + fun providesTextToSpeechClient( + @ApplicationContext appContext: Context + ): TextToSpeechClient = TextToSpeechClient(appContext, AndroidTextToSpeechEngine(appContext)) } @Binds diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt deleted file mode 100755 index 12023ff52f0..00000000000 --- a/common/src/main/java/io/homeassistant/companion/android/common/util/TextToSpeech.kt +++ /dev/null @@ -1,114 +0,0 @@ -package io.homeassistant.companion.android.common.util - -import android.content.Context -import android.media.AudioAttributes -import android.media.AudioManager -import android.os.Handler -import android.os.Looper -import android.speech.tts.TextToSpeech -import android.speech.tts.UtteranceProgressListener -import android.util.Log -import android.widget.Toast -import androidx.core.content.getSystemService -import io.homeassistant.companion.android.common.R -import io.homeassistant.companion.android.common.notifications.NotificationData - -object TextToSpeechData { - const val TTS = "TTS" - const val TTS_TEXT = "tts_text" - - const val COMMAND_STOP_TTS = "command_stop_tts" -} - -private const val TAG = "TextToSpeech" -private var textToSpeech: TextToSpeech? = null - -fun speakText( - context: Context, - data: Map -) { - Log.d(TAG, "Sending text to TTS") - var tts = data[TextToSpeechData.TTS_TEXT] - val audioManager = context.getSystemService() - val currentAlarmVolume = audioManager?.getStreamVolume(AudioManager.STREAM_ALARM) - val maxAlarmVolume = audioManager?.getStreamMaxVolume(AudioManager.STREAM_ALARM) - if (tts.isNullOrEmpty()) { - tts = context.getString(R.string.tts_no_text) - } - textToSpeech = TextToSpeech( - context - ) { - if (it == TextToSpeech.SUCCESS) { - val listener = object : UtteranceProgressListener() { - override fun onStart(p0: String?) { - if (data[NotificationData.MEDIA_STREAM] == NotificationData.ALARM_STREAM_MAX) { - audioManager?.setStreamVolume( - AudioManager.STREAM_ALARM, - maxAlarmVolume!!, - 0 - ) - } - } - - override fun onDone(p0: String?) { - textToSpeech?.stop() - textToSpeech?.shutdown() - if (data[NotificationData.MEDIA_STREAM] == NotificationData.ALARM_STREAM_MAX) { - audioManager?.setStreamVolume( - AudioManager.STREAM_ALARM, - currentAlarmVolume!!, - 0 - ) - } - } - - @Deprecated("Deprecated in Java") - override fun onError(p0: String?) { - textToSpeech?.stop() - textToSpeech?.shutdown() - if (data[NotificationData.MEDIA_STREAM] == NotificationData.ALARM_STREAM_MAX) { - audioManager?.setStreamVolume( - AudioManager.STREAM_ALARM, - currentAlarmVolume!!, - 0 - ) - } - } - - override fun onStop(utteranceId: String?, interrupted: Boolean) { - if (data[NotificationData.MEDIA_STREAM] == NotificationData.ALARM_STREAM_MAX) { - audioManager?.setStreamVolume( - AudioManager.STREAM_ALARM, - currentAlarmVolume!!, - 0 - ) - } - } - } - textToSpeech?.setOnUtteranceProgressListener(listener) - if (data[NotificationData.MEDIA_STREAM] in NotificationData.ALARM_STREAMS) { - val audioAttributes = AudioAttributes.Builder() - .setContentType(AudioAttributes.CONTENT_TYPE_SONIFICATION) - .setUsage(AudioAttributes.USAGE_ALARM) - .build() - textToSpeech?.setAudioAttributes(audioAttributes) - } - textToSpeech?.speak(tts, TextToSpeech.QUEUE_ADD, null, "") - Log.d(TAG, "speaking text") - } else { - Handler(Looper.getMainLooper()).post { - Toast.makeText( - context, - context.getString(R.string.tts_error, tts), - Toast.LENGTH_LONG - ).show() - } - } - } -} - -fun stopTTS() { - Log.d(TAG, "Stopping TTS") - textToSpeech?.stop() - textToSpeech?.shutdown() -} diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt new file mode 100644 index 00000000000..8fa7f0ea006 --- /dev/null +++ b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/AndroidTextToSpeechEngine.kt @@ -0,0 +1,89 @@ +package io.homeassistant.companion.android.common.util.tts + +import android.content.Context +import android.speech.tts.TextToSpeech +import android.speech.tts.UtteranceProgressListener +import android.util.Log +import kotlin.coroutines.resume +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock + +private const val TAG = "AndroidTTSEngine" + +/** + * Implementation of [TextToSpeechEngine] that uses the default [TextToSpeech] engine found on the device. + */ +class AndroidTextToSpeechEngine(private val applicationContext: Context) : TextToSpeechEngine { + + private val initMutex = Mutex() + private var textToSpeech: TextToSpeech? = null + private var lastVolumeOverridingUtterance: Utterance? = null + + override suspend fun initialize(): Result = initMutex.withLock { + if (textToSpeech != null) { + Result.success(Unit) + } else { + suspendCancellableCoroutine { continuation -> + textToSpeech = TextToSpeech(applicationContext) { code -> + if (code == TextToSpeech.SUCCESS) { + continuation.resume(Result.success(Unit)) + } else { + textToSpeech?.shutdown() + textToSpeech = null + continuation.resume( + Result.failure(RuntimeException("Failed to initialize TTS client. Code: $code.")) + ) + } + } + } + } + } + + override suspend fun play(utterance: Utterance): Result { + val textToSpeech = initMutex.withLock { textToSpeech } + return suspendCancellableCoroutine { continuation -> + if (textToSpeech == null) { + continuation.resume(Result.failure(IllegalStateException("TextToSpeechEngine not initialized."))) + } else { + textToSpeech.setAudioAttributes(utterance.audioAttributes) + val listener = object : UtteranceProgressListener() { + override fun onStart(p0: String?) { + utterance.streamVolumeAdjustment.overrideVolume() + lastVolumeOverridingUtterance = utterance + } + + override fun onDone(p0: String?) { + Log.d(TAG, "Done speaking; utterance ID: $p0") + utterance.streamVolumeAdjustment.resetVolume() + continuation.resume(Result.success(Unit)) + } + + @Deprecated("Deprecated in Java") + override fun onError(utteranceId: String?) { + utterance.streamVolumeAdjustment.resetVolume() + continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId"))) + } + + override fun onError(utteranceId: String?, errorCode: Int) { + utterance.streamVolumeAdjustment.resetVolume() + continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId; error code: $errorCode"))) + } + } + textToSpeech.setOnUtteranceProgressListener(listener) + textToSpeech.speak(utterance.text, TextToSpeech.QUEUE_FLUSH, null, utterance.id) + Log.d(TAG, "Speaking; utterance ID: ${utterance.id}") + } + } + } + + override fun release() { + if (textToSpeech?.isSpeaking == true) { + // resets the volume back if the playback was interrupted + lastVolumeOverridingUtterance?.streamVolumeAdjustment?.resetVolume() + } + textToSpeech?.stop() + textToSpeech?.shutdown() + textToSpeech = null + } +} diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt new file mode 100644 index 00000000000..2ecf96a7584 --- /dev/null +++ b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeech.kt @@ -0,0 +1,106 @@ +package io.homeassistant.companion.android.common.util.tts + +import android.media.AudioAttributes +import android.media.AudioManager + +object TextToSpeechData { + const val TTS = "TTS" + const val TTS_TEXT = "tts_text" + + const val COMMAND_STOP_TTS = "command_stop_tts" +} + +/** + * Interface for a text to speech engine. + */ +interface TextToSpeechEngine { + + /** + * Suspends until the engine is initialized. + * + * If already initialized, a successful [Result] returns immediately. + * + * @return success or initialization error [Throwable] + */ + suspend fun initialize(): Result + + /** + * Suspends until the engine finishes the playback. + * + * @return success or playback error [Throwable] + */ + suspend fun play(utterance: Utterance): Result + + /** + * Stops all playback and releases engines resources. + */ + fun release() +} + +/** + * Data model for an utterance to be played. + * + * @param id a unique identifier + * @param text message to be synthesized + * @param streamVolumeAdjustment utility object to adjust the volume ahead of this utterance's playback, + * and reset it back after it's finished + * @param audioAttributes attributes to be set for the media player responsible for the audio playback + */ +data class Utterance( + val id: String, + val text: String, + val streamVolumeAdjustment: StreamVolumeAdjustment, + val audioAttributes: AudioAttributes +) + +/** + * Utility object to adjust the volume ahead of this utterance's playback, and reset it back after it's finished. + */ +sealed class StreamVolumeAdjustment { + + /** + * Applies volume adjustment. + */ + abstract fun overrideVolume() + + /** + * Resets the volume back to pre-adjustment levels. Does nothing if [overrideVolume] wasn't called before. + */ + abstract fun resetVolume() + + /** + * Object that does no adjustments to audio stream's volume level. + */ + data object None : StreamVolumeAdjustment() { + override fun overrideVolume() { + // no-op + } + + override fun resetVolume() { + // no-op + } + } + + /** + * Object that maximizes the volume of a specific [streamId]. + */ + class Maximize( + private val audioManager: AudioManager, + private val streamId: Int + ) : StreamVolumeAdjustment() { + private val maxVolume: Int = audioManager.getStreamMaxVolume(streamId) + private var resetVolume: Int? = null + + override fun overrideVolume() { + resetVolume = audioManager.getStreamVolume(streamId) + audioManager.setStreamVolume(streamId, maxVolume, 0) + } + + override fun resetVolume() { + resetVolume?.let { volume -> + audioManager.setStreamVolume(streamId, volume, 0) + } + resetVolume = null + } + } +} diff --git a/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeechClient.kt b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeechClient.kt new file mode 100644 index 00000000000..f0dd5dcadf8 --- /dev/null +++ b/common/src/main/java/io/homeassistant/companion/android/common/util/tts/TextToSpeechClient.kt @@ -0,0 +1,152 @@ +package io.homeassistant.companion.android.common.util.tts + +import android.content.Context +import android.media.AudioAttributes +import android.media.AudioManager +import android.util.Log +import android.widget.Toast +import androidx.core.content.getSystemService +import io.homeassistant.companion.android.common.R +import io.homeassistant.companion.android.common.notifications.NotificationData +import java.util.UUID +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancelChildren +import kotlinx.coroutines.launch + +private const val TAG = "TextToSpeechClient" + +/** + * Entry point for speech synthesis and playback. + * + * Maintains a FIFO queue of utterances. To initiate playback or further add messages to the queue use [speakText]. + * The queue can be cleared and playback immediately interrupted with [stopTTS]. + * + * @param textToSpeechEngine [TextToSpeechEngine] implementation to synthesize and play back a single message + */ +class TextToSpeechClient( + private val applicationContext: Context, + private val textToSpeechEngine: TextToSpeechEngine +) { + private val utteranceQueue: ArrayDeque = ArrayDeque() + + private val mainJob = Job() + private val mainScope: CoroutineScope = CoroutineScope(Dispatchers.Main + mainJob) + + private var isPlaying = false + + /** + * Queues a text message to be played back if [data] with a [TextToSpeechData.TTS_TEXT] key is provided. + * + * If [data] also contains [NotificationData.MEDIA_STREAM] key and it's one of [NotificationData.ALARM_STREAMS], an [AudioManager.STREAM_ALARM] will be used for playback. + * Additionally, if it's specifically [NotificationData.ALARM_STREAM_MAX], the channel's volume will be maximized during playback. + */ + fun speakText(data: Map) { + mainScope.launch { + val utteranceId = UUID.randomUUID().toString() + var tts = data[TextToSpeechData.TTS_TEXT] + if (tts.isNullOrEmpty()) { + tts = applicationContext.getString(R.string.tts_no_text) + } + Log.d(TAG, "processing utterance ID: $utteranceId; msg: $tts") + + val streamVolumeAdjustment = getStreamVolumeAdjustment(applicationContext, data) + val audioAttributes = getAudioAttributes(data) + utteranceQueue.add( + Utterance( + id = utteranceId, + text = tts, + streamVolumeAdjustment = streamVolumeAdjustment, + audioAttributes = audioAttributes + ) + ) + if (!isPlaying) { + play() + } + } + } + + /** + * Interrupts any playback and clears the queue. + */ + fun stopTTS() { + Log.d(TAG, "stopped TTS") + mainJob.cancelChildren() + utteranceQueue.clear() + textToSpeechEngine.release() + isPlaying = false + } + + /** + * Plays each queued [Utterance] in sequence until [utteranceQueue] is empty. + * There can be further additions to the queue while a message is playing which will be picked up in the running playback loop. + */ + private suspend fun play() { + isPlaying = true + textToSpeechEngine.initialize().onFailure { throwable -> + Log.e( + TAG, + "Failed to initialize engine.", + throwable + ) + handleError(applicationContext.getString(R.string.tts_error_init)) + utteranceQueue.clear() + }.onSuccess { + while (utteranceQueue.isNotEmpty()) { + utteranceQueue.removeFirst().let { utterance -> + textToSpeechEngine.play(utterance).onFailure { throwable -> + Log.e(TAG, "Failed to play utterance '${utterance.id}'", throwable) + handleError( + applicationContext.getString(R.string.tts_error_utterance, utterance.text) + ) + } + } + } + textToSpeechEngine.release() + } + isPlaying = false + } + + private fun handleError(msg: String) { + mainScope.launch { + Toast.makeText( + applicationContext, + msg, + Toast.LENGTH_LONG + ).show() + } + } + + private companion object { + private fun getStreamVolumeAdjustment( + context: Context, + data: Map + ): StreamVolumeAdjustment { + val audioManager = context.getSystemService() + return if ( + audioManager != null && + data[NotificationData.MEDIA_STREAM] in NotificationData.ALARM_STREAMS && + data[NotificationData.MEDIA_STREAM] == NotificationData.ALARM_STREAM_MAX + ) { + StreamVolumeAdjustment.Maximize( + audioManager = audioManager, + streamId = AudioManager.STREAM_ALARM + ) + } else { + StreamVolumeAdjustment.None + } + } + + private fun getAudioAttributes(data: Map): AudioAttributes { + return if (data[NotificationData.MEDIA_STREAM] in NotificationData.ALARM_STREAMS) { + AudioAttributes.Builder() + .setContentType(AudioAttributes.CONTENT_TYPE_SONIFICATION) + .setUsage(AudioAttributes.USAGE_ALARM) + .build() + } else { + AudioAttributes.Builder().build() + } + } + } +} diff --git a/common/src/main/res/values/strings.xml b/common/src/main/res/values/strings.xml index 48c08adc7dd..c1f0d2fe8e9 100644 --- a/common/src/main/res/values/strings.xml +++ b/common/src/main/res/values/strings.xml @@ -938,7 +938,8 @@ Toggle flashlight Remotely control app & device Manage enabled sensors and use notification commands from this server - Unable to process notification \"%1$s\" as text to speech. + Unable to process notification \"%1$s\" as text to speech. + Failed to initialize a text to speech engine. Please set the text for text to speech to process Unable to register application Unknown address diff --git a/wear/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt b/wear/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt index 328057a8c7b..84533d5938d 100755 --- a/wear/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt +++ b/wear/src/main/java/io/homeassistant/companion/android/notifications/MessagingManager.kt @@ -17,11 +17,10 @@ import io.homeassistant.companion.android.common.notifications.handleChannel import io.homeassistant.companion.android.common.notifications.handleDeleteIntent import io.homeassistant.companion.android.common.notifications.handleSmallIcon import io.homeassistant.companion.android.common.notifications.handleText -import io.homeassistant.companion.android.common.util.TextToSpeechData import io.homeassistant.companion.android.common.util.cancelGroupIfNeeded import io.homeassistant.companion.android.common.util.getActiveNotification -import io.homeassistant.companion.android.common.util.speakText -import io.homeassistant.companion.android.common.util.stopTTS +import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient +import io.homeassistant.companion.android.common.util.tts.TextToSpeechData import io.homeassistant.companion.android.database.AppDatabase import io.homeassistant.companion.android.database.notification.NotificationItem import io.homeassistant.companion.android.database.sensor.SensorDao @@ -36,7 +35,8 @@ import org.json.JSONObject class MessagingManager @Inject constructor( @ApplicationContext val context: Context, private val serverManager: ServerManager, - private val sensorDao: SensorDao + private val sensorDao: SensorDao, + private val textToSpeechClient: TextToSpeechClient ) { companion object { @@ -79,8 +79,8 @@ class MessagingManager @Inject constructor( sendNotification(notificationData) } } - message == TextToSpeechData.TTS -> speakText(context, notificationData) - message == TextToSpeechData.COMMAND_STOP_TTS -> stopTTS() + message == TextToSpeechData.TTS -> textToSpeechClient.speakText(notificationData) + message == TextToSpeechData.COMMAND_STOP_TTS -> textToSpeechClient.stopTTS() message == DeviceCommandData.COMMAND_UPDATE_SENSORS -> SensorReceiver.updateAllSensors(context) else -> sendNotification(notificationData, now) }