Skip to content

Commit

Permalink
Fixed TTS queuing mechanism and volume override resets (#4480)
Browse files Browse the repository at this point in the history
fixed TTS queuing mechanism and volume override resets

A new TextToSpeechEngine abstraction has been introduced, along with a default Android TTS implementation and a TextToSpeechClient as the entry point. The client now features an independent queue, allowing for better control over the start and finish of each utterance and separating it from the engine, which focuses solely on playback. This resolves issues with interrupting utterances and volume overrides not resetting correctly when utterances are queued or force-stopped.
  • Loading branch information
LukasPaczos authored Jul 12, 2024
1 parent 851b1c7 commit 2281e64
Show file tree
Hide file tree
Showing 8 changed files with 369 additions and 127 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,10 @@ import io.homeassistant.companion.android.common.notifications.handleText
import io.homeassistant.companion.android.common.notifications.parseColor
import io.homeassistant.companion.android.common.notifications.parseVibrationPattern
import io.homeassistant.companion.android.common.notifications.prepareText
import io.homeassistant.companion.android.common.util.TextToSpeechData
import io.homeassistant.companion.android.common.util.cancelGroupIfNeeded
import io.homeassistant.companion.android.common.util.getActiveNotification
import io.homeassistant.companion.android.common.util.speakText
import io.homeassistant.companion.android.common.util.stopTTS
import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient
import io.homeassistant.companion.android.common.util.tts.TextToSpeechData
import io.homeassistant.companion.android.database.notification.NotificationDao
import io.homeassistant.companion.android.database.notification.NotificationItem
import io.homeassistant.companion.android.database.sensor.SensorDao
Expand Down Expand Up @@ -111,7 +110,8 @@ class MessagingManager @Inject constructor(
private val prefsRepository: PrefsRepository,
private val notificationDao: NotificationDao,
private val sensorDao: SensorDao,
private val settingsDao: SettingsDao
private val settingsDao: SettingsDao,
private val textToSpeechClient: TextToSpeechClient
) {
companion object {
const val TAG = "MessagingService"
Expand Down Expand Up @@ -323,9 +323,9 @@ class MessagingManager @Inject constructor(
removeNotificationChannel(jsonData[NotificationData.CHANNEL]!!)
}
jsonData[NotificationData.MESSAGE] == TextToSpeechData.TTS -> {
speakText(context, jsonData)
textToSpeechClient.speakText(jsonData)
}
jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> stopTTS()
jsonData[NotificationData.MESSAGE] == TextToSpeechData.COMMAND_STOP_TTS -> textToSpeechClient.stopTTS()
jsonData[NotificationData.MESSAGE] in DEVICE_COMMANDS && allowCommands -> {
Log.d(TAG, "Processing device command")
when (jsonData[NotificationData.MESSAGE]) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import io.homeassistant.companion.android.common.data.servers.ServerManager
import io.homeassistant.companion.android.common.data.servers.ServerManagerImpl
import io.homeassistant.companion.android.common.data.wifi.WifiHelper
import io.homeassistant.companion.android.common.data.wifi.WifiHelperImpl
import io.homeassistant.companion.android.common.util.tts.AndroidTextToSpeechEngine
import io.homeassistant.companion.android.common.util.tts.TextToSpeechClient
import java.util.UUID
import javax.inject.Named
import javax.inject.Singleton
Expand Down Expand Up @@ -144,6 +146,12 @@ abstract class DataModule {
@Provides
@Singleton
fun packageManager(@ApplicationContext appContext: Context) = appContext.packageManager

@Provides
@Singleton
fun providesTextToSpeechClient(
@ApplicationContext appContext: Context
): TextToSpeechClient = TextToSpeechClient(appContext, AndroidTextToSpeechEngine(appContext))
}

@Binds
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package io.homeassistant.companion.android.common.util.tts

import android.content.Context
import android.speech.tts.TextToSpeech
import android.speech.tts.UtteranceProgressListener
import android.util.Log
import kotlin.coroutines.resume
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock

private const val TAG = "AndroidTTSEngine"

/**
* Implementation of [TextToSpeechEngine] that uses the default [TextToSpeech] engine found on the device.
*/
class AndroidTextToSpeechEngine(private val applicationContext: Context) : TextToSpeechEngine {

private val initMutex = Mutex()
private var textToSpeech: TextToSpeech? = null
private var lastVolumeOverridingUtterance: Utterance? = null

override suspend fun initialize(): Result<Unit> = initMutex.withLock {
if (textToSpeech != null) {
Result.success(Unit)
} else {
suspendCancellableCoroutine { continuation ->
textToSpeech = TextToSpeech(applicationContext) { code ->
if (code == TextToSpeech.SUCCESS) {
continuation.resume(Result.success(Unit))
} else {
textToSpeech?.shutdown()
textToSpeech = null
continuation.resume(
Result.failure(RuntimeException("Failed to initialize TTS client. Code: $code."))
)
}
}
}
}
}

override suspend fun play(utterance: Utterance): Result<Unit> {
val textToSpeech = initMutex.withLock { textToSpeech }
return suspendCancellableCoroutine { continuation ->
if (textToSpeech == null) {
continuation.resume(Result.failure(IllegalStateException("TextToSpeechEngine not initialized.")))
} else {
textToSpeech.setAudioAttributes(utterance.audioAttributes)
val listener = object : UtteranceProgressListener() {
override fun onStart(p0: String?) {
utterance.streamVolumeAdjustment.overrideVolume()
lastVolumeOverridingUtterance = utterance
}

override fun onDone(p0: String?) {
Log.d(TAG, "Done speaking; utterance ID: $p0")
utterance.streamVolumeAdjustment.resetVolume()
continuation.resume(Result.success(Unit))
}

@Deprecated("Deprecated in Java")
override fun onError(utteranceId: String?) {
utterance.streamVolumeAdjustment.resetVolume()
continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId")))
}

override fun onError(utteranceId: String?, errorCode: Int) {
utterance.streamVolumeAdjustment.resetVolume()
continuation.resume(Result.failure(RuntimeException("Playback error; utterance ID: $utteranceId; error code: $errorCode")))
}
}
textToSpeech.setOnUtteranceProgressListener(listener)
textToSpeech.speak(utterance.text, TextToSpeech.QUEUE_FLUSH, null, utterance.id)
Log.d(TAG, "Speaking; utterance ID: ${utterance.id}")
}
}
}

override fun release() {
if (textToSpeech?.isSpeaking == true) {
// resets the volume back if the playback was interrupted
lastVolumeOverridingUtterance?.streamVolumeAdjustment?.resetVolume()
}
textToSpeech?.stop()
textToSpeech?.shutdown()
textToSpeech = null
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package io.homeassistant.companion.android.common.util.tts

import android.media.AudioAttributes
import android.media.AudioManager

object TextToSpeechData {
const val TTS = "TTS"
const val TTS_TEXT = "tts_text"

const val COMMAND_STOP_TTS = "command_stop_tts"
}

/**
* Interface for a text to speech engine.
*/
interface TextToSpeechEngine {

/**
* Suspends until the engine is initialized.
*
* If already initialized, a successful [Result] returns immediately.
*
* @return success or initialization error [Throwable]
*/
suspend fun initialize(): Result<Unit>

/**
* Suspends until the engine finishes the playback.
*
* @return success or playback error [Throwable]
*/
suspend fun play(utterance: Utterance): Result<Unit>

/**
* Stops all playback and releases engines resources.
*/
fun release()
}

/**
* Data model for an utterance to be played.
*
* @param id a unique identifier
* @param text message to be synthesized
* @param streamVolumeAdjustment utility object to adjust the volume ahead of this utterance's playback,
* and reset it back after it's finished
* @param audioAttributes attributes to be set for the media player responsible for the audio playback
*/
data class Utterance(
val id: String,
val text: String,
val streamVolumeAdjustment: StreamVolumeAdjustment,
val audioAttributes: AudioAttributes
)

/**
* Utility object to adjust the volume ahead of this utterance's playback, and reset it back after it's finished.
*/
sealed class StreamVolumeAdjustment {

/**
* Applies volume adjustment.
*/
abstract fun overrideVolume()

/**
* Resets the volume back to pre-adjustment levels. Does nothing if [overrideVolume] wasn't called before.
*/
abstract fun resetVolume()

/**
* Object that does no adjustments to audio stream's volume level.
*/
data object None : StreamVolumeAdjustment() {
override fun overrideVolume() {
// no-op
}

override fun resetVolume() {
// no-op
}
}

/**
* Object that maximizes the volume of a specific [streamId].
*/
class Maximize(
private val audioManager: AudioManager,
private val streamId: Int
) : StreamVolumeAdjustment() {
private val maxVolume: Int = audioManager.getStreamMaxVolume(streamId)
private var resetVolume: Int? = null

override fun overrideVolume() {
resetVolume = audioManager.getStreamVolume(streamId)
audioManager.setStreamVolume(streamId, maxVolume, 0)
}

override fun resetVolume() {
resetVolume?.let { volume ->
audioManager.setStreamVolume(streamId, volume, 0)
}
resetVolume = null
}
}
}
Loading

0 comments on commit 2281e64

Please sign in to comment.