From 1da78196b3d990a9ab519479286e253258cb5b2a Mon Sep 17 00:00:00 2001 From: jamsch <12927717+jamsch@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:21:30 +1300 Subject: [PATCH] fix(ios): nomatch event firing on iOS 18+ (#43) * fix(ios): nomatch event firing on iOS 18+ * fix typos --- README.md | 4 ++- ios/ExpoSpeechRecognitionModule.swift | 35 ++++++++++++++++++++---- src/ExpoSpeechRecognitionModule.types.ts | 5 +++- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 08e7d3a..69ff92f 100644 --- a/README.md +++ b/README.md @@ -237,7 +237,9 @@ ExpoSpeechRecognitionModule.start({ // The maximum number of alternative transcriptions to return. maxAlternatives: 1, // [Default: false] Continuous recognition. - // If false on iOS, recognition will run until no speech is detected for 3 seconds. + // If false: + // - on iOS 17-, recognition will run until no speech is detected for 3 seconds. + // - on iOS 18+ and Android, recognition will run until a final result is received. // Not supported on Android 12 and below. continuous: true, // [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it. diff --git a/ios/ExpoSpeechRecognitionModule.swift b/ios/ExpoSpeechRecognitionModule.swift index b77bd7d..80438d8 100644 --- a/ios/ExpoSpeechRecognitionModule.swift +++ b/ios/ExpoSpeechRecognitionModule.swift @@ -41,6 +41,14 @@ public class ExpoSpeechRecognitionModule: Module { // This is a temporary workaround until the issue is fixed in a future iOS release var hasSeenFinalResult: Bool = false + // Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result + // Example event order emitted in iOS 18: + // [ + // { isFinal: false, transcripts: ["actually", "final", "results"], metadata: { duration: 1500 } }, + // { isFinal: true, transcripts: [] } + // ] + var previousResult: SFSpeechRecognitionResult? + public func definition() -> ModuleDefinition { // Sets the name of the module that JavaScript code will use to refer to the module. Takes a string as an argument. // Can be inferred from module's class name, but it's recommended to set it explicitly for clarity. @@ -130,6 +138,9 @@ public class ExpoSpeechRecognitionModule: Module { do { let currentLocale = await speechRecognizer?.getLocale() + // Reset the previous result + self.previousResult = nil + // Re-create the speech recognizer when locales change if self.speechRecognizer == nil || currentLocale != options.lang { guard let locale = resolveLocale(localeIdentifier: options.lang) else { @@ -358,12 +369,14 @@ public class ExpoSpeechRecognitionModule: Module { func sendErrorAndStop(error: String, message: String) { hasSeenFinalResult = false + previousResult = nil sendEvent("error", ["error": error, "message": message]) sendEvent("end") } func handleEnd() { hasSeenFinalResult = false + previousResult = nil sendEvent("end") } @@ -422,11 +435,21 @@ public class ExpoSpeechRecognitionModule: Module { } if isFinal && results.isEmpty { - // https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event - // The nomatch event of the Web Speech API is fired - // when the speech recognition service returns a final result with no significant recognition. - sendEvent("nomatch") - return + // Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result + var previousResultWasFinal = false + var previousResultHadTranscriptions = false + if #available(iOS 18.0, *), let previousResult = previousResult { + previousResultWasFinal = previousResult.speechRecognitionMetadata?.speechDuration ?? 0 > 0 + previousResultHadTranscriptions = !previousResult.transcriptions.isEmpty + } + + if !previousResultWasFinal || !previousResultHadTranscriptions { + // https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event + // The nomatch event of the Web Speech API is fired + // when the speech recognition service returns a final result with no significant recognition. + sendEvent("nomatch") + return + } } sendEvent( @@ -436,6 +459,8 @@ public class ExpoSpeechRecognitionModule: Module { "results": results.map { $0.toDictionary() }, ] ) + + previousResult = result } func handleRecognitionError(_ error: Error) { diff --git a/src/ExpoSpeechRecognitionModule.types.ts b/src/ExpoSpeechRecognitionModule.types.ts index 5d841aa..a83e839 100644 --- a/src/ExpoSpeechRecognitionModule.types.ts +++ b/src/ExpoSpeechRecognitionModule.types.ts @@ -149,7 +149,10 @@ export type ExpoSpeechRecognitionOptions = { * * Not supported on Android 12 and below. * - * If false on iOS, recognition will run until no speech is detected for 3 seconds. + * If false, the behaviors are the following: + * + * - on iOS 17-, recognition will run until no speech is detected for 3 seconds. + * - on iOS 18+ and Android, recognition will run until a result with `isFinal: true` is received. */ continuous?: boolean; /** [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.