Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ios): non-continuous recognition timers #40

Merged
merged 2 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions ios/ExpoSpeechRecognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ actor ExpoSpeechRecognizer: ObservableObject {
)
}

// Don't run any timers if the audio source is from a file
let continuous = options.continuous || isSourcedFromFile
// Run timers on non-continuous mode, as long as the audio source is the mic
let shouldRunTimers = !options.continuous && !isSourcedFromFile
let audioEngine = self.audioEngine

self.task = recognizer.recognitionTask(
Expand All @@ -300,18 +300,20 @@ actor ExpoSpeechRecognizer: ObservableObject {
}
}

// Result handler
// Handle the result
self?.recognitionHandler(
audioEngine: audioEngine,
result: result,
error: error,
resultHandler: resultHandler,
errorHandler: errorHandler,
continuous: continuous
continuous: options.continuous,
shouldRunTimers: shouldRunTimers,
canEmitInterimResults: options.interimResults
)
})

if !continuous {
if shouldRunTimers {
invalidateAndScheduleTimer()
}

Expand Down Expand Up @@ -449,7 +451,10 @@ actor ExpoSpeechRecognizer: ObservableObject {
request = SFSpeechAudioBufferRecognitionRequest()
}

request.shouldReportPartialResults = options.interimResults
// We also force-enable partial results on non-continuous mode,
// which will allow us to re-schedule timers when text is detected
// These won't get emitted to the user, however
request.shouldReportPartialResults = options.interimResults || options.continuous

if recognizer.supportsOnDeviceRecognition {
request.requiresOnDeviceRecognition = options.requiresOnDeviceRecognition
Expand Down Expand Up @@ -613,12 +618,25 @@ actor ExpoSpeechRecognizer: ObservableObject {
error: Error?,
resultHandler: @escaping (SFSpeechRecognitionResult) -> Void,
errorHandler: @escaping (Error) -> Void,
continuous: Bool
continuous: Bool,
shouldRunTimers: Bool,
canEmitInterimResults: Bool
) {
// When a final result is returned, we should expect the task to be idle or stopping
let receivedFinalResult = result?.isFinal ?? false
let receivedError = error != nil

if let result: SFSpeechRecognitionResult {
// Hack for iOS 18 to detect final results
// See: https://forums.developer.apple.com/forums/thread/762952 for more info
// This can be emitted multiple times during a continuous session, unlike `result.isFinal` which is only emitted once
var receivedFinalLikeResult: Bool = receivedFinalResult
if #available(iOS 18.0, *), !receivedFinalLikeResult {
receivedFinalLikeResult = result?.speechRecognitionMetadata?.speechDuration ?? 0 > 0
}

let shouldEmitResult = receivedFinalResult || canEmitInterimResults || receivedFinalLikeResult

if let result: SFSpeechRecognitionResult, shouldEmitResult {
Task { @MainActor in
let taskState = await task?.state
// Make sure the task is running before emitting the result
Expand All @@ -638,15 +656,16 @@ actor ExpoSpeechRecognizer: ObservableObject {
}
}

if receivedFinalResult || receivedError {
if (receivedFinalLikeResult && !continuous) || receivedError || receivedFinalResult {
Task { @MainActor in
await reset()
}
return
}

// Non-continuous speech recognition
// Stop the speech recognizer if the timer fires after not receiving a result for 3 seconds
if !continuous && !receivedError {
if shouldRunTimers && !receivedError {
invalidateAndScheduleTimer()
}
}
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "expo-speech-recognition",
"version": "0.2.22",
"version": "0.2.23",
"description": "Speech Recognition for React Native Expo projects",
"main": "build/index.js",
"types": "build/index.d.ts",
Expand Down