From 2d2e73d4a6f8c87f1207844c8215197241803830 Mon Sep 17 00:00:00 2001 From: jamsch <12927717+jamsch@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:16:48 +1300 Subject: [PATCH] feat: volume change event support (#37) * feat: volume change event support * chore: update readme * update events doc * refactor: rmsDB -> value, use max power in buffer * chore: autoformat * chore: update VolumeMeteringAvatar and docs * update volume metering link --- README.md | 69 +- .../ExpoSpeechRecognitionModule.kt | 32 +- .../ExpoSpeechRecognitionOptions.kt | 11 + .../speechrecognition/ExpoSpeechService.kt | 20 + example/App.tsx | 29 +- example/assets/avatar.png | Bin 0 -> 19293 bytes example/babel.config.js | 3 +- example/components/VolumeMeteringAvatar.tsx | 131 ++++ example/ios/Podfile.lock | 37 +- example/package-lock.json | 602 +++++++++++++----- example/package.json | 7 +- images/volume-metering.gif | Bin 0 -> 363662 bytes ios/ExpoSpeechRecognitionModule.swift | 9 +- ios/ExpoSpeechRecognizer.swift | 90 ++- ios/SpeechRecognitionOptions.swift | 11 + src/ExpoSpeechRecognitionModule.types.ts | 28 + 16 files changed, 865 insertions(+), 214 deletions(-) create mode 100644 example/assets/avatar.png create mode 100644 example/components/VolumeMeteringAvatar.tsx create mode 100644 images/volume-metering.gif diff --git a/README.md b/README.md index 69ff92f..c0308f6 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ expo-speech-recognition implements the iOS [`SFSpeechRecognizer`](https://develo - [Transcribing audio files](#transcribing-audio-files) - [Supported input audio formats](#supported-input-audio-formats) - [File transcription example](#file-transcription-example) +- [Volume metering](#volume-metering) + - [Volume metering example](#volume-metering-example) - [Polyfilling the Web SpeechRecognition API](#polyfilling-the-web-speechrecognition-api) - [Muting the beep sound on Android](#muting-the-beep-sound-on-android) - [Improving accuracy of single-word prompts](#improving-accuracy-of-single-word-prompts) @@ -299,6 +301,13 @@ ExpoSpeechRecognitionModule.start({ // Default: 50ms for network-based recognition, 15ms for on-device recognition chunkDelayMillis: undefined, }, + // Settings for volume change events. + volumeChangeEventOptions: { + // [Default: false] Whether to emit the `volumechange` events when the input volume changes. + enabled: false, + // [Default: 100ms on iOS] The interval (in milliseconds) to emit `volumechange` events. + intervalMillis: 300, + }, }); // Stop capturing audio (and emit a final result if there is one) @@ -312,17 +321,18 @@ ExpoSpeechRecognitionModule.abort(); Events are largely based on the [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition). The following events are supported: -| Event Name | Description | Notes | -| ------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `audiostart` | Audio capturing has started | Includes the `uri` if `recordingOptions.persist` is enabled. | -| `audioend` | Audio capturing has ended | Includes the `uri` if `recordingOptions.persist` is enabled. | -| `end` | Speech recognition service has disconnected. | This should always be the last event dispatched, including after errors. | -| `error` | Fired when a speech recognition error occurs. | You'll also receive an `error` event (with code "aborted") when calling `.abort()` | -| `nomatch` | Speech recognition service returns a final result with no significant recognition. | You may have non-final results recognized. This may get emitted after cancellation. | -| `result` | Speech recognition service returns a word or phrase has been positively recognized. | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. | -| `speechstart` | Fired when any sound — recognizable speech or not — has been detected | On iOS, this will fire once in the session after a result has occurred | -| `speechend` | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS | -| `start` | Speech recognition has started | Use this event to indicate to the user when to speak. | +| Event Name | Description | Notes | +| -------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `audiostart` | Audio capturing has started | Includes the `uri` if `recordingOptions.persist` is enabled. | +| `audioend` | Audio capturing has ended | Includes the `uri` if `recordingOptions.persist` is enabled. | +| `end` | Speech recognition service has disconnected. | This should always be the last event dispatched, including after errors. | +| `error` | Fired when a speech recognition error occurs. | You'll also receive an `error` event (with code "aborted") when calling `.abort()` | +| `nomatch` | Speech recognition service returns a final result with no significant recognition. | You may have non-final results recognized. This may get emitted after cancellation. | +| `result` | Speech recognition service returns a word or phrase has been positively recognized. | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. | +| `speechstart` | Fired when any sound — recognizable speech or not — has been detected | On iOS, this will fire once in the session after a result has occurred | +| `speechend` | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS | +| `start` | Speech recognition has started | Use this event to indicate to the user when to speak. | +| `volumechange` | Fired when the input volume changes. | Returns a value between -2 and 10 indicating the volume of the input audio. Consider anything below 0 to be inaudible. | ## Handling Errors @@ -530,6 +540,43 @@ function TranscribeAudioFile() { } ``` +## Volume metering + +You can use the `volumeChangeEventOptions.enabled` option to enable volume metering. This will emit a `volumechange` event with the current volume level (between -2 and 10) as a value. You can use this value to animate the volume metering of a user's voice, or to provide feedback to the user about the volume level. + +### Volume metering example + +![Volume metering example](./images/volume-metering.gif) + +See: [VolumeMeteringAvatar.tsx](https://github.com/jamsch/expo-speech-recognition/tree/main/example/components/VolumeMeteringAvatar.tsx) for a complete example that involves using `react-native-reanimated` to animate the volume metering. + +```tsx +import { ExpoSpeechRecognitionModule } from "expo-speech-recognition"; + +function VolumeMeteringAvatar() { + useSpeechRecognitionEvent("volumechange", (event) => { + console.log("Volume changed to:", event.value); + }); + + const handleStart = () => { + ExpoSpeechRecognitionModule.start({ + lang: "en-US", + volumeChangeEventOptions: { + enabled: true, + intervalMillis: 300, + }, + }); + }; + + return ( + +