From bd7a0f741b8c9d39a51254d19e74a7a519134380 Mon Sep 17 00:00:00 2001 From: Albert Ho <82767499+albho@users.noreply.github.com> Date: Fri, 16 Aug 2024 13:37:36 -0700 Subject: [PATCH] Nodejs demo pvspeaker (#37) --- .github/workflows/nodejs-demos.yml | 2 +- demo/nodejs/README.md | 13 +--- demo/nodejs/package.json | 8 +- demo/nodejs/streaming.js | 98 +++++++++++++----------- demo/nodejs/yarn.lock | 108 +-------------------------- resources/.lint/spell-check/dict.txt | 1 + 6 files changed, 65 insertions(+), 165 deletions(-) diff --git a/.github/workflows/nodejs-demos.yml b/.github/workflows/nodejs-demos.yml index f91fdb3d..d86b55ac 100644 --- a/.github/workflows/nodejs-demos.yml +++ b/.github/workflows/nodejs-demos.yml @@ -59,7 +59,7 @@ jobs: strategy: matrix: - machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64, jetson ] + machine: [ rpi3-32, rpi3-64, rpi4-32, rpi4-64, rpi5-64 ] steps: - uses: actions/checkout@v3 diff --git a/demo/nodejs/README.md b/demo/nodejs/README.md index 34413d33..84f973b5 100644 --- a/demo/nodejs/README.md +++ b/demo/nodejs/README.md @@ -12,12 +12,12 @@ voice assistants. Orca is: - Linux (x86_64), macOS (x86_64, arm64), Windows (x86_64) - Android and iOS - Chrome, Safari, Firefox, and Edge - - Raspberry Pi (3, 4, 5) and NVIDIA Jetson Nano + - Raspberry Pi (3, 4, 5) ## Compatibility - Node.js 16+ -- Runs on Linux (x86_64), macOS (x86_64, arm64), Windows (x86_64), Raspberry Pi (3, 4, 5), and NVIDIA Jetson Nano. +- Runs on Linux (x86_64), macOS (x86_64, arm64), Windows (x86_64), and Raspberry Pi (3, 4, 5). ## Installation @@ -45,13 +45,8 @@ In the single synthesis mode, the text is synthesized in a single call to the Or In this demo, we simulate a response from a language model by creating a text stream from a user-defined text. We stream that text to Orca and play the synthesized audio as soon as it gets generated. -[node-speaker](https://github.com/TooTallNate/node-speaker) is used to play pcm audio generated by Orca to your -device's speakers. On Debian/Ubuntu, `node-speaker` uses the [ALSA](http://www.alsa-project.org/) backend, so you'll -need to install `libasound2-dev` before starting the demo: - -```console -sudo apt-get install libasound2-dev -``` +[@picovoice/pvspeaker-node](https://www.npmjs.com/package/@picovoice/pvspeaker-node) is used to play pcm audio generated +by Orca to your device's speakers. Execute the following: diff --git a/demo/nodejs/package.json b/demo/nodejs/package.json index 12b346c9..13862281 100644 --- a/demo/nodejs/package.json +++ b/demo/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/orca-node-demo", - "version": "0.2.0", + "version": "0.2.1", "description": "Picovoice Orca Node.js file-based and streaming demos", "scripts": { "file": "node file.js", @@ -22,18 +22,14 @@ "license": "Apache-2.0", "dependencies": { "@picovoice/orca-node": "=0.2.0", + "@picovoice/pvspeaker-node": "^1.0.0", "commander": "^6.1.0", - "pcm-convert": "^1.6.5", "prettier": "^2.6.2", "readline": "^1.3.0", - "systeminformation": "^5.22.11", "tiktoken": "^1.0.15", "wavefile": "^11.0.0" }, "devDependencies": {}, - "optionalDependencies": { - "speaker": "^0.5.5" - }, "homepage": "https://picovoice.ai/platform/orca/", "repository": { "type": "git", diff --git a/demo/nodejs/streaming.js b/demo/nodejs/streaming.js index 26941bf7..35cfc588 100644 --- a/demo/nodejs/streaming.js +++ b/demo/nodejs/streaming.js @@ -12,13 +12,12 @@ 'use strict'; const os = require('os'); -const si = require('systeminformation'); const { program } = require('commander'); const { performance } = require('perf_hooks'); const { execSync } = require('child_process'); const tiktoken = require('tiktoken'); -const convert = require('pcm-convert'); +const { PvSpeaker } = require('@picovoice/pvspeaker-node'); const { Orca, OrcaActivationLimitReachedError } = require('@picovoice/orca-node'); program @@ -46,7 +45,20 @@ program .option( '--audio_wait_chunks ', 'Number of PCM chunks to wait before starting to play audio', - '0', + ) + .option( + '--buffer_size_secs ', + 'The size in seconds of the internal buffer used by PvSpeaker to play audio', + '20', + ) + .option( + '--audio_device_index ', + 'Index of input audio device', + '-1', + ) + .option( + '--show_audio_devices', + 'Only list available audio output devices and exit', ); if (process.argv.length < 2) { @@ -142,6 +154,17 @@ async function streamingDemo() { let text = program['text_to_stream']; let tokensPerSeconds = program['tokens_per_second']; let audioWaitChunks = program['audio_wait_chunks']; + let bufferSizeSecs = Number(program['buffer_size_secs']); + let deviceIndex = Number(program['audio_device_index']); + let showAudioDevices = program['show_audio_devices']; + + if (showAudioDevices) { + const devices = PvSpeaker.getAvailableDevices(); + for (let i = 0; i < devices.length; i++) { + console.log(`index: ${i}, device name: ${devices[i]}`); + } + return; + } if (audioWaitChunks === undefined || audioWaitChunks === null) { audioWaitChunks = 0; @@ -154,7 +177,7 @@ async function streamingDemo() { } try { - let orca = new Orca( + const orca = new Orca( accessKey, { 'modelPath': modelFilePath, @@ -167,42 +190,17 @@ async function streamingDemo() { let speaker = null; try { - require.resolve('speaker'); - await si.audio((devices) => { - if (devices.length > 0 && devices[0].driver !== null) { - console.log(`Playing from device: ${devices[0].name}`); - const Speaker = require('speaker'); - speaker = new Speaker({ - channels: 1, - bitDepth: 8, - sampleRate: orca.sampleRate, - }); - } else { - console.error('Note: No sound card(s) detected. Orca will generate the pcm, but no audio will be played.'); - } - }); + const bitsPerSample = 16; + speaker = new PvSpeaker(orca.sampleRate, bitsPerSample, { bufferSizeSecs, deviceIndex }); + speaker.start(); } catch (e) { - console.error('\nNote: External package \'node-speaker\' was not installed successfully. This package may not be compatible with your machine. ' + - 'Orca will generate the pcm, but it will not be played to your speakers.'); + console.error('\nNote: External package \'@picovoice/pvspeaker-node\' failed to initialize.' + + ' Orca will generate the pcm, but it will not be played to your speakers.'); } - const pcmBuffer = []; - - function playStream() { - if (pcmBuffer.length === 0) return; - - const pcmInt16 = pcmBuffer.shift(); - - // for some reason, "speaker" does not accept Int16Array - const pcmUint8 = convert(pcmInt16, 'int16', 'uint8'); - try { - speaker?.write(pcmUint8); - } catch (e) { - console.log(`'node-speaker' unable to play audio: ${e}`); - } - - playStream(); - } + let pcmBuffer = []; + let numAudioChunks = 0; + let isStartedPlaying = false; process.stdout.write('\nSimulated text stream: '); @@ -217,11 +215,19 @@ async function streamingDemo() { if (timeFirstAudioAvailable === null) { timeFirstAudioAvailable = ((performance.now() - startTime) / 1000).toFixed(2); } - pcmBuffer.push(pcm); - if (pcmBuffer.length >= audioWaitChunks) { - playStream(); + pcmBuffer.push(...pcm); + numAudioChunks++; + } + + if (pcmBuffer.length > 0 && speaker !== null && (isStartedPlaying || numAudioChunks >= audioWaitChunks)) { + const arrayBuffer = new Int16Array(pcmBuffer).buffer; + const written = speaker.write(arrayBuffer); + if (written < arrayBuffer.byteLength) { + pcmBuffer = pcmBuffer.slice(written); } + isStartedPlaying = true; } + await sleepSecs(1 / tokensPerSeconds); } @@ -230,8 +236,7 @@ async function streamingDemo() { if (timeFirstAudioAvailable === null) { timeFirstAudioAvailable = ((performance.now() - startTime) / 1000).toFixed(2); } - pcmBuffer.push(flushedPcm); - playStream(); + pcmBuffer.push(...flushedPcm); } const elapsedTime = ((performance.now() - startTime) / 1000).toFixed(2); @@ -239,9 +244,14 @@ async function streamingDemo() { console.log(`Time to receive first audio: ${timeFirstAudioAvailable} seconds after text stream started`); console.log('\nWaiting for audio to finish...'); - speaker?.end(); + if (speaker !== null) { + const arrayBuffer = new Int16Array(pcmBuffer).buffer; + speaker.flush(arrayBuffer); + speaker.stop(); + speaker.release(); + } stream.close(); - orca?.release(); + orca.release(); } catch (err) { if (err instanceof OrcaActivationLimitReachedError) { console.error(`AccessKey '${accessKey}' has reached it's processing limit.`); diff --git a/demo/nodejs/yarn.lock b/demo/nodejs/yarn.lock index 3f91a022..1603470d 100644 --- a/demo/nodejs/yarn.lock +++ b/demo/nodejs/yarn.lock @@ -7,99 +7,16 @@ resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-0.2.0.tgz#928eab700374581acbacfe9b54c368a0a5714b27" integrity sha512-eOdWXCKUp1YOI7w8ExAfzeqyyr9GheA0x3adzpFw2vb/5xqVExct6wklESOzJ2dQ0EkSSqzWWnXzLqMd02lfRg== -audio-format@^2.3.2: - version "2.3.2" - resolved "https://registry.yarnpkg.com/audio-format/-/audio-format-2.3.2.tgz#f23fc73b308c6dfba1904da6d4cd9c7621731abd" - integrity sha512-5IA2grZhaVhpGxX6lbJm8VVh/SKQULMXXrFxuiodi0zhzDPRB8BJfieo89AclEQv4bDxZRH4lv06qNnxqkFhKQ== - dependencies: - is-audio-buffer "^1.0.11" - is-buffer "^1.1.5" - is-plain-obj "^1.1.0" - pick-by-alias "^1.2.0" - sample-rate "^2.0.0" - -bindings@^1.3.0: - version "1.5.0" - resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df" - integrity sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ== - dependencies: - file-uri-to-path "1.0.0" - -buffer-alloc-unsafe@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/buffer-alloc-unsafe/-/buffer-alloc-unsafe-1.1.0.tgz#bd7dc26ae2972d0eda253be061dba992349c19f0" - integrity sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg== - -buffer-alloc@^1.1.0: - version "1.2.0" - resolved "https://registry.yarnpkg.com/buffer-alloc/-/buffer-alloc-1.2.0.tgz#890dd90d923a873e08e10e5fd51a57e5b7cce0ec" - integrity sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow== - dependencies: - buffer-alloc-unsafe "^1.1.0" - buffer-fill "^1.0.0" - -buffer-fill@^1.0.0: +"@picovoice/pvspeaker-node@^1.0.0": version "1.0.0" - resolved "https://registry.yarnpkg.com/buffer-fill/-/buffer-fill-1.0.0.tgz#f8f78b76789888ef39f205cd637f68e702122b2c" - integrity sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ== + resolved "https://registry.yarnpkg.com/@picovoice/pvspeaker-node/-/pvspeaker-node-1.0.0.tgz#6b87b86138a8bb256e46ea41cb7d258ff8534337" + integrity sha512-qhH4ktObGj/Dd7iiptPXqf89U3l7FbCiaB1xY439YE3EFLsfsvZvjAzEwsF5A01h47AbXJTCdAjvkXrcsdt0/w== commander@^6.1.0: version "6.2.1" resolved "https://registry.yarnpkg.com/commander/-/commander-6.2.1.tgz#0792eb682dfbc325999bb2b84fddddba110ac73c" integrity sha512-U7VdrJFnJgo4xjrHpTzu0yrHPGImdsmD95ZlgYSEajAn2JKzDhDTPG9kBTefmObL2w/ngeZnilk+OV9CG3d7UA== -debug@^4.0.0: - version "4.3.5" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.5.tgz#e83444eceb9fedd4a1da56d671ae2446a01a6e1e" - integrity sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg== - dependencies: - ms "2.1.2" - -file-uri-to-path@1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd" - integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw== - -is-audio-buffer@^1.0.11: - version "1.1.0" - resolved "https://registry.yarnpkg.com/is-audio-buffer/-/is-audio-buffer-1.1.0.tgz#f4c7f274d20180f993f9b1650768cb77bedd4470" - integrity sha512-fmPC/dizJmP4ITCsW5oTQGMJ9wZVE+A/zAe6FQo3XwgERxmXHmm3ON5XkWDAxmyxvsrDmWx3NArpSgamp/59AA== - -is-buffer@^1.1.5: - version "1.1.6" - resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be" - integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w== - -is-plain-obj@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e" - integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg== - -ms@2.1.2: - version "2.1.2" - resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" - integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== - -object-assign@^4.1.1: - version "4.1.1" - resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" - integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== - -pcm-convert@^1.6.5: - version "1.6.5" - resolved "https://registry.yarnpkg.com/pcm-convert/-/pcm-convert-1.6.5.tgz#81b5f2d0c2fb386edb8b9f69ce2374e093aeb7cc" - integrity sha512-5CEspU4j8aEQ80AhNbcLfpT0apc93E6endFxahWd4sV70I6PN7LPdz8GoYm/1qr400K9bUVsVA+KxNgbFROZPw== - dependencies: - audio-format "^2.3.2" - is-audio-buffer "^1.0.11" - is-buffer "^1.1.5" - object-assign "^4.1.1" - -pick-by-alias@^1.2.0: - version "1.2.0" - resolved "https://registry.yarnpkg.com/pick-by-alias/-/pick-by-alias-1.2.0.tgz#5f7cb2b1f21a6e1e884a0c87855aa4a37361107b" - integrity sha512-ESj2+eBxhGrcA1azgHs7lARG5+5iLakc/6nlfbpjcLl00HuuUOIuORhYXN4D1HfvMSKuVtFQjAlnwi1JHEeDIw== - prettier@^2.6.2: version "2.8.8" resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.8.8.tgz#e8c5d7e98a4305ffe3de2e1fc4aca1a71c28b1da" @@ -110,25 +27,6 @@ readline@^1.3.0: resolved "https://registry.yarnpkg.com/readline/-/readline-1.3.0.tgz#c580d77ef2cfc8752b132498060dc9793a7ac01c" integrity sha512-k2d6ACCkiNYz222Fs/iNze30rRJ1iIicW7JuX/7/cozvih6YCkFZH+J6mAFDVgv0dRBaAyr4jDqC95R2y4IADg== -sample-rate@^2.0.0: - version "2.0.1" - resolved "https://registry.yarnpkg.com/sample-rate/-/sample-rate-2.0.1.tgz#3f508969262b2c729a95d4d42b795a5b33610bd0" - integrity sha512-AIK0vVBiAEObmpJOxQu/WCyklnWGqzTSDII4O7nBo+SJHmfgBUiYhgV/Y3Ohz76gfSlU6R5CIAKggj+nAOLSvg== - -speaker@^0.5.5: - version "0.5.5" - resolved "https://registry.yarnpkg.com/speaker/-/speaker-0.5.5.tgz#8c5fa2aaee2f272889a8778dcad1fc30a13d42a3" - integrity sha512-IBeMZUITigYBO139h0+1MAgBHNZF55GFJN4U/Box35Sg49cfqYkbCO92TXoCUy22Ast08zfqKuXLvPxq9CWwLw== - dependencies: - bindings "^1.3.0" - buffer-alloc "^1.1.0" - debug "^4.0.0" - -systeminformation@^5.22.11: - version "5.22.11" - resolved "https://registry.yarnpkg.com/systeminformation/-/systeminformation-5.22.11.tgz#42be7b650ce0a8b940c06219a6647f6ab3f7a349" - integrity sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug== - tiktoken@^1.0.15: version "1.0.15" resolved "https://registry.yarnpkg.com/tiktoken/-/tiktoken-1.0.15.tgz#a1e11681fa51b50c81bb7eaaee53b7a66e844a23" diff --git a/resources/.lint/spell-check/dict.txt b/resources/.lint/spell-check/dict.txt index 4649d037..469917ff 100644 --- a/resources/.lint/spell-check/dict.txt +++ b/resources/.lint/spell-check/dict.txt @@ -54,6 +54,7 @@ frombuffer drwav wchars pvrecorder +pvspeaker pvcheetah itok numpy