From edfbd57d41eef5e74f45adc0d012dff986cd39bb Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Thu, 5 Dec 2024 14:13:49 -0800 Subject: [PATCH 1/2] v2.1 node --- .github/workflows/nodejs-demos.yml | 12 +++++ binding/nodejs/README.md | 8 +++ binding/nodejs/package.json | 2 +- binding/nodejs/test/index.test.ts | 87 ++++++++++++++++-------------- binding/nodejs/test/test_utils.ts | 61 ++++++++++++++++----- demo/nodejs/package.json | 2 +- demo/nodejs/yarn.lock | 6 +-- 7 files changed, 121 insertions(+), 57 deletions(-) diff --git a/.github/workflows/nodejs-demos.yml b/.github/workflows/nodejs-demos.yml index 919efae4..4150e6f4 100644 --- a/.github/workflows/nodejs-demos.yml +++ b/.github/workflows/nodejs-demos.yml @@ -39,6 +39,12 @@ jobs: with: node-version: ${{ matrix.node-version }} + # ************** REMOVE AFTER RELEASE ******************** + - name: Build Local Packages + run: yarn && yarn build + working-directory: binding/nodejs + # ******************************************************** + - name: Install dependencies run: yarn install @@ -55,6 +61,12 @@ jobs: steps: - uses: actions/checkout@v3 + # ************** REMOVE AFTER RELEASE ******************** + - name: Build Local Packages + run: yarn && yarn build + working-directory: binding/nodejs + # ******************************************************** + - name: Install dependencies run: yarn install diff --git a/binding/nodejs/README.md b/binding/nodejs/README.md index 11332c60..6de1e8de 100644 --- a/binding/nodejs/README.md +++ b/binding/nodejs/README.md @@ -60,6 +60,14 @@ while (true) { Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). Finally, when done be sure to explicitly release the resources using `handle.release()`. +### Language Model + +The Cheetah Node.js SDK comes preloaded with a default English language model (`.pv` file). +Default models for other supported languages can be found in [lib/common](../../lib/common). + +Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train +language models with custom vocabulary and boost words in the existing vocabulary. + ## Demos [Cheetah Node.js demo package](https://www.npmjs.com/package/@picovoice/cheetah-node-demo) provides command-line utilities for processing audio using cheetah. diff --git a/binding/nodejs/package.json b/binding/nodejs/package.json index 32c4949c..850e82f3 100644 --- a/binding/nodejs/package.json +++ b/binding/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/cheetah-node", - "version": "2.0.3", + "version": "2.1.0", "description": "Picovoice Cheetah Node.js binding", "main": "dist/index.js", "types": "dist/types/index.d.ts", diff --git a/binding/nodejs/test/index.test.ts b/binding/nodejs/test/index.test.ts index aaa15be7..2d72ae83 100644 --- a/binding/nodejs/test/index.test.ts +++ b/binding/nodejs/test/index.test.ts @@ -1,5 +1,5 @@ // -// Copyright 2022-2023 Picovoice Inc. +// Copyright 2022-2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" // file accompanying this source. @@ -20,17 +20,12 @@ import { WaveFile } from 'wavefile'; import { getSystemLibraryPath } from '../src/platforms'; import { - TRANSCRIPT, + getModelPathByLanguage, getAudioFile, - getModelPath, - getTestParameters, + getLanguageTestParameters, } from './test_utils'; -const MODEL_PATH = getModelPath(); -const TEST_PARAMETERS = getTestParameters(); -const WAV_PATH = "test.wav"; - -const libraryPath = getSystemLibraryPath(); +const LANGUAGE_TEST_PARAMETERS = getLanguageTestParameters(); const ACCESS_KEY = process.argv .filter(x => x.startsWith('--access_key='))[0] @@ -76,8 +71,7 @@ const loadPcm = (audioFile: string): Int16Array => { const waveBuffer = fs.readFileSync(waveFilePath); const waveAudioFile = new WaveFile(waveBuffer); - const pcm: any = waveAudioFile.getSamples(false, Int16Array); - return pcm; + return waveAudioFile.getSamples(false, Int16Array) as any; }; const cheetahProcessWaveFile = ( @@ -101,60 +95,73 @@ const cheetahProcessWaveFile = ( const testCheetahProcess = ( - _: string, - transcript: string, - testPunctuation: boolean, + language: string, + audioFile: string, + referenceTranscript: string, + punctuations: string[], + enableAutomaticPunctuation: boolean, errorRate: number, - audioFile: string ) => { + const modelPath = getModelPathByLanguage(language); + let cheetahEngine = new Cheetah(ACCESS_KEY, { - enableAutomaticPunctuation: testPunctuation, + modelPath, + enableAutomaticPunctuation, }); - let [res, __] = cheetahProcessWaveFile(cheetahEngine, audioFile); + let [transcript] = cheetahProcessWaveFile(cheetahEngine, audioFile); + + let normalizedTranscript = referenceTranscript; + if (!enableAutomaticPunctuation) { + for (const punctuation of punctuations) { + normalizedTranscript = normalizedTranscript.replace(punctuation, ""); + } + } expect( - characterErrorRate(res, transcript) < errorRate + characterErrorRate(transcript, normalizedTranscript) < errorRate ).toBeTruthy(); cheetahEngine.release(); }; describe('successful processes', () => { - it.each(TEST_PARAMETERS)( + it.each(LANGUAGE_TEST_PARAMETERS)( 'testing process `%p`', ( language: string, + audioFile: string, transcript: string, - _: string, + punctuations: string[], errorRate: number, - audioFile: string ) => { testCheetahProcess( language, + audioFile, transcript, + punctuations, false, errorRate, - audioFile ); } ); - it.each(TEST_PARAMETERS)( - 'testing process `%p` with punctuation', + it.each(LANGUAGE_TEST_PARAMETERS)( + 'testing process `%p` with punctuation', ( language: string, - _: string, - transcriptWithPunctuation: string, + audioFile: string, + transcript: string, + punctuations: string[], errorRate: number, - audioFile: string ) => { testCheetahProcess( language, - transcriptWithPunctuation, + audioFile, + transcript, + punctuations, true, errorRate, - audioFile ); } ); @@ -170,30 +177,32 @@ describe('Defaults', () => { describe('manual paths', () => { test('manual model path', () => { - let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: MODEL_PATH }); + let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: getModelPathByLanguage("en") }); - let [transcript, _] = cheetahProcessWaveFile( + let [transcript] = cheetahProcessWaveFile( cheetahEngine, - WAV_PATH + "test.wav" ); - expect(transcript).toBe(TRANSCRIPT); + expect(transcript.length).toBeGreaterThan(0); cheetahEngine.release(); }); test('manual model and library path', () => { + const libraryPath = getSystemLibraryPath(); + let cheetahEngine = new Cheetah(ACCESS_KEY, { - modelPath: MODEL_PATH, + modelPath: getModelPathByLanguage("en"), libraryPath: libraryPath, endpointDurationSec: 0.2, }); - let [transcript, _] = cheetahProcessWaveFile( + let [transcript] = cheetahProcessWaveFile( cheetahEngine, - WAV_PATH + "test.wav" ); - expect(transcript).toBe(TRANSCRIPT); + expect(transcript.length).toBeGreaterThan(0); cheetahEngine.release(); }); }); @@ -202,7 +211,7 @@ describe("error message stack", () => { test("message stack cleared after read", () => { let error: string[] = []; try { - new Cheetah('invalid', { modelPath: MODEL_PATH }); + new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") }); } catch (e: any) { error = e.messageStack; } @@ -211,7 +220,7 @@ describe("error message stack", () => { expect(error.length).toBeLessThanOrEqual(8); try { - new Cheetah('invalid', { modelPath: MODEL_PATH }); + new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") }); } catch (e: any) { for (let i = 0; i < error.length; i++) { expect(error[i]).toEqual(e.messageStack[i]); diff --git a/binding/nodejs/test/test_utils.ts b/binding/nodejs/test/test_utils.ts index 93c00575..f63d8b90 100644 --- a/binding/nodejs/test/test_utils.ts +++ b/binding/nodejs/test/test_utils.ts @@ -1,5 +1,5 @@ // -// Copyright 2023 Picovoice Inc. +// Copyright 2024 Picovoice Inc. // // You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" // file accompanying this source. @@ -8,19 +8,27 @@ // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. // +import * as fs from 'fs'; import * as path from 'path'; const ROOT_DIR = path.join(__dirname, '../../..'); +const TEST_DATA_JSON = require(path.join( + ROOT_DIR, + 'resources/.test/test_data.json' +)); +const MB_50 = 1024 * 1024 * 50; -export const TRANSCRIPT = - 'Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel'; -export const TRANSCRIPT_WITH_PUNCTUATION = - 'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.'; +function appendLanguage(s: string, language: string): string { + if (language === 'en') { + return s; + } + return s + '_' + language; +} -export function getModelPath(): string { +export function getModelPathByLanguage(language: string): string { return path.join( ROOT_DIR, - `lib/common/cheetah_params.pv` + `${appendLanguage('lib/common/cheetah_params', language)}.pv` ); } @@ -28,14 +36,43 @@ export function getAudioFile(audioFile: string): string { return path.join(ROOT_DIR, 'resources/audio_samples', audioFile); } -export function getTestParameters(): [ +function getCpuPart(): string { + if (!fs.existsSync('/proc/cpuinfo')) { + return ""; + } + const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'ascii'); + for (const infoLine of cpuInfo.split('\n')) { + if (infoLine.includes('CPU part')) { + const infoLineSplit = infoLine.split(' '); + return infoLineSplit[infoLineSplit.length - 1].toLowerCase(); + } + } + return ""; +} + +function getModelSize(language: string): number { + const modelPath = getModelPathByLanguage(language); + const stats = fs.statSync(modelPath); + return stats.size; +} + +export function getLanguageTestParameters(): [ string, string, string, + string[], number, - string ][] { - return [ - ["en", TRANSCRIPT, TRANSCRIPT_WITH_PUNCTUATION, 0.025, "test.wav"] - ]; + const cpuPart = getCpuPart(); + let parametersJson = TEST_DATA_JSON.tests.language_tests; + if (cpuPart === "0xd03") { + parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_50)); + } + return parametersJson.map((x: any) => [ + x.language, + x.audio_file, + x.transcript, + x.punctuations, + x.error_rate, + ]); } diff --git a/demo/nodejs/package.json b/demo/nodejs/package.json index abdba311..d9640b74 100644 --- a/demo/nodejs/package.json +++ b/demo/nodejs/package.json @@ -16,7 +16,7 @@ "author": "Picovoice Inc.", "license": "Apache-2.0", "dependencies": { - "@picovoice/cheetah-node": "=2.0.3", + "@picovoice/cheetah-node": "../../binding/nodejs", "@picovoice/pvrecorder-node": "^1.2.4", "commander": "^6.1.0", "readline": "^1.3.0", diff --git a/demo/nodejs/yarn.lock b/demo/nodejs/yarn.lock index 03b1ed68..3484ebea 100644 --- a/demo/nodejs/yarn.lock +++ b/demo/nodejs/yarn.lock @@ -2,10 +2,8 @@ # yarn lockfile v1 -"@picovoice/cheetah-node@=2.0.3": - version "2.0.3" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.0.3.tgz#6b426ded58c2cf21e82a3282582f46698f3ddc32" - integrity sha512-BqcDV72PhjE41GQohlnfu/1xr52QTSMlpo504tTY+JgUHcoHnwT0jEp0AbpZgdXLIexYgH/dzUt8Ls12yXyCgQ== +"@picovoice/cheetah-node@../../binding/nodejs": + version "2.1.0" "@picovoice/pvrecorder-node@^1.2.4": version "1.2.4" From d7993fb672752ac9d596cb71a2f39c7737484258 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Fri, 6 Dec 2024 10:55:43 -0800 Subject: [PATCH 2/2] trim numbers --- binding/nodejs/test/test_utils.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/binding/nodejs/test/test_utils.ts b/binding/nodejs/test/test_utils.ts index f63d8b90..c4806bd1 100644 --- a/binding/nodejs/test/test_utils.ts +++ b/binding/nodejs/test/test_utils.ts @@ -16,7 +16,7 @@ const TEST_DATA_JSON = require(path.join( ROOT_DIR, 'resources/.test/test_data.json' )); -const MB_50 = 1024 * 1024 * 50; +const MB_40 = 1024 * 1024 * 40; function appendLanguage(s: string, language: string): string { if (language === 'en') { @@ -66,7 +66,7 @@ export function getLanguageTestParameters(): [ const cpuPart = getCpuPart(); let parametersJson = TEST_DATA_JSON.tests.language_tests; if (cpuPart === "0xd03") { - parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_50)); + parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_40)); } return parametersJson.map((x: any) => [ x.language,