Skip to content

Commit

Permalink
v2.1 node
Browse files Browse the repository at this point in the history
  • Loading branch information
ksyeo1010 committed Dec 5, 2024
1 parent 2e6c2d4 commit ab77f71
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 57 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/nodejs-demos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ jobs:
with:
node-version: ${{ matrix.node-version }}

# ************** REMOVE AFTER RELEASE ********************
- name: Build Local Packages
run: yarn && yarn build
working-directory: binding/nodejs
# ********************************************************

- name: Install dependencies
run: yarn install

Expand All @@ -55,6 +61,12 @@ jobs:
steps:
- uses: actions/checkout@v3

# ************** REMOVE AFTER RELEASE ********************
- name: Build Local Packages
run: yarn && yarn build
working-directory: binding/nodejs
# ********************************************************

- name: Install dependencies
run: yarn install

Expand Down
8 changes: 8 additions & 0 deletions binding/nodejs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ while (true) {
Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/). Finally, when done be sure to explicitly release the resources using
`handle.release()`.

### Language Model

The Cheetah Node.js SDK comes preloaded with a default English language model (`.pv` file).
Default models for other supported languages can be found in [lib/common](../../lib/common).

Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
language models with custom vocabulary and boost words in the existing vocabulary.

## Demos

[Cheetah Node.js demo package](https://www.npmjs.com/package/@picovoice/cheetah-node-demo) provides command-line utilities for processing audio using cheetah.
2 changes: 1 addition & 1 deletion binding/nodejs/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@picovoice/cheetah-node",
"version": "2.0.3",
"version": "2.1.0",
"description": "Picovoice Cheetah Node.js binding",
"main": "dist/index.js",
"types": "dist/types/index.d.ts",
Expand Down
87 changes: 48 additions & 39 deletions binding/nodejs/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2022-2023 Picovoice Inc.
// Copyright 2022-2024 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
Expand All @@ -20,17 +20,12 @@ import { WaveFile } from 'wavefile';
import { getSystemLibraryPath } from '../src/platforms';

import {
TRANSCRIPT,
getModelPathByLanguage,
getAudioFile,
getModelPath,
getTestParameters,
getLanguageTestParameters,
} from './test_utils';

const MODEL_PATH = getModelPath();
const TEST_PARAMETERS = getTestParameters();
const WAV_PATH = "test.wav";

const libraryPath = getSystemLibraryPath();
const LANGUAGE_TEST_PARAMETERS = getLanguageTestParameters();

const ACCESS_KEY = process.argv
.filter(x => x.startsWith('--access_key='))[0]
Expand Down Expand Up @@ -76,8 +71,7 @@ const loadPcm = (audioFile: string): Int16Array => {
const waveBuffer = fs.readFileSync(waveFilePath);
const waveAudioFile = new WaveFile(waveBuffer);

const pcm: any = waveAudioFile.getSamples(false, Int16Array);
return pcm;
return waveAudioFile.getSamples(false, Int16Array) as any;
};

const cheetahProcessWaveFile = (
Expand All @@ -101,60 +95,73 @@ const cheetahProcessWaveFile = (


const testCheetahProcess = (
_: string,
transcript: string,
testPunctuation: boolean,
language: string,
audioFile: string,
referenceTranscript: string,
punctuations: string[],
enableAutomaticPunctuation: boolean,
errorRate: number,
audioFile: string
) => {
const modelPath = getModelPathByLanguage(language);

let cheetahEngine = new Cheetah(ACCESS_KEY, {
enableAutomaticPunctuation: testPunctuation,
modelPath,
enableAutomaticPunctuation,
});

let [res, __] = cheetahProcessWaveFile(cheetahEngine, audioFile);
let [transcript] = cheetahProcessWaveFile(cheetahEngine, audioFile);

let normalizedTranscript = referenceTranscript;
if (!enableAutomaticPunctuation) {
for (const punctuation of punctuations) {
normalizedTranscript = normalizedTranscript.replace(punctuation, "");
}
}

expect(
characterErrorRate(res, transcript) < errorRate
characterErrorRate(transcript, normalizedTranscript) < errorRate
).toBeTruthy();

cheetahEngine.release();
};

describe('successful processes', () => {
it.each(TEST_PARAMETERS)(
it.each(LANGUAGE_TEST_PARAMETERS)(
'testing process `%p`',
(
language: string,
audioFile: string,
transcript: string,
_: string,
punctuations: string[],
errorRate: number,
audioFile: string
) => {
testCheetahProcess(
language,
audioFile,
transcript,
punctuations,
false,
errorRate,
audioFile
);
}
);

it.each(TEST_PARAMETERS)(
'testing process `%p` with punctuation',
it.each(LANGUAGE_TEST_PARAMETERS)(
'testing process `%p` with punctuation',
(
language: string,
_: string,
transcriptWithPunctuation: string,
audioFile: string,
transcript: string,
punctuations: string[],
errorRate: number,
audioFile: string
) => {
testCheetahProcess(
language,
transcriptWithPunctuation,
audioFile,
transcript,
punctuations,
true,
errorRate,
audioFile
);
}
);
Expand All @@ -170,30 +177,32 @@ describe('Defaults', () => {

describe('manual paths', () => {
test('manual model path', () => {
let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: MODEL_PATH });
let cheetahEngine = new Cheetah(ACCESS_KEY, { modelPath: getModelPathByLanguage("en") });

let [transcript, _] = cheetahProcessWaveFile(
let [transcript] = cheetahProcessWaveFile(
cheetahEngine,
WAV_PATH
"test.wav"
);

expect(transcript).toBe(TRANSCRIPT);
expect(transcript.length).toBeGreaterThan(0);
cheetahEngine.release();
});

test('manual model and library path', () => {
const libraryPath = getSystemLibraryPath();

let cheetahEngine = new Cheetah(ACCESS_KEY, {
modelPath: MODEL_PATH,
modelPath: getModelPathByLanguage("en"),
libraryPath: libraryPath,
endpointDurationSec: 0.2,
});

let [transcript, _] = cheetahProcessWaveFile(
let [transcript] = cheetahProcessWaveFile(
cheetahEngine,
WAV_PATH
"test.wav"
);

expect(transcript).toBe(TRANSCRIPT);
expect(transcript.length).toBeGreaterThan(0);
cheetahEngine.release();
});
});
Expand All @@ -202,7 +211,7 @@ describe("error message stack", () => {
test("message stack cleared after read", () => {
let error: string[] = [];
try {
new Cheetah('invalid', { modelPath: MODEL_PATH });
new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") });
} catch (e: any) {
error = e.messageStack;
}
Expand All @@ -211,7 +220,7 @@ describe("error message stack", () => {
expect(error.length).toBeLessThanOrEqual(8);

try {
new Cheetah('invalid', { modelPath: MODEL_PATH });
new Cheetah('invalid', { modelPath: getModelPathByLanguage("en") });
} catch (e: any) {
for (let i = 0; i < error.length; i++) {
expect(error[i]).toEqual(e.messageStack[i]);
Expand Down
61 changes: 49 additions & 12 deletions binding/nodejs/test/test_utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Copyright 2023 Picovoice Inc.
// Copyright 2024 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
Expand All @@ -8,34 +8,71 @@
// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
import * as fs from 'fs';
import * as path from 'path';

const ROOT_DIR = path.join(__dirname, '../../..');
const TEST_DATA_JSON = require(path.join(
ROOT_DIR,
'resources/.test/test_data.json'
));
const MB_50 = 1024 * 1024 * 50;

export const TRANSCRIPT =
'Mr quilter is the apostle of the middle classes and we are glad to welcome his gospel';
export const TRANSCRIPT_WITH_PUNCTUATION =
'Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.';
function appendLanguage(s: string, language: string): string {
if (language === 'en') {
return s;
}
return s + '_' + language;
}

export function getModelPath(): string {
export function getModelPathByLanguage(language: string): string {
return path.join(
ROOT_DIR,
`lib/common/cheetah_params.pv`
`${appendLanguage('lib/common/cheetah_params', language)}.pv`
);
}

export function getAudioFile(audioFile: string): string {
return path.join(ROOT_DIR, 'resources/audio_samples', audioFile);
}

export function getTestParameters(): [
function getCpuPart(): string {
if (!fs.existsSync('/proc/cpuinfo')) {
return "";
}
const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'ascii');
for (const infoLine of cpuInfo.split('\n')) {
if (infoLine.includes('CPU part')) {
const infoLineSplit = infoLine.split(' ');
return infoLineSplit[infoLineSplit.length - 1].toLowerCase();
}
}
return "";
}

function getModelSize(language: string): number {
const modelPath = getModelPathByLanguage(language);
const stats = fs.statSync(modelPath);
return stats.size;
}

export function getLanguageTestParameters(): [
string,
string,
string,
string[],
number,
string
][] {
return [
["en", TRANSCRIPT, TRANSCRIPT_WITH_PUNCTUATION, 0.025, "test.wav"]
];
const cpuPart = getCpuPart();
let parametersJson = TEST_DATA_JSON.tests.language_tests;
if (cpuPart === "0xd03") {
parametersJson = parametersJson.filter((x: any) => (getModelSize(x.language) < MB_50));
}
return parametersJson.map((x: any) => [
x.language,
x.audio_file,
x.transcript,
x.punctuations,
x.error_rate,
]);
}
2 changes: 1 addition & 1 deletion demo/nodejs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"author": "Picovoice Inc.",
"license": "Apache-2.0",
"dependencies": {
"@picovoice/cheetah-node": "=2.0.3",
"@picovoice/cheetah-node": "../../binding/nodejs",
"@picovoice/pvrecorder-node": "^1.2.4",
"commander": "^6.1.0",
"readline": "^1.3.0",
Expand Down
6 changes: 2 additions & 4 deletions demo/nodejs/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
# yarn lockfile v1


"@picovoice/cheetah-node@=2.0.3":
version "2.0.3"
resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.0.3.tgz#6b426ded58c2cf21e82a3282582f46698f3ddc32"
integrity sha512-BqcDV72PhjE41GQohlnfu/1xr52QTSMlpo504tTY+JgUHcoHnwT0jEp0AbpZgdXLIexYgH/dzUt8Ls12yXyCgQ==
"@picovoice/cheetah-node@../../binding/nodejs":
version "2.1.0"

"@picovoice/pvrecorder-node@^1.2.4":
version "1.2.4"
Expand Down

0 comments on commit ab77f71

Please sign in to comment.