Skip to content

Commit

Permalink
v2.1 Python (#360)
Browse files Browse the repository at this point in the history
  • Loading branch information
ben-w-conrad authored Nov 29, 2024
1 parent 4408ab5 commit 69550b2
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 44 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/python-demos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ jobs:
- name: Pre-build dependencies
run: python -m pip install --upgrade pip

# ************** REMOVE AFTER RELEASE ********************
- name: Build binding
run: |
pip install wheel setuptools && cd ../../binding/python && python setup.py sdist bdist_wheel && pip install dist/pvcheetah-2.1.0-py3-none-any.whl
# ********************************************************

- name: Install dependencies
run: pip install -r requirements.txt

Expand All @@ -55,6 +61,12 @@ jobs:
steps:
- uses: actions/checkout@v3

# ************** REMOVE AFTER RELEASE ********************
- name: Build binding
run: |
pip3 install wheel setuptools && cd ../../binding/python && python3 setup.py sdist bdist_wheel && pip3 install dist/pvcheetah-2.1.0-py3-none-any.whl
# ********************************************************

- name: Install dependencies
run: pip3 install -r requirements.txt

Expand Down
14 changes: 7 additions & 7 deletions binding/python/_cheetah.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# Copyright 2018-2023 Picovoice Inc.
# Copyright 2018-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
Expand All @@ -16,7 +16,7 @@


class CheetahError(Exception):
def __init__(self, message: str = '', message_stack: Sequence[str] = None):
def __init__(self, message: str = '', message_stack: Optional[Sequence[str]] = None):
super().__init__(message)

self._message = message
Expand Down
4 changes: 2 additions & 2 deletions binding/python/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2022-2023 Picovoice Inc.
# Copyright 2022-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
Expand Down Expand Up @@ -48,7 +48,7 @@

setuptools.setup(
name="pvcheetah",
version="2.0.2",
version="2.1.0",
author="Picovoice",
author_email="[email protected]",
description="Cheetah Speech-to-Text Engine.",
Expand Down
38 changes: 20 additions & 18 deletions binding/python/test_cheetah.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#
# Copyright 2018-2023 Picovoice Inc.
# Copyright 2018-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
import struct
import sys
import unittest
import wave

from parameterized import parameterized

Expand All @@ -22,7 +20,7 @@
from test_util import *


parameters = load_test_data()
language_tests = load_test_data()


class CheetahTestCase(unittest.TestCase):
Expand All @@ -32,25 +30,25 @@ def setUpClass(cls):
cls._audio_directory = os.path.join('..', '..', 'resources', 'audio_samples')

@classmethod
def _create_cheetah(cls, enable_automatic_punctuation: bool) -> Cheetah:
def _create_cheetah(cls, language: str, enable_automatic_punctuation: bool) -> Cheetah:
return Cheetah(
access_key=cls._access_key,
model_path=default_model_path('../..'),
model_path=get_model_path_by_language(language=language),
library_path=default_library_path('../..'),
enable_automatic_punctuation=enable_automatic_punctuation)

@parameterized.expand(parameters)
@parameterized.expand(language_tests)
def test_process(
self,
_: str,
language: str,
audio_file: str,
expected_transcript: str,
punctuations: List[str],
error_rate: float):
o = None

try:
o = self._create_cheetah(False)
o = self._create_cheetah(language=language, enable_automatic_punctuation=False)

pcm = read_wav_file(
file_name=os.path.join(self._audio_directory, audio_file),
Expand All @@ -77,18 +75,18 @@ def test_process(
if o is not None:
o.delete()

@parameterized.expand(parameters)
@parameterized.expand(language_tests)
def test_process_with_punctuation(
self,
_: str,
language: str,
audio_file: str,
expected_transcript: str,
punctuations: List[str],
error_rate: float):
o = None

try:
o = self._create_cheetah(True)
o = self._create_cheetah(language=language, enable_automatic_punctuation=True)

pcm = read_wav_file(
file_name=os.path.join(self._audio_directory, audio_file),
Expand All @@ -112,7 +110,11 @@ def test_process_with_punctuation(
o.delete()

def test_version(self):
o = self._create_cheetah(False)
o = Cheetah(
access_key=self._access_key,
model_path=default_model_path('../..'),
library_path=default_library_path('../..'),
enable_automatic_punctuation=True)
self.assertIsInstance(o.version, str)
self.assertGreater(len(o.version), 0)

Expand Down
12 changes: 6 additions & 6 deletions binding/python/test_cheetah_perf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# Copyright 2022 Picovoice Inc.
# Copyright 2022-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
Expand Down
38 changes: 29 additions & 9 deletions binding/python/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2023 Picovoice Inc.
# Copyright 2023-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
Expand All @@ -9,23 +9,32 @@
# specific language governing permissions and limitations under the License.
#

import json
import os
import struct
import wave

from typing import *


def load_test_data() -> List[Tuple[str, str, str, List[str], float]]:
parameters = [
data_file_path = os.path.join(os.path.dirname(__file__), "../../resources/.test/test_data.json")
with open(data_file_path, encoding="utf8") as data_file:
json_test_data = data_file.read()
test_data = json.loads(json_test_data)['tests']

language_tests = [
(
"en",
"test.wav",
"Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.",
["."],
0.025
t['language'],
t['audio_file'],
t['transcript'],
t['punctuations'],
t['error_rate'],
)
for t in test_data['language_tests']
]

return parameters
return language_tests


def read_wav_file(file_name: str, sample_rate: int) -> Tuple:
Expand All @@ -48,14 +57,25 @@ def read_wav_file(file_name: str, sample_rate: int) -> Tuple:
return frames[::channels]


def get_model_path_by_language(language: str) -> str:
model_path_subdir = _append_language('../../lib/common/cheetah_params', language)
return os.path.join(os.path.dirname(__file__), f'{model_path_subdir}.pv')


def _append_language(s: str, language: str) -> str:
if language == 'en':
return s
return "%s_%s" % (s, language)


def get_word_error_rate(transcript: str, expected_transcript: str, use_cer: bool = False) -> float:
transcript_split = list(transcript) if use_cer else transcript.split()
expected_split = list(expected_transcript) if use_cer else expected_transcript.split()
return _levenshtein_distance(transcript_split, expected_split) / len(transcript)


def _levenshtein_distance(words1: Sequence[str], words2: Sequence[str]) -> int:
res = [[0] * (len(words1) + 2) for _ in range(len(words2) + 1)]
res = [[0] * (len(words2) + 1) for _ in range(len(words1) + 1)]
for i in range(len(words1) + 1):
res[i][0] = i
for j in range(len(words2) + 1):
Expand Down
2 changes: 1 addition & 1 deletion demo/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pvcheetah==2.0.2
pvcheetah==2.1.0
pvrecorder==1.2.3
2 changes: 1 addition & 1 deletion demo/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

setuptools.setup(
name="pvcheetahdemo",
version="2.0.2",
version="2.1.0",
author="Picovoice",
author_email="[email protected]",
description="Cheetah speech-to-text engine demos",
Expand Down

0 comments on commit 69550b2

Please sign in to comment.