Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.1 Python #360

Merged
merged 5 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/python-demos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ jobs:
- name: Pre-build dependencies
run: python -m pip install --upgrade pip

# ************** REMOVE AFTER RELEASE ********************
- name: Build binding
run: |
pip install wheel setuptools && cd ../../binding/python && python setup.py sdist bdist_wheel && pip install dist/pvcheetah-2.1.0-py3-none-any.whl
# ********************************************************

- name: Install dependencies
run: pip install -r requirements.txt

Expand All @@ -55,6 +61,12 @@ jobs:
steps:
- uses: actions/checkout@v3

# ************** REMOVE AFTER RELEASE ********************
- name: Build binding
run: |
pip3 install wheel setuptools && cd ../../binding/python && python3 setup.py sdist bdist_wheel && pip3 install dist/pvcheetah-2.1.0-py3-none-any.whl
# ********************************************************

- name: Install dependencies
run: pip3 install -r requirements.txt

Expand Down
14 changes: 7 additions & 7 deletions binding/python/_cheetah.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# Copyright 2018-2023 Picovoice Inc.
# Copyright 2018-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
Expand All @@ -16,7 +16,7 @@


class CheetahError(Exception):
def __init__(self, message: str = '', message_stack: Sequence[str] = None):
def __init__(self, message: str = '', message_stack: Optional[Sequence[str]] = None):
super().__init__(message)

self._message = message
Expand Down
4 changes: 2 additions & 2 deletions binding/python/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2022-2023 Picovoice Inc.
# Copyright 2022-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
Expand Down Expand Up @@ -48,7 +48,7 @@

setuptools.setup(
name="pvcheetah",
version="2.0.2",
version="2.1.0",
author="Picovoice",
author_email="[email protected]",
description="Cheetah Speech-to-Text Engine.",
Expand Down
38 changes: 20 additions & 18 deletions binding/python/test_cheetah.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
#
# Copyright 2018-2023 Picovoice Inc.
# Copyright 2018-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
import struct
import sys
import unittest
import wave

from parameterized import parameterized

Expand All @@ -22,7 +20,7 @@
from test_util import *


parameters = load_test_data()
language_tests = load_test_data()


class CheetahTestCase(unittest.TestCase):
Expand All @@ -32,25 +30,25 @@ def setUpClass(cls):
cls._audio_directory = os.path.join('..', '..', 'resources', 'audio_samples')

@classmethod
def _create_cheetah(cls, enable_automatic_punctuation: bool) -> Cheetah:
def _create_cheetah(cls, language: str, enable_automatic_punctuation: bool) -> Cheetah:
return Cheetah(
access_key=cls._access_key,
model_path=default_model_path('../..'),
model_path=get_model_path_by_language(language=language),
library_path=default_library_path('../..'),
enable_automatic_punctuation=enable_automatic_punctuation)

@parameterized.expand(parameters)
@parameterized.expand(language_tests)
def test_process(
self,
_: str,
language: str,
audio_file: str,
expected_transcript: str,
punctuations: List[str],
error_rate: float):
o = None

try:
o = self._create_cheetah(False)
o = self._create_cheetah(language=language, enable_automatic_punctuation=False)

pcm = read_wav_file(
file_name=os.path.join(self._audio_directory, audio_file),
Expand All @@ -77,18 +75,18 @@ def test_process(
if o is not None:
o.delete()

@parameterized.expand(parameters)
@parameterized.expand(language_tests)
def test_process_with_punctuation(
self,
_: str,
language: str,
audio_file: str,
expected_transcript: str,
punctuations: List[str],
error_rate: float):
o = None

try:
o = self._create_cheetah(True)
o = self._create_cheetah(language=language, enable_automatic_punctuation=True)

pcm = read_wav_file(
file_name=os.path.join(self._audio_directory, audio_file),
Expand All @@ -112,7 +110,11 @@ def test_process_with_punctuation(
o.delete()

def test_version(self):
o = self._create_cheetah(False)
o = Cheetah(
access_key=self._access_key,
model_path=default_model_path('../..'),
library_path=default_library_path('../..'),
enable_automatic_punctuation=True)
self.assertIsInstance(o.version, str)
self.assertGreater(len(o.version), 0)

Expand Down
12 changes: 6 additions & 6 deletions binding/python/test_cheetah_perf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#
# Copyright 2022 Picovoice Inc.
# Copyright 2022-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#

import os
Expand Down
38 changes: 29 additions & 9 deletions binding/python/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2023 Picovoice Inc.
# Copyright 2023-2024 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
Expand All @@ -9,23 +9,32 @@
# specific language governing permissions and limitations under the License.
#

import json
import os
import struct
import wave

from typing import *


def load_test_data() -> List[Tuple[str, str, str, List[str], float]]:
parameters = [
data_file_path = os.path.join(os.path.dirname(__file__), "../../resources/.test/test_data.json")
with open(data_file_path, encoding="utf8") as data_file:
json_test_data = data_file.read()
test_data = json.loads(json_test_data)['tests']

language_tests = [
(
"en",
"test.wav",
"Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.",
["."],
0.025
t['language'],
t['audio_file'],
t['transcript'],
t['punctuations'],
t['error_rate'],
)
for t in test_data['language_tests']
]

return parameters
return language_tests


def read_wav_file(file_name: str, sample_rate: int) -> Tuple:
Expand All @@ -48,14 +57,25 @@ def read_wav_file(file_name: str, sample_rate: int) -> Tuple:
return frames[::channels]


def get_model_path_by_language(language: str) -> str:
model_path_subdir = _append_language('../../lib/common/cheetah_params', language)
return os.path.join(os.path.dirname(__file__), f'{model_path_subdir}.pv')


def _append_language(s: str, language: str) -> str:
if language == 'en':
return s
return "%s_%s" % (s, language)


def get_word_error_rate(transcript: str, expected_transcript: str, use_cer: bool = False) -> float:
transcript_split = list(transcript) if use_cer else transcript.split()
expected_split = list(expected_transcript) if use_cer else expected_transcript.split()
return _levenshtein_distance(transcript_split, expected_split) / len(transcript)


def _levenshtein_distance(words1: Sequence[str], words2: Sequence[str]) -> int:
res = [[0] * (len(words1) + 2) for _ in range(len(words2) + 1)]
res = [[0] * (len(words2) + 1) for _ in range(len(words1) + 1)]
for i in range(len(words1) + 1):
res[i][0] = i
for j in range(len(words2) + 1):
Expand Down
2 changes: 1 addition & 1 deletion demo/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pvcheetah==2.0.2
pvcheetah==2.1.0
pvrecorder==1.2.3
2 changes: 1 addition & 1 deletion demo/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

setuptools.setup(
name="pvcheetahdemo",
version="2.0.2",
version="2.1.0",
author="Picovoice",
author_email="[email protected]",
description="Cheetah speech-to-text engine demos",
Expand Down
Loading