diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index fb0c9c4b0b..b6abbefac0 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -53,17 +53,17 @@ jobs:
wget https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true -O adapter_model.safetensors
- run: >
. ./ov/setupvars.sh
- && timeout 25s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a
+ && timeout 35s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a
env:
PYTHONPATH: "./build"
- run: >
. ./ov/setupvars.sh
- && timeout 25s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b
+ && timeout 35s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b
env:
PYTHONPATH: "./build"
- run: >
. ./ov/setupvars.sh
- && timeout 25s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
+ && timeout 35s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
| diff <(timeout 25s samples/python/text_generation/greedy_causal_lm.py ./open_llama_3b_v2/ "return 0") -
env:
PYTHONPATH: "./build"
diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml
index bd6cb46362..333bee3e11 100644
--- a/.github/workflows/genai-tools.yml
+++ b/.github/workflows/genai-tools.yml
@@ -44,7 +44,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ revision: latest_available_commit
llm_bench:
name: 'LLM bench tests'
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 0d7a5b7bae..0a991e2a54 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -52,7 +52,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ revision: latest_available_commit
- name: Clone docker tag from OpenVINO repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 062b83fc27..7cb0ff98d3 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -17,7 +17,7 @@ concurrency:
env:
PYTHON_VERSION: '3.10'
- OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ OV_BRANCH: 'master'
OV_TARBALL: ''
jobs:
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 3b01697f26..e0bf5371b3 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -45,7 +45,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ revision: latest_available_commit
openvino_download_windows:
name: Download OpenVINO for Windows
@@ -71,7 +71,7 @@ jobs:
with:
platform: windows
commit_packages_to_provide: wheels
- revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ revision: latest_available_commit
stable_diffusion_1_5_cpp-linux:
runs-on: ubuntu-22.04-8-cores
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 95a713d7a1..e65972110b 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -17,7 +17,7 @@ concurrency:
env:
PYTHON_VERSION: '3.11'
- OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
+ OV_BRANCH: 'master'
OV_TARBALL: ''
jobs:
@@ -310,6 +310,12 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
+
+ # will install transformers 4.46.3 version
+ # transformers 4.46.3 will enable return_timestamps tests
+ # this check enabled for windows only. Ticket: 160205.
+ python -m pip install git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631
+
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_python_lib_vlm:
diff --git a/README.md b/README.md
index c5cf799973..cea1e358bc 100644
--- a/README.md
+++ b/README.md
@@ -73,9 +73,9 @@ optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weigh
### Run generation using LLMPipeline API in Python
```python
-import openvino_genai as ov_genai
+import openvino_genai
#Will run model on CPU, GPU or NPU are possible options
-pipe = ov_genai.LLMPipeline("./TinyLlama-1.1B-Chat-v1.0/", "CPU")
+pipe = openvino_genai.LLMPipeline("./TinyLlama-1.1B-Chat-v1.0/", "CPU")
print(pipe.generate("The Sun is yellow because", max_new_tokens=100))
```
@@ -128,11 +128,11 @@ curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
```python
import numpy as np
import openvino as ov
-import openvino_genai as ov_genai
+import openvino_genai
from PIL import Image
# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
-pipe = ov_genai.VLMPipeline("./InternVL2-1B", "CPU")
+pipe = openvino_genai.VLMPipeline("./InternVL2-1B", "CPU")
pipe.start_chat()
image = Image.open("dog.jpg")
diff --git a/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md
index 6b45f47890..79333fa45c 100644
--- a/SUPPORTED_MODELS.md
+++ b/SUPPORTED_MODELS.md
@@ -14,7 +14,6 @@
ChatGLM |
|
diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
index 2f71891b7b..af38558656 100644
--- a/samples/export-requirements.txt
+++ b/samples/export-requirements.txt
@@ -2,7 +2,7 @@
--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
openvino-tokenizers~=2025.0.0.0.dev
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631
+optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
numpy<2.0.0; sys_platform == 'darwin'
einops==0.8.0 # For Qwen
transformers_stream_generator==0.0.5 # For Qwen
diff --git a/tests/python_tests/models/real_models b/tests/python_tests/models/real_models
index 420f8f53b6..5fd8fe0500 100644
--- a/tests/python_tests/models/real_models
+++ b/tests/python_tests/models/real_models
@@ -27,7 +27,6 @@ Salesforce/codegen-350M-multi
Salesforce/codegen-350M-nl
Salesforce/codegen2-1b
# Salesforce/xgen-7b-8k-base: Transformers issue - Object of type method is not JSON serializable (https://huggingface.co/Salesforce/xgen-7b-8k-base/discussions/32)
-THUDM/chatglm2-6b
THUDM/chatglm3-6b
TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ
TinyLlama/TinyLlama-1.1B-Chat-v0.6
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index 66fb58f46d..ff55c3c378 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -26,7 +26,7 @@ def get_models_list():
"facebook/opt-125m",
"microsoft/phi-1_5",
"microsoft/phi-2",
- "THUDM/chatglm2-6b",
+ "THUDM/chatglm3-6b",
"Qwen/Qwen2-0.5B-Instruct",
"Qwen/Qwen-7B-Chat",
"Qwen/Qwen1.5-7B-Chat",
diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt
index e23eaacc21..c851c71ee5 100644
--- a/tests/python_tests/requirements.txt
+++ b/tests/python_tests/requirements.txt
@@ -1,6 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/cpu
diffusers==0.32.1
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631
+optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
numpy<2.0.0; platform_system == "Darwin" and platform_machine == "x86_64"
onnx==1.17.0
pytest
diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py
index aa78666e32..c046d1ae2c 100644
--- a/tests/python_tests/test_whisper_pipeline.py
+++ b/tests/python_tests/test_whisper_pipeline.py
@@ -11,11 +11,13 @@
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import gc
import json
-import time
import typing
import numpy as np
import os
import pathlib
+import importlib.metadata as metadata
+from packaging.version import parse
+
@pytest.fixture(scope="class", autouse=True)
def run_gc_after_test():
@@ -27,36 +29,29 @@ def run_gc_after_test():
gc.collect()
-def get_whisper_models_list(tiny_only=False, multilingual=False, en_only=False):
- precommit_models = [
+def get_whisper_models_list(tiny_only=False):
+ model_ids = [
"openai/whisper-tiny",
- "openai/whisper-tiny.en",
"distil-whisper/distil-small.en",
]
- if multilingual:
- precommit_models = ["openai/whisper-tiny"]
- if en_only:
- precommit_models = ["openai/whisper-tiny.en", "distil-whisper/distil-small.en"]
- if tiny_only:
- precommit_models = ["openai/whisper-tiny"]
-
- nightly_models = []
- if pytest.run_marker == "precommit":
- model_ids = precommit_models
- else:
- model_ids = nightly_models
+ if tiny_only:
+ model_ids = ["openai/whisper-tiny"]
if pytest.selected_model_ids:
- model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')]
+ model_ids = [
+ model_id
+ for model_id in model_ids
+ if model_id in pytest.selected_model_ids.split(" ")
+ ]
- prefix = pathlib.Path(os.getenv('GENAI_MODELS_PATH_PREFIX', ''))
- return [(model_id, prefix / model_id.split('/')[1]) for model_id in model_ids]
+ prefix = pathlib.Path(os.getenv("GENAI_MODELS_PATH_PREFIX", ""))
+ return [(model_id, prefix / model_id.split("/")[1]) for model_id in model_ids]
# used whisper models are relatively small
# cache them in memory to speedup tests
-@functools.lru_cache(3)
+@functools.lru_cache()
def read_whisper_model(params, **tokenizer_kwargs):
model_id, path = params
@@ -90,6 +85,7 @@ def read_whisper_model(params, **tokenizer_kwargs):
model_id,
export=True,
trust_remote_code=True,
+ stateful=False,
compile=False,
device="CPU",
load_in_8bit=False,
@@ -114,30 +110,39 @@ def read_whisper_model(params, **tokenizer_kwargs):
)
-def compare_genai_and_opt_pipelines(opt_pipe, genai_pipe, dataset_id):
- ds = datasets.load_dataset(dataset_id, "clean", split="validation")
- opt_infer_time = 0
- genai_infer_time = 0
-
- for ds_row in ds:
- audio_sample = ds_row["audio"]
+def run_huggingface(
+ pipeline,
+ sample,
+ config: ov_genai.WhisperGenerationConfig | None = None,
+):
+ if not config:
+ config = ov_genai.WhisperGenerationConfig()
+
+ return pipeline(
+ sample,
+ max_new_tokens=min(config.max_new_tokens, 444),
+ return_timestamps=config.return_timestamps,
+ generate_kwargs={"language": config.language, "task": config.task},
+ )
- streamer_result = []
- start = time.time()
- genai_result = genai_pipe.generate(
- audio_sample["array"].tolist(), streamer=lambda x: streamer_result.append(x)
- )
- genai_infer_time += time.time() - start
+def run_genai(
+ pipeline: ov_genai.WhisperPipeline,
+ sample,
+ config: ov_genai.WhisperGenerationConfig | None = None,
+ streamer: typing.Callable[[str], bool] | None = None,
+):
+ if not config:
+ config = ov_genai.WhisperGenerationConfig()
- start = time.time()
- result = opt_pipe(audio_sample)
- opt_infer_time += time.time() - start
+ genai_config = pipeline.get_generation_config()
- assert genai_result.texts[0] == result["text"]
- assert "".join(streamer_result) == result["text"]
+ genai_config.max_new_tokens = config.max_new_tokens
+ genai_config.return_timestamps = config.return_timestamps
+ genai_config.task = config.task
+ genai_config.language = f"<|{config.language}|>" if config.language else None
- print(f"Inference time\nOpt: {opt_infer_time}\nGenAI: {genai_infer_time}")
+ return pipeline.generate(sample, genai_config, streamer=streamer)
def get_samples_from_dataset(
@@ -166,13 +171,50 @@ def get_samples_from_dataset(
return [x["audio"]["array"] for x in ds]
-@pytest.mark.parametrize("model_descr", get_whisper_models_list())
-@pytest.mark.parametrize("dataset_id", ["hf-internal-testing/librispeech_asr_dummy"])
-@pytest.mark.precommit
-def test_whisper_on_hf_dataset(model_descr, dataset_id):
- model_id, path, opt_pipe, genai_pipe = read_whisper_model(model_descr)
+def run_pipeline_with_ref(
+ model_id: str,
+ tmp_path: str,
+ sample: np.ndarray | list[np.ndarray],
+ generation_config: ov_genai.WhisperGenerationConfig | None = None,
+ streamer: typing.Callable[[str], bool] | None = None,
+):
+ _, _, hf_pipe, genai_pipe = read_whisper_model((model_id, tmp_path))
+
+ if type(sample) is np.ndarray and len(sample.shape) == 1:
+ sample = np.expand_dims(sample, 0)
+
+ for _sample in sample:
+ genai_result = run_genai(genai_pipe, _sample, generation_config, streamer)
+ hf_result = run_huggingface(hf_pipe, _sample, generation_config)
+
+ compare_results(hf_result, genai_result)
+
- compare_genai_and_opt_pipelines(opt_pipe, genai_pipe, dataset_id)
+def compare_results(hf_result, genai_result):
+ assert genai_result.texts[0] == hf_result["text"]
+
+ # transformers 4.47 updated return_timestamps implementation
+ # remove once genai implementation aligned with transformers. Ticket 160205.
+ transformers_version_greater_4_47 = parse(
+ metadata.version("transformers")
+ ) >= parse("4.47.0")
+
+ if transformers_version_greater_4_47:
+ return
+
+ if "chunks" not in hf_result and genai_result.chunks is None:
+ return
+
+ assert len(genai_result.chunks) == len(hf_result["chunks"])
+
+ for opt_chunk, genai_chunk in zip(hf_result["chunks"], genai_result.chunks):
+ assert opt_chunk["text"] == genai_chunk.text
+ assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2)
+ if opt_chunk["timestamp"][1]:
+ assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2)
+ else:
+ assert opt_chunk["timestamp"][1] == None
+ assert round(genai_chunk.end_ts, 2) == -1.0
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@@ -182,16 +224,11 @@ def test_whisper_on_hf_dataset(model_descr, dataset_id):
)
@pytest.mark.precommit
def test_smoke(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
-
- expected = opt_pipe(test_sample)
-
- genai_result = pipe.generate(test_sample)
-
- assert genai_result.texts[0] == expected["text"]
-
- assert "chunks" not in expected
- assert genai_result.chunks == None
+ run_pipeline_with_ref(
+ model_id=model_descr[0],
+ tmp_path=model_descr[1],
+ sample=test_sample,
+ )
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@@ -259,79 +296,55 @@ def test_whisper_constructors(model_descr, test_sample):
def test_max_new_tokens(model_descr, test_sample):
model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
- expected = opt_pipe(test_sample, max_new_tokens=10)["text"]
+ expected = opt_pipe(test_sample, max_new_tokens=10)
genai_result = pipe.generate(test_sample, max_new_tokens=10)
- assert genai_result.texts[0] == expected
-
- genai_result = pipe.generate(test_sample)
-
- assert genai_result.texts[0] != expected
+ compare_results(expected, genai_result)
config = pipe.get_generation_config()
config.max_new_tokens = 10
genai_result = pipe.generate(test_sample, config)
- assert genai_result.texts[0] == expected
+ compare_results(expected, genai_result)
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
- "test_sample", get_samples_from_dataset(language="fr", length=3)
+ "test_samples",
+ [
+ (get_samples_from_dataset(language="fr", length=1), "fr"),
+ (get_samples_from_dataset(language="de", length=1), "de"),
+ ],
)
@pytest.mark.precommit
-def test_language_mode_fr(model_descr, test_sample):
- model_id, path = model_descr
+def test_language_mode(model_descr, test_samples):
model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
+ samples, language = test_samples
expected = opt_pipe(
- test_sample, max_new_tokens=30, generate_kwargs={"language": "fr"}
+ samples[0], max_new_tokens=30, generate_kwargs={"language": language}
)
- genai_result = pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>")
-
- assert genai_result.texts[0] == expected["text"]
-
- config = pipe.get_generation_config()
- config.max_new_tokens = 30
- config.language = "<|fr|>"
- genai_result = pipe.generate(test_sample, config)
-
- assert genai_result.texts[0] == expected["text"]
-
-
-@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
-@pytest.mark.parametrize(
- "test_sample", get_samples_from_dataset(language="de", length=3)
-)
-@pytest.mark.precommit
-def test_language_mode_de(model_descr, test_sample):
- model_id, path = model_descr
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
-
- expected = opt_pipe(
- test_sample, max_new_tokens=30, generate_kwargs={"language": "de"}
+ genai_result = pipe.generate(
+ samples[0], max_new_tokens=30, language=f"<|{language}|>"
)
- genai_result = pipe.generate(test_sample, max_new_tokens=30, language="<|de|>")
-
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
config = pipe.get_generation_config()
config.max_new_tokens = 30
- config.language = "<|de|>"
- genai_result = pipe.generate(test_sample, config)
+ config.language = f"<|{language}|>"
+ genai_result = pipe.generate(samples[0], config)
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
- "test_sample", get_samples_from_dataset(language="fr", length=3)
+ "test_sample", get_samples_from_dataset(language="fr", length=1)
)
@pytest.mark.precommit
def test_task_mode(model_descr, test_sample):
- model_id, path = model_descr
model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
expected = opt_pipe(
@@ -344,7 +357,7 @@ def test_task_mode(model_descr, test_sample):
test_sample, max_new_tokens=30, language="<|fr|>", task="translate"
)
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
config = pipe.get_generation_config()
config.max_new_tokens = 30
@@ -352,27 +365,7 @@ def test_task_mode(model_descr, test_sample):
config.task = "translate"
genai_result = pipe.generate(test_sample, config)
- assert genai_result.texts[0] == expected["text"]
-
- expected = opt_pipe(
- test_sample,
- max_new_tokens=30,
- generate_kwargs={"language": "ru", "task": "translate"},
- )
-
- genai_result = pipe.generate(
- test_sample, max_new_tokens=30, language="<|ru|>", task="translate"
- )
-
- assert genai_result.texts[0] == expected["text"]
-
- config = pipe.get_generation_config()
- config.max_new_tokens = 30
- config.language = "<|ru|>"
- config.task = "translate"
- genai_result = pipe.generate(test_sample, config)
-
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
# seems to be equivalent to translate task
expected = opt_pipe(
@@ -385,7 +378,7 @@ def test_task_mode(model_descr, test_sample):
test_sample, max_new_tokens=30, language="<|en|>", task="transcribe"
)
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
config = pipe.get_generation_config()
config.max_new_tokens = 30
@@ -393,21 +386,20 @@ def test_task_mode(model_descr, test_sample):
config.task = "transcribe"
genai_result = pipe.generate(test_sample, config)
- assert genai_result.texts[0] == expected["text"]
+ compare_results(expected, genai_result)
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample",
[
- *get_samples_from_dataset(language="fr", length=2),
- *get_samples_from_dataset(language="de", length=2),
- *get_samples_from_dataset(language="es", length=2),
+ *get_samples_from_dataset(language="fr", length=1),
+ *get_samples_from_dataset(language="de", length=1),
+ *get_samples_from_dataset(language="es", length=1),
],
)
@pytest.mark.precommit
def test_language_autodetect(model_descr, test_sample):
- model_id, path = model_descr
model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
input_features = opt_pipe.feature_extractor(test_sample)
@@ -415,188 +407,84 @@ def test_language_autodetect(model_descr, test_sample):
# ensure detected language us not english
assert language_id != pipe.get_generation_config().lang_to_id["<|en|>"]
- expected = opt_pipe(
- test_sample,
- max_new_tokens=30,
+ run_pipeline_with_ref(
+ model_id=model_descr[0],
+ tmp_path=model_descr[1],
+ sample=test_sample,
+ generation_config=ov_genai.WhisperGenerationConfig(max_new_tokens=30),
)
- genai_result = pipe.generate(test_sample, max_new_tokens=30)
-
- assert genai_result.texts[0] == expected["text"]
-
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
-@pytest.mark.parametrize(
- "test_sample",
- [
- *get_samples_from_dataset(language="en", length=10, long_form=True),
- ],
-)
+@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1))
@pytest.mark.precommit
def test_return_timestamps_short_form(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
- # long form audio not supported yet
- test_sample = test_sample[: 16000 * 30]
-
- expected = opt_pipe(
- test_sample,
- return_timestamps=True,
- )
-
- genai_result = pipe.generate(
- test_sample.tolist(),
- return_timestamps=True,
+ run_pipeline_with_ref(
+ model_id=model_descr[0],
+ tmp_path=model_descr[1],
+ sample=test_sample,
+ generation_config=ov_genai.WhisperGenerationConfig(return_timestamps=True),
)
- assert genai_result.texts[0] == expected["text"]
-
- assert len(genai_result.chunks) == len(expected["chunks"])
-
- for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks):
- assert opt_chunk["text"] == genai_chunk.text
- assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2)
- assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2)
-
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
-@pytest.mark.parametrize(
- "test_sample",
- [
- *get_samples_from_dataset(language="en", length=10, long_form=True),
- ],
-)
+@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1))
@pytest.mark.precommit
def test_return_timestamps_max_new_tokens_short_form(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
- # long form audio not supported yet
- test_sample = test_sample[: 16000 * 30]
-
- expected = opt_pipe(
- test_sample,
- return_timestamps=True,
- max_new_tokens=15,
- generate_kwargs={"language": "en"},
- )
-
- genai_result = pipe.generate(
- test_sample.tolist(),
- max_new_tokens=15,
- return_timestamps=True,
- language="<|en|>",
+ run_pipeline_with_ref(
+ model_id=model_descr[0],
+ tmp_path=model_descr[1],
+ sample=test_sample,
+ generation_config=ov_genai.WhisperGenerationConfig(
+ return_timestamps=True, language="en", max_new_tokens=30
+ ),
)
- assert genai_result.texts[0] == expected["text"]
-
- assert len(genai_result.chunks) == len(expected["chunks"])
- for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks):
- assert opt_chunk["text"] == genai_chunk.text
- assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2)
- if opt_chunk["timestamp"][1]:
- assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2)
- else:
- assert opt_chunk["timestamp"][1] == None
- assert round(genai_chunk.end_ts, 2) == -1.0
-
-
-@pytest.mark.parametrize("model_descr", get_whisper_models_list(multilingual=True))
+@pytest.mark.parametrize("model_descr", get_whisper_models_list())
@pytest.mark.parametrize(
- "test_sample",
- [
- *get_samples_from_dataset(language="en", length=10, long_form=True),
- *get_samples_from_dataset(language="fr", length=10, long_form=True),
- ],
+ "test_sample", get_samples_from_dataset(length=10, long_form=True)
)
@pytest.mark.precommit
-def test_longform_audio_return_timestamps_multilingual(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
-
- expected = opt_pipe(
- test_sample,
- return_timestamps=True,
- )
+def test_longform_audio(model_descr, test_sample):
+ _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr)
streamer_result = []
- genai_result = pipe.generate(
+ genai_result = run_genai(
+ genai_pipe,
test_sample,
- return_timestamps=True,
+ config=ov_genai.WhisperGenerationConfig(return_timestamps=True),
streamer=lambda x: streamer_result.append(x),
)
- assert genai_result.texts[0] == expected["text"]
- assert "".join(streamer_result) == expected["text"]
-
- assert len(genai_result.chunks) == len(expected["chunks"])
-
- for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks):
- assert opt_chunk["text"] == genai_chunk.text
- assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2)
- if opt_chunk["timestamp"][1]:
- assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2)
- else:
- assert opt_chunk["timestamp"][1] == None
- assert round(genai_chunk.end_ts, 2) == -1.0
-
-
-@pytest.mark.parametrize("model_descr", get_whisper_models_list(en_only=True))
-@pytest.mark.parametrize(
- "test_sample",
- [
- *get_samples_from_dataset(language="en", length=10, long_form=True),
- ],
-)
-@pytest.mark.precommit
-def test_longform_audio_return_timestamps_en(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
-
- expected = opt_pipe(
- test_sample,
- return_timestamps=True,
- )
-
- streamer_result = []
-
- genai_result = pipe.generate(
+ hf_result = run_huggingface(
+ hf_pipe,
test_sample,
- return_timestamps=True,
- streamer=lambda x: streamer_result.append(x),
+ config=ov_genai.WhisperGenerationConfig(return_timestamps=True),
)
- assert genai_result.texts[0] == expected["text"]
- assert "".join(streamer_result) == expected["text"]
-
- assert len(genai_result.chunks) == len(expected["chunks"])
+ compare_results(hf_result, genai_result)
- for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks):
- assert opt_chunk["text"] == genai_chunk.text
- assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2)
- if opt_chunk["timestamp"][1]:
- assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2)
- else:
- assert opt_chunk["timestamp"][1] == None
- assert round(genai_chunk.end_ts, 2) == -1.0
+ assert "".join(streamer_result) == hf_result["text"]
-@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
-@pytest.mark.parametrize(
- "test_sample",
- [
- *get_samples_from_dataset(language="en", length=3, long_form=True),
- *get_samples_from_dataset(language="sp", length=3, long_form=True),
- ],
-)
+@pytest.mark.parametrize("model_descr", get_whisper_models_list())
@pytest.mark.precommit
-def test_longform_audio(model_descr, test_sample):
- model_id, path, opt_pipe, pipe = read_whisper_model(model_descr)
-
- expected = opt_pipe(test_sample, return_timestamps=True)
-
- genai_result = pipe.generate(test_sample)
+def test_shortform(model_descr):
+ samples = []
+ ds = datasets.load_dataset(
+ "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation"
+ )
- assert genai_result.texts[0] == expected["text"]
+ for ds_row in ds:
+ samples.append(ds_row["audio"]["array"])
- assert genai_result.chunks == None
+ run_pipeline_with_ref(
+ model_id=model_descr[0],
+ tmp_path=model_descr[1],
+ sample=samples,
+ )
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index bcfd3eda25..d5f0abf827 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit bcfd3eda25ae3ec423502a4074e35c774506c732
+Subproject commit d5f0abf8271f3cd8fc98d747b3e569fbeacca532