Skip to content

Commit

Permalink
Master increase priority for rt info to fix Phi-3.5-vision-instruct a…
Browse files Browse the repository at this point in the history
…nd Phi-3-vision-128k-instruct (#1626)
  • Loading branch information
Wovchena authored Jan 24, 2025
1 parent 01ad253 commit b6ec4b7
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
32 changes: 11 additions & 21 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,22 +173,12 @@ class Tokenizer::TokenizerImpl {
ov_detokenizer = core.read_model(models_path / "openvino_detokenizer.xml", {}, properties);
}

read_config(models_path);
read_special_tokens_map(models_path);
// Try to read tokenizer_config if some token ids or token str are not defined.
read_tokenizer_config_if_necessary(models_path);
setup_tokenizer(std::make_pair(ov_tokenizer, ov_detokenizer), properties);

// If special tokens were not found from IR, try to read them from config.
// This will be triggered only for IRs older than 2024.3.
if (m_pad_token_id == -1 || m_bos_token_id == -1 || m_eos_token_id == -1 ||
m_pad_token.empty() || m_bos_token.empty() || m_eos_token.empty()) {
read_config(models_path);
read_special_tokens_map(models_path);
// Try to read tokenizer_config if some token ids or token str are not defined.
read_tokenizer_config_if_necessary(models_path);
}

// If chat_template was not found in IR, try to read them from config.
if (m_chat_template.empty()) {
m_chat_template = chat_template_from_tokenizer_json_if_exists(models_path);
}
m_chat_template = chat_template_from_tokenizer_json_if_exists(models_path);
}

void setup_tokenizer(const std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::Model>>& models, const ov::AnyMap& properties) {
Expand Down Expand Up @@ -250,12 +240,12 @@ class Tokenizer::TokenizerImpl {
m_chat_template = patch_chat_template(m_chat_template);
if (m_detokenizer) {
// Unset/-1 token causes exception in SentencePiece detokenization.
if (m_pad_token_id != -1)
m_pad_token = decode(std::vector{m_pad_token_id});
if (m_bos_token_id != -1)
m_bos_token = decode(std::vector{m_bos_token_id});
if (m_eos_token_id != -1)
m_eos_token = decode(std::vector{m_eos_token_id});
if (m_pad_token_id != -1 && m_pad_token.empty())
m_pad_token = decode(std::vector{m_pad_token_id}, {ov::genai::add_special_tokens(true)});
if (m_bos_token_id != -1 && m_bos_token.empty())
m_bos_token = decode(std::vector{m_bos_token_id}, {ov::genai::add_special_tokens(true)});
if (m_eos_token_id != -1 && m_eos_token.empty())
m_eos_token = decode(std::vector{m_eos_token_id}, {ov::genai::add_special_tokens(true)});
}
}

Expand Down
17 changes: 17 additions & 0 deletions tests/python_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import shutil
import pytest
import openvino

from optimum.intel import OVModelForCausalLM
from pathlib import Path
Expand Down Expand Up @@ -513,3 +514,19 @@ def get_image_by_link(link):
image = image.convert('RGB')
image_data = np.array((np.array(image.getdata()) - 128).astype(np.byte)).reshape(1, 3, image.size[1], image.size[0])
return Tensor(image_data)


"""rt_info has the highest priority. Delete it to respect configs."""
def delete_rt_info(configs: List[Tuple], temp_path):
core = openvino.Core()
core.set_property({'ENABLE_MMAP': False})
for model_path in temp_path / "openvino_tokenizer.xml", temp_path / "openvino_detokenizer.xml":
tokenizer = core.read_model(model_path)
rt_info = tokenizer.get_rt_info()
for config, _ in configs:
for key in config.keys():
try:
del rt_info[key]
except KeyError:
pass
openvino.save_model(tokenizer, model_path)
3 changes: 2 additions & 1 deletion tests/python_tests/ov_genai_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import json

import openvino_genai as ov_genai
from common import get_default_properties
from common import get_default_properties, delete_rt_info

def get_models_list():
precommit_models = [
Expand Down Expand Up @@ -173,6 +173,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
# remove existing jsons from previous tests
for json_file in temp_path.glob("*.json"):
json_file.unlink()
delete_rt_info(configs, temp_path)

for config_json, config_name in configs:
with (temp_path / config_name).open('w') as f:
Expand Down
3 changes: 3 additions & 0 deletions tests/python_tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import openvino_genai
import json

from common import delete_rt_info
from ov_genai_test_utils import (
get_models_list,
get_chat_models_list,
Expand All @@ -18,6 +19,8 @@


def load_genai_tokenizer_with_configs(configs: List[Tuple], temp_path):
delete_rt_info(configs, temp_path)

for json_file in temp_path.glob("*.json"):
json_file.unlink()

Expand Down

0 comments on commit b6ec4b7

Please sign in to comment.