Skip to content

Commit

Permalink
Merge branch 'master' into feature/nodejs-bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
vishniakov-nikolai authored Nov 25, 2024
2 parents 6853446 + d490c18 commit e433a19
Show file tree
Hide file tree
Showing 31 changed files with 604 additions and 556 deletions.
25 changes: 21 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,34 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code --

### Run generation using VLMPipeline API in Python

See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) for a demo application.

Run the following command to download a sample image:

```sh
curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
```

```python
import numpy as np
import openvino as ov
import openvino_genai as ov_genai
#Will run model on CPU, GPU is a possible option
from PIL import Image

# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
rgb = read_image("cat.jpg")
print(pipe.generate(prompt, image=rgb, max_new_tokens=100))

image = Image.open("dog.jpg")
image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
image_data = ov.Tensor(image_data)

prompt = "Can you describe the image?"
print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
```

### Run generation using VLMPipeline in C++

Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details)
Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details). See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) for a demo application.

```cpp
#include "load_image.hpp"
Expand Down
8 changes: 6 additions & 2 deletions samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ add_subdirectory(cpp/text2image)
add_subdirectory(cpp/visual_language_chat)
add_subdirectory(cpp/whisper_speech_recognition)

install(FILES requirements.txt DESTINATION samples
COMPONENT cpp_samples_genai)
install(FILES
deployment-requirements.txt
export-requirements.txt
requirements.txt
DESTINATION samples
COMPONENT cpp_samples_genai)

install(DIRECTORY
cpp/beam_search_causal_lm
Expand Down
6 changes: 4 additions & 2 deletions samples/cpp/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,16 @@ You can also add a callback to the `main.cpp` file to interrupt the image genera
Please find the template of the callback usage below.

```cpp
auto callback = [](size_t step, ov::Tensor& intermediate_res) -> bool {
ov::genai::Text2ImagePipeline pipe(models_path, device);

auto callback = [&](size_t step, ov::Tensor& intermediate_res) -> bool {
std::cout << "Image generation step: " << step << std::endl;
ov::Tensor img = pipe.decode(intermediate_res); // get intermediate image tensor
if (your_condition) // return true if you want to interrupt image generation
return true;
return false;
};

ov::genai::Text2ImagePipeline pipe(models_path, device);
ov::Tensor image = pipe.generate(prompt,
...
ov::genai::callback(callback)
Expand Down
4 changes: 3 additions & 1 deletion samples/python/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ You can also add a callback to the `main.py` file to interrupt the image generat
Please find the template of the callback usage below.

```python
pipe = openvino_genai.Text2ImagePipeline(model_dir, device)

def callback(step, intermediate_res):
print("Image generation step: ", step)
image_tensor = pipe.decode(intermediate_res) # get intermediate image tensor
if your_condition: # return True if you want to interrupt image generation
return True
return False

pipe = openvino_genai.Text2ImagePipeline(model_dir, device)
image = pipe.generate(
...
callback = callback
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
static constexpr ov::Property<size_t> min_new_tokens{"min_new_tokens"};
static constexpr ov::Property<std::vector<std::string>> stop_strings{"stop_strings"};
static constexpr ov::Property<bool> include_stop_str_in_output{"include_stop_str_in_output"};
static constexpr ov::Property<std::vector<std::vector<int64_t>>> stop_token_ids{"stop_token_ids"};
static constexpr ov::Property<std::set<int64_t>> stop_token_ids{"stop_token_ids"};

static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
static constexpr ov::Property<size_t> num_beams{"num_beams"};
Expand Down
36 changes: 20 additions & 16 deletions src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,33 +297,33 @@ class FluxPipeline : public DiffusionPipeline {
ov::Tensor generate(const std::string& positive_prompt,
ov::Tensor initial_image,
const ov::AnyMap& properties) override {
ImageGenerationConfig generation_config = m_generation_config;
generation_config.update_generation_config(properties);
m_custom_generation_config = m_generation_config;
m_custom_generation_config.update_generation_config(properties);

if (!initial_image) {
// in case of typical text to image generation, we need to ignore 'strength'
generation_config.strength = 1.0f;
m_custom_generation_config.strength = 1.0f;
}

const size_t vae_scale_factor = m_vae->get_vae_scale_factor();
const auto& transformer_config = m_transformer->get_config();

if (generation_config.height < 0)
generation_config.height = transformer_config.m_default_sample_size * vae_scale_factor;
if (generation_config.width < 0)
generation_config.width = transformer_config.m_default_sample_size * vae_scale_factor;
if (m_custom_generation_config.height < 0)
m_custom_generation_config.height = transformer_config.m_default_sample_size * vae_scale_factor;
if (m_custom_generation_config.width < 0)
m_custom_generation_config.width = transformer_config.m_default_sample_size * vae_scale_factor;

check_inputs(generation_config, initial_image);
check_inputs(m_custom_generation_config, initial_image);

compute_hidden_states(positive_prompt, generation_config);
compute_hidden_states(positive_prompt, m_custom_generation_config);

ov::Tensor latents = prepare_latents(initial_image, generation_config);
ov::Tensor latents = prepare_latents(initial_image, m_custom_generation_config);

size_t image_seq_len = latents.get_shape()[1];
float mu = m_scheduler->calculate_shift(image_seq_len);

float linspace_end = 1.0f / generation_config.num_inference_steps;
std::vector<float> sigmas = numpy_utils::linspace<float>(1.0f, linspace_end, generation_config.num_inference_steps, true);
float linspace_end = 1.0f / m_custom_generation_config.num_inference_steps;
std::vector<float> sigmas = numpy_utils::linspace<float>(1.0f, linspace_end, m_custom_generation_config.num_inference_steps, true);

m_scheduler->set_timesteps_with_sigma(sigmas, mu);
std::vector<float> timesteps = m_scheduler->get_float_timesteps();
Expand All @@ -345,7 +345,7 @@ class FluxPipeline : public DiffusionPipeline {

ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep);

auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, generation_config.generator);
auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator);
latents = scheduler_step_result["latent"];

if (do_callback) {
Expand All @@ -355,12 +355,16 @@ class FluxPipeline : public DiffusionPipeline {
}
}

latents = unpack_latents(latents, generation_config.height, generation_config.width, vae_scale_factor);
latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor);
return m_vae->decode(latents);
}

ov::Tensor decode(const ov::Tensor latent) override {
return m_vae->decode(latent);
ov::Tensor unpacked_latent = unpack_latents(latent,
m_custom_generation_config.height,
m_custom_generation_config.width,
m_vae->get_vae_scale_factor());
return m_vae->decode(unpacked_latent);
}

private:
Expand Down Expand Up @@ -407,7 +411,7 @@ class FluxPipeline : public DiffusionPipeline {
std::shared_ptr<CLIPTextModel> m_clip_text_encoder;
std::shared_ptr<T5EncoderModel> m_t5_text_encoder;
std::shared_ptr<AutoencoderKL> m_vae;

ImageGenerationConfig m_custom_generation_config;
};

} // namespace genai
Expand Down
7 changes: 7 additions & 0 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,13 @@ template <typename T>
T pop_or_default(ov::AnyMap& config, const std::string& key, const T& default_value) {
auto anyopt = pop_option(config, key);
if (anyopt.has_value()) {
if (anyopt.value().empty()) {
if (ov::genai::utils::is_container<T>)
return T{};
else {
OPENVINO_THROW("Got empty ov::Any for key: " + key);
}
}
return anyopt.value().as<T>();
}
return default_value;
Expand Down
22 changes: 21 additions & 1 deletion src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include <type_traits>

#include "openvino/genai/llm_pipeline.hpp"
#include "openvino/runtime/core.hpp"
Expand All @@ -12,6 +13,16 @@ namespace ov {
namespace genai {
namespace utils {

// Variable template that checks if a type has begin() and end() member functions
template<typename, typename = void>
constexpr bool is_container = false;

template<typename T>
constexpr bool is_container<T,
std::void_t<decltype(std::declval<T>().begin()),
decltype(std::declval<T>().end())>> = true;


Tensor init_attention_mask(const Tensor& position_ids);

void print_tensor(const ov::Tensor& tensor);
Expand All @@ -31,7 +42,16 @@ template <typename T>
void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T& param) {
auto it = config_map.find(name);
if (it != config_map.end()) {
param = it->second.as<typename OmitOptional<T>::value>();
if (it->second.empty()) {
if (ov::genai::utils::is_container<T>)
param = T{};
else {
OPENVINO_THROW("Got empty ov::Any for parameter name: " + name);
}
}
else {
param = it->second.as<typename OmitOptional<T>::value>();
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,14 @@ if(pybind11_stubgen_AVAILABLE)
VERBATIM)

add_custom_target(${TARGET_NAME}_stub ALL DEPENDS ${output_file})
else()
elseif(OpenVINODeveloperPackage_FOUND)
# Produce warning message at build time as well
add_custom_command(OUTPUT pybind11_stub_gen_not_found.txt
COMMAND ${CMAKE_COMMAND}
-E cmake_echo_color --red "Warning: Please, install ${pybind11_stubgen_dep}")
add_custom_target(${TARGET_NAME}_stub ALL DEPENDS pybind11_stub_gen_not_found.txt)
else()
add_custom_target(${TARGET_NAME}_stub ALL)
endif()

add_dependencies(${TARGET_NAME}_stub ${TARGET_NAME})
2 changes: 1 addition & 1 deletion src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ class Tokenizer:
openvino_genai.Tokenizer object is used to initialize Tokenizer
if it's located in a different path than the main model.
"""
def __init__(self, tokenizer_path: os.PathLike, properties: dict[str, typing.Any] = {}) -> None:
def __init__(self, tokenizer_path: os.PathLike, properties: dict[str, typing.Any] = {}, **kwargs) -> None:
...
def apply_chat_template(self, history: list[dict[str, str]], add_generation_prompt: bool, chat_template: str = '') -> str:
"""
Expand Down
108 changes: 3 additions & 105 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,108 +67,6 @@ auto text2image_generate_docstring = R"(
)";


void update_image_generation_config_from_kwargs(
ov::genai::ImageGenerationConfig& config,
const py::kwargs& kwargs) {
for (const auto& item : kwargs) {
std::string key = py::cast<std::string>(item.first);
py::object value = py::cast<py::object>(item.second);

if (key == "prompt_2") {
config.prompt_2 = py::cast<std::string>(value);
} else if (key == "prompt_3") {
config.prompt_3 = py::cast<std::string>(value);
} else if (key == "negative_prompt") {
config.negative_prompt = py::cast<std::string>(value);
} else if (key == "negative_prompt_2") {
config.negative_prompt_2 = py::cast<std::string>(value);
} else if (key == "negative_prompt_3") {
config.negative_prompt_3 = py::cast<std::string>(value);
} else if (key == "num_images_per_prompt") {
config.num_images_per_prompt = py::cast<size_t>(value);
} else if (key == "guidance_scale") {
config.guidance_scale = py::cast<float>(value);
} else if (key == "height") {
config.height = py::cast<int64_t>(value);
} else if (key == "width") {
config.width = py::cast<int64_t>(value);
} else if (key == "num_inference_steps") {
config.num_inference_steps = py::cast<size_t>(value);
} else if (key == "generator") {
auto py_generator = py::cast<std::shared_ptr<ov::genai::Generator>>(value);
config.generator = py_generator;
} else if (key == "adapters") {
config.adapters = py::cast<ov::genai::AdapterConfig>(value);
} else if (key == "strength") {
config.strength = py::cast<float>(value);
} else if (key == "max_sequence_length") {
config.max_sequence_length = py::cast<size_t>(value);
} else {
throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
"Use help(openvino_genai.ImageGenerationConfig) to get list of acceptable parameters."));
}
}
}

ov::AnyMap text2image_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) {
ov::AnyMap params = {};

for (const auto& item : kwargs) {
std::string key = py::cast<std::string>(item.first);
py::object value = py::cast<py::object>(item.second);

if (key == "prompt_2") {
params.insert({ov::genai::prompt_2(std::move(py::cast<std::string>(value)))});
} else if (key == "prompt_3") {
params.insert({ov::genai::prompt_3(std::move(py::cast<std::string>(value)))});
} else if (key == "negative_prompt") {
params.insert({ov::genai::negative_prompt(std::move(py::cast<std::string>(value)))});
} else if (key == "negative_prompt_2") {
params.insert({ov::genai::negative_prompt_2(std::move(py::cast<std::string>(value)))});
} else if (key == "negative_prompt_3") {
params.insert({ov::genai::negative_prompt_3(std::move(py::cast<std::string>(value)))});
} else if (key == "num_images_per_prompt") {
params.insert({ov::genai::num_images_per_prompt(std::move(py::cast<size_t>(value)))});
} else if (key == "guidance_scale") {
params.insert({ov::genai::guidance_scale(std::move(py::cast<float>(value)))});
} else if (key == "height") {
params.insert({ov::genai::height(std::move(py::cast<int64_t>(value)))});
} else if (key == "width") {
params.insert({ov::genai::width(std::move(py::cast<int64_t>(value)))});
} else if (key == "num_inference_steps") {
params.insert({ov::genai::num_inference_steps(std::move(py::cast<size_t>(value)))});
} else if (key == "generator") {
auto py_generator =py::cast<std::shared_ptr<ov::genai::Generator>>(value);
params.insert({ov::genai::generator(std::move(py_generator))});
} else if (key == "adapters") {
params.insert({ov::genai::adapters(std::move(py::cast<ov::genai::AdapterConfig>(value)))});
} else if (key == "strength") {
params.insert({ov::genai::strength(std::move(py::cast<float>(value)))});
} else if (key == "max_sequence_length") {
params.insert({ov::genai::max_sequence_length(std::move(py::cast<size_t>(value)))});
} else if (key == "callback") {
params.insert({ov::genai::callback(std::move(py::cast<std::function<bool(size_t, ov::Tensor&)>>(value)))});
}
else {
if (allow_compile_properties) {
// convert arbitrary objects to ov::Any
// not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property
if (pyutils::py_object_is_any_map(value)) {
auto map = pyutils::py_object_to_any_map(value);
params.insert(map.begin(), map.end());
} else {
params[key] = pyutils::py_object_to_any(value);
}
}
else {
// generate doesn't run compile(), so only Text2ImagePipeline specific properties are allowed
throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
"Use help(openvino_genai.Text2ImagePipeline.generate) to get list of acceptable parameters."));
}
}
}
return params;
}

} // namespace

Expand Down Expand Up @@ -230,7 +128,7 @@ void init_image_generation_pipelines(py::module_& m) {
.def("update_generation_config", [](
ov::genai::ImageGenerationConfig config,
const py::kwargs& kwargs) {
update_image_generation_config_from_kwargs(config, kwargs);
config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
});

auto text2image_pipeline = py::class_<ov::genai::Text2ImagePipeline>(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.")
Expand All @@ -252,7 +150,7 @@ void init_image_generation_pipelines(py::module_& m) {
const py::kwargs& kwargs
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<ov::genai::Text2ImagePipeline>(models_path, device, text2image_kwargs_to_any_map(kwargs, true));
return std::make_unique<ov::genai::Text2ImagePipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
}),
py::arg("models_path"), "folder with exported model files.",
py::arg("device"), "device on which inference will be done",
Expand Down Expand Up @@ -289,7 +187,7 @@ void init_image_generation_pipelines(py::module_& m) {
const std::string& prompt,
const py::kwargs& kwargs
) -> py::typing::Union<ov::Tensor> {
ov::AnyMap params = text2image_kwargs_to_any_map(kwargs, false);
ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
return py::cast(pipe.generate(prompt, params));
},
py::arg("prompt"), "Input string",
Expand Down
Loading

0 comments on commit e433a19

Please sign in to comment.