From 6cd66d0274ddc8fde544643f74113fb6c40d2394 Mon Sep 17 00:00:00 2001
From: Anna Likholat <anna.likholat@intel.com>
Date: Wed, 20 Nov 2024 19:17:38 +0100
Subject: [PATCH 1/8] Text2Image Readme update: decode method usage (#1237)

---
 samples/cpp/text2image/README.md              |  6 ++--
 samples/python/text2image/README.md           |  4 ++-
 .../src/image_generation/flux_pipeline.hpp    | 36 ++++++++++---------
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/samples/cpp/text2image/README.md b/samples/cpp/text2image/README.md
index c5ffd53a84..ac736b2383 100644
--- a/samples/cpp/text2image/README.md
+++ b/samples/cpp/text2image/README.md
@@ -46,14 +46,16 @@ You can also add a callback to the `main.cpp` file to interrupt the image genera
 Please find the template of the callback usage below.
 
 ```cpp
-auto callback = [](size_t step, ov::Tensor& intermediate_res) -> bool {
+ov::genai::Text2ImagePipeline pipe(models_path, device);
+
+auto callback = [&](size_t step, ov::Tensor& intermediate_res) -> bool {
    std::cout << "Image generation step: " << step << std::endl;
+   ov::Tensor img = pipe.decode(intermediate_res); // get intermediate image tensor
    if (your_condition) // return true if you want to interrupt image generation
       return true;
    return false;
 };
 
-ov::genai::Text2ImagePipeline pipe(models_path, device);
 ov::Tensor image = pipe.generate(prompt,
    ...
    ov::genai::callback(callback)
diff --git a/samples/python/text2image/README.md b/samples/python/text2image/README.md
index 9421061885..2e841673d3 100644
--- a/samples/python/text2image/README.md
+++ b/samples/python/text2image/README.md
@@ -46,13 +46,15 @@ You can also add a callback to the `main.py` file to interrupt the image generat
 Please find the template of the callback usage below.
 
 ```python
+pipe = openvino_genai.Text2ImagePipeline(model_dir, device)
+
 def callback(step, intermediate_res):
    print("Image generation step: ", step)
+   image_tensor = pipe.decode(intermediate_res) # get intermediate image tensor
    if your_condition: # return True if you want to interrupt image generation
       return True
    return False
 
-pipe = openvino_genai.Text2ImagePipeline(model_dir, device)
 image = pipe.generate(
    ...
    callback = callback
diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp
index 101401d434..e684443e47 100644
--- a/src/cpp/src/image_generation/flux_pipeline.hpp
+++ b/src/cpp/src/image_generation/flux_pipeline.hpp
@@ -297,33 +297,33 @@ class FluxPipeline : public DiffusionPipeline {
     ov::Tensor generate(const std::string& positive_prompt,
                         ov::Tensor initial_image,
                         const ov::AnyMap& properties) override {
-        ImageGenerationConfig generation_config = m_generation_config;
-        generation_config.update_generation_config(properties);
+        m_custom_generation_config = m_generation_config;
+        m_custom_generation_config.update_generation_config(properties);
 
         if (!initial_image) {
             // in case of typical text to image generation, we need to ignore 'strength'
-            generation_config.strength = 1.0f;
+            m_custom_generation_config.strength = 1.0f;
         }
 
         const size_t vae_scale_factor = m_vae->get_vae_scale_factor();
         const auto& transformer_config = m_transformer->get_config();
 
-        if (generation_config.height < 0)
-            generation_config.height = transformer_config.m_default_sample_size * vae_scale_factor;
-        if (generation_config.width < 0)
-            generation_config.width = transformer_config.m_default_sample_size * vae_scale_factor;
+        if (m_custom_generation_config.height < 0)
+            m_custom_generation_config.height = transformer_config.m_default_sample_size * vae_scale_factor;
+        if (m_custom_generation_config.width < 0)
+            m_custom_generation_config.width = transformer_config.m_default_sample_size * vae_scale_factor;
 
-        check_inputs(generation_config, initial_image);
+        check_inputs(m_custom_generation_config, initial_image);
 
-        compute_hidden_states(positive_prompt, generation_config);
+        compute_hidden_states(positive_prompt, m_custom_generation_config);
 
-        ov::Tensor latents = prepare_latents(initial_image, generation_config);
+        ov::Tensor latents = prepare_latents(initial_image, m_custom_generation_config);
 
         size_t image_seq_len = latents.get_shape()[1];
         float mu = m_scheduler->calculate_shift(image_seq_len);
 
-        float linspace_end = 1.0f / generation_config.num_inference_steps;
-        std::vector<float> sigmas = numpy_utils::linspace<float>(1.0f, linspace_end, generation_config.num_inference_steps, true);
+        float linspace_end = 1.0f / m_custom_generation_config.num_inference_steps;
+        std::vector<float> sigmas = numpy_utils::linspace<float>(1.0f, linspace_end, m_custom_generation_config.num_inference_steps, true);
 
         m_scheduler->set_timesteps_with_sigma(sigmas, mu);
         std::vector<float> timesteps = m_scheduler->get_float_timesteps();
@@ -345,7 +345,7 @@ class FluxPipeline : public DiffusionPipeline {
 
             ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep);
 
-            auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, generation_config.generator);
+            auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator);
             latents = scheduler_step_result["latent"];
 
             if (do_callback) {
@@ -355,12 +355,16 @@ class FluxPipeline : public DiffusionPipeline {
             }
         }
 
-        latents = unpack_latents(latents, generation_config.height, generation_config.width, vae_scale_factor);
+        latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor);
         return m_vae->decode(latents);
     }
 
     ov::Tensor decode(const ov::Tensor latent) override {
-        return m_vae->decode(latent);
+        ov::Tensor unpacked_latent = unpack_latents(latent,
+                                                m_custom_generation_config.height,
+                                                m_custom_generation_config.width,
+                                                m_vae->get_vae_scale_factor());
+        return m_vae->decode(unpacked_latent);
     }
 
 private:
@@ -407,7 +411,7 @@ class FluxPipeline : public DiffusionPipeline {
     std::shared_ptr<CLIPTextModel> m_clip_text_encoder;
     std::shared_ptr<T5EncoderModel> m_t5_text_encoder;
     std::shared_ptr<AutoencoderKL> m_vae;
-
+    ImageGenerationConfig m_custom_generation_config;
 };
 
 }  // namespace genai

From cd05c8eb9ce1eb22411c2107afcdb1b3e2344fa9 Mon Sep 17 00:00:00 2001
From: Anastasiia Pnevskaia <anastasiia.pnevskaia@intel.com>
Date: Wed, 20 Nov 2024 20:03:33 +0100
Subject: [PATCH 2/8] Fixed passing of generation config params to VLM
 generate. (#1180)

- Fixed passing of generation config params to VLM generate().
- Updated generation config params params list in
`update_config_from_kwargs()` method.

Ticket: CVS-157050

---------

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 .../openvino/genai/generation_config.hpp      |   2 +-
 src/cpp/src/llm_pipeline_static.cpp           |   7 +
 src/cpp/src/utils.hpp                         |  22 +-
 .../openvino_genai/py_openvino_genai.pyi      |   2 +-
 src/python/py_image_generation_pipelines.cpp  | 108 +--------
 src/python/py_tokenizer.cpp                   |  13 +-
 src/python/py_utils.cpp                       | 209 ++++++++++++------
 src/python/py_utils.hpp                       |   2 +-
 src/python/py_vlm_pipeline.cpp                |  44 +---
 src/python/py_whisper_pipeline.cpp            |  55 +----
 tests/cpp/utils.cpp                           |  21 ++
 tests/python_tests/test_generate_api.py       |  11 +-
 tests/python_tests/test_sampling.py           |   6 +-
 13 files changed, 220 insertions(+), 282 deletions(-)
 create mode 100644 tests/cpp/utils.cpp

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 22edcb98c0..8d23b298ba 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -156,7 +156,7 @@ static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
 static constexpr ov::Property<size_t> min_new_tokens{"min_new_tokens"};
 static constexpr ov::Property<std::vector<std::string>> stop_strings{"stop_strings"};
 static constexpr ov::Property<bool> include_stop_str_in_output{"include_stop_str_in_output"};
-static constexpr ov::Property<std::vector<std::vector<int64_t>>> stop_token_ids{"stop_token_ids"};
+static constexpr ov::Property<std::set<int64_t>> stop_token_ids{"stop_token_ids"};
 
 static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
 static constexpr ov::Property<size_t> num_beams{"num_beams"};
diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
index 40089384a8..2beb7d64be 100644
--- a/src/cpp/src/llm_pipeline_static.cpp
+++ b/src/cpp/src/llm_pipeline_static.cpp
@@ -530,6 +530,13 @@ template <typename T>
 T pop_or_default(ov::AnyMap& config, const std::string& key, const T& default_value) {
     auto anyopt = pop_option(config, key);
     if (anyopt.has_value()) {
+        if (anyopt.value().empty()) {
+            if (ov::genai::utils::is_container<T>)
+                return T{};
+            else {
+                OPENVINO_THROW("Got empty ov::Any for key: " + key);
+            }
+        }
         return anyopt.value().as<T>();
     }
     return default_value;
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index 9adc46c87a..3487fccb81 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
+#include <type_traits>
 
 #include "openvino/genai/llm_pipeline.hpp"
 #include "openvino/runtime/core.hpp"
@@ -12,6 +13,16 @@ namespace ov {
 namespace genai {
 namespace utils {
 
+// Variable template that checks if a type has begin() and end() member functions
+template<typename, typename = void>
+constexpr bool is_container = false;
+ 
+template<typename T>
+constexpr bool is_container<T,
+    std::void_t<decltype(std::declval<T>().begin()),
+                decltype(std::declval<T>().end())>> = true;
+
+
 Tensor init_attention_mask(const Tensor& position_ids);
 
 void print_tensor(const ov::Tensor& tensor);
@@ -31,7 +42,16 @@ template <typename T>
 void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T& param) {
     auto it = config_map.find(name);
     if (it != config_map.end()) {
-        param = it->second.as<typename OmitOptional<T>::value>();
+        if (it->second.empty()) {
+            if (ov::genai::utils::is_container<T>)
+                param = T{};
+            else {
+                OPENVINO_THROW("Got empty ov::Any for parameter name: " + name);
+            }
+        }
+        else {
+            param = it->second.as<typename OmitOptional<T>::value>();
+        }
     }
 }
 
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index a16b74b703..df290a9744 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -1296,7 +1296,7 @@ class Tokenizer:
     openvino_genai.Tokenizer object is used to initialize Tokenizer
                if it's located in a different path than the main model.
     """
-    def __init__(self, tokenizer_path: os.PathLike, properties: dict[str, typing.Any] = {}) -> None:
+    def __init__(self, tokenizer_path: os.PathLike, properties: dict[str, typing.Any] = {}, **kwargs) -> None:
         ...
     def apply_chat_template(self, history: list[dict[str, str]], add_generation_prompt: bool, chat_template: str = '') -> str:
         """
diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
index f70faaca61..dade8a170e 100644
--- a/src/python/py_image_generation_pipelines.cpp
+++ b/src/python/py_image_generation_pipelines.cpp
@@ -67,108 +67,6 @@ auto text2image_generate_docstring = R"(
 )";
 
 
-void update_image_generation_config_from_kwargs(
-    ov::genai::ImageGenerationConfig& config,
-    const py::kwargs& kwargs) {
-    for (const auto& item : kwargs) {
-        std::string key = py::cast<std::string>(item.first);
-        py::object value = py::cast<py::object>(item.second);
-
-        if (key == "prompt_2") {
-            config.prompt_2 = py::cast<std::string>(value);
-        } else if (key == "prompt_3") {
-            config.prompt_3 = py::cast<std::string>(value);
-        } else if (key == "negative_prompt") {
-            config.negative_prompt = py::cast<std::string>(value);
-        } else if (key == "negative_prompt_2") {
-            config.negative_prompt_2 = py::cast<std::string>(value);
-        } else if (key == "negative_prompt_3") {
-            config.negative_prompt_3 = py::cast<std::string>(value);
-        } else if (key == "num_images_per_prompt") {
-            config.num_images_per_prompt = py::cast<size_t>(value);
-        } else if (key == "guidance_scale") {
-            config.guidance_scale = py::cast<float>(value);
-        } else if (key == "height") {
-            config.height = py::cast<int64_t>(value);
-        } else if (key == "width") {
-            config.width = py::cast<int64_t>(value);
-        } else if (key == "num_inference_steps") {
-            config.num_inference_steps = py::cast<size_t>(value);
-        } else if (key == "generator") {
-            auto py_generator = py::cast<std::shared_ptr<ov::genai::Generator>>(value);
-            config.generator = py_generator;
-        } else if (key == "adapters") {
-            config.adapters = py::cast<ov::genai::AdapterConfig>(value);
-        } else if (key == "strength") {
-            config.strength = py::cast<float>(value);
-        } else if (key == "max_sequence_length") {
-            config.max_sequence_length = py::cast<size_t>(value);
-        } else {
-            throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
-                                        "Use help(openvino_genai.ImageGenerationConfig) to get list of acceptable parameters."));
-        }
-    }
-}
-
-ov::AnyMap text2image_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) {
-    ov::AnyMap params = {};
-
-    for (const auto& item : kwargs) {
-        std::string key = py::cast<std::string>(item.first);
-        py::object value = py::cast<py::object>(item.second);
-
-        if (key == "prompt_2") {
-            params.insert({ov::genai::prompt_2(std::move(py::cast<std::string>(value)))});
-        } else if (key == "prompt_3") {
-            params.insert({ov::genai::prompt_3(std::move(py::cast<std::string>(value)))});
-        } else if (key == "negative_prompt") {
-            params.insert({ov::genai::negative_prompt(std::move(py::cast<std::string>(value)))});
-        } else if (key == "negative_prompt_2") {
-            params.insert({ov::genai::negative_prompt_2(std::move(py::cast<std::string>(value)))});
-        } else if (key == "negative_prompt_3") {
-            params.insert({ov::genai::negative_prompt_3(std::move(py::cast<std::string>(value)))});
-        } else if (key == "num_images_per_prompt") {
-            params.insert({ov::genai::num_images_per_prompt(std::move(py::cast<size_t>(value)))});
-        } else if (key == "guidance_scale") {
-            params.insert({ov::genai::guidance_scale(std::move(py::cast<float>(value)))});
-        } else if (key == "height") {
-            params.insert({ov::genai::height(std::move(py::cast<int64_t>(value)))});
-        } else if (key == "width") {
-            params.insert({ov::genai::width(std::move(py::cast<int64_t>(value)))});
-        } else if (key == "num_inference_steps") {
-            params.insert({ov::genai::num_inference_steps(std::move(py::cast<size_t>(value)))});
-        } else if (key == "generator") {
-            auto py_generator =py::cast<std::shared_ptr<ov::genai::Generator>>(value);
-            params.insert({ov::genai::generator(std::move(py_generator))});
-        } else if (key == "adapters") {
-            params.insert({ov::genai::adapters(std::move(py::cast<ov::genai::AdapterConfig>(value)))});
-        } else if (key == "strength") {
-            params.insert({ov::genai::strength(std::move(py::cast<float>(value)))});
-        } else if (key == "max_sequence_length") {
-            params.insert({ov::genai::max_sequence_length(std::move(py::cast<size_t>(value)))});
-        } else if (key == "callback") {
-            params.insert({ov::genai::callback(std::move(py::cast<std::function<bool(size_t, ov::Tensor&)>>(value)))});
-        }
-        else {
-            if (allow_compile_properties) {
-                // convert arbitrary objects to ov::Any
-                // not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property
-                if (pyutils::py_object_is_any_map(value)) {
-                    auto map = pyutils::py_object_to_any_map(value);
-                    params.insert(map.begin(), map.end());
-                } else {
-                    params[key] = pyutils::py_object_to_any(value);
-                }
-            }
-            else {
-                // generate doesn't run compile(), so only Text2ImagePipeline specific properties are allowed
-                throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
-                                            "Use help(openvino_genai.Text2ImagePipeline.generate) to get list of acceptable parameters."));
-            }
-        }
-    }
-    return params;
-}
 
 } // namespace
 
@@ -230,7 +128,7 @@ void init_image_generation_pipelines(py::module_& m) {
         .def("update_generation_config", [](
             ov::genai::ImageGenerationConfig config,
             const py::kwargs& kwargs) {
-            update_image_generation_config_from_kwargs(config, kwargs);
+            config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
         });
 
     auto text2image_pipeline = py::class_<ov::genai::Text2ImagePipeline>(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.")
@@ -252,7 +150,7 @@ void init_image_generation_pipelines(py::module_& m) {
             const py::kwargs& kwargs
         ) {
             ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
-            return std::make_unique<ov::genai::Text2ImagePipeline>(models_path, device, text2image_kwargs_to_any_map(kwargs, true));
+            return std::make_unique<ov::genai::Text2ImagePipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
         }),
         py::arg("models_path"), "folder with exported model files.",
         py::arg("device"), "device on which inference will be done",
@@ -289,7 +187,7 @@ void init_image_generation_pipelines(py::module_& m) {
                 const std::string& prompt,
                 const py::kwargs& kwargs
             ) -> py::typing::Union<ov::Tensor> {
-                ov::AnyMap params = text2image_kwargs_to_any_map(kwargs, false);
+                ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
                 return py::cast(pipe.generate(prompt, params));
             },
             py::arg("prompt"), "Input string",
diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index b3c52cd28b..2ccccff4c0 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -30,9 +30,18 @@ void init_tokenizer(py::module_& m) {
         R"(openvino_genai.Tokenizer object is used to initialize Tokenizer
            if it's located in a different path than the main model.)")
 
-        .def(py::init([](const std::filesystem::path& tokenizer_path, const std::map<std::string, py::object>& properties) {
+        .def(py::init([](const std::filesystem::path& tokenizer_path, const std::map<std::string, py::object>& properties, const py::kwargs& kwargs) {
             ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
-            return std::make_unique<ov::genai::Tokenizer>(tokenizer_path, pyutils::properties_to_any_map(properties));
+            auto kwargs_properties = pyutils::kwargs_to_any_map(kwargs);
+            if (properties.size()) {
+                PyErr_WarnEx(PyExc_DeprecationWarning, 
+                         "'properties' parameters is deprecated, please use kwargs to pass config properties instead.", 
+                         1);
+                auto map_properties = pyutils::properties_to_any_map(properties);
+                kwargs_properties.insert(map_properties.begin(), map_properties.end());
+            }
+
+            return std::make_unique<ov::genai::Tokenizer>(tokenizer_path, kwargs_properties);
         }), py::arg("tokenizer_path"), py::arg("properties") = ov::AnyMap({}))
 
         .def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, bool add_special_tokens) {
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
index a2e8630059..579fe6b789 100644
--- a/src/python/py_utils.cpp
+++ b/src/python/py_utils.cpp
@@ -6,11 +6,15 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/stl_bind.h>
+#include <pybind11/functional.h>
 
 #include <openvino/runtime/auto/properties.hpp>
 
 #include "tokenizers_path.hpp"
 #include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/visual_language/pipeline.hpp"
+#include "openvino/genai/image_generation/generation_config.hpp"
+#include "openvino/genai/whisper_generation_config.hpp"
 
 namespace py = pybind11;
 namespace ov::genai::pybind::utils {
@@ -43,7 +47,7 @@ bool py_object_is_any_map(const py::object& py_obj) {
     });
 }
 
-ov::Any py_object_to_any(const py::object& py_obj);
+ov::Any py_object_to_any(const py::object& py_obj, std::string property_name);
 
 ov::AnyMap py_object_to_any_map(const py::object& py_obj) {
     OPENVINO_ASSERT(py_object_is_any_map(py_obj), "Unsupported attribute type.");
@@ -54,16 +58,34 @@ ov::AnyMap py_object_to_any_map(const py::object& py_obj) {
         if (py_object_is_any_map(value)) {
             return_value[key] = py_object_to_any_map(value);
         } else {
-            return_value[key] = py_object_to_any(value);
+            return_value[key] = py_object_to_any(value, key);
         }
     }
     return return_value;
 }
 
-ov::Any py_object_to_any(const py::object& py_obj) {
+ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
     // Python types
+    // TODO: Remove this after ov::Any is fixed to allow pass types, that can be casted to target type. Ticket: 157622
+    std::set<std::string> size_t_properties = {
+        "max_new_tokens",
+        "max_length",
+        "min_new_tokens",
+        "logprobs",
+        "num_beam_groups",
+        "num_beams",
+        "num_return_sequences",
+        "no_repeat_ngram_size",
+        "top_k",
+        "rng_seed",
+        "num_assistant_tokens",
+        "max_initial_timestamp_index",
+        "num_images_per_prompt",
+        "num_inference_steps",
+        "max_sequence_length"
+    };
+
     py::object float_32_type = py::module_::import("numpy").attr("float32");
-    
     if (py::isinstance<py::str>(py_obj)) {
         return py_obj.cast<std::string>();
     } else if (py::isinstance<py::bool_>(py_obj)) {
@@ -71,16 +93,19 @@ ov::Any py_object_to_any(const py::object& py_obj) {
     } else if (py::isinstance<py::bytes>(py_obj)) {
         return py_obj.cast<std::string>();
     } else if (py::isinstance<py::float_>(py_obj)) {
-        return py_obj.cast<double>();
+        return py_obj.cast<float>();
     } else if (py::isinstance(py_obj, float_32_type)) {
         return py_obj.cast<float>();
     } else if (py::isinstance<py::int_>(py_obj)) {
+        if (size_t_properties.find(property_name) != size_t_properties.end()) {
+            return py_obj.cast<size_t>();
+        }
         return py_obj.cast<int64_t>();
     } else if (py::isinstance<py::none>(py_obj)) {
         return {};
     } else if (py::isinstance<py::list>(py_obj)) {
         auto _list = py_obj.cast<py::list>();
-        enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE };
+        enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE, TENSOR};
         PY_TYPE detected_type = PY_TYPE::UNKNOWN;
         for (const auto& it : _list) {
             auto check_type = [&](PY_TYPE type) {
@@ -88,7 +113,7 @@ ov::Any py_object_to_any(const py::object& py_obj) {
                     detected_type = type;
                     return;
                 }
-                OPENVINO_THROW("Incorrect attribute. Mixed types in the list are not allowed.");
+                OPENVINO_THROW("Incorrect value in \"" + property_name + "\". Mixed types in the list are not allowed.");
             };
             if (py::isinstance<py::str>(it)) {
                 check_type(PY_TYPE::STR);
@@ -100,6 +125,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
                 check_type(PY_TYPE::BOOL);
             } else if (py::isinstance<ov::PartialShape>(it)) {
                 check_type(PY_TYPE::PARTIAL_SHAPE);
+            } else if (py::isinstance<ov::Tensor>(it)) {
+                check_type(PY_TYPE::TENSOR);
             }
         }
 
@@ -117,10 +144,89 @@ ov::Any py_object_to_any(const py::object& py_obj) {
             return _list.cast<std::vector<bool>>();
         case PY_TYPE::PARTIAL_SHAPE:
             return _list.cast<std::vector<ov::PartialShape>>();
+        case PY_TYPE::TENSOR:
+            return _list.cast<std::vector<ov::Tensor>>();
+        default:
+            OPENVINO_THROW("Property \"" + property_name + "\" got unsupported type.");
+        }
+
+    } else if (py::isinstance<py::dict>(py_obj)) {
+        auto _dict = py_obj.cast<py::dict>();
+        enum class PY_TYPE : int { UNKNOWN = 0, STR, INT};
+        PY_TYPE detected_key_type = PY_TYPE::UNKNOWN;
+        PY_TYPE detected_value_type = PY_TYPE::UNKNOWN;
+        for (const auto& it : _dict) {
+            auto check_type = [&](PY_TYPE type, PY_TYPE& detected_type) {
+                if (detected_type == PY_TYPE::UNKNOWN || detected_type == type) {
+                    detected_type = type;
+                    return;
+                }
+                OPENVINO_THROW("Incorrect value in \"" + property_name + "\". Mixed types in the dict are not allowed.");
+            };
+            // check key type
+            if (py::isinstance<py::str>(it.first)) {
+                check_type(PY_TYPE::STR, detected_key_type);
+            }
+
+            // check value type
+            if (py::isinstance<py::int_>(it.second)) {
+                check_type(PY_TYPE::INT, detected_value_type);
+            }
+        }
+        if (_dict.empty()) {
+            return ov::Any();
+        }
+
+        switch (detected_key_type) {
+        case PY_TYPE::STR:
+            switch (detected_value_type) {
+            case PY_TYPE::INT:
+                return _dict.cast<std::map<std::string, int64_t>>();
+            default:
+                OPENVINO_THROW("Property \"" + property_name + "\" got unsupported type.");
+            }
+        default:
+            OPENVINO_THROW("Property \"" + property_name + "\" got unsupported type.");
+        }
+    } else if (py::isinstance<py::set>(py_obj)) {
+        auto _set = py_obj.cast<py::set>();
+        enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL};
+        PY_TYPE detected_type = PY_TYPE::UNKNOWN;
+        for (const auto& it : _set) {
+            auto check_type = [&](PY_TYPE type) {
+                if (detected_type == PY_TYPE::UNKNOWN || detected_type == type) {
+                    detected_type = type;
+                    return;
+                }
+                OPENVINO_THROW("Incorrect value in \"" + property_name + "\". Mixed types in the set are not allowed.");
+            };
+            if (py::isinstance<py::str>(it)) {
+                check_type(PY_TYPE::STR);
+            } else if (py::isinstance<py::int_>(it)) {
+                check_type(PY_TYPE::INT);
+            } else if (py::isinstance<py::float_>(it)) {
+                check_type(PY_TYPE::FLOAT);
+            } else if (py::isinstance<py::bool_>(it)) {
+                check_type(PY_TYPE::BOOL);
+            }
+        }
+
+        if (_set.empty())
+            return ov::Any();
+
+        switch (detected_type) {
+        case PY_TYPE::STR:
+            return _set.cast<std::set<std::string>>();
+        case PY_TYPE::FLOAT:
+            return _set.cast<std::set<double>>();
+        case PY_TYPE::INT:
+            return _set.cast<std::set<int64_t>>();
+        case PY_TYPE::BOOL:
+            return _set.cast<std::set<bool>>();
         default:
-            OPENVINO_ASSERT(false, "Unsupported attribute type.");
+            OPENVINO_THROW("Property \"" + property_name + "\" got unsupported type.");
         }
-    
+
     // OV types
     } else if (py_object_is_any_map(py_obj)) {
         return py_object_to_any_map(py_obj);
@@ -156,18 +262,33 @@ ov::Any py_object_to_any(const py::object& py_obj) {
         return py::cast<ov::Output<ov::Node>>(py_obj);
     } else if (py::isinstance<ov::genai::SchedulerConfig>(py_obj)) {
         return py::cast<ov::genai::SchedulerConfig>(py_obj);
-    } else if (py::isinstance<ov::genai::AdapterConfig>(py_obj)) { 
+    } else if (py::isinstance<ov::genai::AdapterConfig>(py_obj)) {
         return py::cast<ov::genai::AdapterConfig>(py_obj);
+    } else if (py::isinstance<ov::genai::GenerationConfig>(py_obj)) {
+        return py::cast<ov::genai::GenerationConfig>(py_obj);
+    } else if (py::isinstance<ov::genai::ImageGenerationConfig>(py_obj)) {
+        return py::cast<ov::genai::ImageGenerationConfig>(py_obj);
+    } else if (py::isinstance<ov::genai::WhisperGenerationConfig>(py_obj)) {
+        return py::cast<ov::genai::WhisperGenerationConfig>(py_obj);
+    } else if (py::isinstance<ov::genai::StopCriteria>(py_obj)) {
+        return py::cast<ov::genai::StopCriteria>(py_obj);
+    } else if (py::isinstance<ov::genai::Generator>(py_obj)) {
+        return py::cast<std::shared_ptr<ov::genai::Generator>>(py_obj);
+    } else if (py::isinstance<py::function>(py_obj) && property_name == "callback") {
+        return py::cast<std::function<bool(size_t, ov::Tensor&)>>(py_obj);
+    } else if ((py::isinstance<py::function>(py_obj) || py::isinstance<ov::genai::StreamerBase>(py_obj) || py::isinstance<std::monostate>(py_obj)) && property_name == "streamer") {
+        auto streamer = py::cast<ov::genai::pybind::utils::PyBindStreamerVariant>(py_obj);
+        return ov::genai::streamer(pystreamer_to_streamer(streamer)).second;
     } else if (py::isinstance<py::object>(py_obj)) {
         return py_obj;
     }
-    OPENVINO_ASSERT(false, "Unsupported attribute type.");
+    OPENVINO_THROW("Property \"" + property_name + "\" got unsupported type.");
 }
 
 std::map<std::string, ov::Any> properties_to_any_map(const std::map<std::string, py::object>& properties) {
     std::map<std::string, ov::Any> properties_to_cpp;
     for (const auto& property : properties) {
-        properties_to_cpp[property.first] = py_object_to_any(property.second);
+        properties_to_cpp[property.first] = py_object_to_any(property.second, property.first);
     }
     return properties_to_cpp;
 }
@@ -179,11 +300,16 @@ ov::AnyMap kwargs_to_any_map(const py::kwargs& kwargs) {
     for (const auto& item : kwargs) {
         std::string key = py::cast<std::string>(item.first);
         py::object value = py::cast<py::object>(item.second);
-        if (utils::py_object_is_any_map(value)) {
+        // we need to unpack only dictionaries, which are passed with "config" name,
+        // because there are dictionary properties that should not be unpacked
+        if (utils::py_object_is_any_map(value) && key == "config") {
             auto map = utils::py_object_to_any_map(value);
             params.insert(map.begin(), map.end());
         } else {
-            params[key] = utils::py_object_to_any(value);
+            if (py::isinstance<py::none>(value)) {
+                OPENVINO_ASSERT(!py::isinstance<py::none>(value), "Property \"", key, "\" can't be None.");
+            }
+            params[key] = utils::py_object_to_any(value, key);
         }
 
     }
@@ -227,60 +353,9 @@ ov::genai::OptionalGenerationConfig update_config_from_kwargs(const ov::genai::O
     ov::genai::GenerationConfig res_config;
     if(config.has_value())
         res_config = *config;
- 
-    for (const auto& item : kwargs) {
-        std::string key = py::cast<std::string>(item.first);
-        py::object value = py::cast<py::object>(item.second);
-
-        if (item.second.is_none()) {
-            // Even if argument key name does not fit GenerationConfig name 
-            // it's not an error if it's not defined. 
-            // Some HF configs can have parameters for methods currently unsupported in ov_genai
-            // but if their values are not set / None, then this should not block 
-            // us from reading such configs, e.g. {"typical_p": None, 'top_p': 1.0,...}
-            return res_config;
-        }  
-        if (key == "max_new_tokens") {
-            res_config.max_new_tokens = py::cast<int>(item.second);
-        } else if (key == "max_length") {
-            res_config.max_length = py::cast<int>(item.second);
-        } else if (key == "ignore_eos") {
-            res_config.ignore_eos = py::cast<bool>(item.second);
-        } else if (key == "num_beam_groups") {
-            res_config.num_beam_groups = py::cast<int>(item.second);
-        } else if (key == "num_beams") {
-            res_config.num_beams = py::cast<int>(item.second);
-        } else if (key == "diversity_penalty") {
-            res_config.diversity_penalty = py::cast<float>(item.second);
-        } else if (key == "length_penalty") {
-            res_config.length_penalty = py::cast<float>(item.second);
-        } else if (key == "num_return_sequences") {
-            res_config.num_return_sequences = py::cast<int>(item.second);
-        } else if (key == "no_repeat_ngram_size") {
-            res_config.no_repeat_ngram_size = py::cast<int>(item.second);
-        } else if (key == "stop_criteria") {
-            res_config.stop_criteria = py::cast<StopCriteria>(item.second);
-        } else if (key == "temperature") {
-            res_config.temperature = py::cast<float>(item.second);
-        } else if (key == "top_p") {
-            res_config.top_p = py::cast<float>(item.second);
-        } else if (key == "top_k") {
-            res_config.top_k = py::cast<int>(item.second);
-        } else if (key == "do_sample") {
-            res_config.do_sample = py::cast<bool>(item.second);
-        } else if (key == "repetition_penalty") {
-            res_config.repetition_penalty = py::cast<float>(item.second);
-        } else if (key == "eos_token_id") {
-            res_config.set_eos_token_id(py::cast<int>(item.second));
-        } else if (key == "adapters") {
-            res_config.adapters = py::cast<ov::genai::AdapterConfig>(item.second);
-        } else {
-            throw(std::invalid_argument("'" + key + "' is incorrect GenerationConfig parameter name. "
-                                        "Use help(openvino_genai.GenerationConfig) to get list of acceptable parameters."));
-        }
-    }
-
+    res_config.update_generation_config(kwargs_to_any_map(kwargs));
     return res_config;
 }
 
+
 }  // namespace ov::genai::pybind::utils
diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp
index 9213060660..20094196a6 100644
--- a/src/python/py_utils.hpp
+++ b/src/python/py_utils.hpp
@@ -28,7 +28,7 @@ py::list handle_utf8(const std::vector<std::string>& decoded_res);
 
 py::str handle_utf8(const std::string& text);
 
-ov::Any py_object_to_any(const py::object& py_obj);
+ov::Any py_object_to_any(const py::object& py_obj, std::string property_name);
 
 bool py_object_is_any_map(const py::object& py_obj);
 
diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
index 30e2e04a14..9572652204 100644
--- a/src/python/py_vlm_pipeline.cpp
+++ b/src/python/py_vlm_pipeline.cpp
@@ -72,46 +72,6 @@ py::object call_vlm_generate(
     return py::cast(pipe.generate(prompt, images, updated_config, streamer));
 }
 
-ov::AnyMap vlm_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) {
-    ov::AnyMap params = {};
-
-    for (const auto& item : kwargs) {
-        std::string key = py::cast<std::string>(item.first);
-        py::object value = py::cast<py::object>(item.second);
-
-        if (key == "images") {
-            params.insert({ov::genai::images(std::move(py::cast<std::vector<ov::Tensor>>(value)))});
-        } else if (key == "image") {
-            params.insert({ov::genai::image(std::move(py::cast<ov::Tensor>(value)))});
-        } else if (key == "generation_config") {
-            params.insert({ov::genai::generation_config(std::move(py::cast<ov::genai::GenerationConfig>(value)))});
-        } else if (key == "streamer") {
-            auto py_streamer = py::cast<pyutils::PyBindStreamerVariant>(value);
-            params.insert({ov::genai::streamer(std::move(pyutils::pystreamer_to_streamer(py_streamer)))});
-
-        } 
-        else {
-            if (allow_compile_properties) {
-                // convert arbitrary objects to ov::Any
-                // not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property
-                if (pyutils::py_object_is_any_map(value)) {
-                    auto map = pyutils::py_object_to_any_map(value);
-                    params.insert(map.begin(), map.end());
-                } else {
-                    params[key] = pyutils::py_object_to_any(value);
-                }
-            }
-            else {
-                // generate doesn't run compile(), so only VLMPipeline specific properties are allowed
-                throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
-                                        "Use help(openvino_genai.VLMPipeline.generate) to get list of acceptable parameters."));
-            }
-        }
-    }
-
-    return params;
-}
-
 void init_vlm_pipeline(py::module_& m) {
     py::class_<ov::genai::VLMPipeline>(m, "VLMPipeline", "This class is used for generation with VLMs")
         .def(py::init([](
@@ -120,7 +80,7 @@ void init_vlm_pipeline(py::module_& m) {
             const py::kwargs& kwargs
         ) {
             ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
-            return std::make_unique<ov::genai::VLMPipeline>(models_path, device, vlm_kwargs_to_any_map(kwargs, true));
+            return std::make_unique<ov::genai::VLMPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
         }),
         py::arg("models_path"), "folder with exported model files",
         py::arg("device"), "device on which inference will be done"
@@ -177,7 +137,7 @@ void init_vlm_pipeline(py::module_& m) {
                const std::string& prompt,
                const py::kwargs& kwargs
             )  -> py::typing::Union<ov::genai::DecodedResults> {
-                return py::cast(pipe.generate(prompt, vlm_kwargs_to_any_map(kwargs, false)));
+                return py::cast(pipe.generate(prompt, pyutils::kwargs_to_any_map(kwargs)));
             },
             py::arg("prompt"), "Input string",
             (vlm_generate_kwargs_docstring + std::string(" \n ")).c_str()
diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
index 3bf777f739..d34bd5f3b6 100644
--- a/src/python/py_whisper_pipeline.cpp
+++ b/src/python/py_whisper_pipeline.cpp
@@ -162,60 +162,7 @@ OptionalWhisperGenerationConfig update_whisper_config_from_kwargs(const Optional
     WhisperGenerationConfig res_config;
     if (config.has_value())
         res_config = *config;
-
-    for (const auto& item : kwargs) {
-        std::string key = py::cast<std::string>(item.first);
-        py::object value = py::cast<py::object>(item.second);
-
-        if (item.second.is_none()) {
-            // Even if argument key name does not fit GenerationConfig name
-            // it's not an error if it's not defined.
-            // Some HF configs can have parameters for methods currently unsupported in ov_genai
-            // but if their values are not set / None, then this should not block
-            // us from reading such configs, e.g. {"typical_p": None, 'top_p': 1.0,...}
-            return res_config;
-        }
-
-        if (key == "max_new_tokens") {
-            res_config.max_new_tokens = py::cast<int>(item.second);
-        } else if (key == "max_length") {
-            res_config.max_length = py::cast<int>(item.second);
-        } else if (key == "decoder_start_token_id") {
-            res_config.decoder_start_token_id = py::cast<int>(item.second);
-        } else if (key == "pad_token_id") {
-            res_config.pad_token_id = py::cast<int>(item.second);
-        } else if (key == "translate_token_id") {
-            res_config.translate_token_id = py::cast<int>(item.second);
-        } else if (key == "transcribe_token_id") {
-            res_config.transcribe_token_id = py::cast<int>(item.second);
-        } else if (key == "no_timestamps_token_id") {
-            res_config.no_timestamps_token_id = py::cast<int>(item.second);
-        } else if (key == "max_initial_timestamp_index") {
-            res_config.max_initial_timestamp_index = py::cast<size_t>(item.second);
-        } else if (key == "begin_suppress_tokens") {
-            res_config.begin_suppress_tokens = py::cast<std::vector<int64_t>>(item.second);
-        } else if (key == "suppress_tokens") {
-            res_config.suppress_tokens = py::cast<std::vector<int64_t>>(item.second);
-        } else if (key == "is_multilingual") {
-            res_config.is_multilingual = py::cast<bool>(item.second);
-        } else if (key == "language") {
-            res_config.language = py::cast<std::string>(item.second);
-        } else if (key == "lang_to_id") {
-            res_config.lang_to_id = py::cast<std::map<std::string, int64_t>>(item.second);
-        } else if (key == "task") {
-            res_config.task = py::cast<std::string>(item.second);
-        } else if (key == "return_timestamps") {
-            res_config.return_timestamps = py::cast<bool>(item.second);
-        } else if (key == "eos_token_id") {
-            res_config.set_eos_token_id(py::cast<int>(item.second));
-        } else {
-            throw(std::invalid_argument(
-                "'" + key +
-                "' is incorrect WhisperGenerationConfig parameter name. "
-                "Use help(openvino_genai.WhisperGenerationConfig) to get list of acceptable parameters."));
-        }
-    }
-
+    res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
     return res_config;
 }
 
diff --git a/tests/cpp/utils.cpp b/tests/cpp/utils.cpp
new file mode 100644
index 0000000000..d00edae6fb
--- /dev/null
+++ b/tests/cpp/utils.cpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include "utils.hpp"
+
+
+using namespace ov::genai::utils;
+using map_type = std::map<std::string, int64_t>;
+
+TEST(TestIsContainer, test_is_container) {
+    EXPECT_EQ(is_container<int>, false);
+    EXPECT_EQ(is_container<int64_t>, false);
+    EXPECT_EQ(is_container<float>, false);
+    EXPECT_EQ(is_container<size_t>, false);
+    EXPECT_EQ(is_container<std::string>, true);
+    EXPECT_EQ(is_container<std::vector<float>>, true);
+    EXPECT_EQ(is_container<map_type>, true);
+    EXPECT_EQ(is_container<std::set<int64_t>>, true);
+}
\ No newline at end of file
diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py
index ba934e3bda..80df79f31b 100644
--- a/tests/python_tests/test_generate_api.py
+++ b/tests/python_tests/test_generate_api.py
@@ -38,7 +38,7 @@ def run_hf_ov_genai_comparison_batched(model_descr, generation_config: Dict, pro
         # Need to set explicitly to False, but only if test arguments omitted this arg.
         # Do not apply 'repetition_penalty' if sampling is not used.
         config['do_sample'] = False
-        config['repetition_penalty'] = None
+        config['repetition_penalty'] = 1.0 # 1.0 means no penalty
     
     generation_config_hf = config.copy()
     if generation_config_hf.get('stop_criteria'):
@@ -78,7 +78,7 @@ def run_hf_ov_genai_comparison(model_descr, generation_config: Dict, prompt: str
         # Need to set explicitly to False, but only if test arguments omitted this arg.
         # Do not apply 'repetition_penalty' if sampling is not used.
         config['do_sample'] = False
-        config['repetition_penalty'] = None
+        config['repetition_penalty'] = 1.0 # 1.0 means no penalty
 
     generation_config_hf = config.copy()
     if generation_config_hf.get('stop_criteria'):
@@ -117,7 +117,7 @@ def hf_ov_genai_tensors_comparison(
         # Need to set explicitly to False, but only if test arguments omitted this arg.
         # Do not apply 'repetition_penalty' if sampling is not used.
         config['do_sample'] = False
-        config['repetition_penalty'] = None
+        config['repetition_penalty'] = 1.0 # 1.0 means no penalty
     
     generation_config_hf = config.copy()
     if generation_config_hf.get('stop_criteria'):
@@ -635,7 +635,8 @@ def test_valid_configs(model_tmp_path):
 
 invalid_py_configs = [
     dict(num_beam_groups=3, num_beams=15, do_sample=True),
-    dict(unexisting_key_name=True),  # no eos_token_id no max_new_tokens, no max_len
+    # TODO: Currently unexpected params do not cause exceptions. Need to implement it in c++ and return this test
+  #  dict(unexisting_key_name=True),  # no eos_token_id no max_new_tokens, no max_len
     dict(eos_token_id=42, ignore_eos=True),  # no max_new_tokens, no max_len with ignore_eos
     dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty
     dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp
@@ -763,7 +764,7 @@ def run_perf_metrics_collection(model_descr, generation_config: Dict, prompt: st
         # Need to set explicitly to False, but only if test arguments omitted this arg.
         # Do not apply 'repetition_penalty' if sampling is not used.
         config['do_sample'] = False
-        config['repetition_penalty'] = None
+        config['repetition_penalty'] = 1.0 # 1.0 means no penalty
     return pipe.generate([prompt], **config).perf_metrics
 
 
diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py
index 9973e20e1d..9aa6931d85 100644
--- a/tests/python_tests/test_sampling.py
+++ b/tests/python_tests/test_sampling.py
@@ -334,7 +334,7 @@ def test_echo_without_completion(tmp_path, get_generation_config, max_num_batche
     model_path : Path = tmp_path / model_id
     save_ov_model_from_optimum(model, hf_tokenizer, model_path)
 
-    pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), Tokenizer(model_path.absolute().as_posix(), {}), scheduler_config, "CPU", {})
+    pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), Tokenizer(model_path.absolute().as_posix()), scheduler_config, "CPU", {})
 
     outputs = pipe.generate(["What is OpenVINO?"], generation_configs)
     assert(len(outputs))
@@ -361,7 +361,7 @@ def test_echo_with_completion(tmp_path, get_generation_config, max_num_batched_t
     model_path : Path = tmp_path / model_id
     save_ov_model_from_optimum(model, hf_tokenizer, model_path)
 
-    pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), Tokenizer(model_path.absolute().as_posix(), {}), scheduler_config, "CPU", {})
+    pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), Tokenizer(model_path.absolute().as_posix()), scheduler_config, "CPU", {})
 
     outputs = pipe.generate(["What is OpenVINO?"], generation_configs)
     assert(len(outputs))
@@ -389,7 +389,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     models_path : Path = tmp_path / model_id
     save_ov_model_from_optimum(model, hf_tokenizer, models_path)
 
-    pipe = ContinuousBatchingPipeline(models_path.absolute().as_posix(), Tokenizer(models_path.absolute().as_posix(), {}), scheduler_config, "CPU", {})
+    pipe = ContinuousBatchingPipeline(models_path.absolute().as_posix(), Tokenizer(models_path.absolute().as_posix()), scheduler_config, "CPU", {})
     # First run should return incomplete response
     output = pipe.generate(["What is OpenVINO?"], generation_configs)
     assert (len(output))

From 89865c3e3856abec5fe6b7896a5e42cb81f5ff75 Mon Sep 17 00:00:00 2001
From: Helena Kloosterman <helena.kloosterman@intel.com>
Date: Thu, 21 Nov 2024 07:28:53 +0100
Subject: [PATCH 3/8] Update Python VLM example in README (#1178)

Existing example uses an undefined "read_image" function, and using
max_new_tokens in pipe.generate() resulted in an error with latest
nightly.

I updated the example to work out of the box. Makes it a bit longer, but
this section is hidden by default in the README, so it doesn't add to
visual clutter for people just visiting the repo. Also added links to
the relevant samples.

---------

Co-authored-by: Vladimir Zlobin <vladimir.zlobin@intel.com>
---
 README.md | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index fe18205028..c1217a0215 100644
--- a/README.md
+++ b/README.md
@@ -117,17 +117,34 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code --
 
 ### Run generation using VLMPipeline API in Python
 
+See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) for a demo application.
+
+Run the following command to download a sample image:
+
+```sh
+curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
+```
+
 ```python
+import numpy as np
+import openvino as ov
 import openvino_genai as ov_genai
-#Will run model on CPU, GPU is a possible option
+from PIL import Image
+
+# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
 pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
-rgb = read_image("cat.jpg")
-print(pipe.generate(prompt, image=rgb, max_new_tokens=100))
+
+image = Image.open("dog.jpg")
+image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
+image_data = ov.Tensor(image_data)  
+
+prompt = "Can you describe the image?"
+print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
 ```
 
 ### Run generation using VLMPipeline in C++
 
-Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details)
+Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details). See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) for a demo application.
 
 ```cpp
 #include "load_image.hpp"

From 799454f5731518e795193721a77b44c95b45fb0f Mon Sep 17 00:00:00 2001
From: Vladimir Zlobin <vladimir.zlobin@intel.com>
Date: Thu, 21 Nov 2024 13:59:58 +0400
Subject: [PATCH 4/8] Install deployment and export requirements.txt (#1231)
 (#1241)

Ticket 157649

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 samples/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index 229eccb3fe..860ced072b 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -14,8 +14,12 @@ add_subdirectory(cpp/text2image)
 add_subdirectory(cpp/visual_language_chat)
 add_subdirectory(cpp/whisper_speech_recognition)
 
-install(FILES requirements.txt DESTINATION samples
-        COMPONENT cpp_samples_genai)
+install(FILES
+        deployment-requirements.txt
+        export-requirements.txt
+        requirements.txt
+    DESTINATION samples
+    COMPONENT cpp_samples_genai)
 
 install(DIRECTORY
             cpp/beam_search_causal_lm

From 5d5fe7512398778681e0e2d2f5325e9c7995a7d0 Mon Sep 17 00:00:00 2001
From: Vladimir Zlobin <vladimir.zlobin@intel.com>
Date: Thu, 21 Nov 2024 18:08:22 +0400
Subject: [PATCH 5/8] Allow missing OpenVINODeveloperPackage (#1243)

Compiliung GenAI against ov archives prints Warning: Please, install
pybind11-stubgen==2.5.1 otherwise
---
 src/python/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 898e18b895..25d81277d6 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -182,12 +182,14 @@ if(pybind11_stubgen_AVAILABLE)
         VERBATIM)
 
     add_custom_target(${TARGET_NAME}_stub ALL DEPENDS ${output_file})
-else()
+elseif(OpenVINODeveloperPackage_FOUND)
     # Produce warning message at build time as well
     add_custom_command(OUTPUT pybind11_stub_gen_not_found.txt
         COMMAND ${CMAKE_COMMAND}
             -E cmake_echo_color --red "Warning: Please, install ${pybind11_stubgen_dep}")
     add_custom_target(${TARGET_NAME}_stub ALL DEPENDS pybind11_stub_gen_not_found.txt)
+else()
+    add_custom_target(${TARGET_NAME}_stub ALL)
 endif()
 
 add_dependencies(${TARGET_NAME}_stub ${TARGET_NAME})

From ff8846ae599bc2a05b3173c0dd05a027a376e32c Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Fri, 22 Nov 2024 09:11:36 +0800
Subject: [PATCH 6/8] Fix wrong token latency when batch size is greater than 1
 (#1244)

Fix the wrong 2nd token latency when batch size is greater than 1.
python benchmark.py -m
/mnt/llm_irs/models_6c715998_ww45.4_optimum/llama-2-7b-chat/pytorch/dldt/FP16
-n 1 --genai -ic 128 -bs 16
[ INFO ] [Average] P[0] Input token size: 128, 1st token latency: **0.36
ms/16tokens**, **2nd token latency: 1958228200.33 ms/16tokens**, 2nd
tokens throughput: **0.00** 16tokenss/s

Fix result:
[ INFO ] [Average] P[0] Input token size: 128, 1st token latency: 91.54
ms/16tokens, 2nd token latency: 69.81 ms/16tokens, 2nd tokens
throughput: 229.18 tokens/s
---
 tools/llm_bench/llm_bench_utils/metrics_print.py  | 4 ++--
 tools/llm_bench/task/speech_to_text_generation.py | 4 ++--
 tools/llm_bench/task/text_generation.py           | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/llm_bench/llm_bench_utils/metrics_print.py b/tools/llm_bench/llm_bench_utils/metrics_print.py
index 905decf72b..de9d0126f8 100644
--- a/tools/llm_bench/llm_bench_utils/metrics_print.py
+++ b/tools/llm_bench/llm_bench_utils/metrics_print.py
@@ -149,7 +149,7 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
             avg_input_size = int(avg_input_size / index_num)
             if avg_2nd_tokens_latency > 0:
                 avg_2nd_token_tput = (1 / avg_2nd_tokens_latency) * batch_size * 1000
-            latency_unit = 'token' if is_text_gen is True else 'step'
+            tput_unit = latency_unit = 'token' if is_text_gen is True else 'step'
             if batch_size > 1:
                 if is_text_gen is True:
                     latency_unit = '{}tokens'.format(batch_size)
@@ -157,7 +157,7 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
                     latency_unit = '{}steps'.format(batch_size)
             avg_1st_token_latency = 'NA' if avg_1st_token_latency < 0 else f'{avg_1st_token_latency:.2f} ms/{latency_unit}'
             avg_2nd_tokens_latency = 'NA' if avg_2nd_tokens_latency < 0 else f'{avg_2nd_tokens_latency:.2f} ms/{latency_unit}'
-            avg_2nd_token_tput = 'NA' if avg_2nd_tokens_latency == 'NA' else f'{avg_2nd_token_tput:.2f} {latency_unit}s/s'
+            avg_2nd_token_tput = 'NA' if avg_2nd_tokens_latency == 'NA' else f'{avg_2nd_token_tput:.2f} {tput_unit}s/s'
             prefix = f'[ INFO ] [Average] P[{p_idx}]L[{loop_idx}]' if loop_idx != -1 else f'[ INFO ] [Average] P[{p_idx}]'
             if is_text_gen is True:
                 output_info = ''
diff --git a/tools/llm_bench/task/speech_to_text_generation.py b/tools/llm_bench/task/speech_to_text_generation.py
index ad49109bab..be9c9ab295 100644
--- a/tools/llm_bench/task/speech_to_text_generation.py
+++ b/tools/llm_bench/task/speech_to_text_generation.py
@@ -51,10 +51,10 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
         )
         end = time.perf_counter()
         perf_metrics = result_text.perf_metrics
-        first_token_time = perf_metrics.get_ttft().mean / args["batch_size"]
+        first_token_time = perf_metrics.get_ttft().mean
         second_tokens_durations = (
             np.array(perf_metrics.raw_metrics.m_new_token_times[1:])
-            - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) / args["batch_size"]
+            - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
         ).tolist()
         tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist()
         tm_infer_list = []
diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py
index 029bcdf16d..7718773560 100644
--- a/tools/llm_bench/task/text_generation.py
+++ b/tools/llm_bench/task/text_generation.py
@@ -240,10 +240,10 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
         per_token_time = generation_time * 1000 / (num_tokens / args['batch_size'])
     else:
         log.warning("No generated tokens")
-    first_token_time = (perf_metrics.get_ttft().mean - perf_metrics.raw_metrics.tokenization_durations[-1] / 1000) / args["batch_size"]
+    first_token_time = (perf_metrics.get_ttft().mean - perf_metrics.raw_metrics.tokenization_durations[-1] / 1000) * args["batch_size"]
     second_tokens_durations = (
         np.array(perf_metrics.raw_metrics.m_new_token_times[1:])
-        - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) / args["batch_size"]
+        - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
     ).tolist()
 
     tm_list = np.array([first_token_time] + second_tokens_durations) / 1000

From 18e8d5b59c9f4776a59811db4f299c2da1ea974f Mon Sep 17 00:00:00 2001
From: Alexander Kozlov <kozzzloff@list.ru>
Date: Fri, 22 Nov 2024 16:12:08 +0400
Subject: [PATCH 7/8] [WWB]: Updated readme with the latest information (#1248)

---
 tools/who_what_benchmark/README.md | 34 ++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/tools/who_what_benchmark/README.md b/tools/who_what_benchmark/README.md
index 012782bad3..0e597859d2 100644
--- a/tools/who_what_benchmark/README.md
+++ b/tools/who_what_benchmark/README.md
@@ -9,12 +9,12 @@ WWB provides default datasets for the supported use cases. However, it is relati
 * Command-line interface for Hugging Face and OpenVINO models and API to support broader inference backends.
 * Simple and quick accuracy test for compressed, quantized, pruned, distilled LLMs. It works with any model that supports HuggingFace Transformers text generation API including:
     * HuggingFace Transformers compressed models via [Bitsandbytes](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.BitsAndBytesConfig)
+    * [OpenVINO](https://github.com/openvinotoolkit/openvino) and [NNCF](https://github.com/openvinotoolkit/nncf) via [Optimum-Intel](https://github.com/huggingface/optimum-intel) and OpenVINO [GenAI](https://github.com/openvinotoolkit/openvino.genai)
     * [GPTQ](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.GPTQConfig) via HuggingFace API
     * Llama.cpp via [BigDL-LLM](https://github.com/intel-analytics/BigDL/tree/main/python/llm)
-    * [OpenVINO](https://github.com/openvinotoolkit/openvino) and [NNCF](https://github.com/openvinotoolkit/nncf) via [Optimum-Intel](https://github.com/huggingface/optimum-intel)
     * Support of custom datasets of the user choice
-* Validation of text-to-image pipelines. Computes similarity score between generated images:
-    * Supports Diffusers library and Optimum-Intel via `Text2ImageEvaluator` class.
+* Validation of text-to-image pipelines. Computes similarity score between generated images with Diffusers library, Optimum-Intel, and OpenVINO GenAI via `Text2ImageEvaluator` class.
+* Validation of Visual Language pipelines. Computes similarity score between generated images with Diffusers library, Optimum-Intel, and OpenVINO GenAI via `VisualTextEvaluator` class.
 
 ### Installation
 Install WWB and its requirements from the source using `pip` or any other package manager. For example,
@@ -41,18 +41,30 @@ wwb --target-model phi-3-openvino --gt-data gt.csv --model-type text
 wwb --target-model phi-3-openvino --gt-data gt.csv --model-type text --genai
 ```
 
-### Compare Text-to-image models (Diffusers)
+> **NOTE**: use --verbose option for debug to see the outputs with the largest difference.
+
+### Compare Text-to-image models
 ```sh
-# Export FP16 model to OpenVINO
-optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 sd-lcm-fp16
 # Export model with 8-bit quantized weights to OpenVINO
 optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format int8 sd-lcm-int8
-# Collect the references and save the mappling in the .json file. 
-# Reference images will be stored in the "reference" subfolder under the same path with .json.
-wwb --base-model sd-lcm-fp16 --gt-data lcm_test/sd_xl.json --model-type text-to-image
+# Collect the references and save the mappling in the .csv file. 
+# Reference images will be stored in the "reference" subfolder under the same path with .csv.
+wwb --base-model SimianLuo/LCM_Dreamshaper_v7--gt-data lcm_test/gt.csv --model-type text-to-image --hf
+# Compute the metric
+# Target images will be stored in the "target" subfolder under the same path with .csv.
+wwb --target-model sd-lcm-int8 --gt-data lcm_test/gt.csv --model-type text-to-image --genai
+```
+
+### Compare Visual Language Models (VLMs)
+```sh
+# Export FP16 model to OpenVINO
+optimum-cli export openvino -m llava-hf/llava-v1.6-mistral-7b-hf  --weight-format int8 llava-int8
+# Collect the references and save the mappling in the .csv file. 
+# Reference images will be stored in the "reference" subfolder under the same path with .csv.
+wwb --base-model llava-hf/llava-v1.6-mistral-7b-hf --gt-data llava_test/gt.csv --model-type visual-text --hf
 # Compute the metric
-# Target images will be stored in the "target" subfolder under the same path with .json.
-wwb --target-model sd-lcm-int8 --gt-data lcm_test/sd_xl.json --model-type text-to-image
+# Target images will be stored in the "target" subfolder under the same path with .csv.
+wwb --target-model llava-int8 --gt-data llava_test/gt.csv --model-type visual-text --genai
 ```
 
 ### API

From d490c18aabe6c9491fab6d6601948e91f10d6fc3 Mon Sep 17 00:00:00 2001
From: Alexander Kozlov <kozzzloff@list.ru>
Date: Mon, 25 Nov 2024 10:38:04 +0300
Subject: [PATCH 8/8] [WWB]: Added ability to compare results for previously
 collected outputs w/o models provided (#1238)

- Compare outputs collected from the previous runs
- Kept only "similarity" metric by default as the only one that is used
in CI

Example:
```shell
optimum-cli export openvino -m Qwen/Qwen2-0.5B-Instruct --weight-format fp16 models/Qwen2-0.5B-Instruct-fp16

mkdir qwen2_N_FP16

# References from NAT FP16
wwb --base-model Qwen/Qwen2-0.5B-Instruct --gt-data qwen2_N_FP16/gt.csv --hf --num-samples 4

# Compare N_O_FP16, save Optimum data for references
wwb --target-model models/Qwen2-0.5B-Instruct-fp16 --gt-data qwen2_N_FP16/gt.csv --output qwen2_N_O_FP16 --num-samples 4

# Compare N_G_FP16, save GenAI data for references
wwb --target-model  models/Qwen2-0.5B-Instruct-fp16 --gt-data qwen2_N_FP16/gt.csv --genai --output qwen2_N_G_FP16 --num-samples 4

# Compare O_G_FP16, use pre-generated grout truth and target data from the previous runs
wwb --target-data qwen2_N_G_FP16/target.csv --gt-data qwen2_N_O_FP16/target.csv --genai --output qwen2_O_G_FP16 --num-samples 4

# The same for INT8
optimum-cli export openvino -m Qwen/Qwen2-0.5B-Instruct --weight-format int8 models/Qwen2-0.5B-Instruct-int8

# Compare N_G_INT8, save GenAI data for references
wwb --target-model models/Qwen2-0.5B-Instruct-int8 --gt-data qwen2_N_FP16/gt.csv --genai --output qwen2_N_G_INT8 --num-samples 4
```
---
 .../tests/test_cli_image.py                   | 166 ++++++++++--------
 .../who_what_benchmark/tests/test_cli_text.py | 128 ++++++++------
 .../who_what_benchmark/tests/test_cli_vlm.py  | 142 ++++++++-------
 .../whowhatbench/registry.py                  |   2 +-
 .../whowhatbench/text2image_evaluator.py      |  14 +-
 .../whowhatbench/text_evaluator.py            |  10 +-
 .../whowhatbench/visualtext_evaluator.py      |   8 +-
 tools/who_what_benchmark/whowhatbench/wwb.py  |  59 ++++---
 8 files changed, 298 insertions(+), 231 deletions(-)

diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py
index 374df2a1ec..b2c2015f80 100644
--- a/tools/who_what_benchmark/tests/test_cli_image.py
+++ b/tools/who_what_benchmark/tests/test_cli_image.py
@@ -14,7 +14,6 @@ def run_wwb(args):
     logger.info(" ".join(["TRANSFOREMRS_VERBOSITY=debug wwb"] + args))
     result = subprocess.run(["wwb"] + args, capture_output=True, text=True)
     logger.info(result)
-    print(" ".join(["TRANSFOREMRS_VERBOSITY=debug wwb"] + args))
     return result
 
 
@@ -27,7 +26,7 @@ def run_wwb(args):
     ],
 )
 def test_image_model_types(model_id, model_type, backend):
-    GT_FILE = "test_sd.json"
+    GT_FILE = "test_sd.csv"
     wwb_args = [
         "--base-model",
         model_id,
@@ -70,79 +69,94 @@ def test_image_model_types(model_id, model_type, backend):
     ],
 )
 def test_image_model_genai(model_id, model_type):
-    GT_FILE = "test_sd.json"
-    MODEL_PATH = tempfile.TemporaryDirectory().name
-
-    result = subprocess.run(["optimum-cli", "export",
-                             "openvino", "-m", model_id,
-                             MODEL_PATH], capture_output=True, text=True)
-    assert result.returncode == 0
-
-    wwb_args = [
-        "--base-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-    ]
-    result = run_wwb(wwb_args)
-    assert result.returncode == 0
-    assert os.path.exists(GT_FILE)
-    assert os.path.exists("reference")
-
-    wwb_args = [
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--genai",
-    ]
-    result = run_wwb(wwb_args)
-
-    assert result.returncode == 0
-    assert "Metrics for model" in result.stderr
-    similarity = float(str(result.stderr).split(" ")[-1])
-    assert similarity >= 0.98
-    assert os.path.exists("target")
-
-    output_dir = tempfile.TemporaryDirectory().name
-    wwb_args = [
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--output",
-        output_dir,
-    ]
-    result = run_wwb(wwb_args)
-    assert os.path.exists(os.path.join(output_dir, "target"))
-    assert os.path.exists(os.path.join(output_dir, "target.json"))
-
-    try:
-        os.remove(GT_FILE)
-    except OSError:
-        pass
-    shutil.rmtree("reference", ignore_errors=True)
-    shutil.rmtree("target", ignore_errors=True)
-    shutil.rmtree(MODEL_PATH, ignore_errors=True)
-    shutil.rmtree(output_dir, ignore_errors=True)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        GT_FILE = os.path.join(temp_dir, "gt.csv")
+        MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--"))
+
+        result = subprocess.run(["optimum-cli", "export",
+                                 "openvino", "-m", model_id,
+                                 MODEL_PATH],
+                                capture_output=True, text=True)
+        assert result.returncode == 0
+
+        wwb_args = [
+            "--base-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
+        assert os.path.exists(GT_FILE)
+        assert os.path.exists(os.path.join(temp_dir, "reference"))
+
+        wwb_args = [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--genai",
+        ]
+        result = run_wwb(wwb_args)
+
+        assert result.returncode == 0
+        assert "Metrics for model" in result.stderr
+        similarity = float(str(result.stderr).split(" ")[-1])
+        assert similarity >= 0.98
+        assert os.path.exists(os.path.join(temp_dir, "target"))
+
+        output_dir = tempfile.TemporaryDirectory().name
+        wwb_args = [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--output",
+            output_dir,
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
+        assert os.path.exists(os.path.join(output_dir, "target"))
+        assert os.path.exists(os.path.join(output_dir, "target.csv"))
+
+        # test w/o models
+        wwb_args = [
+            "--target-data",
+            os.path.join(output_dir, "target.csv"),
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
+
+        shutil.rmtree("reference", ignore_errors=True)
+        shutil.rmtree("target", ignore_errors=True)
+        shutil.rmtree(MODEL_PATH, ignore_errors=True)
+        shutil.rmtree(output_dir, ignore_errors=True)
 
 
 @pytest.mark.parametrize(
@@ -152,7 +166,7 @@ def test_image_model_genai(model_id, model_type):
     ],
 )
 def test_image_custom_dataset(model_id, model_type, backend):
-    GT_FILE = "test_sd.json"
+    GT_FILE = "test_sd.csv"
     wwb_args = [
         "--base-model",
         model_id,
diff --git a/tools/who_what_benchmark/tests/test_cli_text.py b/tools/who_what_benchmark/tests/test_cli_text.py
index cf71adc08a..0baf60a5a4 100644
--- a/tools/who_what_benchmark/tests/test_cli_text.py
+++ b/tools/who_what_benchmark/tests/test_cli_text.py
@@ -73,29 +73,28 @@ def test_text_target_model():
 
 @pytest.fixture
 def test_text_gt_data():
-    with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
-        temp_file_name = tmpfile.name
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_file_name = os.path.join(temp_dir, "gt.csv")
 
-    result = run_wwb(
-        [
-            "--base-model",
-            base_model_path,
-            "--gt-data",
-            temp_file_name,
-            "--dataset",
-            "EleutherAI/lambada_openai,en",
-            "--dataset-field",
-            "text",
-            "--split",
-            "test",
-            "--num-samples",
-            "2",
-            "--device",
-            "CPU",
-        ]
-    )
-    data = pd.read_csv(temp_file_name)
-    os.remove(temp_file_name)
+        result = run_wwb(
+            [
+                "--base-model",
+                base_model_path,
+                "--gt-data",
+                temp_file_name,
+                "--dataset",
+                "EleutherAI/lambada_openai,en",
+                "--dataset-field",
+                "text",
+                "--split",
+                "test",
+                "--num-samples",
+                "2",
+                "--device",
+                "CPU",
+            ]
+        )
+        data = pd.read_csv(temp_file_name)
 
     assert result.returncode == 0
     assert len(data["questions"].values) == 2
@@ -107,6 +106,8 @@ def test_text_output_directory():
             [
                 "--base-model",
                 base_model_path,
+                "--gt-data",
+                os.path.join(temp_dir, "gt.csv"),
                 "--target-model",
                 target_model_path,
                 "--num-samples",
@@ -121,7 +122,23 @@ def test_text_output_directory():
         assert "Metrics for model" in result.stderr
         assert os.path.exists(os.path.join(temp_dir, "metrics_per_qustion.csv"))
         assert os.path.exists(os.path.join(temp_dir, "metrics.csv"))
-        assert os.path.exists(os.path.join(temp_dir, "target.json"))
+        assert os.path.exists(os.path.join(temp_dir, "target.csv"))
+
+        # test measurtement w/o models
+        result = run_wwb(
+            [
+                "--gt-data",
+                os.path.join(temp_dir, "gt.csv"),
+                "--target-data",
+                os.path.join(temp_dir, "target.csv"),
+                "--num-samples",
+                "2",
+                "--device",
+                "CPU",
+            ]
+        )
+        assert result.returncode == 0
+        assert "Metrics for model" in result.stderr
 
 
 def test_text_verbose():
@@ -143,46 +160,43 @@ def test_text_verbose():
 
 
 def test_text_language_autodetect():
-    temp_file_name = tempfile.NamedTemporaryFile(suffix=".csv").name
-
-    result = run_wwb(
-        [
-            "--base-model",
-            "Qwen/Qwen2-0.5B",
-            "--gt-data",
-            temp_file_name,
-            "--num-samples",
-            "2",
-            "--device",
-            "CPU",
-        ]
-    )
-    data = pd.read_csv(temp_file_name)
-    os.remove(temp_file_name)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_file_name = os.path.join(temp_dir, "gt.csv")
+        result = run_wwb(
+            [
+                "--base-model",
+                "Qwen/Qwen2-0.5B",
+                "--gt-data",
+                temp_file_name,
+                "--num-samples",
+                "2",
+                "--device",
+                "CPU",
+            ]
+        )
+        data = pd.read_csv(temp_file_name)
 
     assert result.returncode == 0
     assert "马克" in data["prompts"].values[0]
 
 
 def test_text_hf_model():
-    with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
-        temp_file_name = tmpfile.name
-
-    result = run_wwb(
-        [
-            "--base-model",
-            model_id,
-            "--gt-data",
-            temp_file_name,
-            "--num-samples",
-            "2",
-            "--device",
-            "CPU",
-            "--hf",
-        ]
-    )
-    data = pd.read_csv(temp_file_name)
-    os.remove(temp_file_name)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_file_name = os.path.join(temp_dir, "gt.csv")
+        result = run_wwb(
+            [
+                "--base-model",
+                model_id,
+                "--gt-data",
+                temp_file_name,
+                "--num-samples",
+                "2",
+                "--device",
+                "CPU",
+                "--hf",
+            ]
+        )
+        data = pd.read_csv(temp_file_name)
 
     assert result.returncode == 0
     assert len(data["prompts"].values) == 2
diff --git a/tools/who_what_benchmark/tests/test_cli_vlm.py b/tools/who_what_benchmark/tests/test_cli_vlm.py
index d45283493e..5b33abf33c 100644
--- a/tools/who_what_benchmark/tests/test_cli_vlm.py
+++ b/tools/who_what_benchmark/tests/test_cli_vlm.py
@@ -24,70 +24,88 @@ def run_wwb(args):
     ],
 )
 def test_vlm_basic(model_id, model_type):
-    GT_FILE = tempfile.NamedTemporaryFile(suffix=".json").name
-    MODEL_PATH = tempfile.TemporaryDirectory().name
+    with tempfile.TemporaryDirectory() as temp_dir:
+        GT_FILE = os.path.join(temp_dir, "gt.csv")
+        MODEL_PATH = os.path.join(temp_dir, model_id.replace("/", "--"))
 
-    result = subprocess.run(["optimum-cli", "export",
-                             "openvino", "-m", model_id,
-                             MODEL_PATH, "--task",
-                             "image-text-to-text",
-                             "--trust-remote-code"],
-                            capture_output=True,
-                            text=True,
-                            )
-    assert result.returncode == 0
+        result = subprocess.run(["optimum-cli", "export",
+                                 "openvino", "-m", model_id,
+                                 MODEL_PATH, "--task",
+                                 "image-text-to-text",
+                                 "--trust-remote-code"],
+                                capture_output=True,
+                                text=True,
+                                )
+        assert result.returncode == 0
 
-    wwb_args = [
-        "--base-model",
-        model_id,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--hf",
-    ]
-    result = run_wwb(wwb_args)
-    assert result.returncode == 0
+        # Collect reference with HF model
+        wwb_args = [
+            "--base-model",
+            model_id,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--hf",
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
 
-    wwb_args = [
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-    ]
-    result = run_wwb(wwb_args)
-    assert result.returncode == 0
+        # test Optimum
+        wwb_args = [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
 
-    wwb_args = [
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--genai",
-    ]
-    result = run_wwb(wwb_args)
-    assert result.returncode == 0
+        # test GenAI
+        wwb_args = [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--genai",
+            "--output",
+            "target",
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
 
-    try:
-        os.remove(GT_FILE)
-    except OSError:
-        pass
-    shutil.rmtree("reference", ignore_errors=True)
-    shutil.rmtree("target", ignore_errors=True)
-    shutil.rmtree(MODEL_PATH, ignore_errors=True)
+        # test w/o models
+        wwb_args = [
+            "--target-data",
+            "target/target.csv",
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--genai",
+        ]
+        result = run_wwb(wwb_args)
+        assert result.returncode == 0
+        shutil.rmtree("reference", ignore_errors=True)
+        shutil.rmtree("target", ignore_errors=True)
+        shutil.rmtree(MODEL_PATH, ignore_errors=True)
diff --git a/tools/who_what_benchmark/whowhatbench/registry.py b/tools/who_what_benchmark/whowhatbench/registry.py
index 85fabf618e..0cfbf8e440 100644
--- a/tools/who_what_benchmark/whowhatbench/registry.py
+++ b/tools/who_what_benchmark/whowhatbench/registry.py
@@ -29,7 +29,7 @@ def dump_predictions(self, csv_name: str):
         pass
 
     @abstractmethod
-    def score(self, model, **kwargs):
+    def score(self, model_or_data, **kwargs):
         pass
 
     @abstractmethod
diff --git a/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py b/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py
index 2663414917..1ff7ff5e21 100644
--- a/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py
+++ b/tools/who_what_benchmark/whowhatbench/text2image_evaluator.py
@@ -84,15 +84,19 @@ def __init__(
     def get_generation_fn(self):
         return self.generation_fn
 
-    def score(self, model, gen_image_fn=None, output_dir=None, **kwargs):
-        model.resolution = self.resolution
+    def score(self, model_or_data, gen_image_fn=None, output_dir=None, **kwargs):
         if output_dir is None:
             image_folder = os.path.join(self.gt_dir, "target")
         else:
             image_folder = os.path.join(output_dir, "target")
-        predictions = self._generate_data(
-            model, gen_image_fn, image_folder
-        )
+
+        if isinstance(model_or_data, str) and os.path.exists(model_or_data):
+            predictions = pd.read_csv(model_or_data, keep_default_na=False)
+        else:
+            model_or_data.resolution = self.resolution
+            predictions = self._generate_data(
+                model_or_data, gen_image_fn, image_folder
+            )
         self.predictions = predictions
 
         all_metrics_per_prompt = {}
diff --git a/tools/who_what_benchmark/whowhatbench/text_evaluator.py b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
index eb89083496..50ce224def 100644
--- a/tools/who_what_benchmark/whowhatbench/text_evaluator.py
+++ b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
@@ -1,5 +1,6 @@
 from typing import Any, Union
 
+import os
 import pandas as pd
 from tqdm import tqdm
 
@@ -97,7 +98,7 @@ def __init__(
         tokenizer: Any = None,
         gt_data: str = None,
         test_data: Union[str, list] = None,
-        metrics=("similarity", "divergency"),
+        metrics="similarity",
         similarity_model_id: str = "sentence-transformers/all-mpnet-base-v2",
         max_new_tokens=128,
         crop_question=True,
@@ -155,8 +156,11 @@ def __init__(
     def get_generation_fn(self):
         return self.generation_fn
 
-    def score(self, model, gen_answer_fn=None, **kwargs):
-        predictions = self._generate_data(model, gen_answer_fn, self.generation_config)
+    def score(self, model_or_data, gen_answer_fn=None, **kwargs):
+        if isinstance(model_or_data, str) and os.path.exists(model_or_data):
+            predictions = pd.read_csv(model_or_data, keep_default_na=False)
+        else:
+            predictions = self._generate_data(model_or_data, gen_answer_fn, self.generation_config)
         self.predictions = predictions
 
         all_metrics_per_prompt = {}
diff --git a/tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py b/tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py
index ef10bdafcf..99027971d8 100644
--- a/tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py
+++ b/tools/who_what_benchmark/whowhatbench/visualtext_evaluator.py
@@ -1,5 +1,6 @@
 from typing import Any, Union
 
+import os
 import datasets
 import pandas as pd
 from diffusers.utils.loading_utils import load_image
@@ -64,8 +65,11 @@ def __init__(
             seqs_per_request=seqs_per_request,
         )
 
-    def score(self, model, gen_answer_fn=None, **kwargs):
-        predictions = self._generate_data(model, gen_answer_fn)
+    def score(self, model_or_data, gen_answer_fn=None, **kwargs):
+        if isinstance(model_or_data, str) and os.path.exists(model_or_data):
+            predictions = pd.read_csv(model_or_data, keep_default_na=False)
+        else:
+            predictions = self._generate_data(model_or_data, gen_answer_fn, self.generation_config)
         self.predictions = predictions
 
         all_metrics_per_prompt = {}
diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py
index f3c5f8224a..0a01a8e8df 100644
--- a/tools/who_what_benchmark/whowhatbench/wwb.py
+++ b/tools/who_what_benchmark/whowhatbench/wwb.py
@@ -271,12 +271,17 @@ def parse_args():
         default=None,
         help="Tokenizer for divergency metric. If not provided, it will be load from base_model or target_model.",
     )
-
     parser.add_argument(
         "--gt-data",
         default=None,
-        help="CSV file containing GT outputs from base_model. If defined and exists then base_model will not used."
-        " If the files does not exist, it will be generated by base_model evaluation.",
+        help="CSV file containing GT outputs from --base-model. If defined and exists then --base-model will not used."
+        " If the files does not exist, it will be generated by --base-model evaluation.",
+    )
+    parser.add_argument(
+        "--target-data",
+        default=None,
+        help="CSV file containing outputs from target model. If defined and exists then --target-model will not used."
+        " If the files does not exist, it will be generated by --target-model evaluation.",
     )
     parser.add_argument(
         "--model-type",
@@ -385,14 +390,11 @@ def parse_args():
 
 
 def check_args(args):
-    if args.base_model is None and args.target_model is None:
-        raise ValueError(
-            "Wether --base-model or --target-model should be provided")
     if args.base_model is None and args.gt_data is None:
         raise ValueError("Wether --base-model or --gt-data should be provided")
-    if args.target_model is None and args.gt_data is None:
+    if args.target_model is None and args.gt_data is None and args.target_data:
         raise ValueError(
-            "Wether --target-model or --gt-data should be provided")
+            "Wether --target-model, --target-data or --gt-data should be provided")
 
 
 def load_tokenizer(args):
@@ -405,7 +407,7 @@ def load_tokenizer(args):
         tokenizer = AutoTokenizer.from_pretrained(
             args.base_model, trust_remote_code=True
         )
-    else:
+    elif args.target_model is not None:
         tokenizer = AutoTokenizer.from_pretrained(
             args.target_model, trust_remote_code=True
         )
@@ -419,7 +421,7 @@ def load_processor(args):
         processor = AutoProcessor.from_pretrained(
             args.base_model, trust_remote_code=True
         )
-    else:
+    elif args.target_model is not None:
         processor = AutoProcessor.from_pretrained(
             args.target_model, trust_remote_code=True
         )
@@ -611,20 +613,27 @@ def main():
             evaluator.dump_gt(args.gt_data)
         del base_model
 
-    if args.target_model:
-        target_model = load_model(
-            args.model_type,
-            args.target_model,
-            args.device,
-            args.ov_config,
-            args.hf,
-            args.genai,
-        )
-        all_metrics_per_question, all_metrics = evaluator.score(
-            target_model,
-            evaluator.get_generation_fn() if args.genai else None,
-            output_dir=args.output
-        )
+    if args.target_data or args.target_model:
+        if args.target_data and os.path.exists(args.target_data):
+            all_metrics_per_question, all_metrics = evaluator.score(
+                args.target_data,
+                None,
+                output_dir=args.output
+            )
+        else:
+            target_model = load_model(
+                args.model_type,
+                args.target_model,
+                args.device,
+                args.ov_config,
+                args.hf,
+                args.genai,
+            )
+            all_metrics_per_question, all_metrics = evaluator.score(
+                target_model,
+                evaluator.get_generation_fn() if args.genai else None,
+                output_dir=args.output
+            )
         logger.info("Metrics for model: %s", args.target_model)
         logger.info(all_metrics)
 
@@ -635,7 +644,7 @@ def main():
             df.to_csv(os.path.join(args.output, "metrics_per_qustion.csv"))
             df = pd.DataFrame(all_metrics)
             df.to_csv(os.path.join(args.output, "metrics.csv"))
-            evaluator.dump_predictions(os.path.join(args.output, "target.json"))
+            evaluator.dump_predictions(os.path.join(args.output, "target.csv"))
 
     if args.verbose and args.target_model is not None:
         if args.model_type == "text" or args.model_type == "visual-text":