diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 0da478a397..82a450b619 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -100,8 +100,39 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_greedy_decoding() const;
     bool is_beam_search() const;
     bool is_multinomial() const;
-    static GenerationConfig anymap_to_generation_config(const ov::AnyMap& config_map = {});
+    void update_generation_config(const ov::AnyMap& config_map = {});
 };
 
+/*
+ * utils that allow to use generate and operator() in the following way:
+ * pipe.generate(input_ids, ov::max_new_tokens(200), ov::temperature(1.0f),...)
+ * pipe(text, ov::max_new_tokens(200), ov::temperature(1.0f),...)
+*/
+static constexpr ov::Property<size_t> max_new_tokens{"max_new_tokens"};
+static constexpr ov::Property<size_t> max_length{"max_length"};
+static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
+
+static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
+static constexpr ov::Property<size_t> num_beams{"num_beams"};
+static constexpr ov::Property<float> diversity_penalty{"diversity_penalty"};
+static constexpr ov::Property<float> length_penalty{"length_penalty"};
+static constexpr ov::Property<size_t> num_return_sequences{"num_return_sequences"};
+static constexpr ov::Property<size_t> no_repeat_ngram_size{"no_repeat_ngram_size"};
+static constexpr ov::Property<StopCriteria> stop_criteria{"stop_criteria"};
+
+static constexpr ov::Property<float> temperature{"temperature"};
+static constexpr ov::Property<float> top_p{"top_p"};
+static constexpr ov::Property<int> top_k{"top_k"};
+static constexpr ov::Property<bool> do_sample{"do_sample"};
+static constexpr ov::Property<float> repetition_penalty{"repetition_penalty"};
+
+
+static constexpr ov::Property<int64_t> pad_token_id{"pad_token_id"};
+static constexpr ov::Property<int64_t> bos_token_id{"bos_token_id"};
+static constexpr ov::Property<int64_t> eos_token_id{"eos_token_id"};
+    
+static constexpr ov::Property<std::string> bos_token{"bos_token"};
+static constexpr ov::Property<std::string> eos_token{"eos_token"};
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index d16ec0dc8b..c8c6ea9231 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -14,9 +14,10 @@
 namespace ov {
 namespace genai {
 
-using StreamerVariant = std::variant<std::function<void (std::string)>, std::shared_ptr<StreamerBase>>;
+using StreamerVariant = std::variant<std::function<void(std::string)>, std::shared_ptr<StreamerBase>, std::monostate>;
 using OptionalGenerationConfig = std::optional<GenerationConfig>;
-using OptionalStreamerVariant = std::optional<StreamerVariant>;
+using EncodedInputs = std::variant<ov::Tensor, std::pair<ov::Tensor, ov::Tensor>, TokenizedInputs>;
+using StringInputs = std::variant<std::string, std::vector<std::string>>;
 
 /**
 * @brief Structure to store resulting batched tokens and scores for each batch sequence
@@ -43,6 +44,13 @@ class DecodedResults {
 
      // @brief Convert DecodedResults to a vector of strings.
      // @return A std::vector<std::string> containing the texts from the DecodedResults object.
+    operator std::string() const { 
+        OPENVINO_ASSERT(texts.size() == 1, "DecodedResults can be converted to string only if contains a single prompt");
+        return texts.at(0); 
+    }
+
+    // @brief Convert DecodedResults to a single string.
+    // @return std::string containing the texts from the DecodedResults object.
     operator std::vector<std::string>() const { 
         return texts; 
     }
@@ -71,11 +79,27 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     * @param model_path Path to the dir model xml/bin files, tokenizers and generation_configs.json
     * @param device optional device
     * @param plugin_config optional plugin_config
-    * @param ov_tokenizers_path optional path to an extension to add. Empty adds openvino_tokenizers from openvini_genai library folder.
     */
-    LLMPipeline(const std::string& path, const std::string& device="CPU", 
-                const ov::AnyMap& plugin_config={}, 
-                const std::string& ov_tokenizers_path="");
+    LLMPipeline(
+        const std::string& path, 
+        const std::string& device="CPU", 
+        const ov::AnyMap& plugin_config={}
+    );
+
+    /**
+    * @brief Constructs an LLMPipeline from already existing infer InferRequest and Tokenizer
+    *
+    * @param request infer request of the model
+    * @param tokenizer initialized Tokenizer 
+    * @param generation_config optional generation_config, be default will be initialized for greedy decoding
+    * @param device optional device
+    * @param plugin_config optional plugin_config
+    */
+    LLMPipeline(
+        const ov::InferRequest& request, 
+        const ov::genai::Tokenizer& tokenizer, 
+        OptionalGenerationConfig generation_config=std::nullopt
+    );
     
     /**
     * @brief Constructs a LLMPipeline when ov::Tokenizer is initialized manually using file from the different dirs.
@@ -95,76 +119,84 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     ~LLMPipeline();
 
     /**
-    * @brief High level generate for the input with a single prompt which encodes inputs and returns decoded output
+    * @brief High level generate that receives prompts as a string or a vector of strings and returns decoded output.
     *
-    * @param text input prompt
+    * @param inputs input prompt or a vector of prompts
     * @param generation_config optional GenerationConfig
     * @param streamer optional streamer
-    * @return std::string decoded resulting text
+    * @return DecodedResults decoded resulting text
+    */
+    DecodedResults generate(
+        StringInputs inputs, 
+        OptionalGenerationConfig generation_config=std::nullopt, 
+        StreamerVariant streamer=std::monostate()
+    );
+
+    /**
+    * @brief High level generate that receives prompts as a string or a vector of strings and returns decoded output.
+    * properties can be in any order pipe.generate(..., ov::genai::max_new_tokens(100), ov::genai::streamer(lambda_func)).
+    * 
+    * @param inputs input prompt or a vector of prompts
+    * @param properties properties 
+    * @return DecodedResults decoded resulting text
     */
-    std::string generate(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt);
-    
     template <typename... Properties>
-    util::EnableIfAllStringAny<std::string, Properties...> generate(
-            std::string text,
+    util::EnableIfAllStringAny<DecodedResults, Properties...> generate(
+            StringInputs inputs,
             Properties&&... properties) {
-        return generate(text, AnyMap{std::forward<Properties>(properties)...});
+        return generate(inputs, AnyMap{std::forward<Properties>(properties)...});
+    }
+    DecodedResults generate(StringInputs inputs, const ov::AnyMap& config_map);
+
+
+    DecodedResults operator()(
+        StringInputs inputs, 
+        OptionalGenerationConfig generation_config=std::nullopt, 
+        StreamerVariant streamer=std::monostate()
+    ) {
+        return generate(inputs, generation_config, streamer);
     }
-    std::string generate(std::string text, const ov::AnyMap& config);
 
     template <typename... Properties>
-    util::EnableIfAllStringAny<EncodedResults, Properties...> generate(
-        ov::Tensor input_ids,
-        Properties&&... properties) {
-        return generate(input_ids, AnyMap{std::forward<Properties>(properties)...});
+    util::EnableIfAllStringAny<DecodedResults, Properties...> operator()(
+            StringInputs inputs,
+            Properties&&... properties) {
+        return generate(inputs, AnyMap{std::forward<Properties>(properties)...});
     }
-    EncodedResults generate(ov::Tensor input_ids, const ov::AnyMap& config);
 
     /**
-    * @brief High level generate for batched prompts which encodes inputs and returns decoded outputs. 
+    * @brief Low level generate to be called with already encoded input_ids tokens.
     * Streamer cannot be used for multibatch inputs.
     *
-    * @param text input prompt
+    * @param input_ids or pair of (input_ids, attentino_mask) encoded input prompt tokens
     * @param generation_config optional GenerationConfig
-    * @return DecodedResults a structure with resulting texts & scores
+    * @param streamer optional streamer
+    * @return EncodedResults a structure with resulting tokens and scores
+    * @throws Exception if the stremaer is set for inputs_ids with multiple batches
     */
-    DecodedResults generate(const std::vector<std::string>& texts, OptionalGenerationConfig generation_config);
+    EncodedResults generate(
+        const EncodedInputs& inputs, 
+        OptionalGenerationConfig generation_config=std::nullopt,
+        StreamerVariant streamer=std::monostate()
+    );
 
     /**
     * @brief Low level generate to be called with already encoded input_ids tokens.
     * Streamer cannot be used for multibatch inputs.
     *
-    * @param input_ids encoded input prompt tokens
-    * @param attention_mask optional attention_mask
-    * @param generation_config optional GenerationConfig
-    * @param streamer optional streamer
+    * @param input_ids or pair of (input_ids, attentino_mask) encoded input prompt tokens
+    * @param generation config params
     * @return EncodedResults a structure with resulting tokens and scores
     * @throws Exception if the stremaer is set for inputs_ids with multiple batches
     */
-    EncodedResults generate(ov::Tensor input_ids, 
-                            std::optional<ov::Tensor> attention_mask, 
-                            OptionalGenerationConfig generation_config=std::nullopt,
-                            OptionalStreamerVariant streamer=std::nullopt);
-    
-    template <typename InputsType, typename... Properties>
-    util::EnableIfAllStringAny<std::string, Properties...> operator()(
-        InputsType text,
-        Properties&&... properties) {
-        return generate(text, AnyMap{std::forward<Properties>(properties)...});
-    }
-    
-    DecodedResults operator()(const std::vector<std::string>& text, OptionalGenerationConfig generation_config=std::nullopt) {
-        return generate(text, generation_config);
-    }
-
-    std::string operator()(
-        std::string text, 
-        OptionalGenerationConfig generation_config=std::nullopt, 
-        OptionalStreamerVariant streamer=std::nullopt
-    ) {
-        return generate(text, generation_config, streamer);
+    template <typename... Properties>
+    util::EnableIfAllStringAny<EncodedResults, Properties...> generate(
+            const EncodedInputs& inputs,
+            Properties&&... properties) {
+        return generate(inputs, AnyMap{std::forward<Properties>(properties)...});
     }
-    
+    EncodedResults generate(const EncodedInputs& inputs, const ov::AnyMap& config_map);
+  
     ov::genai::Tokenizer get_tokenizer();
     GenerationConfig get_generation_config() const;
     void set_generation_config(const GenerationConfig& generation_config);
@@ -178,40 +210,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     std::unique_ptr<LLMPipelineImpl> m_pimpl;
 };
 
-/*
- * utils that allow to use generate and operator() in the following way:
- * pipe.generate(input_ids, ov::max_new_tokens(200), ov::temperature(1.0f),...)
- * pipe(text, ov::max_new_tokens(200), ov::temperature(1.0f),...)
-*/
-static constexpr ov::Property<size_t> max_new_tokens{"max_new_tokens"};
-static constexpr ov::Property<size_t> max_length{"max_length"};
-static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
-
-static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
-static constexpr ov::Property<size_t> num_beams{"num_beams"};
-static constexpr ov::Property<float> diversity_penalty{"diversity_penalty"};
-static constexpr ov::Property<float> length_penalty{"length_penalty"};
-static constexpr ov::Property<size_t> num_return_sequences{"num_return_sequences"};
-static constexpr ov::Property<size_t> no_repeat_ngram_size{"no_repeat_ngram_size"};
-static constexpr ov::Property<StopCriteria> stop_criteria{"stop_criteria"};
-
-static constexpr ov::Property<float> temperature{"temperature"};
-static constexpr ov::Property<float> top_p{"top_p"};
-static constexpr ov::Property<int> top_k{"top_k"};
-static constexpr ov::Property<bool> do_sample{"do_sample"};
-static constexpr ov::Property<float> repetition_penalty{"repetition_penalty"};
-
-
-static constexpr ov::Property<int64_t> pad_token_id{"pad_token_id"};
-static constexpr ov::Property<int64_t> bos_token_id{"bos_token_id"};
-static constexpr ov::Property<int64_t> eos_token_id{"eos_token_id"};
-    
-static constexpr ov::Property<std::string> bos_token{"bos_token"};
-static constexpr ov::Property<std::string> eos_token{"eos_token"};
-
-// only lambda streamer can be set via ov::streamer(),... syntaxic sugar,
-// because std::variant<StremaerBase, std::function<>> can not be stored in AnyMap
-static constexpr ov::Property<std::function<void (std::string)>> streamer{"streamer"};
+std::pair<std::string, Any> streamer(StreamerVariant func);
+std::pair<std::string, Any> generation_config(const GenerationConfig& config);
 
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/include/openvino/genai/streamer_base.hpp b/src/cpp/include/openvino/genai/streamer_base.hpp
index 7731b51c1c..ba6287c66a 100644
--- a/src/cpp/include/openvino/genai/streamer_base.hpp
+++ b/src/cpp/include/openvino/genai/streamer_base.hpp
@@ -15,10 +15,6 @@ namespace genai {
 */
 class StreamerBase {
 public:
-    Tokenizer m_tokenizer;
-    explicit StreamerBase(Tokenizer tokenizer): m_tokenizer(tokenizer) {}
-    StreamerBase() = default;
-    
     /// @brief put is called every time new token is decoded
     virtual void put(int64_t token) = 0;
     
@@ -26,5 +22,6 @@ class StreamerBase {
     virtual void end() = 0;
 };
 
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
index e0214fcfbb..5dcc1a2670 100644
--- a/src/cpp/include/openvino/genai/tokenizer.hpp
+++ b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -12,6 +12,11 @@
 namespace ov {
 namespace genai {
 
+struct TokenizedInputs {
+    ov::Tensor input_ids;
+    ov::Tensor attention_mask;
+};
+
 /**
 * @brief class is used to encode prompts and decode resulting tokens
 */
@@ -22,22 +27,22 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param tokenizer_path openvino_tokenizer.xml and openvino_detokenizer.xml should be located in the tokenizer_path
     * @param device device. Currently only 'CPU' is supported
     */
-    Tokenizer(const std::string& tokenizers_path, const std::string& device="CPU", const std::string& ov_tokenizers_path="");
+    Tokenizer(const std::string& tokenizers_path, const std::string& device="CPU");
 
     /**
     * @brief encode a single prompt
     * @return pair of [input_ids, attention_mask]
     */
-    std::pair<ov::Tensor, ov::Tensor> encode(const std::string prompt);
+    TokenizedInputs encode(const std::string prompt);
     
     /**
     * @brief encode batch of prompts. Left padding will be applied by default
     * @param prompts vector storing batch of prompts
     * @return pair of [input_ids, attention_mask]
     */
-    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string>& prompts);
-    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string>&& prompts);
-    std::pair<ov::Tensor, ov::Tensor> encode(std::initializer_list<std::string>& prompts);
+    TokenizedInputs encode(std::vector<std::string>& prompts);
+    TokenizedInputs encode(std::vector<std::string>&& prompts);
+    TokenizedInputs encode(std::initializer_list<std::string>& prompts);
     
     /**
     * @brief decode sequence of tokens
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index e2e95262de..5569a759b0 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -55,32 +55,29 @@ GenerationConfig::GenerationConfig(std::string json_path) {
 
 }
 
-GenerationConfig GenerationConfig::anymap_to_generation_config(const ov::AnyMap& config_map) {
+void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
     using ov::genai::utils::read_anymap_param;
     
-    GenerationConfig config;
-    read_anymap_param(config_map, "max_new_tokens", config.max_new_tokens);
-    read_anymap_param(config_map, "max_length", config.max_length);
-    read_anymap_param(config_map, "ignore_eos", config.ignore_eos);
-    read_anymap_param(config_map, "num_beam_groups", config.num_beam_groups);
-    read_anymap_param(config_map, "num_beams", config.num_beams);
-    read_anymap_param(config_map, "diversity_penalty", config.diversity_penalty);
-    read_anymap_param(config_map, "length_penalty", config.length_penalty);
-    read_anymap_param(config_map, "num_return_sequences", config.num_return_sequences);
-    read_anymap_param(config_map, "no_repeat_ngram_size", config.no_repeat_ngram_size);
-    read_anymap_param(config_map, "stop_criteria", config.stop_criteria);
-    read_anymap_param(config_map, "temperature", config.temperature);
-    read_anymap_param(config_map, "top_p", config.top_p);
-    read_anymap_param(config_map, "top_k", config.top_k);
-    read_anymap_param(config_map, "do_sample", config.do_sample);
-    read_anymap_param(config_map, "repetition_penalty", config.repetition_penalty);
-    read_anymap_param(config_map, "pad_token_id", config.pad_token_id);
-    read_anymap_param(config_map, "bos_token_id", config.bos_token_id);
-    read_anymap_param(config_map, "eos_token_id", config.eos_token_id);
-    read_anymap_param(config_map, "bos_token", config.bos_token);
-    read_anymap_param(config_map, "eos_token", config.eos_token);
-  
-    return config;
+    read_anymap_param(config_map, "max_new_tokens", max_new_tokens);
+    read_anymap_param(config_map, "max_length", max_length);
+    read_anymap_param(config_map, "ignore_eos", ignore_eos);
+    read_anymap_param(config_map, "num_beam_groups", num_beam_groups);
+    read_anymap_param(config_map, "num_beams", num_beams);
+    read_anymap_param(config_map, "diversity_penalty", diversity_penalty);
+    read_anymap_param(config_map, "length_penalty", length_penalty);
+    read_anymap_param(config_map, "num_return_sequences", num_return_sequences);
+    read_anymap_param(config_map, "no_repeat_ngram_size", no_repeat_ngram_size);
+    read_anymap_param(config_map, "stop_criteria", stop_criteria);
+    read_anymap_param(config_map, "temperature", temperature);
+    read_anymap_param(config_map, "top_p", top_p);
+    read_anymap_param(config_map, "top_k", top_k);
+    read_anymap_param(config_map, "do_sample", do_sample);
+    read_anymap_param(config_map, "repetition_penalty", repetition_penalty);
+    read_anymap_param(config_map, "pad_token_id", pad_token_id);
+    read_anymap_param(config_map, "bos_token_id", bos_token_id);
+    read_anymap_param(config_map, "eos_token_id", eos_token_id);
+    read_anymap_param(config_map, "bos_token", bos_token);
+    read_anymap_param(config_map, "eos_token", eos_token);
 }
 
 size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const {
diff --git a/src/cpp/src/generation_config_helper.hpp b/src/cpp/src/generation_config_helper.hpp
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/src/cpp/src/greedy_decoding.cpp b/src/cpp/src/greedy_decoding.cpp
index 51e8023b42..3cc5efd26e 100644
--- a/src/cpp/src/greedy_decoding.cpp
+++ b/src/cpp/src/greedy_decoding.cpp
@@ -92,7 +92,7 @@ EncodedResults greedy_decoding(
     for (size_t i = 0; i < max_tokens - 1; ++i) {
         utils::update_position_ids(m_model_runner.get_tensor("position_ids"), m_model_runner.get_tensor("attention_mask"));
         m_model_runner.set_tensor("attention_mask", utils::extend_attention(m_model_runner.get_tensor("attention_mask")));
-
+    
         // todo: consider replacing with start_async and run callback right after that
         m_model_runner.infer();
         auto logits = m_model_runner.get_tensor("logits");
diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
index 3f4b9f3f89..3a824a7ec5 100644
--- a/src/cpp/src/llm_pipeline.cpp
+++ b/src/cpp/src/llm_pipeline.cpp
@@ -44,6 +44,9 @@ std::string get_absolute_file_path(const std::string& path) {
 
 namespace {
 
+const std::string STREAMER_ARG_NAME = "streamer";
+const std::string CONFIG_ARG_NAME = "generation_config";
+
 ov::genai::GenerationConfig from_config_json_if_exists(const std::string& path) {
     constexpr char generation_config_fname[] = "generation_config.json";
     constexpr char config_fname[] = "config.json";
@@ -83,16 +86,7 @@ std::string from_tokenizer_json_if_exists(const std::string& path) {
     return res;
 }
 
-std::filesystem::path with_openvino_tokenizers(const std::filesystem::path& path) {
-#ifdef _WIN32
-    constexpr char tokenizers[] = "openvino_tokenizers.dll";
-#elif __linux__
-    constexpr char tokenizers[] = "libopenvino_tokenizers.so";
-#elif __APPLE__
-    constexpr char tokenizers[] = "libopenvino_tokenizers.dylib";
-#endif
-    return path.parent_path() / tokenizers;
-}
+
 
 std::string get_ov_genai_library_path() {
 #ifdef _WIN32
@@ -116,6 +110,27 @@ std::string get_ov_genai_library_path() {
 #endif  // _WIN32
 }
 
+ov::genai::StreamerVariant get_streamer_from_map(const ov::AnyMap& config_map) {
+    ov::genai::StreamerVariant streamer = std::monostate();
+
+    if (config_map.count(STREAMER_ARG_NAME)) {
+        auto any_val = config_map.at(STREAMER_ARG_NAME);
+        if (any_val.is<std::shared_ptr<ov::genai::StreamerBase>>()) {
+            streamer = any_val.as<std::shared_ptr<ov::genai::StreamerBase>>();
+        } else if (any_val.is<std::function<void(std::string)>>()) {
+            streamer = any_val.as<std::function<void(std::string)>>();
+        }
+    }
+    return streamer;
+}
+
+ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config_map) {
+    if (config_map.count(CONFIG_ARG_NAME))
+        return config_map.at(CONFIG_ARG_NAME).as<ov::genai::GenerationConfig>();
+    else
+        return std::nullopt;
+}
+
 }
 
 namespace ov {
@@ -149,6 +164,15 @@ class LLMPipeline::LLMPipelineImpl {
     std::string m_chat_template = "";
     bool is_chat_conversation = false;
 
+    LLMPipelineImpl(
+        const ov::InferRequest& request, 
+        const ov::genai::Tokenizer& tokenizer, 
+        OptionalGenerationConfig generation_config=std::nullopt
+    ): m_model_runner(request), m_tokenizer(tokenizer) {
+        GenerationConfig default_config;
+        m_generation_config = (generation_config.has_value()) ? *generation_config : default_config;
+    }
+
     LLMPipelineImpl(
         const std::string& model_path,
         const ov::genai::Tokenizer& tokenizer,
@@ -159,24 +183,212 @@ class LLMPipeline::LLMPipelineImpl {
     LLMPipelineImpl(
         const std::string& path, 
         const std::string& device, 
-        const ov::AnyMap& config, 
-        const std::string& ov_tokenizers_path=""
+        const ov::AnyMap& config
     );
     
-    GenerationConfig generation_config() const;
+    DecodedResults generate(
+        StringInputs inputs, 
+        OptionalGenerationConfig generation_config, 
+        StreamerVariant streamer
+    ) {
+        GenerationConfig config = (generation_config.has_value()) ? *generation_config : m_generation_config;
+        
+
+        EncodedInputs encoded_input;
+        if (auto input_vector = std::get_if<std::vector<std::string>>(&inputs)) {
+            encoded_input = m_tokenizer.encode(*input_vector);
+        } else if (auto input_str = std::get_if<std::string>(&inputs)) {
+            
+            std::string text = *input_str;
+            // todo: make for batched inputs as well
+            if (is_chat_conversation)
+                text = apply_chat_template(text);
+
+            
+            // previous prompt generation in chat dialog stops with the end of sentence token, 
+            // need to append this token to the current prompt
+            auto kv_cache_len = m_model_runner.query_state()[0].get_state().get_shape()[2];
+            if (is_chat_conversation && kv_cache_len > 0)
+                text = config.eos_token + text;
+            
+            auto res = m_tokenizer.encode(text);
+            auto input_ids = res.input_ids;
+            auto attention_mask = res.attention_mask;
+
+
+            // todo: W/A If sentence begins with a specfial tokens (<bos>, <s>, etc.) openvino_tokenizer inserts 2 special extra tokens <bos> and "▁",
+            // but HF does not do that. Moreover openvino_tokenizer always inserts <bos> but in chat scenario HF does not do that because skip_special_tokens=True.
+            // Need to remove both of that tokens manually to get exact token by token alignment with HF
+            auto size = input_ids.get_shape();
+            int64_t* inputs_data = input_ids.data<int64_t>();
+            std::vector<int64_t> tmp_ids(inputs_data, inputs_data + input_ids.get_size()); // todo: works only for batch 1
+            // tmp_ids.erase(tmp_ids.begin());
+
+            auto attention_mask_data = attention_mask.data<int64_t>();
+            std::vector<float> tmp_attn_mask(attention_mask_data, attention_mask_data + attention_mask.get_size());
+            // tmp_attn_mask.erase(tmp_attn_mask.begin());
+
+            std::vector<std::string> prefixes_to_exclude = {config.eos_token, config.bos_token};
+            auto prefix_match = [&text](std::string prefix) { return text.substr(0, prefix.length()) == prefix; };
+            if (std::any_of(prefixes_to_exclude.begin(), prefixes_to_exclude.end(), prefix_match)) {
+                tmp_ids.erase(tmp_ids.begin());
+                tmp_attn_mask.erase(tmp_attn_mask.begin());
+            }
+
+            input_ids = ov::Tensor(input_ids.get_element_type(), {1, tmp_ids.size()});
+            for (size_t i = 0; i < tmp_ids.size(); i++)
+                input_ids.data<int64_t>()[i] = tmp_ids.data()[i];
+            attention_mask = ov::Tensor(attention_mask.get_element_type(), {1, tmp_attn_mask.size()});
+            for (size_t i = 0; i < tmp_attn_mask.size(); i++)
+                attention_mask.data<int64_t>()[i] = tmp_attn_mask.data()[i];
+            
+            encoded_input = TokenizedInputs{input_ids, attention_mask};
+        }
+        auto encoded_results  = generate(encoded_input, config, streamer);
+        return {m_tokenizer.decode(encoded_results.tokens), encoded_results.scores};
+    }
 
-    std::string generate(std::string text, OptionalGenerationConfig generation_config, OptionalStreamerVariant streamer);
-    DecodedResults generate(std::vector<std::string> texts, OptionalGenerationConfig generation_config);
-    EncodedResults generate(ov::Tensor input_ids, std::optional<ov::Tensor> attention_mask, OptionalGenerationConfig generation_config, OptionalStreamerVariant streamer);
+    EncodedResults generate(
+        const EncodedInputs& inputs,
+        OptionalGenerationConfig generation_config, 
+        StreamerVariant streamer
+    ) {
+        ov::Tensor input_ids;
+        ov::Tensor attention_mask;
+
+        // input_ids
+        // attention_mask
+        if (auto data = std::get_if<ov::Tensor>(&inputs)) {
+            input_ids = *data;
+        } else if (auto data = std::get_if<std::pair<ov::Tensor, ov::Tensor>>(&inputs)) {
+            input_ids = data->first;
+            attention_mask = data->second;
+        } else if (auto data = std::get_if<TokenizedInputs>(&inputs)) {
+            input_ids = data->input_ids;
+            attention_mask = data->attention_mask;
+        }
+        
+        ov::genai::EncodedResults result;
+        GenerationConfig config = (generation_config.has_value()) ? *generation_config : m_generation_config;
+        
+        std::shared_ptr<StreamerBase> streamer_ptr;
+
+        if (auto streamer_obj = std::get_if<std::monostate>(&streamer)) {
+            streamer_ptr = nullptr;
+        } else if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
+            streamer_ptr = *streamer_obj;
+        } else if (auto callback = std::get_if<std::function<void(std::string)>>(&streamer)) {
+            streamer_ptr = std::make_shared<TextCallbackStreamer>(m_tokenizer, *callback);
+        }
+
+        auto batch_size = input_ids.get_shape().at(0);
+        if ((batch_size != 1 || !(config.is_greedy_decoding() || config.is_multinomial())) && streamer_ptr) {
+            OPENVINO_THROW("Currently streaming is possible only with batch size=1 and greedy or multinomial decoding");
+        }
+
+        // auto attention_mask_data = attention_mask.has_value() ? *attention_mask : ov::genai::utils::init_attention_mask(input_ids);
+
+        if (config.is_greedy_decoding()) {
+            result = ov::genai::greedy_decoding(m_model_runner, input_ids, attention_mask, config, streamer_ptr, is_chat_conversation);
+        } else if (config.is_beam_search()) {
+            result = beam_search(m_model_runner, input_ids, attention_mask, config);
+        } else if (config.is_multinomial()) {
+            result = multinominal_decoding(m_model_runner, input_ids, attention_mask, config, streamer_ptr);
+        } else {
+            OPENVINO_THROW("No decoding algorithm found for provided configuration parameters.");
+        }
+
+        if (!is_chat_conversation)
+            m_model_runner.reset_state();
+
+        return result;        
+    }
 
-    std::string apply_chat_template(std::string prompt, std::string role = "user") const;
+    std::string apply_chat_template(std::string prompt, std::string role = "user") const {
+        jinja2::TemplateEnv env;
+        env.GetSettings().lstripBlocks = true;
+        env.GetSettings().trimBlocks = true;
+        jinja2::Template tpl(&env);
+        tpl.Load(m_chat_template);
+        
+        jinja2::ValuesMap message {{"role", role}, {"content", prompt}};
+        jinja2::ValuesMap params = {
+            {"messages", jinja2::ValuesList({message})},
+            {"bos_token",  m_generation_config.bos_token},
+            {"eos_token", m_generation_config.eos_token},
+            {"add_generation_prompt", true},
+        };
+    
+        return tpl.RenderAsString(params).value();
+    }
+
+    std::vector<std::string> apply_chat_template(std::vector<std::string>& prompts, std::string role = "user") const {
+        std::vector<std::string> res;
+        for (const auto& prompt: prompts) {
+            res.emplace_back(apply_chat_template(prompt));
+        }
+        return res;
+    }
 };
 
+DecodedResults LLMPipeline::generate(
+        StringInputs inputs, 
+        OptionalGenerationConfig generation_config, 
+        StreamerVariant streamer
+) {
+    return m_pimpl->generate(inputs, generation_config, streamer);
+}
+
+DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) {
+    auto config_arg = get_config_from_map(config_map);
+    GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config();
+    config.update_generation_config(config_map);
+
+    return m_pimpl->generate(text, config, get_streamer_from_map(config_map));
+}
+
+EncodedResults LLMPipeline::generate(
+    const EncodedInputs& inputs, 
+    OptionalGenerationConfig generation_config,
+    StreamerVariant streamer
+) {
+    return m_pimpl->generate(inputs, generation_config, streamer);
+}
+
+EncodedResults LLMPipeline::generate(const EncodedInputs& inputs, const ov::AnyMap& config_map) {
+    auto config_arg = get_config_from_map(config_map);
+    GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config();
+    config.update_generation_config(config_map);
+
+    return m_pimpl->generate(inputs, config, get_streamer_from_map(config_map));
+}
+
+std::pair<std::string, Any> streamer(StreamerVariant func) {
+    if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&func)) {
+        return {STREAMER_ARG_NAME, Any::make<std::shared_ptr<StreamerBase>>(*streamer_obj)};
+    } else  {
+        auto callback = std::get<std::function<void(std::string)>>(func);
+        return {STREAMER_ARG_NAME, Any::make<std::function<void(std::string)>>(callback)};
+    } 
+}
+
+std::pair<std::string, Any> generation_config(const GenerationConfig& config) {
+    return {CONFIG_ARG_NAME, Any::make<GenerationConfig>(config)};
+}
+
 }  // namespace genai
 }  // namespace ov
 
 using namespace std;
 
+ov::genai::LLMPipeline::LLMPipeline(
+    const ov::InferRequest& request, 
+    const ov::genai::Tokenizer& tokenizer, 
+    OptionalGenerationConfig generation_config
+) {
+    m_pimpl = std::make_unique<LLMPipelineImpl>(request, tokenizer, generation_config);
+}
+
 
 ov::genai::LLMPipeline::LLMPipeline(
     const std::string& model_path,
@@ -208,167 +420,27 @@ ov::genai::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(
 ov::genai::LLMPipeline::LLMPipeline(
     const std::string& path, 
     const std::string& device, 
-    const ov::AnyMap& config, 
-    const std::string& ov_tokenizers_path
+    const ov::AnyMap& config
 ) {
-    m_pimpl = make_unique<LLMPipelineImpl>(path, device, config, ov_tokenizers_path);
+    m_pimpl = make_unique<LLMPipelineImpl>(path, device, config);
 }
 
 ov::genai::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(
     const std::string& path, 
     const std::string& device, 
-    const ov::AnyMap& config, 
-    const std::string& ov_tokenizers_path
+    const ov::AnyMap& config
 ): 
     m_model_runner{ov::Core{}.compile_model(path + "/openvino_model.xml", device, config).create_infer_request()}, 
-    m_tokenizer{
-        ov_tokenizers_path.empty()
-        ? Tokenizer(path, device, with_openvino_tokenizers(get_ov_genai_library_path()).string())
-        : Tokenizer(path, device, ov_tokenizers_path)
-    },
     m_generation_config{from_config_json_if_exists(path)},
     m_chat_template{from_tokenizer_json_if_exists(path)}
- {}
+ {
+    ov::genai::utils::GenAIEnvManager env_manager(get_ov_genai_library_path());
+    m_tokenizer = Tokenizer(path, device);
+ }
 
-ov::genai::GenerationConfig ov::genai::LLMPipeline::LLMPipelineImpl::generation_config() const {
-    return m_generation_config;
-}
 
 ov::genai::GenerationConfig ov::genai::LLMPipeline::get_generation_config() const {
-    return m_pimpl->generation_config();
-}
-
-std::string ov::genai::LLMPipeline::LLMPipelineImpl::generate(
-    std::string text, 
-    OptionalGenerationConfig generation_config,
-    OptionalStreamerVariant streamer
-) {
-    GenerationConfig config = (generation_config.has_value()) ? *generation_config : m_generation_config;
-
-    if (is_chat_conversation) {
-        text = apply_chat_template(text);
-    }
-    auto kv_cache_len = m_model_runner.query_state()[0].get_state().get_shape()[2];
-    
-    // previous prompt generation in chat dialog stops with the end of sentence token, 
-    // need to append this token to the current prompt
-    if (is_chat_conversation && kv_cache_len > 0) {
-        text = config.eos_token + text;
-    }
-
-    auto [input_ids, attention_mask] = m_tokenizer.encode(text);
-
-    // todo: W/A If sentence begins with a specfial tokens (<bos>, <s>, etc.) openvino_tokenizer inserts 2 special extra tokens <bos> and "▁",
-    // but HF does not do that. Moreover openvino_tokenizer always inserts <bos> but in chat scenario HF does not do that because skip_special_tokens=True.
-    // Need to remove both of that tokens manually to get exact token by token alignment with HF
-    auto size = input_ids.get_shape();
-    int64_t* inputs_data = input_ids.data<int64_t>();
-    std::vector<int64_t> tmp_ids(inputs_data, inputs_data + input_ids.get_size()); // todo: works only for batch 1
-    // tmp_ids.erase(tmp_ids.begin());
-
-    auto attention_mask_data = attention_mask.data<int64_t>();
-    std::vector<float> tmp_attn_mask(attention_mask_data, attention_mask_data + attention_mask.get_size());
-    // tmp_attn_mask.erase(tmp_attn_mask.begin());
-
-    std::vector<std::string> prefixes_to_exclude = {config.eos_token, config.bos_token};
-    auto prefix_match = [&text](std::string prefix) { return text.substr(0, prefix.length()) == prefix; };
-    if (std::any_of(prefixes_to_exclude.begin(), prefixes_to_exclude.end(), prefix_match)) {
-        tmp_ids.erase(tmp_ids.begin());
-        tmp_attn_mask.erase(tmp_attn_mask.begin());
-    }
-
-    input_ids = ov::Tensor(input_ids.get_element_type(), {1, tmp_ids.size()});
-    for (size_t i = 0; i < tmp_ids.size(); i++)
-        input_ids.data<int64_t>()[i] = tmp_ids.data()[i];
-    attention_mask = ov::Tensor(attention_mask.get_element_type(), {1, tmp_attn_mask.size()});
-    for (size_t i = 0; i < tmp_attn_mask.size(); i++)
-        attention_mask.data<int64_t>()[i] = tmp_attn_mask.data()[i];
-
-    auto generate_results = generate(input_ids, attention_mask, config, streamer);
-    return m_tokenizer.decode(generate_results.tokens)[0];
-}
-
-ov::genai::DecodedResults ov::genai::LLMPipeline::generate(const std::vector<std::string>& texts, OptionalGenerationConfig generation_config) {
-    return m_pimpl->generate(texts, generation_config);
-}
-
-ov::genai::DecodedResults ov::genai::LLMPipeline::LLMPipelineImpl::generate(std::vector<std::string> texts, OptionalGenerationConfig generation_config) {
-    auto [input_ids, attention_mask] = m_tokenizer.encode(texts);
-
-    auto generate_results = generate(input_ids, attention_mask, generation_config, {});
-
-    return {m_tokenizer.decode(generate_results.tokens), generate_results.scores};
-}
-
-ov::genai::EncodedResults ov::genai::LLMPipeline::LLMPipeline::generate(ov::Tensor input_ids, 
-                                                          std::optional<ov::Tensor> attention_mask, 
-                                                          OptionalGenerationConfig generation_config,
-                                                          OptionalStreamerVariant streamer) {
-    return m_pimpl->generate(input_ids, attention_mask, generation_config, streamer);
-}
-
-ov::genai::EncodedResults ov::genai::LLMPipeline::LLMPipelineImpl::generate(
-    ov::Tensor input_ids, 
-    std::optional<ov::Tensor> attention_mask, OptionalGenerationConfig generation_config, 
-    OptionalStreamerVariant streamer
-) {
-    ov::genai::EncodedResults result;
-    GenerationConfig config = (generation_config.has_value()) ? *generation_config : m_generation_config;
-    
-    std::shared_ptr<StreamerBase> streamer_ptr;
-    if (!streamer.has_value()){
-        streamer_ptr = nullptr;
-    } else if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&*streamer)) {
-        streamer_ptr = *streamer_obj;
-    } else if (auto callback = std::get_if<std::function<void(std::string)>>(&*streamer)) {
-        streamer_ptr = std::make_shared<TextCallbackStreamer>(m_tokenizer, *callback);
-    }
-    auto batch_size = input_ids.get_shape().at(0);
-    if ((batch_size != 1 || !(config.is_greedy_decoding() || config.is_multinomial())) && streamer_ptr) {
-        OPENVINO_THROW("Currently streaming is possible only with batch size=1 and greedy or multinomial decoding");
-    }
-
-    auto attention_mask_data = attention_mask.has_value() ? *attention_mask : ov::genai::utils::init_attention_mask(input_ids);
-
-    if (config.is_greedy_decoding()) {
-        result = ov::genai::greedy_decoding(m_model_runner, input_ids, attention_mask_data, config, streamer_ptr, is_chat_conversation);
-    } else if (config.is_beam_search()) {
-        result = beam_search(m_model_runner, input_ids, attention_mask_data, config);
-    } else if (config.is_multinomial()) {
-        result = multinominal_decoding(m_model_runner, input_ids, attention_mask_data, config, streamer_ptr);
-    } else {
-        OPENVINO_THROW("No decoding algorithm found for provided configuration parameters.");
-    }
-
-    if (!is_chat_conversation)
-        m_model_runner.reset_state();
-
-    return result;
-}
-
-std::string ov::genai::LLMPipeline::generate(std::string text, OptionalGenerationConfig generation_config, OptionalStreamerVariant streamer) {
-    return m_pimpl->generate(text, generation_config, streamer);
-}
-
-std::string ov::genai::LLMPipeline::generate(std::string text, const ov::AnyMap& config_map) {
-    OptionalStreamerVariant streamer;
-    auto config = GenerationConfig::anymap_to_generation_config(config_map);
-    if (config_map.count("streamer")) {
-        streamer = config_map.at("streamer").as<std::function<void (std::string)>>();
-    }
-
-    return m_pimpl->generate(text, config, streamer);
-}
-
-ov::genai::EncodedResults ov::genai::LLMPipeline::generate(ov::Tensor input_ids, const ov::AnyMap& config_map) {
-    OptionalStreamerVariant streamer;
-    auto config = GenerationConfig::anymap_to_generation_config(config_map);
-    if (config_map.count("streamer")) {
-        streamer = config_map.at("streamer").as<std::function<void (std::string)>>();
-    }
-    
-    std::optional<ov::Tensor> attention_mask;
-    return m_pimpl->generate(input_ids, attention_mask, config, streamer);
+    return m_pimpl->m_generation_config;
 }
 
 ov::genai::Tokenizer ov::genai::LLMPipeline::get_tokenizer() {
@@ -379,23 +451,6 @@ std::string ov::genai::LLMPipeline::apply_chat_template(std::string prompt, std:
     return m_pimpl->apply_chat_template(prompt, role);
 }
 
-std::string ov::genai::LLMPipeline::LLMPipelineImpl::apply_chat_template(std::string prompt, std::string role) const {
-    jinja2::TemplateEnv env;
-    env.GetSettings().lstripBlocks = true;
-    env.GetSettings().trimBlocks = true;
-    jinja2::Template tpl(&env);
-    tpl.Load(m_chat_template);
-    
-    jinja2::ValuesMap message {{"role", role}, {"content", prompt}};
-    jinja2::ValuesMap params = {
-        {"messages", jinja2::ValuesList({message})},
-        {"bos_token",  m_generation_config.bos_token},
-        {"eos_token", m_generation_config.eos_token},
-        {"add_generation_prompt", true},
-    };
- 
-    return tpl.RenderAsString(params).value();
-}
 
 void ov::genai::LLMPipeline::start_chat() {
     m_pimpl->is_chat_conversation = true;
diff --git a/src/cpp/src/multinomial_decoding.cpp b/src/cpp/src/multinomial_decoding.cpp
index 3dcdcdeb72..33b7e5e378 100644
--- a/src/cpp/src/multinomial_decoding.cpp
+++ b/src/cpp/src/multinomial_decoding.cpp
@@ -9,7 +9,6 @@
 #include <regex>
 #include <vector>
 
-#include "generation_config_helper.hpp"
 #include "openvino/genai/llm_pipeline.hpp"
 #include "utils.hpp"
 
diff --git a/src/cpp/src/text_callback_streamer.cpp b/src/cpp/src/text_callback_streamer.cpp
index bb2bec09d9..39ef3bbcfa 100644
--- a/src/cpp/src/text_callback_streamer.cpp
+++ b/src/cpp/src/text_callback_streamer.cpp
@@ -1,3 +1,6 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
 #include "text_callback_streamer.hpp"
 
 namespace ov {
@@ -10,11 +13,6 @@ TextCallbackStreamer::TextCallbackStreamer(const Tokenizer& tokenizer, std::func
     m_enabled = true;
 }
 
-TextCallbackStreamer::TextCallbackStreamer(const Tokenizer& tokenizer, bool print_eos_token) {
-    m_tokenizer = tokenizer;
-    m_print_eos_token = print_eos_token;
-}
-
 void TextCallbackStreamer::put(int64_t token) {
     std::stringstream res;
     // do nothing if <eos> token is met and if print_eos_token=false
@@ -51,19 +49,19 @@ void TextCallbackStreamer::end() {
     on_finalized_text(res.str());
 }
 
-void TextCallbackStreamer::set_tokenizer(Tokenizer tokenizer) {
-    this->m_tokenizer = tokenizer;
-}
+// void TextCallbackStreamer::set_tokenizer(Tokenizer tokenizer) {
+//     this->m_tokenizer = tokenizer;
+// }
 
-void TextCallbackStreamer::set_callback(std::function<void (std::string)> callback) {
-    on_decoded_text_callback = callback;
-    m_enabled = true;
-}
+// void TextCallbackStreamer::set_callback(std::function<void (std::string)> callback) {
+//     on_decoded_text_callback = callback;
+//     m_enabled = true;
+// }
 
-void TextCallbackStreamer::set_callback() {
-    on_decoded_text_callback = [](std::string words){};
-    m_enabled = false;
-}
+// void TextCallbackStreamer::set_callback() {
+//     on_decoded_text_callback = [](std::string words){};
+//     m_enabled = false;
+// }
 
 void TextCallbackStreamer::on_finalized_text(const std::string& subword) {
     if (m_enabled) {
diff --git a/src/cpp/src/text_callback_streamer.hpp b/src/cpp/src/text_callback_streamer.hpp
index 3834dd01ba..766f80cf9c 100644
--- a/src/cpp/src/text_callback_streamer.hpp
+++ b/src/cpp/src/text_callback_streamer.hpp
@@ -1,5 +1,6 @@
 // Copyright (C) 2023-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
+
 #pragma once
 
 #include "openvino/genai/streamer_base.hpp"
@@ -13,14 +14,12 @@ class TextCallbackStreamer: public StreamerBase {
     void put(int64_t token) override;
     void end() override;
 
-    TextCallbackStreamer(const Tokenizer& tokenizer, std::function<void (std::string)> callback, bool print_eos_token = false);
-    TextCallbackStreamer(const Tokenizer& tokenizer, bool print_eos_token = false);
-    TextCallbackStreamer() = default;
-    ~TextCallbackStreamer() = default;
+    TextCallbackStreamer(const Tokenizer& tokenizer, std::function<void(std::string)> callback, bool print_eos_token = false);
+    // ~TextCallbackStreamer() = default;
 
-    void set_tokenizer(Tokenizer tokenizer);
-    void set_callback(std::function<void (std::string)> callback);
-    void set_callback();
+    // void set_tokenizer(Tokenizer tokenizer);
+    // void set_callback(std::function<void (std::string)> callback);
+    // void set_callback();
     
     std::function<void (std::string)> on_decoded_text_callback = [](std::string words){};
     bool m_enabled = false;
diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp
index 11ca3d3538..05e0c0d5db 100644
--- a/src/cpp/src/tokenizer.cpp
+++ b/src/cpp/src/tokenizer.cpp
@@ -4,11 +4,12 @@
 #include <openvino/openvino.hpp>
 #include "openvino/genai/tokenizer.hpp"
 #include "utils.hpp"
+#include <cstdlib>
 
 namespace {
 
 // todo: remove when openvino-tokenizers will support left padding
-std::pair<ov::Tensor, ov::Tensor> pad_left(ov::Tensor&& input_ids, ov::Tensor&& attention_mask, int64_t pad_token) {
+ov::genai::TokenizedInputs pad_left(ov::Tensor&& input_ids, ov::Tensor&& attention_mask, int64_t pad_token) {
     const size_t batch_size = input_ids.get_shape()[0];
     const size_t sequence_length = input_ids.get_shape()[1];
     int64_t* inputs_data = input_ids.data<int64_t>();
@@ -39,6 +40,17 @@ std::pair<ov::Tensor, ov::Tensor> pad_left(ov::Tensor&& input_ids, ov::Tensor&&
     return {input_ids, attention_mask};
 }
 
+std::filesystem::path with_openvino_tokenizers(const std::filesystem::path& path) {
+#ifdef _WIN32
+    constexpr char tokenizers[] = "openvino_tokenizers.dll";
+#elif __linux__
+    constexpr char tokenizers[] = "libopenvino_tokenizers.so";
+#elif __APPLE__
+    constexpr char tokenizers[] = "libopenvino_tokenizers.dylib";
+#endif
+    return path.parent_path() / tokenizers;
+}
+
 }
 
 namespace ov {
@@ -53,13 +65,19 @@ class Tokenizer::TokenizerImpl {
     int64_t m_eos_token_id = 2;
 
     TokenizerImpl() = default;
-    TokenizerImpl(std::string tokenizers_path, const std::string device, const std::string& ov_tokenizers_path) {
+    TokenizerImpl(std::string tokenizers_path, const std::string device) {
         ov::Core core;
         
         if (ov::genai::utils::is_xml(tokenizers_path))
             OPENVINO_THROW("tokenizers_path should be a path to a dir not a xml file");
-    
-        core.add_extension(ov_tokenizers_path);
+
+        const char* ov_tokenizers_path = getenv(ov::genai::utils::get_tokenizers_env_name());
+        if (ov_tokenizers_path) {
+            core.add_extension(with_openvino_tokenizers(ov_tokenizers_path));
+        } else {
+            OPENVINO_THROW("openvino_tokenizers path is not set");
+        }
+
         std::shared_ptr<ov::Model> tokenizer_model, detokenizer_model;
         try {
             tokenizer_model = core.read_model(tokenizers_path + "/openvino_tokenizer.xml");
@@ -80,14 +98,14 @@ class Tokenizer::TokenizerImpl {
             m_pad_token_id = rt_info["pad_token_id"].as<int64_t>();
         }
 
-    std::pair<ov::Tensor, ov::Tensor> encode(std::string prompt) {
+    TokenizedInputs encode(std::string prompt) {
         size_t batch_size = 1;
         m_tokenize_request.set_input_tensor(ov::Tensor{ov::element::string, {batch_size}, &prompt});
         m_tokenize_request.infer();
         return {m_tokenize_request.get_tensor("input_ids"), m_tokenize_request.get_tensor("attention_mask")};
     }
 
-    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string>& prompts) {
+    TokenizedInputs encode(std::vector<std::string>& prompts) {
         m_tokenize_request.set_input_tensor(ov::Tensor{ov::element::string, {prompts.size()}, prompts.data()});
         auto size_ = m_tokenize_request.get_input_tensor().get_shape();
         m_tokenize_request.infer();
@@ -139,23 +157,23 @@ class Tokenizer::TokenizerImpl {
     }
 };
 
-Tokenizer::Tokenizer(const std::string& tokenizers_path, const std::string& device, const std::string& ov_tokenizers_path) {
-    m_pimpl = std::make_shared<TokenizerImpl>(tokenizers_path, device, ov_tokenizers_path);
+Tokenizer::Tokenizer(const std::string& tokenizers_path, const std::string& device) {
+    m_pimpl = std::make_shared<TokenizerImpl>(tokenizers_path, device);
 }
 
-std::pair<ov::Tensor, ov::Tensor> Tokenizer::encode(const std::string prompt) {
+TokenizedInputs Tokenizer::encode(const std::string prompt) {
     return m_pimpl->encode(std::move(prompt));
 }
 
-std::pair<ov::Tensor, ov::Tensor> Tokenizer::encode(std::vector<std::string>& prompts) {
+TokenizedInputs Tokenizer::encode(std::vector<std::string>& prompts) {
     return m_pimpl->encode(prompts);
 }
 
-std::pair<ov::Tensor, ov::Tensor> Tokenizer::encode(std::vector<std::string>&& prompts) {
+TokenizedInputs Tokenizer::encode(std::vector<std::string>&& prompts) {
     return m_pimpl->encode(prompts);
 }
 
-std::pair<ov::Tensor, ov::Tensor> Tokenizer::encode(std::initializer_list<std::string>& text) {
+TokenizedInputs Tokenizer::encode(std::initializer_list<std::string>& text) {
     return encode(std::vector<std::string>(text.begin(), text.end()));
 }
 
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
index 8111dc5c94..497c4ac100 100644
--- a/src/cpp/src/utils.cpp
+++ b/src/cpp/src/utils.cpp
@@ -146,6 +146,36 @@ ov::Tensor extend_attention(ov::Tensor attention_mask) {
     return new_atten_mask;
 }
 
+GenAIEnvManager::GenAIEnvManager(const std::string& path) {
+    #ifdef _WIN32
+    char* value = nullptr;
+    size_t len = 0;
+    _dupenv_s(&value, &len, ov::genai::utils::get_tokenizers_env_name().c_str());
+    if (value == nullptr)
+        _putenv_s(ov::genai::utils::get_tokenizers_env_name().c_str(), path.c_str());
+    #else
+    if (!getenv(ov::genai::utils::get_tokenizers_env_name()))
+        setenv(ov::genai::utils::get_tokenizers_env_name(), path.c_str(), 1);
+    #endif
+    else
+        was_already_set = true;
+}
+
+GenAIEnvManager::~GenAIEnvManager() {
+    if (!was_already_set){
+    #ifdef _WIN32
+        _putenv_s(ov::genai::utils::get_tokenizers_env_name());
+    #else
+        unsetenv(ov::genai::utils::get_tokenizers_env_name());
+    #endif
+    }
+}
+
+const char* get_tokenizers_env_name() {
+    return "OPENVINO_TOKENIZERS_PATH_GENAI";
+}
+
+
 }  // namespace utils
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
index 4559a8962f..292bb43505 100644
--- a/src/cpp/src/utils.hpp
+++ b/src/cpp/src/utils.hpp
@@ -59,6 +59,18 @@ void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T&
     }
 }
 
+const char* get_tokenizers_env_name();
+
+// const char* OV_TOKENIZERS_ENV_NAME = "OPENVINO_TOKENIZERS_PATH_GENAI";
+
+class GenAIEnvManager {
+public:
+    GenAIEnvManager(const std::string& path);
+    ~GenAIEnvManager();
+private:
+    bool was_already_set;
+};
+
 }  // namespace utils
 }  // namespace genai
 }  // namespace ov
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index f235d00fb4..178cc441a0 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -15,7 +15,7 @@ if(NOT pybind11_POPULATED)
 endif()
 
 pybind11_add_module(py_generate_pipeline py_generate_pipeline.cpp)
-target_link_libraries(py_generate_pipeline PRIVATE openvino::genai)
+target_link_libraries(py_generate_pipeline PRIVATE openvino::genai nlohmann_json::nlohmann_json)
 set_target_properties(py_generate_pipeline PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY "$<1:${CMAKE_BINARY_DIR}/openvino_genai/>"
 )
@@ -49,3 +49,6 @@ install(TARGETS genai py_generate_pipeline
     LIBRARY DESTINATION . COMPONENT wheel_genai
     RUNTIME DESTINATION . COMPONENT wheel_genai
     EXCLUDE_FROM_ALL)
+
+# to be able to use utils.hpp in pybind
+include_directories(${CMAKE_SOURCE_DIR}/src/cpp/src/)
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index e069157fa7..f23e447d5f 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -9,6 +9,6 @@
 if hasattr(os, "add_dll_directory"):
     os.add_dll_directory(os.path.dirname(__file__))
 
-from .py_generate_pipeline import LLMPipeline, Tokenizer, GenerationConfig, DecodedResults, EncodedResults
+from .py_generate_pipeline import LLMPipeline, Tokenizer, GenerationConfig, DecodedResults, EncodedResults, StreamerBase
 
-__all__ = ['LLMPipeline', 'Tokenizer', 'GenerationConfig', 'DecodedResults', 'EncodedResults']
+__all__ = ['LLMPipeline', 'Tokenizer', 'GenerationConfig', 'DecodedResults', 'EncodedResults', 'StreamerBase']
diff --git a/src/python/py_generate_pipeline.cpp b/src/python/py_generate_pipeline.cpp
index d1f8c5b3c2..3a93ddbea8 100644
--- a/src/python/py_generate_pipeline.cpp
+++ b/src/python/py_generate_pipeline.cpp
@@ -6,6 +6,7 @@
 #include <pybind11/stl.h>
 #include <pybind11/functional.h>
 #include "openvino/genai/llm_pipeline.hpp"
+#include "utils.hpp"
 
 #ifdef _WIN32
 #    include <windows.h>
@@ -42,6 +43,7 @@ using ov::genai::EncodedResults;
 using ov::genai::DecodedResults;
 using ov::genai::StopCriteria;
 using ov::genai::StreamerBase;
+using ov::genai::StreamerVariant;
 
 namespace {
 void str_to_stop_criteria(GenerationConfig& config, const std::string& stop_criteria_str){
@@ -84,17 +86,29 @@ void update_config_from_kwargs(GenerationConfig& config, const py::kwargs& kwarg
     if (kwargs.contains("bos_token")) config.bos_token = kwargs["bos_token"].cast<std::string>();
 }
 
-// operator() and generate methods are identical, operator() is just an alias for generate
-std::string call_with_kwargs(LLMPipeline& pipeline, const std::string& text, const py::kwargs& kwargs) {
-    // Create a new GenerationConfig instance and initialize from kwargs
+py::object call_with_config(LLMPipeline& pipe, const std::string& text, const GenerationConfig& config, const StreamerVariant& streamer) {
+    if (config.num_return_sequences > 1) {
+        return py::cast(pipe.generate({text}, config, streamer).texts);
+    } else {
+        return py::cast(std::string(pipe.generate(text, config, streamer)));
+    }
+}
+
+std::vector<std::string> call_with_config(LLMPipeline& pipe, const std::vector<std::string>& text, const GenerationConfig& config, const StreamerVariant& streamer) {
+    return pipe.generate(text, config, streamer);
+}
+
+std::vector<std::string> call_with_kwargs(LLMPipeline& pipeline, const std::vector<std::string>& texts, const py::kwargs& kwargs) {
     GenerationConfig config = pipeline.get_generation_config();
     update_config_from_kwargs(config, kwargs);
-    return pipeline(text, config);
+    return call_with_config(pipeline, texts, config, kwargs.contains("streamer") ? kwargs["streamer"].cast<StreamerVariant>() : std::monostate());
 }
 
-std::string call_with_config(LLMPipeline& pipe, const std::string& text, const GenerationConfig& config) {
-    std::shared_ptr<StreamerBase> streamer;
-    return pipe(text, config);
+py::object call_with_kwargs(LLMPipeline& pipeline, const std::string& text, const py::kwargs& kwargs) {
+    // Create a new GenerationConfig instance and initialize from kwargs
+    GenerationConfig config = pipeline.get_generation_config();
+    update_config_from_kwargs(config, kwargs);
+    return call_with_config(pipeline, text, config, kwargs.contains("streamer") ? kwargs["streamer"].cast<StreamerVariant>() : std::monostate());
 }
 
 std::filesystem::path with_openvino_tokenizers(const std::filesystem::path& path) {
@@ -138,6 +152,20 @@ std::string ov_tokenizers_module_path() {
     }
     return py::str(py::module_::import("openvino_tokenizers").attr("_ext_path"));
 }
+class EmptyStreamer: public StreamerBase {
+    // It's impossible to create an instance of pure virtual class. Define EmptyStreamer instead.
+    void put(int64_t token) override {
+        PYBIND11_OVERRIDE_PURE(
+            void,  // Return type
+            StreamerBase,  // Parent class
+            put,  // Name of function in C++ (must match Python name)
+            token  // Argument(s)
+        );
+    }
+    void end() override {
+        PYBIND11_OVERRIDE_PURE(void, StreamerBase, end);
+    }
+};
 }
 
 PYBIND11_MODULE(py_generate_pipeline, m) {
@@ -147,21 +175,28 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
         .def(py::init<const std::string, const Tokenizer&, const std::string, const ov::AnyMap&>(), 
              py::arg("model_path"), py::arg("tokenizer"), py::arg("device") = "CPU", 
              py::arg("plugin_config") = ov::AnyMap{})
-        .def(py::init<std::string&, std::string, const ov::AnyMap&, const std::string>(),
-             py::arg("path"), py::arg("device") = "CPU", py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizers_path") = ov_tokenizers_module_path())
+        .def(py::init([](const std::string& model_path, 
+                         const std::string& device,
+                         const ov::AnyMap& plugin_config) {
+            ov::genai::utils::GenAIEnvManager env_manager(ov_tokenizers_module_path());
+            return std::make_unique<LLMPipeline>(model_path, device, plugin_config);}), 
+        py::arg("model_path"), "path to the model path", 
+        py::arg("device") = "CPU", "device on which inference will be done",
+        py::arg("plugin_config") = ov::AnyMap(), 
+        "LLMPipeline class constructor.\n"
+        "    model_path (str): Path to the model file.\n"
+        "    device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.\n"
+        "    plugin_config (ov::AnyMap): Plugin configuration settings. Default is an empty.")
+        
         .def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
-        .def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&>(&call_with_config))
+        .def("__call__", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))
+        
+        .def("generate", py::overload_cast<LLMPipeline&, const std::vector<std::string>&, const py::kwargs&>(&call_with_kwargs))
+        .def("generate", py::overload_cast<LLMPipeline&, const std::vector<std::string>&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))
         .def("generate", py::overload_cast<LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
-        .def("generate", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&>(&call_with_config))
+        .def("generate", py::overload_cast<LLMPipeline&, const std::string&, const GenerationConfig&, const StreamerVariant&>(&call_with_config))
         
         // todo: if input_ids is a ov::Tensor/numpy tensor
-        // todo: implement calling generate/operator() with StreamerBase or lambda streamer
-        // signature to be implemented:
-        // EncodedResults generate(ov::Tensor input_ids, 
-        //                 std::optional<ov::Tensor> attention_mask, 
-        //                 OptionalGenerationConfig generation_config=nullopt,
-        //                 OptionalStreamerVariant streamer=nullopt);
-        
 
         .def("get_tokenizer", &LLMPipeline::get_tokenizer)
         .def("start_chat", &LLMPipeline::start_chat)
@@ -174,10 +209,9 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
      // Binding for Tokenizer
     py::class_<Tokenizer>(m, "Tokenizer")
         .def(py::init<>())
-        .def(py::init<std::string&, const std::string&, const std::string&>(), 
+        .def(py::init<std::string&, const std::string&>(), 
              py::arg("tokenizers_path"), 
-             py::arg("device") = "CPU",
-             py::arg("ov_tokenizers_path") = py::str(ov_tokenizers_module_path()))
+             py::arg("device") = "CPU")
 
         // todo: implement encode/decode when for numpy inputs and outputs
         .def("encode", py::overload_cast<const std::string>(&Tokenizer::encode), "Encode a single prompt")
@@ -222,4 +256,8 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
         .def_readwrite("tokens", &EncodedResults::tokens)
         .def_readwrite("scores", &EncodedResults::scores);
 
+    py::class_<StreamerBase, EmptyStreamer, std::shared_ptr<StreamerBase>>(m, "StreamerBase")  // Change the holder form unique_ptr to shared_ptr
+        .def(py::init<>())
+        .def("put", &StreamerBase::put)
+        .def("end", &StreamerBase::end);
 }
diff --git a/tests/python_tests/generate_api_check.py b/tests/python_tests/generate_api_check.py
new file mode 100644
index 0000000000..ad0851fea2
--- /dev/null
+++ b/tests/python_tests/generate_api_check.py
@@ -0,0 +1,25 @@
+import openvino_genai as ov_genai
+model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
+path = '/home/epavel/devel/openvino.genai/text_generation/causal_lm/TinyLlama-1.1B-Chat-v1.0'
+device = 'CPU'
+pipe = ov_genai.LLMPipeline(path, device)
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+
+prompt = 'table is made of'
+generation_config = {'max_new_tokens': 10}
+
+encoded_prompt = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=True)
+hf_encoded_output = model.generate(encoded_prompt, **generation_config)
+hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:])
+
+
+
+import os
+build_dir = os.getenv('GENAI_BUILD_DIR', 'build')
+ov_tokenizers_path = f'{build_dir}/openvino_tokenizers/src/'
+# pipe = ov_genai.LLMPipeline(path, device, {}, ov_tokenizers_path)
+
+ov_output = pipe.generate(prompt, **generation_config)
diff --git a/tests/python_tests/list_test_models.py b/tests/python_tests/list_test_models.py
index 99ca47b27c..90dcc83f0f 100644
--- a/tests/python_tests/list_test_models.py
+++ b/tests/python_tests/list_test_models.py
@@ -3,17 +3,17 @@
 def models_list():
     model_ids = [
         ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0"),
+        # ("databricks/dolly-v2-3b", "dolly-v2-3b"), # not free disk space on CI machine
+        ("microsoft/phi-1_5", "phi-1_5/"),
         # ("google/gemma-2b-it", "gemma-2b-it"),
         # ("google/gemma-7b-it", "gemma-7b-it"),
         # ("meta-llama/Llama-2-7b-chat-hf", "Llama-2-7b-chat-hf"),
         # ("meta-llama/Llama-2-13b-chat-hf", "Llama-2-13b-chat-hf"),
         # ("openlm-research/open_llama_3b", "open_llama_3b"),
         # ("openlm-research/open_llama_7b", "open_llama_7b"),
-        # ("databricks/dolly-v2-3b", "dolly-v2-3b"),
         # ("databricks/dolly-v2-12b", "dolly-v2-12b"),
         # ("mistralai/Mistral-7B-v0.1", "Mistral-7B-v0.1"),
         # ("ikala/redpajama-3b-chat", "redpajama-3b-chat"),
-        # ("microsoft/phi-1_5", "phi-1_5/"),
         # ("Qwen/Qwen1.5-7B-Chat", "Qwen1.5-7B-Chat"),
     ]
     import os
diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py
index 690e1ae9dd..442201b0cd 100644
--- a/tests/python_tests/test_generate_api.py
+++ b/tests/python_tests/test_generate_api.py
@@ -3,6 +3,7 @@
 
 import functools
 import openvino
+import openvino_genai
 import openvino_tokenizers
 import optimum.intel
 import pytest
@@ -38,11 +39,11 @@ def run_hf_ov_genai_comparison(model_fixture, generation_config, prompt):
     hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:])
 
     device = 'CPU'
-    # pipe = ov_genai.LLMPipeline(path, device)
-    
-    pipe = ov_genai.LLMPipeline(str(path), device)
+    pipe = ov_genai.LLMPipeline(path, device)
     
     ov_output = pipe.generate(prompt, **generation_config)
+    if generation_config.get('num_return_sequences', 1) > 1:
+        ov_output = ov_output[0]
 
     if hf_output != ov_output:
         print(f'hf_output: {hf_output}')
@@ -56,12 +57,11 @@ def stop_criteria_map():
 
 test_cases = [
     (dict(max_new_tokens=20, do_sample=False), 'table is made of'),  # generation_config, prompt
-    (dict(num_beam_groups=3, num_beams=15, num_return_sequences=15, max_new_tokens=20, diversity_penalty=1.0), 'table is made of'),
-    # (dict(num_beam_groups=3, num_beams=15, num_return_sequences=15, max_new_tokens=20, diversity_penalty=1.0), 'Alan Turing was a'),
-    # (dict(num_beam_groups=3, num_beams=15, num_return_sequences=15, max_new_tokens=30, diversity_penalty=1.0), 'Alan Turing was a'),
-    # (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.0), 'table is made of'),
-    # (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.0), 'The Sun is yellow because'),
-    # (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.5), 'The Sun is yellow because'),
+    (dict(num_beam_groups=3, num_beams=15, num_return_sequences=15, max_new_tokens=20, diversity_penalty=1.0), 'Alan Turing was a'),
+    (dict(num_beam_groups=3, num_beams=15, num_return_sequences=15, max_new_tokens=30, diversity_penalty=1.0), 'Alan Turing was a'),
+    (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.0), 'table is made of'),
+    (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.0), 'The Sun is yellow because'),
+    (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.5), 'The Sun is yellow because'),
 ]
 @pytest.mark.parametrize("generation_config,prompt", test_cases)
 def test_greedy_decoding(model_fixture, generation_config, prompt):
@@ -74,7 +74,6 @@ def test_greedy_decoding(model_fixture, generation_config, prompt):
 @pytest.mark.parametrize("max_new_tokens", [20, 15])
 @pytest.mark.parametrize("diversity_penalty", [1.0, 1.5])
 @pytest.mark.parametrize("prompt", prompts)
-@pytest.mark.skip  # temporarily
 def test_beam_search_decoding(model_fixture, num_beam_groups, group_size, 
                               max_new_tokens, diversity_penalty, prompt):
     generation_config = dict(
@@ -90,7 +89,6 @@ def test_beam_search_decoding(model_fixture, num_beam_groups, group_size,
 @pytest.mark.parametrize("stop_criteria", ["never", "early", "heuristic"])
 @pytest.mark.parametrize("prompt", prompts)
 @pytest.mark.parametrize("max_new_tokens", [20, 40, 300])
-@pytest.mark.skip # temporarily
 def test_stop_criteria(model_fixture, stop_criteria, prompt, max_new_tokens):
     # todo: for long sentences early stop_criteria fails
     if (stop_criteria == 'early' and max_new_tokens >= 300):
@@ -123,3 +121,95 @@ def test_beam_search_long_sentences(model_fixture, num_beam_groups, group_size,
         max_new_tokens=max_new_tokens, 
     )
     run_hf_ov_genai_comparison(model_fixture, generation_config, prompt)
+
+
+def user_defined_callback(subword):
+    print(subword)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_callback_one_string(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    pipe.generate('', openvino_genai.GenerationConfig(), callback)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_callback_batch_fail(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    with pytest.raises(RuntimeError):
+        pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), callback)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_callback_kwargs_one_string(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    pipe.generate('', max_new_tokens=10, streamer=callback)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_callback_kwargs_batch_fail(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    with pytest.raises(RuntimeError):
+        pipe.generate(['1', '2'], max_new_tokens=10, streamer=callback)
+
+
+class Printer(openvino_genai.StreamerBase):
+    def __init__(self, tokenizer):
+        super().__init__()
+        self.tokenizer = tokenizer
+    def put(self, token_id):
+        print(self.tokenizer.decode([token_id]))  # Incorrect way to print, but easy to implement
+    def end(self):
+        print('end')
+
+
+def test_streamer_one_string(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    pipe.generate('', openvino_genai.GenerationConfig(), printer)
+
+
+def test_streamer_batch_fail(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    with pytest.raises(RuntimeError):
+        pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), printer)
+
+
+def test_streamer_kwargs_one_string(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    pipe.generate('', do_sample=True, streamer=printer)
+
+
+def test_streamer_kwargs_batch_fail(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    with pytest.raises(RuntimeError):
+        pipe.generate('', num_beams=2, streamer=printer)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_operator_wit_callback_one_string(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    pipe('', openvino_genai.GenerationConfig(), callback)
+
+
+@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)])
+def test_operator_wit_callback_batch_fail(model_fixture, callback):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    with pytest.raises(RuntimeError):
+        pipe(['1', '2'], openvino_genai.GenerationConfig(), callback)
+
+
+def test_perator_wit_streamer_kwargs_one_string(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    pipe('', do_sample=True, streamer=printer)
+
+
+def test_erator_wit_streamer_kwargs_batch_fail(model_fixture):
+    pipe = openvino_genai.LLMPipeline(model_fixture[1], 'CPU')
+    printer = Printer(pipe.get_tokenizer())
+    with pytest.raises(RuntimeError):
+        pipe('', num_beams=2, streamer=printer)
diff --git a/text_generation/causal_lm/cpp/chat_sample.cpp b/text_generation/causal_lm/cpp/chat_sample.cpp
index 3e215e5208..08748b3bbd 100644
--- a/text_generation/causal_lm/cpp/chat_sample.cpp
+++ b/text_generation/causal_lm/cpp/chat_sample.cpp
@@ -3,9 +3,26 @@
 
 #include <openvino/openvino.hpp>
 #include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/streamer_base.hpp"
 
 using namespace std;
 
+class CustomStreamer: public ov::genai::StreamerBase {
+public:
+    void put(int64_t token) {
+        std::cout << token << std::endl;
+        /* custom decoding/tokens processing code
+        tokens_cache.push_back(token);
+        std::string text = m_tokenizer.decode(tokens_cache);
+        ...
+        */
+    };
+
+    void end() {
+        /* custom finalization */
+    };
+};
+
 std::vector<string> questions = {
     "1+1=", 
     "what was the previous answer?", 
@@ -24,7 +41,8 @@ int main(int argc, char* argv[]) try {
     
     ov::genai::GenerationConfig config = pipe.get_generation_config();
     config.max_new_tokens = 10000;
-    auto streamer = [](std::string word) { std::cout << word << std::flush; };
+    std::function<bool(std::string)> streamer = [](std::string word) { std::cout << word << std::flush; return true;};
+    std::shared_ptr<ov::genai::StreamerBase> custom_streamer = std::make_shared<CustomStreamer>();
 
     pipe.start_chat();
     for (size_t i = 0; i < questions.size(); i++) {
@@ -35,7 +53,8 @@ int main(int argc, char* argv[]) try {
         cout << prompt << endl;
 
         // auto answer_str = pipe(prompt, config, streamer);
-        auto answer_str = pipe.generate(prompt, ov::genai::max_new_tokens(10000), ov::genai::streamer(streamer));
+        auto answer_str = pipe(prompt, ov::genai::generation_config(config), ov::genai::streamer(streamer));
+        // auto answer_str = pipe.generate(prompt, ov::genai::max_new_tokens(10000), ov::genai::streamer(streamer));
         accumulated_str += answer_str;
         
         cout << "\n----------\n";