Merge branch 'master' into alignament_results_sd_vs_cb

openvinotoolkit · Nov 25, 2024 · 2a4884d · 2a4884d
2 parents ac65485 + d490c18
commit 2a4884d
Show file tree

Hide file tree

Showing 50 changed files with 1,737 additions and 787 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -36,7 +36,7 @@
 - 'tests/cpp/generate_config.cpp'
 - 'tests/cpp/sampler.cpp'
 
-- 'category: LoRA':
+'category: LoRA':
 - 'src/cpp/include/openvino/genai/lora_adapter.hpp'
 - 'src/cpp/src/lora_adapter.cpp'
 - 'src/cpp/src/lora_helper.cpp'

diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml
@@ -82,6 +82,12 @@ jobs:
       run: |
         wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
         python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai  --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7
+    - name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
+      run: |
+        optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
+        optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
+        python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai  --assistant_confidence_threshold 0.4
+        python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai  --num_assistant_tokens 5
     - name: Test whisper-tiny on Linux
       run: |
         GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
@@ -96,7 +102,7 @@ jobs:
     - name: WWB Tests
       run: |
         GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt
-        pip install git+https://github.com/huggingface/optimum.git
+        pip install git+https://github.com/huggingface/optimum-intel.git
         GIT_CLONE_PROTECTION_ACTIVE=false pip install ${{ env.WWB_PATH }}
         python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall
         python -m pytest -v tools/who_what_benchmark/tests
@@ -117,7 +123,7 @@ jobs:
       - name: WWB Tests
         run: |
           GIT_CLONE_PROTECTION_ACTIVE=false pip install -r tools/who_what_benchmark/requirements.txt
-          pip install git+https://github.com/huggingface/optimum.git
+          pip install git+https://github.com/huggingface/optimum-intel.git
           GIT_CLONE_PROTECTION_ACTIVE=false pip install tools/who_what_benchmark/
           pip install pytest
           python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -51,11 +51,11 @@ endif()
 
 # Find OpenVINODeveloperPackage first to compile with SDL flags
 find_package(OpenVINODeveloperPackage ${OpenVINOGenAI_VERSION} QUIET
-             COMPONENTS Runtime
+             COMPONENTS Runtime Threading
              PATHS "${OpenVINO_DIR}")
 if(NOT OpenVINODeveloperPackage_FOUND)
     find_package(OpenVINO ${OpenVINOGenAI_VERSION} REQUIRED
-                 COMPONENTS Runtime
+                 COMPONENTS Runtime Threading
                  PATHS "${OpenVINO_DIR_PY}")
 endif()
 

diff --git a/README.md b/README.md
@@ -117,17 +117,34 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code --
 
 ### Run generation using VLMPipeline API in Python
 
+See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) for a demo application.
+
+Run the following command to download a sample image:
+
+```sh
+curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
+```
+
 ```python
+import numpy as np
+import openvino as ov
 import openvino_genai as ov_genai
-#Will run model on CPU, GPU is a possible option
+from PIL import Image
+
+# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
 pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
-rgb = read_image("cat.jpg")
-print(pipe.generate(prompt, image=rgb, max_new_tokens=100))
+
+image = Image.open("dog.jpg")
+image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
+image_data = ov.Tensor(image_data)  
+
+prompt = "Can you describe the image?"
+print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
 ```
 
 ### Run generation using VLMPipeline in C++
 
-Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details)
+Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details). See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) for a demo application.
 
 ```cpp
 #include "load_image.hpp"
@@ -163,6 +180,9 @@ For more examples check out our [LLM Inference Guide](https://docs.openvino.ai/2
 ```sh
 #Download and convert to OpenVINO dreamlike-anime-1.0 model
 optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16
+
+#You can also use INT8 hybrid quantization to further optimize the model and reduce inference latency
+optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format int8 --dataset conceptual_captions dreamlike_anime_1_0_ov/INT8
 ```
 
 ### Run generation using Text2Image API in Python

diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -14,8 +14,12 @@ add_subdirectory(cpp/text2image)
 add_subdirectory(cpp/visual_language_chat)
 add_subdirectory(cpp/whisper_speech_recognition)
 
-install(FILES requirements.txt DESTINATION samples
-        COMPONENT cpp_samples_genai)
+install(FILES
+        deployment-requirements.txt
+        export-requirements.txt
+        requirements.txt
+    DESTINATION samples
+    COMPONENT cpp_samples_genai)
 
 install(DIRECTORY
             cpp/beam_search_causal_lm

diff --git a/samples/cpp/text2image/README.md b/samples/cpp/text2image/README.md
@@ -39,6 +39,28 @@ Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk gol
 
    ![](./512x512.bmp)
 
+## Run with callback
+
+You can also add a callback to the `main.cpp` file to interrupt the image generation process earlier if you are satisfied with the intermediate result of the image generation or to add logs.
+
+Please find the template of the callback usage below.
+
+```cpp
+ov::genai::Text2ImagePipeline pipe(models_path, device);
+
+auto callback = [&](size_t step, ov::Tensor& intermediate_res) -> bool {
+   std::cout << "Image generation step: " << step << std::endl;
+   ov::Tensor img = pipe.decode(intermediate_res); // get intermediate image tensor
+   if (your_condition) // return true if you want to interrupt image generation
+      return true;
+   return false;
+};
+
+ov::Tensor image = pipe.generate(prompt,
+   ...
+   ov::genai::callback(callback)
+);
+```
 
 ## Run with optional LoRA adapters
 

diff --git a/samples/cpp/text2image/imwrite.cpp b/samples/cpp/text2image/imwrite.cpp
@@ -135,7 +135,12 @@ void imwrite_single_image(const std::string& name, ov::Tensor image, bool conver
 
 
 void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb) {
-    const ov::Shape shape = images.get_shape(), img_shape = {1, shape[1], shape[2], shape[3]};
+    const ov::Shape shape = images.get_shape();
+    OPENVINO_ASSERT(images.get_element_type() == ov::element::u8 && shape.size() == 4,
+        "Image of u8 type and [1, H, W, 3] shape is expected.",
+        "Given image has shape ", shape, " and element type ", images.get_element_type());
+
+    const ov::Shape img_shape = {1, shape[1], shape[2], shape[3]};
     uint8_t* img_data = images.data<uint8_t>();
 
     for (int img_num = 0, num_images = shape[0], img_size = ov::shape_size(img_shape); img_num < num_images; ++img_num, img_data += img_size) {

diff --git a/samples/python/text2image/README.md b/samples/python/text2image/README.md
@@ -39,6 +39,27 @@ Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk gol
 
    ![](./image.bmp)
 
+## Run with callback
+
+You can also add a callback to the `main.py` file to interrupt the image generation process earlier if you are satisfied with the intermediate result of the image generation or to add logs.
+
+Please find the template of the callback usage below.
+
+```python
+pipe = openvino_genai.Text2ImagePipeline(model_dir, device)
+
+def callback(step, intermediate_res):
+   print("Image generation step: ", step)
+   image_tensor = pipe.decode(intermediate_res) # get intermediate image tensor
+   if your_condition: # return True if you want to interrupt image generation
+      return True
+   return False
+
+image = pipe.generate(
+   ...
+   callback = callback
+)
+```
 
 ## Run with optional LoRA adapters
 

diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
@@ -71,7 +71,7 @@ target_include_directories(${TARGET_NAME}
 
 target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")
 
-target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE nlohmann_json::nlohmann_json jinja2cpp)
+target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp)
 
 target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17)
 

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -156,7 +156,7 @@ static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
 static constexpr ov::Property<size_t> min_new_tokens{"min_new_tokens"};
 static constexpr ov::Property<std::vector<std::string>> stop_strings{"stop_strings"};
 static constexpr ov::Property<bool> include_stop_str_in_output{"include_stop_str_in_output"};
-static constexpr ov::Property<std::vector<std::vector<int64_t>>> stop_token_ids{"stop_token_ids"};
+static constexpr ov::Property<std::set<int64_t>> stop_token_ids{"stop_token_ids"};
 
 static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
 static constexpr ov::Property<size_t> num_beams{"num_beams"};

diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp
@@ -99,7 +99,9 @@ static constexpr ov::Property<float> strength{"strength"};
 
 static constexpr ov::Property<std::shared_ptr<Generator>> generator{"generator"};
 
-static constexpr ov::Property<size_t> max_sequence_length{"max_sequence_length"};
+static constexpr ov::Property<int> max_sequence_length{"max_sequence_length"};
+
+static constexpr ov::Property<std::function<bool(size_t, ov::Tensor&)>> callback{"callback"};
 
 OPENVINO_GENAI_EXPORTS
 std::pair<std::string, ov::Any> generation_config(const ImageGenerationConfig& generation_config);

diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp
@@ -111,6 +111,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
         return generate(positive_prompt, ov::AnyMap{std::forward<Properties>(properties)...});
     }
 
+    ov::Tensor decode(const ov::Tensor latent);
+
 private:
     std::shared_ptr<DiffusionPipeline> m_impl;
 

diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp
@@ -80,8 +80,12 @@ class DiffusionPipeline {
 
     virtual ov::Tensor prepare_latents(ov::Tensor initial_image, const ImageGenerationConfig& generation_config) const = 0;
 
+    virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) = 0;
+
     virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) = 0;
 
+    virtual ov::Tensor decode(const ov::Tensor latent) = 0;
+
     virtual ~DiffusionPipeline() = default;
 
 protected: