Skip to content

Commit

Permalink
Merge branch 'master' into alignament_results_sd_vs_cb
Browse files Browse the repository at this point in the history
  • Loading branch information
iefode authored Nov 25, 2024
2 parents ac65485 + d490c18 commit 2a4884d
Show file tree
Hide file tree
Showing 50 changed files with 1,737 additions and 787 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'

- 'category: LoRA':
'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
- 'src/cpp/src/lora_adapter.cpp'
- 'src/cpp/src/lora_helper.cpp'
Expand Down
10 changes: 8 additions & 2 deletions .github/workflows/llm_bench-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ jobs:
run: |
wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7
- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
run: |
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai --assistant_confidence_threshold 0.4
python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai --num_assistant_tokens 5
- name: Test whisper-tiny on Linux
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
Expand All @@ -96,7 +102,7 @@ jobs:
- name: WWB Tests
run: |
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt
pip install git+https://github.com/huggingface/optimum.git
pip install git+https://github.com/huggingface/optimum-intel.git
GIT_CLONE_PROTECTION_ACTIVE=false pip install ${{ env.WWB_PATH }}
python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall
python -m pytest -v tools/who_what_benchmark/tests
Expand All @@ -117,7 +123,7 @@ jobs:
- name: WWB Tests
run: |
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r tools/who_what_benchmark/requirements.txt
pip install git+https://github.com/huggingface/optimum.git
pip install git+https://github.com/huggingface/optimum-intel.git
GIT_CLONE_PROTECTION_ACTIVE=false pip install tools/who_what_benchmark/
pip install pytest
python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ endif()

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage ${OpenVINOGenAI_VERSION} QUIET
COMPONENTS Runtime
COMPONENTS Runtime Threading
PATHS "${OpenVINO_DIR}")
if(NOT OpenVINODeveloperPackage_FOUND)
find_package(OpenVINO ${OpenVINOGenAI_VERSION} REQUIRED
COMPONENTS Runtime
COMPONENTS Runtime Threading
PATHS "${OpenVINO_DIR_PY}")
endif()

Expand Down
28 changes: 24 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,34 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code --

### Run generation using VLMPipeline API in Python

See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) for a demo application.

Run the following command to download a sample image:

```sh
curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg"
```

```python
import numpy as np
import openvino as ov
import openvino_genai as ov_genai
#Will run model on CPU, GPU is a possible option
from PIL import Image

# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
rgb = read_image("cat.jpg")
print(pipe.generate(prompt, image=rgb, max_new_tokens=100))

image = Image.open("dog.jpg")
image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
image_data = ov.Tensor(image_data)

prompt = "Can you describe the image?"
print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
```

### Run generation using VLMPipeline in C++

Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details)
Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for more details). See [Visual Language Chat](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) for a demo application.

```cpp
#include "load_image.hpp"
Expand Down Expand Up @@ -163,6 +180,9 @@ For more examples check out our [LLM Inference Guide](https://docs.openvino.ai/2
```sh
#Download and convert to OpenVINO dreamlike-anime-1.0 model
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16
#You can also use INT8 hybrid quantization to further optimize the model and reduce inference latency
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format int8 --dataset conceptual_captions dreamlike_anime_1_0_ov/INT8
```

### Run generation using Text2Image API in Python
Expand Down
8 changes: 6 additions & 2 deletions samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ add_subdirectory(cpp/text2image)
add_subdirectory(cpp/visual_language_chat)
add_subdirectory(cpp/whisper_speech_recognition)

install(FILES requirements.txt DESTINATION samples
COMPONENT cpp_samples_genai)
install(FILES
deployment-requirements.txt
export-requirements.txt
requirements.txt
DESTINATION samples
COMPONENT cpp_samples_genai)

install(DIRECTORY
cpp/beam_search_causal_lm
Expand Down
22 changes: 22 additions & 0 deletions samples/cpp/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,28 @@ Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk gol

![](./512x512.bmp)

## Run with callback

You can also add a callback to the `main.cpp` file to interrupt the image generation process earlier if you are satisfied with the intermediate result of the image generation or to add logs.

Please find the template of the callback usage below.

```cpp
ov::genai::Text2ImagePipeline pipe(models_path, device);

auto callback = [&](size_t step, ov::Tensor& intermediate_res) -> bool {
std::cout << "Image generation step: " << step << std::endl;
ov::Tensor img = pipe.decode(intermediate_res); // get intermediate image tensor
if (your_condition) // return true if you want to interrupt image generation
return true;
return false;
};

ov::Tensor image = pipe.generate(prompt,
...
ov::genai::callback(callback)
);
```

## Run with optional LoRA adapters

Expand Down
7 changes: 6 additions & 1 deletion samples/cpp/text2image/imwrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,12 @@ void imwrite_single_image(const std::string& name, ov::Tensor image, bool conver


void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb) {
const ov::Shape shape = images.get_shape(), img_shape = {1, shape[1], shape[2], shape[3]};
const ov::Shape shape = images.get_shape();
OPENVINO_ASSERT(images.get_element_type() == ov::element::u8 && shape.size() == 4,
"Image of u8 type and [1, H, W, 3] shape is expected.",
"Given image has shape ", shape, " and element type ", images.get_element_type());

const ov::Shape img_shape = {1, shape[1], shape[2], shape[3]};
uint8_t* img_data = images.data<uint8_t>();

for (int img_num = 0, num_images = shape[0], img_size = ov::shape_size(img_shape); img_num < num_images; ++img_num, img_data += img_size) {
Expand Down
21 changes: 21 additions & 0 deletions samples/python/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk gol

![](./image.bmp)

## Run with callback

You can also add a callback to the `main.py` file to interrupt the image generation process earlier if you are satisfied with the intermediate result of the image generation or to add logs.

Please find the template of the callback usage below.

```python
pipe = openvino_genai.Text2ImagePipeline(model_dir, device)

def callback(step, intermediate_res):
print("Image generation step: ", step)
image_tensor = pipe.decode(intermediate_res) # get intermediate image tensor
if your_condition: # return True if you want to interrupt image generation
return True
return False

image = pipe.generate(
...
callback = callback
)
```

## Run with optional LoRA adapters

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ target_include_directories(${TARGET_NAME}

target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")

target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE nlohmann_json::nlohmann_json jinja2cpp)
target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp)

target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17)

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
static constexpr ov::Property<size_t> min_new_tokens{"min_new_tokens"};
static constexpr ov::Property<std::vector<std::string>> stop_strings{"stop_strings"};
static constexpr ov::Property<bool> include_stop_str_in_output{"include_stop_str_in_output"};
static constexpr ov::Property<std::vector<std::vector<int64_t>>> stop_token_ids{"stop_token_ids"};
static constexpr ov::Property<std::set<int64_t>> stop_token_ids{"stop_token_ids"};

static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
static constexpr ov::Property<size_t> num_beams{"num_beams"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ static constexpr ov::Property<float> strength{"strength"};

static constexpr ov::Property<std::shared_ptr<Generator>> generator{"generator"};

static constexpr ov::Property<size_t> max_sequence_length{"max_sequence_length"};
static constexpr ov::Property<int> max_sequence_length{"max_sequence_length"};

static constexpr ov::Property<std::function<bool(size_t, ov::Tensor&)>> callback{"callback"};

OPENVINO_GENAI_EXPORTS
std::pair<std::string, ov::Any> generation_config(const ImageGenerationConfig& generation_config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
return generate(positive_prompt, ov::AnyMap{std::forward<Properties>(properties)...});
}

ov::Tensor decode(const ov::Tensor latent);

private:
std::shared_ptr<DiffusionPipeline> m_impl;

Expand Down
4 changes: 4 additions & 0 deletions src/cpp/src/image_generation/diffusion_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,12 @@ class DiffusionPipeline {

virtual ov::Tensor prepare_latents(ov::Tensor initial_image, const ImageGenerationConfig& generation_config) const = 0;

virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) = 0;

virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) = 0;

virtual ov::Tensor decode(const ov::Tensor latent) = 0;

virtual ~DiffusionPipeline() = default;

protected:
Expand Down
Loading

0 comments on commit 2a4884d

Please sign in to comment.