openvinotoolkit · Wovchena · Jun 7, 2024 · Mar 26, 2024 · Mar 28, 2024 · Apr 2, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml
@@ -0,0 +1,47 @@
+name: genai_package
+on: pull_request
+jobs:
+  ubuntu_genai_package:
+    if: false
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: mkdir ./ov/
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+      - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release --target package -j
+      - run: source ./ov/setupvars.sh && cmake --install ./build/ --config Release --prefix ov
+      - run: ov/samples/cpp/build_samples.sh -b "${{ github.workspace }}/s pace"
+      - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+      - run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+      - run: source ./ov/setupvars.sh && timeout 50s "${{ github.workspace }}/s pace/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
+
+  windows_genai_package:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip
+      - run: unzip ov.zip
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release --target package -j
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --install ./build/ --config Release --prefix w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\samples\cpp\build_samples_msvc.bat -b "${{ github.workspace }}/samples_build"
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && "${{ github.workspace }}/samples_build/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
@@ -0,0 +1,54 @@
+name: genai_python_lib
+on: pull_request
+jobs:
+  ubuntu_genai_python_lib:
+    runs-on: ubuntu-20.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: mkdir ./ov/
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14758-22bd6ff0494/l_openvino_toolkit_centos7_2024.1.0.dev20240315_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz  # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI
+      - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
+      - run: python -m pip install --pre openvino --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly  # Can't load CentOS libraries from the archive
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline"
+      - run: source ./ov/setupvars.sh && python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: python -c "from openvino_genai import LLMPipeline"
+      - name: GenAI Python API tests
+        run: |
+          source ./ov/setupvars.sh
+          cd ./tests/python_tests/
+          python -m pip install -r requirements.txt
+          models=$(python list_test_models.py)
+          echo "$models" | while read -r model_name model_path; do
+              optimum-cli export openvino --trust-remote-code --weight-format fp16 --model "$model_name" "$model_path"
+          done
+          python -m pytest test_generate_api.py
+
+  windows_genai_python_lib:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip
+      - run: unzip ov.zip
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
+      - run: python -m pip install "numpy<1.27"
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install .      
+      - run: python -c "from openvino_genai import LLMPipeline"
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# They are copied to python folder during the build to allow skipping wheel installation
+src/python/openvino_genai/*genai*
+src/python/openvino_genai/py_generate_pipeline*
+
 # build/artifact dirs
 _*
 [Bb]uild*/

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+cmake_minimum_required(VERSION 3.15)
+
+# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with
+# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options
+get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE)
+    message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used")
+    # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
+endif()
+
+project(openvino_genai VERSION 2024.2.0.0)
+
+add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
+add_subdirectory(src)
+add_subdirectory(text_generation/causal_lm/cpp)
+
+install(DIRECTORY text_generation/causal_lm/cpp/ DESTINATION samples/cpp/causal_lm COMPONENT cpp_samples_genai)
+install(FILES LICENSE third-party-programs.txt DESTINATION licensing_genai COMPONENT licensing_genai)  # TODO: how to merge with OPenvino
+set(CPACK_GENERATOR "ZIP")
+include(CPack)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,41 @@
+[project]
+name = "openvino_genai"
+version = "2024.2.0.0"
+description = "Python bindings for https://github.com/openvinotoolkit/openvino.genai"
+requires-python = ">=3.8"
+readme = {file = "text_generation/causal_lm/cpp/README.md", content-type="text/markdown"}
+license = {text = "OSI Approved :: Apache Software License"}
+authors = [
+    { name = "OpenVINO Developers", email = "[email protected]" },
+]
+classifiers = [
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "openvino_tokenizers~=2024.1.0.0"
+]
+
+[tool.scikit-build]
+cmake.source-dir = "./"
+cmake.build-type = "Release"
+cmake.targets = ["py_generate_pipeline", "genai"]
+install.components = ["core_genai", "pygenai"]
+sdist.cmake = true
+wheel.packages = ["src/python/openvino_genai"]
+wheel.install-dir = "openvino_genai"
+wheel.build-tag = "000"
+wheel.license-files = ["LICENSE", "SECURITY.md", "third-party-programs.txt"]
+
+[[tool.scikit-build.generate]]
+path = "openvino_genai/__version__.py"
+template = '''
+__version__ = "${version}"
+'''
+
+[build-system]
+requires = ["scikit-build-core~=0.8.0"]  # See https://github.com/openvinotoolkit/openvino_tokenizers/pull/123
+build-backend = "scikit_build_core.build"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Find OpenVINODeveloperPackage first to compile with SDL flags
+find_package(OpenVINODeveloperPackage QUIET
+             PATHS "${OpenVINO_DIR}")
+if(NOT OpenVINODeveloperPackage_FOUND)
+    find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+endif()
+
+add_subdirectory(cpp)
+add_subdirectory(python)
diff --git a/src/README.md b/src/README.md
@@ -0,0 +1,185 @@
+# OpenVINO Generate API
+
+## Usage 
+
+First of all you need to convert your model with optimum-cli
+``` sh
+optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0"
+pip install openvino-genai
+```
+
+LLMPipeline is the main object used for decoding. You can initiliza it straigh away from the folder with the converted model. It will automanically load the main model, tokenizer, detokenizer and default generation configuration.
+
+### Python
+
+A minimalist example:
+```python
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+print(pipe.generate("The Sun is yellow bacause"))
+```
+
+Calling generate with custom generation config parameters, e.g. config for grouped beam search
+```python
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+
+res = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5)
+print(res)
+```
+
+output:
+```
+'it is made up of carbon atoms. The carbon atoms are arranged in a linear pattern, which gives the yellow color. The arrangement of carbon atoms in'
+```
+
+A simples chat in python:
+```python
+import openvino_genai as ov_genai
+pipe = ov_ov_genai.LLMPipeline(model_path)
+
+config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.1}
+pipe.set_generation_cofnig(config)
+
+pipe.start_chat()
+while True:
+    print('question:')
+    prompt = input()
+    if prompt == 'Stop!':
+        break
+    print(pipe(prompt))
+pipe.finish_chat()
+
+```
+
+Test to compare with Huggingface outputs
+
+### C++
+
+Minimalistc example
+```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+    std::cout << pipe.generate("The Sun is yellow bacause");
+}
+```
+
+Using Group Beam Search Decoding
+```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+
+    ov::GenerationConfig config = pipe.get_generation_config();
+    config.max_new_tokens = 256;
+    config.num_groups = 3;
+    config.group_size = 5;
+    config.diversity_penalty = 1.0f;
+
+    std::cout << pipe.generate("The Sun is yellow bacause", config);
+}
+```
+
+A simplest chat in C++
+``` cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string prompt;
+
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+
+    pipe.start_chat();
+    for (size_t i = 0; i < questions.size(); i++) {
+        std::cout << "question:\n";
+        std::getline(std::cin, prompt);
+
+        std::cout << pipe(prompt) << std::endl>>;
+    }
+    pipe.finish_chat();
+}
+```
+
+Specifying generation_config to use grouped beam search
+``` cpp
+int main(int argc, char* argv[]) {
+    std::string prompt;
+
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+
+    ov::GenerationConfig config = pipe.get_generation_config();
+    config.max_new_tokens = 256;
+    config.num_groups = 3;
+    config.group_size = 5;
+    config.diversity_penalty = 1.0f;
+
+    auto streamer = [](std::string word) { std::cout << word << std::flush; };
+
+    pipe.start_chat();
+    for (size_t i = 0; i < questions.size(); i++) {
+
+        std::cout << "question:\n";
+        cout << prompt << endl;
+
+        auto answer = pipe(prompt, config, streamer);
+        // no need to print answer, streamer will do that
+    }
+    pipe.finish_chat();
+}
+```
+
+Streaming exapmle with lambda function
+
+``` cpp
+
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+
+    auto streamer = [](std::string word) { std::cout << word << std::flush; };
+    std::cout << pipe.generate("The Sun is yellow bacause", streamer);
+}
+```
+
+Streaming with custom class
+``` cpp
+#include <streamer_base.hpp>
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+class CustomStreamer: publict StreamerBase {
+public:
+    void put(int64_t token) {
+        /* custom decoding/tokens processing code
+        tokens_cache.push_back(token);
+        std::string text = m_tokenizer.decode(tokens_cache);
+        ...
+        */
+    };
+
+    void end() {
+        /* custom finalization */
+    };
+};
+
+int main(int argc, char* argv[]) {
+    CustomStreamer custom_streamer;
+
+    std::string model_path = argv[1];
+    ov::LLMPipeline pipe(model_path, "CPU");
+    cout << pipe.generate("The Sun is yellow bacause", custom_streamer);
+}
+```