-
Notifications
You must be signed in to change notification settings - Fork 213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Generate pipeline #334
Generate pipeline #334
Changes from 115 commits
ba91fde
9d85a0e
b21c6c1
e52e90d
745a804
8895ed0
b24977d
c933ca0
c43e901
5a914f6
c1e0c9d
8d66353
fa12da7
5ceb9d5
a5083c7
7692160
d3f6339
3776433
964a5e8
e57aa4c
d0c1341
8dcea1f
754a462
9b19c6f
9bf6caa
39fd73c
63d8f6d
a833760
1681654
9fe73c6
053708f
bd6849a
62c471e
f1d54f4
3c82e11
5543cee
abb8835
0998abc
15492c4
005d3fb
cc44bc8
d8cab05
2535394
95c1bfb
4510f71
507bc49
af747d4
c6620d9
59c3e0b
be84345
bced64a
f4e82b6
5b2b0ca
0dd8f59
23638ff
d8c5349
24faefe
598dda3
f274b93
02d0eae
e6695f3
a27c5a7
0849c41
34cddff
023cf1e
6a5d750
27f385e
a9332f0
9ef488c
4fad7d5
51e03a2
e23a7bb
fc5b753
09f8806
af22a8a
e7db7e8
f279363
83d77c8
167f924
a111a3f
813d80a
6227b65
74fc107
9b83a7e
2d15752
8025554
2b14286
1c11bc7
e7fce82
64608d1
43b87c7
fef9674
b21286c
d393f89
2b8954d
11e872b
442dcbf
53d534e
dcb4b86
72c045e
11fbaa2
264e99f
11032b4
7d0c80b
2e3cd73
e7fa974
78d0b88
5eb59ea
ce4eb00
aa90e9d
54cbb52
82a9449
5a0079b
73e4312
75b7c37
70f1177
da729ba
28c313b
c395a8d
bbc8c25
9e37273
81ec069
88c44fe
220035d
5c6c14f
174f67a
6709a67
1a4bd68
9389930
7d1d616
9208110
7021c87
680e362
9ba0a71
ac26bf8
dd619e6
04003d4
1718bfb
1b35935
2c2a34a
b180faf
59c1096
28ebc87
da96019
a7f73a6
a74baa2
13ebf9f
67b1cfa
0bd9cb3
b618673
80a17be
743f348
51a9a73
2494df1
8f1399f
57830ba
2175796
7c07136
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
name: genai_package | ||
on: pull_request | ||
jobs: | ||
ubuntu_genai_package: | ||
if: false | ||
runs-on: ubuntu-20.04 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- run: mkdir ./ov/ | ||
- run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | ||
- run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh | ||
- run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | ||
- run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release --target package -j | ||
- run: source ./ov/setupvars.sh && cmake --install ./build/ --config Release --prefix ov | ||
- run: ov/samples/cpp/build_samples.sh -b "${{ github.workspace }}/s pace" | ||
- run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt | ||
- run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] | ||
- run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | ||
- run: source ./ov/setupvars.sh && timeout 50s "${{ github.workspace }}/s pace/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ "" | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
windows_genai_package: | ||
runs-on: windows-latest | ||
defaults: | ||
run: | ||
shell: cmd | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip | ||
- run: unzip ov.zip | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release --target package -j | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --install ./build/ --config Release --prefix w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64 | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\samples\cpp\build_samples_msvc.bat -b "${{ github.workspace }}/samples_build" | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && "${{ github.workspace }}/samples_build/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ "" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: genai_python_lib | ||
on: pull_request | ||
jobs: | ||
ubuntu_genai_python_lib: | ||
runs-on: ubuntu-20.04-16-cores | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- run: mkdir ./ov/ | ||
- run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14758-22bd6ff0494/l_openvino_toolkit_centos7_2024.1.0.dev20240315_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI | ||
- run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh | ||
- run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | ||
- run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j | ||
- run: python -m pip install --pre openvino --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly # Can't load CentOS libraries from the archive | ||
- run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] | ||
- run: PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline" | ||
- run: source ./ov/setupvars.sh && python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | ||
- run: python -c "from openvino_genai import LLMPipeline" | ||
- name: GenAI Python API tests | ||
run: | | ||
source ./ov/setupvars.sh | ||
cd ./tests/python_tests/ | ||
python -m pip install -r requirements.txt | ||
models=$(python list_test_models.py) | ||
echo "$models" | while read -r model_name model_path; do | ||
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model "$model_name" "$model_path" | ||
done | ||
python -m pytest test_generate_api.py | ||
|
||
windows_genai_python_lib: | ||
runs-on: windows-latest | ||
defaults: | ||
run: | ||
shell: cmd | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.8 | ||
- run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip | ||
- run: unzip ov.zip | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j | ||
- run: python -m pip install "numpy<1.27" | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] | ||
- run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that. | ||
- run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install . | ||
- run: python -c "from openvino_genai import LLMPipeline" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Wovchena |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (C) 2018-2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
|
||
cmake_minimum_required(VERSION 3.15) | ||
|
||
# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with | ||
# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options | ||
get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) | ||
if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Wovchena we can also set default build type for ninja openvinotoolkit/openvino_tokenizers#162 |
||
message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used") | ||
# Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect | ||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...") | ||
endif() | ||
|
||
project(openvino_genai VERSION 2024.2.0.0) | ||
|
||
add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/") | ||
add_subdirectory(src) | ||
add_subdirectory(text_generation/causal_lm/cpp) | ||
|
||
install(DIRECTORY text_generation/causal_lm/cpp/ DESTINATION samples/cpp/causal_lm COMPONENT cpp_samples_genai) | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
install(FILES LICENSE third-party-programs.txt DESTINATION licensing_genai COMPONENT licensing_genai) # TODO: how to merge with OPenvino | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
set(CPACK_GENERATOR "ZIP") | ||
ilya-lavrenov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
include(CPack) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
[project] | ||
name = "openvino_genai" | ||
version = "2024.2.0.0" | ||
description = "Python bindings for https://github.com/openvinotoolkit/openvino.genai" | ||
requires-python = ">=3.8" | ||
readme = {file = "text_generation/causal_lm/cpp/README.md", content-type="text/markdown"} | ||
ilya-lavrenov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
license = {text = "OSI Approved :: Apache Software License"} | ||
authors = [ | ||
{ name = "OpenVINO Developers", email = "[email protected]" }, | ||
] | ||
classifiers = [ | ||
"Programming Language :: Python :: 3.8", | ||
"Programming Language :: Python :: 3.9", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
"Programming Language :: Python :: 3.12", | ||
] | ||
dependencies = [ | ||
"openvino_tokenizers~=2024.1.0.0" | ||
] | ||
|
||
[tool.scikit-build] | ||
cmake.source-dir = "./" | ||
cmake.build-type = "Release" | ||
cmake.targets = ["py_generate_pipeline", "genai"] | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
install.components = ["core_genai", "pygenai"] | ||
sdist.cmake = true | ||
wheel.packages = ["src/python/openvino_genai"] | ||
wheel.install-dir = "openvino_genai" | ||
wheel.build-tag = "000" | ||
wheel.license-files = ["LICENSE", "SECURITY.md", "third-party-programs.txt"] | ||
|
||
[[tool.scikit-build.generate]] | ||
path = "openvino_genai/__version__.py" | ||
template = ''' | ||
__version__ = "${version}" | ||
''' | ||
|
||
[build-system] | ||
requires = ["scikit-build-core~=0.8.0"] # See https://github.com/openvinotoolkit/openvino_tokenizers/pull/123 | ||
build-backend = "scikit_build_core.build" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (C) 2018-2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
|
||
# Find OpenVINODeveloperPackage first to compile with SDL flags | ||
find_package(OpenVINODeveloperPackage QUIET | ||
PATHS "${OpenVINO_DIR}") | ||
if(NOT OpenVINODeveloperPackage_FOUND) | ||
find_package(OpenVINO REQUIRED COMPONENTS Runtime) | ||
endif() | ||
|
||
add_subdirectory(cpp) | ||
add_subdirectory(python) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# OpenVINO Generate API | ||
|
||
## Usage | ||
|
||
First of all you need to convert your model with optimum-cli | ||
``` sh | ||
optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose we should not explicitly use weights format and let Optimum to decide on that matter |
||
pip install openvino-genai | ||
``` | ||
|
||
LLMPipeline is the main object used for decoding. You can initiliza it straigh away from the folder with the converted model. It will automanically load the main model, tokenizer, detokenizer and default generation configuration. | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
### Python | ||
|
||
A minimalist example: | ||
```python | ||
import openvino_genai as ov_genai | ||
pipe = ov_genai.LLMPipeline(model_path, "CPU") | ||
print(pipe.generate("The Sun is yellow bacause")) | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
``` | ||
|
||
Calling generate with custom generation config parameters, e.g. config for grouped beam search | ||
```python | ||
import openvino_genai as ov_genai | ||
pipe = ov_genai.LLMPipeline(model_path, "CPU") | ||
|
||
res = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5) | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
print(res) | ||
``` | ||
|
||
output: | ||
``` | ||
'it is made up of carbon atoms. The carbon atoms are arranged in a linear pattern, which gives the yellow color. The arrangement of carbon atoms in' | ||
``` | ||
|
||
A simples chat in python: | ||
```python | ||
import openvino_genai as ov_genai | ||
pipe = ov_ov_genai.LLMPipeline(model_path) | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.1} | ||
pipe.set_generation_cofnig(config) | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried and it does not work like this for me, should it work ? It woks for me if config is GenerationConfig object and I got with get_generation_config before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to add description about options , which we can configure in config ? |
||
|
||
pipe.start_chat() | ||
while True: | ||
print('question:') | ||
prompt = input() | ||
if prompt == 'Stop!': | ||
break | ||
print(pipe(prompt)) | ||
pipe.finish_chat() | ||
|
||
``` | ||
|
||
Test to compare with Huggingface outputs | ||
|
||
### C++ | ||
|
||
Minimalistc example | ||
```cpp | ||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <iostream> | ||
|
||
int main(int argc, char* argv[]) { | ||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
std::cout << pipe.generate("The Sun is yellow bacause"); | ||
} | ||
``` | ||
|
||
Using Group Beam Search Decoding | ||
```cpp | ||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <iostream> | ||
|
||
int main(int argc, char* argv[]) { | ||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
|
||
ov::GenerationConfig config = pipe.get_generation_config(); | ||
config.max_new_tokens = 256; | ||
config.num_groups = 3; | ||
config.group_size = 5; | ||
config.diversity_penalty = 1.0f; | ||
|
||
std::cout << pipe.generate("The Sun is yellow bacause", config); | ||
} | ||
``` | ||
|
||
A simplest chat in C++ | ||
``` cpp | ||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <iostream> | ||
|
||
int main(int argc, char* argv[]) { | ||
std::string prompt; | ||
|
||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
|
||
pipe.start_chat(); | ||
for (size_t i = 0; i < questions.size(); i++) { | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::cout << "question:\n"; | ||
std::getline(std::cin, prompt); | ||
|
||
std::cout << pipe(prompt) << std::endl>>; | ||
} | ||
pipe.finish_chat(); | ||
} | ||
``` | ||
|
||
Specifying generation_config to use grouped beam search | ||
``` cpp | ||
int main(int argc, char* argv[]) { | ||
std::string prompt; | ||
|
||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
|
||
ov::GenerationConfig config = pipe.get_generation_config(); | ||
config.max_new_tokens = 256; | ||
config.num_groups = 3; | ||
config.group_size = 5; | ||
config.diversity_penalty = 1.0f; | ||
|
||
auto streamer = [](std::string word) { std::cout << word << std::flush; }; | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
pipe.start_chat(); | ||
for (size_t i = 0; i < questions.size(); i++) { | ||
|
||
std::cout << "question:\n"; | ||
cout << prompt << endl; | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
auto answer = pipe(prompt, config, streamer); | ||
// no need to print answer, streamer will do that | ||
} | ||
pipe.finish_chat(); | ||
} | ||
``` | ||
|
||
Streaming exapmle with lambda function | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
``` cpp | ||
|
||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <iostream> | ||
|
||
int main(int argc, char* argv[]) { | ||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
|
||
auto streamer = [](std::string word) { std::cout << word << std::flush; }; | ||
std::cout << pipe.generate("The Sun is yellow bacause", streamer); | ||
} | ||
``` | ||
|
||
Streaming with custom class | ||
``` cpp | ||
#include <streamer_base.hpp> | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#include "openvino/genai/llm_pipeline.hpp" | ||
#include <iostream> | ||
|
||
class CustomStreamer: publict StreamerBase { | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public: | ||
void put(int64_t token) { | ||
/* custom decoding/tokens processing code | ||
tokens_cache.push_back(token); | ||
std::string text = m_tokenizer.decode(tokens_cache); | ||
... | ||
*/ | ||
}; | ||
|
||
void end() { | ||
/* custom finalization */ | ||
}; | ||
}; | ||
|
||
int main(int argc, char* argv[]) { | ||
CustomStreamer custom_streamer; | ||
|
||
std::string model_path = argv[1]; | ||
ov::LLMPipeline pipe(model_path, "CPU"); | ||
cout << pipe.generate("The Sun is yellow bacause", custom_streamer); | ||
pavel-esir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
``` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Wovchena
I suppose it should be installed after tokenizers, because
text_generation/causal_lm/cpp/requirements.txt
forces installation of released version of tokenizers.