Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
causal_lm: migrate to string tensors
Browse files Browse the repository at this point in the history
Wovchena committed Dec 20, 2023
1 parent deff497 commit 6b84952
Showing 7 changed files with 30 additions and 30 deletions.
12 changes: 6 additions & 6 deletions text_generation/causal_lm/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -8,17 +8,17 @@ project(causal_lm)
list(APPEND CUSTOM_OPERATIONS tokenizer)
add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/")

add_executable(causal_lm causal_lm.cpp)
target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
add_executable(greedy_causal_lm greedy_causal_lm.cpp)
target_compile_definitions(greedy_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions)
set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17)
set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
target_link_libraries(greedy_causal_lm PRIVATE openvino::runtime)
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD 17)
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)

add_executable(beam_search_causal_lm beam_search_causal_lm.cpp)
target_compile_definitions(beam_search_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
target_include_directories(beam_search_causal_lm PRIVATE ./)
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
target_link_libraries(beam_search_causal_lm PRIVATE openvino::runtime user_ov_extensions)
target_link_libraries(beam_search_causal_lm PRIVATE openvino::runtime)
set_target_properties(beam_search_causal_lm PROPERTIES CXX_STANDARD 17)
set_target_properties(beam_search_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
25 changes: 14 additions & 11 deletions text_generation/causal_lm/cpp/README.md
Original file line number Diff line number Diff line change
@@ -30,19 +30,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ --c
## Supported models

1. LLaMA 2
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
4. https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
6. https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
2. OpenLLaMA
1. https://huggingface.co/openlm-research/open_llama_3b
2. https://huggingface.co/openlm-research/open_llama_7b
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
2. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
3. OpenLLaMA
3. https://huggingface.co/openlm-research/open_llama_13b
1. https://huggingface.co/openlm-research/open_llama_3b
4. https://huggingface.co/openlm-research/open_llama_3b_v2
2. https://huggingface.co/openlm-research/open_llama_7b
5. https://huggingface.co/openlm-research/open_llama_7b_v2
3. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
4. TinyLlama
1. https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6
2. https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T

This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.

@@ -53,7 +56,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg

```sh
source <INSTALL_DIR>/setupvars.sh
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
python -m pip uninstall openvino # Uninstall openvino from PyPI because there's one from the archive installed
python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --precision FP16 --stateful
python ./convert_tokenizers.py --streaming-detokenizer ./Llama-2-7b-hf/pytorch/dldt/FP16/
@@ -62,11 +65,11 @@ python ./convert_tokenizers.py --streaming-detokenizer ./Llama-2-7b-hf/pytorch/d
## Run

Usage:
1. `causal_lm <MODEL_DIR> "<PROMPT>"`
1. `greedy_causal_lm <MODEL_DIR> "<PROMPT>"`
2. `beam_search_causal_lm <MODEL_DIR> "<PROMPT>"`

Examples:
1. `./build/causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
1. `./build/greedy_causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
2. `./build/beam_search_causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`

To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
9 changes: 4 additions & 5 deletions text_generation/causal_lm/cpp/beam_search_causal_lm.cpp
Original file line number Diff line number Diff line change
@@ -3,12 +3,11 @@

#include <group_beam_searcher.hpp>
#include <openvino/openvino.hpp>
#include <openvino_extensions/strings.hpp>

namespace {
std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string_view prompt) {
ov::Tensor destination = tokenizer.get_input_tensor();
openvino_extensions::pack_strings(std::array{prompt}, destination);
std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string&& prompt) {
constexpr size_t BATCH_SIZE = 1;
tokenizer.set_input_tensor(ov::Tensor{ov::element::string, {BATCH_SIZE}, &prompt});
tokenizer.infer();
return {tokenizer.get_tensor("input_ids"), tokenizer.get_tensor("attention_mask")};
}
@@ -21,7 +20,7 @@ std::string detokenize(ov::InferRequest& detokenizer, const std::vector<int64_t>
inp.data<int64_t>()[idx] = tokens.at(idx);
}
detokenizer.infer();
return openvino_extensions::unpack_strings(detokenizer.get_output_tensor()).front();
return detokenizer.get_output_tensor().data<std::string>()[0];
}
}

2 changes: 1 addition & 1 deletion text_generation/causal_lm/cpp/convert_tokenizers.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ def main():
parser.add_argument('model_dir', type=pathlib.Path)
args = parser.parse_args()
tokenizer, detokenizer = openvino_tokenizers.convert_tokenizer(
transformers.AutoTokenizer.from_pretrained(args.model_dir),
transformers.AutoTokenizer.from_pretrained(args.model_dir, trust_remote_code=True),
with_detokenizer=True, streaming_detokenizer=args.streaming_detokenizer)
openvino.save_model(tokenizer, args.model_dir / "openvino_tokenizer.xml")
openvino.save_model(detokenizer, args.model_dir / "openvino_detokenizer.xml")
Original file line number Diff line number Diff line change
@@ -2,13 +2,11 @@
// SPDX-License-Identifier: Apache-2.0

#include <openvino/openvino.hpp>
#include <openvino_extensions/strings.hpp>

namespace {
std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string_view prompt) {
std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string&& prompt) {
constexpr size_t BATCH_SIZE = 1;
ov::Tensor destination = tokenizer.get_input_tensor();
openvino_extensions::pack_strings(std::array<std::string_view, BATCH_SIZE>{prompt}, destination);
tokenizer.set_input_tensor(ov::Tensor{ov::element::string, {BATCH_SIZE}, &prompt});
tokenizer.infer();
return {tokenizer.get_tensor("input_ids"), tokenizer.get_tensor("attention_mask")};
}
@@ -19,7 +17,7 @@ void print_token(ov::InferRequest& detokenizer, int64_t out_token) {
inp.set_shape({BATCH_SIZE, 1});
inp.data<int64_t>()[0] = out_token;
detokenizer.infer();
std::cout << openvino_extensions::unpack_strings(detokenizer.get_output_tensor()).front() << std::flush;
std::cout << detokenizer.get_output_tensor().data<std::string>()[0] << std::flush;
}
}

2 changes: 1 addition & 1 deletion text_generation/causal_lm/cpp/set_up_and_run.sh
Original file line number Diff line number Diff line change
@@ -23,4 +23,4 @@ cmake --build ./build/ --config Release -j
wait

python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP16/ --streaming-detokenizer
./build/causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"

0 comments on commit 6b84952

Please sign in to comment.