causal_lm: migrate to string tensors

openvinotoolkit · Dec 20, 2023 · 6b84952 · 6b84952
1 parent deff497
commit 6b84952
Showing 7 changed files with 30 additions and 30 deletions.
diff --git a/text_generation/causal_lm/cpp/CMakeLists.txt b/text_generation/causal_lm/cpp/CMakeLists.txt
@@ -8,17 +8,17 @@ project(causal_lm)
 list(APPEND CUSTOM_OPERATIONS tokenizer)
 add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/")
 
-add_executable(causal_lm causal_lm.cpp)
-target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
+add_executable(greedy_causal_lm greedy_causal_lm.cpp)
+target_compile_definitions(greedy_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
 find_package(OpenVINO REQUIRED COMPONENTS Runtime)
-target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions)
-set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17)
-set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
+target_link_libraries(greedy_causal_lm PRIVATE openvino::runtime)
+set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD 17)
+set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
 
 add_executable(beam_search_causal_lm beam_search_causal_lm.cpp)
 target_compile_definitions(beam_search_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
 target_include_directories(beam_search_causal_lm PRIVATE ./)
 find_package(OpenVINO REQUIRED COMPONENTS Runtime)
-target_link_libraries(beam_search_causal_lm PRIVATE openvino::runtime user_ov_extensions)
+target_link_libraries(beam_search_causal_lm PRIVATE openvino::runtime)
 set_target_properties(beam_search_causal_lm PROPERTIES CXX_STANDARD 17)
 set_target_properties(beam_search_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -30,19 +30,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ --c
 ## Supported models
 
 1. LLaMA 2
-   1. https://huggingface.co/meta-llama/Llama-2-7b-hf
-   2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
-   3. https://huggingface.co/meta-llama/Llama-2-13b-hf
    4. https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
-   5. https://huggingface.co/meta-llama/Llama-2-70b-hf
+   3. https://huggingface.co/meta-llama/Llama-2-13b-hf
+   2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
+   1. https://huggingface.co/meta-llama/Llama-2-7b-hf
    6. https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
-2. OpenLLaMA
-   1. https://huggingface.co/openlm-research/open_llama_3b
-   2. https://huggingface.co/openlm-research/open_llama_7b
+   5. https://huggingface.co/meta-llama/Llama-2-70b-hf
+2. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
+3. OpenLLaMA
    3. https://huggingface.co/openlm-research/open_llama_13b
+   1. https://huggingface.co/openlm-research/open_llama_3b
    4. https://huggingface.co/openlm-research/open_llama_3b_v2
+   2. https://huggingface.co/openlm-research/open_llama_7b
    5. https://huggingface.co/openlm-research/open_llama_7b_v2
-3. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
+4. TinyLlama
+   1. https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6
+   2. https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
 
@@ -53,7 +56,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg
 
 ```sh
 source <INSTALL_DIR>/setupvars.sh
-python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
+python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
 python -m pip uninstall openvino  # Uninstall openvino from PyPI because there's one from the archive installed
 python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --precision FP16 --stateful
 python ./convert_tokenizers.py --streaming-detokenizer ./Llama-2-7b-hf/pytorch/dldt/FP16/
@@ -62,11 +65,11 @@ python ./convert_tokenizers.py --streaming-detokenizer ./Llama-2-7b-hf/pytorch/d
 ## Run
 
 Usage:
-1. `causal_lm <MODEL_DIR> "<PROMPT>"`
+1. `greedy_causal_lm <MODEL_DIR> "<PROMPT>"`
 2. `beam_search_causal_lm <MODEL_DIR> "<PROMPT>"`
 
 Examples:
-1. `./build/causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
+1. `./build/greedy_causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
 2. `./build/beam_search_causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
 
 To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
diff --git a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp
@@ -3,12 +3,11 @@
 
 #include <group_beam_searcher.hpp>
 #include <openvino/openvino.hpp>
-#include <openvino_extensions/strings.hpp>
 
 namespace {
-std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string_view prompt) {
-    ov::Tensor destination = tokenizer.get_input_tensor();
-    openvino_extensions::pack_strings(std::array{prompt}, destination);
+std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string&& prompt) {
+    constexpr size_t BATCH_SIZE = 1;
+    tokenizer.set_input_tensor(ov::Tensor{ov::element::string, {BATCH_SIZE}, &prompt});
     tokenizer.infer();
     return {tokenizer.get_tensor("input_ids"), tokenizer.get_tensor("attention_mask")};
 }
@@ -21,7 +20,7 @@ std::string detokenize(ov::InferRequest& detokenizer, const std::vector<int64_t>
         inp.data<int64_t>()[idx] = tokens.at(idx);
     }
     detokenizer.infer();
-    return openvino_extensions::unpack_strings(detokenizer.get_output_tensor()).front();
+    return detokenizer.get_output_tensor().data<std::string>()[0];
 }
 }
 

diff --git a/text_generation/causal_lm/cpp/convert_tokenizers.py b/text_generation/causal_lm/cpp/convert_tokenizers.py
@@ -16,7 +16,7 @@ def main():
     parser.add_argument('model_dir', type=pathlib.Path)
     args = parser.parse_args()
     tokenizer, detokenizer = openvino_tokenizers.convert_tokenizer(
-        transformers.AutoTokenizer.from_pretrained(args.model_dir),
+        transformers.AutoTokenizer.from_pretrained(args.model_dir, trust_remote_code=True),
         with_detokenizer=True, streaming_detokenizer=args.streaming_detokenizer)
     openvino.save_model(tokenizer, args.model_dir / "openvino_tokenizer.xml")
     openvino.save_model(detokenizer, args.model_dir / "openvino_detokenizer.xml")

diff --git a/text_generation/causal_lm/cpp/causal_lm.cpp → ...ration/causal_lm/cpp/greedy_causal_lm.cpp b/text_generation/causal_lm/cpp/causal_lm.cpp → ...ration/causal_lm/cpp/greedy_causal_lm.cpp
@@ -2,13 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include <openvino/openvino.hpp>
-#include <openvino_extensions/strings.hpp>
 
 namespace {
-std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string_view prompt) {
+std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string&& prompt) {
     constexpr size_t BATCH_SIZE = 1;
-    ov::Tensor destination = tokenizer.get_input_tensor();
-    openvino_extensions::pack_strings(std::array<std::string_view, BATCH_SIZE>{prompt}, destination);
+    tokenizer.set_input_tensor(ov::Tensor{ov::element::string, {BATCH_SIZE}, &prompt});
     tokenizer.infer();
     return {tokenizer.get_tensor("input_ids"), tokenizer.get_tensor("attention_mask")};
 }
@@ -19,7 +17,7 @@ void print_token(ov::InferRequest& detokenizer, int64_t out_token) {
     inp.set_shape({BATCH_SIZE, 1});
     inp.data<int64_t>()[0] = out_token;
     detokenizer.infer();
-    std::cout << openvino_extensions::unpack_strings(detokenizer.get_output_tensor()).front() << std::flush;
+    std::cout << detokenizer.get_output_tensor().data<std::string>()[0] << std::flush;
 }
 }
 

diff --git a/text_generation/causal_lm/cpp/set_up_and_run.sh b/text_generation/causal_lm/cpp/set_up_and_run.sh
@@ -23,4 +23,4 @@ cmake --build ./build/ --config Release -j
 wait
 
 python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP16/ --streaming-detokenizer
-./build/causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
+./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
diff --git a/thirdparty/openvino_contrib b/thirdparty/openvino_contrib
+9 −15		modules/custom_operations/user_ie_extensions/tokenizer/python/README.md
+1 −1		modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/hf_parser.py
+1 −1		modules/custom_operations/user_ie_extensions/tokenizer/python/openvino_tokenizers/tokenizer_pipeline.py
+11 −6		modules/custom_operations/user_ie_extensions/tokenizer/python/tests/tokenizers_test.py
+23 −42		modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp
+9 −32		modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.cpp
+1 −2		modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.hpp
+46 −102		modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.cpp
+2 −6		modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.hpp
+1 −1		modules/custom_operations/user_ie_extensions/tokenizer/utils.cpp