names correction

openvinotoolkit · Wovchena · Mar 26, 2024 · Mar 28, 2024 · Apr 2, 2024 · Apr 3, 2024
commit aa90e9d229cc2357acee7e1d202c1a7d5871a63b
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
@@ -16,11 +16,13 @@ jobs:
       - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
       - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
       - run: python -m pip install --pre openvino --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly  # Can't load CentOS libraries from the archive
-      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline"
       - run: source ./ov/setupvars.sh && python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
       - run: python -c "from openvino_genai import LLMPipeline"
-      - name: Install optimum-cli and run for each model
+      - name: GenAI Python API tests
         run: |
+          source ./ov/setupvars.sh
           cd ./tests/
           python -m pip install -r requirements.txt
           models=$(python3 generate_models.py)
@@ -47,6 +49,6 @@ jobs:
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
       - run: python -m pip install "numpy<1.27"
-      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
+      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install .
-      - run: python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+      - run: python -c "from openvino_genai import LLMPipeline"
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -73,7 +73,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     */
     LLMPipeline(std::string& path, std::string device="CPU", 
                 const ov::AnyMap& plugin_config={}, 
-                const std::string& ov_tokenizer_path="");
+                const std::string& ov_tokenizers_path="");
 
     /**
     * @brief Constructs a LLMPipeline when ov::Tokenizer is initialized manually using file from the different dirs.
@@ -88,7 +88,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
         const ov::Tokenizer& tokenizer,
         const std::string device="CPU",
         const ov::AnyMap& plugin_config = {},
-        const std::string& ov_tokenizer_path=""
+        const std::string& ov_tokenizers_path=""
     );
 
     ~LLMPipeline();

diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -21,7 +21,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param tokenizer_path openvino_tokenizer.xml and openvino_detokenizer.xml should be located in the tokenizer_path
     * @param device device. Currently only 'CPU' is supported
     */
-    Tokenizer(const std::string& tokenizers_path, const std::string& device="CPU", const std::string& ov_tokenizer_path="");
+    Tokenizer(const std::string& tokenizers_path, const std::string& device="CPU", const std::string& ov_tokenizers_path="");
 
     /**
     * @brief encode a single prompt

diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
@@ -45,11 +45,11 @@ class LLMPipeline::LLMPipelineImpl {
         const ov::Tokenizer& tokenizer,
         const std::string device,
         const ov::AnyMap& plugin_config,
-        const std::string& ov_tokenizer_path=""
+        const std::string& ov_tokenizers_path=""
     );
 
     LLMPipelineImpl(std::string& path, std::string device, const ov::AnyMap& config);
-    LLMPipelineImpl(std::string& path, std::string device, const ov::AnyMap& config, const std::string& ov_tokenizer_path="");
+    LLMPipelineImpl(std::string& path, std::string device, const ov::AnyMap& config, const std::string& ov_tokenizers_path="");
 
     GenerationConfig generation_config() const;
 
@@ -71,7 +71,7 @@ ov::LLMPipeline::LLMPipeline(
     const ov::Tokenizer& tokenizer,
     const std::string device,
     const ov::AnyMap& plugin_config,
-    const std::string& ov_tokenizer_path
+    const std::string& ov_tokenizers_path
 ) {
     m_pimpl = make_unique<LLMPipelineImpl>(model_path, tokenizer, device, plugin_config);
 }
@@ -81,7 +81,7 @@ ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(
     const ov::Tokenizer& tokenizer,
     std::string device,
     const ov::AnyMap& plugin_config,
-    const std::string& ov_tokenizer_path
+    const std::string& ov_tokenizers_path
 ): m_tokenizer(tokenizer), m_device(device), m_plugin_config(plugin_config) {
     ov::Core core;
 
@@ -95,12 +95,12 @@ ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(
     }
 }
 
-ov::LLMPipeline::LLMPipeline(std::string& path, std::string device, const ov::AnyMap& config, const std::string& ov_tokenizer_path) {
-    m_pimpl = make_unique<LLMPipelineImpl>(path, device, config, ov_tokenizer_path);
+ov::LLMPipeline::LLMPipeline(std::string& path, std::string device, const ov::AnyMap& config, const std::string& ov_tokenizers_path) {
+    m_pimpl = make_unique<LLMPipelineImpl>(path, device, config, ov_tokenizers_path);
 }
 
 ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(std::string& path, std::string device, 
-                                                  const ov::AnyMap& config, const std::string& ov_tokenizer_path) {
+                                                  const ov::AnyMap& config, const std::string& ov_tokenizers_path) {
     std::string config_path = path + "/" + "config.json";
     std::string tokenizer_config_path = path + "/" +"tokenizer_config.json";
     std::string generation_config_path = path + "/" +"generation_config.json";

diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp
@@ -53,17 +53,17 @@ class Tokenizer::TokenizerImpl {
     int64_t m_eos_token_id = 2;
 
     TokenizerImpl() = default;
-    TokenizerImpl(std::string tokenizers_path, const std::string device, const std::string& ov_tokenizer_path) {
+    TokenizerImpl(std::string tokenizers_path, const std::string device, const std::string& ov_tokenizers_path) {
         ov::Core core;
 
         if (ov::generate_utils::is_xml(tokenizers_path))
             OPENVINO_THROW("tokenizers_path should be a path to a dir not a xml file");
 
-        if (ov_tokenizer_path.empty()) {
+        if (ov_tokenizers_path.empty()) {
             // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt
             core.add_extension(OPENVINO_TOKENIZERS_PATH);
         } else {
-            core.add_extension(ov_tokenizer_path + "/libopenvino_tokenizers.so");
+            core.add_extension(ov_tokenizers_path + "/libopenvino_tokenizers.so");
         }
         std::shared_ptr<ov::Model> tokenizer_model, detokenizer_model;
         try {
@@ -144,8 +144,8 @@ class Tokenizer::TokenizerImpl {
     }
 };
 
-Tokenizer::Tokenizer(const std::string& tokenizers_path, const std::string& device, const std::string& ov_tokenizer_path) {
-    m_pimpl = std::make_shared<TokenizerImpl>(tokenizers_path, device, ov_tokenizer_path);
+Tokenizer::Tokenizer(const std::string& tokenizers_path, const std::string& device, const std::string& ov_tokenizers_path) {
+    m_pimpl = std::make_shared<TokenizerImpl>(tokenizers_path, device, ov_tokenizers_path);
 }
 
 std::pair<ov::Tensor, ov::Tensor> Tokenizer::encode(const std::string prompt) {

diff --git a/src/python/py_generate_pipeline.cpp b/src/python/py_generate_pipeline.cpp
@@ -62,9 +62,9 @@ std::string call_with_config(ov::LLMPipeline& pipe, const std::string& text, con
     return pipe(text, config);
 }
 
-std::string genai_module_path() {
+std::string ov_tokenizers_module_path() {
     py::module_ m = py::module_::import("openvino_tokenizers");
-     py::list path_list = m.attr("__path__");
+    py::list path_list = m.attr("__path__");
     return std::string(py::str(path_list[0])) + "/lib";
 }
 
@@ -74,9 +74,9 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
     py::class_<LLMPipeline>(m, "LLMPipeline")
         .def(py::init<const std::string, const ov::Tokenizer&, const std::string, const ov::AnyMap&, const std::string&>(), 
              py::arg("model_path"), py::arg("tokenizer"), py::arg("device") = "CPU", 
-             py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizer_path") = genai_module_path())
+             py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizers_path") = ov_tokenizers_module_path())
         .def(py::init<std::string&, std::string, const ov::AnyMap&, const std::string>(),
-             py::arg("path"), py::arg("device") = "CPU", py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizer_path") = genai_module_path())
+             py::arg("path"), py::arg("device") = "CPU", py::arg("plugin_config") = ov::AnyMap{}, py::arg("ov_tokenizers_path") = ov_tokenizers_module_path())
         .def("__call__", py::overload_cast<ov::LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
         .def("__call__", py::overload_cast<ov::LLMPipeline&, const std::string&, const ov::GenerationConfig&>(&call_with_config))
         .def("generate", py::overload_cast<ov::LLMPipeline&, const std::string&, const py::kwargs&>(&call_with_kwargs))
@@ -105,7 +105,7 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
         .def(py::init<std::string&, const std::string&, const std::string&>(), 
              py::arg("tokenizers_path"), 
              py::arg("device") = "CPU",
-             py::arg("ov_tokenizer_path") = py::str(genai_module_path()))
+             py::arg("ov_tokenizers_path") = py::str(ov_tokenizers_module_path()))
 
         // todo: implement encode/decode when for numpy inputs and outputs
         .def("encode", py::overload_cast<const std::string>(&ov::Tokenizer::encode), "Encode a single prompt")