Skip to content

Fix ios benchmark app #10542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 12 additions & 24 deletions examples/models/llama/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,6 @@ target_include_directories(
extension_module INTERFACE ${_common_include_directories}
)

list(
APPEND _llama_runner__srcs
${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
)

if(CMAKE_TOOLCHAIN_IOS
OR ANDROID
OR APPLE
Expand All @@ -60,23 +52,8 @@ else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

# find RE2 for tokenizer, build tiktoken
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

set(llama_runner_deps executorch extension_data_loader extension_module
extension_tensor re2::re2
extension_tensor
)

target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
Expand All @@ -85,6 +62,17 @@ target_include_directories(
llama_runner
INTERFACE ${_common_include_directories}
)

# Include tokenizers dependency
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/tokenizers
${CMAKE_CURRENT_BINARY_DIR}/tokenizers
)
target_link_libraries(
llama_runner PUBLIC tokenizers
)

target_include_directories(
llama_runner
PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
Expand Down
39 changes: 31 additions & 8 deletions examples/models/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <executorch/extension/llm/runner/util.h>

#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>

namespace example {
Expand All @@ -36,6 +37,30 @@ static constexpr auto kMaxContextLen = "get_max_context_len";
static constexpr auto kVocabSize = "get_vocab_size";
static constexpr auto kUseKVCache = "use_kv_cache";
static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";

std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer(
const std::string& tokenizer_path) {

auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded json tokenizer");
return json_tokenizer;
}

auto tiktoken_tokenizer = get_tiktoken_for_llama();
if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded TikToken tokenizer");
return tiktoken_tokenizer;
}

auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded BPE tokenizer");
return bpe_tokenizer;
}

return nullptr;
}
} // namespace

Runner::Runner(
Expand Down Expand Up @@ -87,25 +112,23 @@ Error Runner::load() {
return Error::Ok;
}
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
// load tokenizer. Assuming tiktoken is the default tokenizer
tokenizer_ = nullptr;
tokenizer_ = get_tiktoken_for_llama();
::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
// Rely on tiktoken to throw error if the artifact is incompatible. Then we
// fallback to BPE tokenizer.
if (err != ::tokenizers::Error::Ok) {

// Load tokenizer.
tokenizer_ = load_tokenizer(tokenizer_path_);
if (tokenizer_ == nullptr) {
ET_LOG(
Info,
"Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
tokenizer_path_.c_str());
tokenizer_.reset();
// @lint-ignore CLANGTIDY facebook-hte-Deprecated
tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
err = tokenizer_->load(tokenizer_path_);
auto err = tokenizer_->load(tokenizer_path_);
ET_CHECK_TK_OK_OR_RETURN_ERROR(
err,
"Failed to load %s as a llama2.c tokenizer artifact",
tokenizer_path_.c_str());
return ::executorch::runtime::Error::InvalidArgument;
}

ET_LOG(Info, "Reading metadata from model");
Expand Down
1 change: 1 addition & 0 deletions examples/models/llama/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def define_common_targets():
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
"//executorch/examples/models/llama/tokenizer:tiktoken",
"//pytorch/tokenizers:llama2c_tokenizer",
"//pytorch/tokenizers:hf_tokenizer",
] + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
Expand Down
5 changes: 4 additions & 1 deletion examples/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ find_package(gflags REQUIRED)
set(_common_compile_options -Wno-deprecated-declarations -fPIC)

# Let files say "include <executorch/path/to/header.h>".
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include)

#
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
Expand Down Expand Up @@ -67,6 +67,9 @@ target_include_directories(
PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/include
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
)

# find RE2 for tokenizer
Expand Down
14 changes: 12 additions & 2 deletions examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs
# build qaihub llama2 7b runner
add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs})
target_include_directories(
qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
qaihub_llama2_7b_runner PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
)
target_link_libraries(
qaihub_llama2_7b_runner
Expand Down Expand Up @@ -69,7 +74,12 @@ list(
# build qaihub llama3 8b runner
add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
target_include_directories(
qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
qaihub_llama3_8b_runner PUBLIC
${_common_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src
)

target_link_libraries(
Expand Down
Loading
Loading