Description
int main() {
std::string tokenizer_json_path = "../tokenizer.json", path;
std::unique_ptrtokenizers::Tokenizer m_tokenizer;
path = LoadBytesFromFile(tokenizer_json_path);
cout<<"load file success !"<<endl;
m_tokenizer = tokenizers::Tokenizer::FromBlobJSON(path);
Here is my CMakeLists.txt info:
{
cmake_minimum_required(VERSION 3.18)
project(target)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
aux_source_directory(. SRC_FILES)
link_directories(libs)
include_directories(include)
add_executable(target ${SRC_FILES})
target_link_libraries(target
${PROJECT_SOURCE_DIR}/libs/libsentencepiece.a
${PROJECT_SOURCE_DIR}/libs/libtokenizers_c.a
${PROJECT_SOURCE_DIR}/libs/libtokenizers_cpp.a
)
}
When I test, met the following issues:
/usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function tokenizers::Tokenizer::FromBlobJSON(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)': huggingface_tokenizer.cc:(.text+0x46): undefined reference to
tokenizers_new_from_str'
/usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function tokenizers::Tokenizer::FromBlobByteLevelBPE(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)': huggingface_tokenizer.cc:(.text+0x13e): undefined reference to
byte_level_bpe_tokenizers_new_from_str'
/usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function tokenizers::HFTokenizer::~HFTokenizer()': huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizerD2Ev[_ZN10tokenizers11HFTokenizerD5Ev]+0x37): undefined reference to
tokenizers_free'
/usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function tokenizers::HFTokenizer::Encode(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)': huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer6EncodeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb[_ZN10tokenizers11HFTokenizer6EncodeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb]+0x69): undefined reference to
tokenizers_encode'
/usr/bin/ld: huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer6EncodeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb[_ZN10tokenizers11HFTokenizer6EncodeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb]+0xbc): undefined reference to tokenizers_free_encode_results' /usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function
tokenizers::HFTokenizer::EncodeBatch(std::vector<std::__cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std::__cxx11::basic_string<char, std::char_traits, std::allocator > > > const&, bool)':
huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer11EncodeBatchERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS7_EEb[_ZN10tokenizers11HFTokenizer11EncodeBatchERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS7_EEb]+0x203): undefined reference to tokenizers_encode_batch' /usr/bin/ld: huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer11EncodeBatchERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS7_EEb[_ZN10tokenizers11HFTokenizer11EncodeBatchERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS7_EEb]+0x344): undefined reference to
tokenizers_free_encode_results'
/usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function tokenizers::HFTokenizer::Decode[abi:cxx11](std::vector<int, std::allocator<int> > const&, bool)': huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer6DecodeB5cxx11ERKSt6vectorIiSaIiEEb[_ZN10tokenizers11HFTokenizer6DecodeB5cxx11ERKSt6vectorIiSaIiEEb]+0x62): undefined reference to
tokenizers_decode'
/usr/bin/ld: huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer6DecodeB5cxx11ERKSt6vectorIiSaIiEEb[_ZN10tokenizers11HFTokenizer6DecodeB5cxx11ERKSt6vectorIiSaIiEEb]+0x7d): undefined reference to tokenizers_get_decode_str' /usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function
tokenizers::HFTokenizer::GetVocabSize()':
huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer12GetVocabSizeEv[_ZN10tokenizers11HFTokenizer12GetVocabSizeEv]+0x32): undefined reference to tokenizers_get_vocab_size' /usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function
tokenizers::HFTokenizer::IdToTokenabi:cxx11':
huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer9IdToTokenB5cxx11Ei[_ZN10tokenizers11HFTokenizer9IdToTokenB5cxx11Ei]+0x3e): undefined reference to tokenizers_id_to_token' /usr/bin/ld: ../libs/libtokenizers_cpp.a(huggingface_tokenizer.cc.o): in function
tokenizers::HFTokenizer::TokenToId(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&)':
huggingface_tokenizer.cc:(.text._ZN10tokenizers11HFTokenizer9TokenToIdERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE[_ZN10tokenizers11HFTokenizer9TokenToIdERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE]+0x58): undefined reference to `tokenizers_token_to_id'
collect2: error: ld returned 1 exit status
make[2]: *** [CMakeFiles/target.dir/build.make:116: target] Error 1
make[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/target.dir/all] Error 2