Skip to content

CMake external GitHub repos with Success compilation on Windows 11 #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@ cmake_minimum_required(VERSION 3.10)

project(MetalTranslate)

include(ExternalProject)

set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/external)

ExternalProject_Add(CTranslate2
GIT_REPOSITORY https://github.com/OpenNMT/CTranslate2
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION} -DWITH_MKL=OFF -DWITH_DNNL=ON
)

ExternalProject_Add(Tokenizer
GIT_REPOSITORY https://github.com/OpenNMT/Tokenizer
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION}
)

include_directories(${EXTERNAL_INSTALL_LOCATION}/include)
link_directories(${EXTERNAL_INSTALL_LOCATION}/lib)

add_executable(metaltranslate src/main.cpp)

set(TARGET_H
Expand All @@ -10,8 +27,11 @@ set(TARGET_H

target_sources(metaltranslate PRIVATE src/MetalTranslate.cpp)

add_subdirectory(third_party/CTranslate2/)
#add_subdirectory(third_party/CTranslate2/)
target_link_libraries(metaltranslate ctranslate2)

add_subdirectory(third_party/Tokenizer/)
##add_subdirectory(third_party/Tokenizer/)
target_link_libraries(metaltranslate OpenNMTTokenizer)

#target_link_libraries(metaltranslate cpu_features)

69 changes: 35 additions & 34 deletions src/MetalTranslate.cpp
Original file line number Diff line number Diff line change
@@ -1,52 +1,53 @@
#include "MetalTranslate.h"

#include <ctranslate2/translator_pool.h>
#include <ctranslate2/translator.h>
#include <iostream>
#include <onmt/Tokenizer.h>

namespace MetalTranslate {

MetalTranslate::MetalTranslate(MetalTranslateConfig config) {
this->_config = config;
}
MetalTranslate::MetalTranslate(MetalTranslateConfig config) {
this->_config = config;
}

std::string MetalTranslate::Translate(std::string source,
std::string source_code,
std::string target_code) {
std::string MetalTranslate::Translate(std::string source,
std::string source_code,
std::string target_code) {

// Tokenizer
onmt::Tokenizer tokenizer(this->_config.ModelPath + "sentencepiece.model");
std::vector<std::string> tokens;
tokenizer.tokenize(source, tokens);
// Tokenizer
onmt::Tokenizer tokenizer(this->_config.ModelPath + "sentencepiece.model");
std::vector<std::string> tokens;
tokenizer.tokenize(source, tokens);

std::string source_prefix = "__" + source_code + "__";
tokens.insert(tokens.begin(), source_prefix);
std::string source_prefix = "__" + source_code + "__";
tokens.insert(tokens.begin(), source_prefix);

// CTranslate2
const size_t num_translators = 1;
const size_t num_threads_per_translator = 0; // Unused with DNNL
ctranslate2::TranslatorPool translator(
num_translators, num_threads_per_translator,
this->_config.ModelPath + "model", ctranslate2::Device::CPU);
// CTranslate2
const size_t num_translators = 1;
const size_t num_threads_per_translator = 0; // Unused with DNNL

const std::vector<std::vector<std::string>> batch = {tokens};
const std::vector<std::vector<std::string>> target_prefix = {
{"__" + target_code + "__"}};
const int max_batch_size = 2024;
const std::vector<std::vector<std::string>> batch = { {"▁H", "ello", "▁world", "!"} };

const std::vector<ctranslate2::TranslationResult> results =
translator.translate_batch(batch, target_prefix);
ctranslate2::Translator translator(this->_config.ModelPath, ctranslate2::Device::CPU);
//const std::vector<ctranslate2::TranslationResult> results = translator.translate_batch(batch);

const std::vector<std::string> translatedTokens = results[0].output();
const std::vector<std::vector<std::string>> target_prefix = {
{"__" + target_code + "__"} };
const int max_batch_size = 2024;

std::string result = tokenizer.detokenize(translatedTokens);
const std::vector<ctranslate2::TranslationResult> results =
translator.translate_batch(batch, target_prefix);

// Remove target prefix
// __es__ Traducción de texto con MetalTranslate
// -> Traducción de texto con MetalTranslate
result = result.substr(7);
const std::vector<std::string> translatedTokens = results[0].output();

return result;
}
std::string result = tokenizer.detokenize(translatedTokens);

} // namespace MetalTranslate
// Remove target prefix
// __es__ Traducción de texto con MetalTranslate
// -> Traducción de texto con MetalTranslate
result = result.substr(7);

return result;
}

} // namespace MetalTranslate
2 changes: 1 addition & 1 deletion src/MetalTranslateConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
namespace MetalTranslate {
class MetalTranslateConfig {
public:
std::string ModelPath = "models/translate-fairseq_m2m_100_418M/";
std::string ModelPath = "models/nllb-200-distilled-600M-int8/";
};
} // namespace MetalTranslate
1 change: 0 additions & 1 deletion third_party/CTranslate2
Submodule CTranslate2 deleted from 4908b9
4 changes: 0 additions & 4 deletions third_party/README.md

This file was deleted.

1 change: 0 additions & 1 deletion third_party/Tokenizer
Submodule Tokenizer deleted from 559b8e