Skip to content

Commit

Permalink
Merge pull request #441 from bluescarni/pr/multimod
Browse files Browse the repository at this point in the history
Parallel compilation
  • Loading branch information
bluescarni authored Aug 4, 2024
2 parents 6c0a9c8 + a205aab commit 3df1333
Show file tree
Hide file tree
Showing 12 changed files with 2,026 additions and 641 deletions.
12 changes: 0 additions & 12 deletions .github/workflows/gha_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,3 @@ jobs:
- uses: actions/checkout@v4
- name: Build
run: bash tools/gha_llvm15_conda_asan.sh
conda_llvm14_asan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: bash tools/gha_llvm14_conda_asan.sh
conda_llvm13_asan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: bash tools/gha_llvm13_conda_asan.sh
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if(NOT CMAKE_BUILD_TYPE)
FORCE)
endif()

project(heyoka VERSION 5.1.0 LANGUAGES CXX C)
project(heyoka VERSION 6.0.0 LANGUAGES CXX C)

list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/yacma")

Expand Down Expand Up @@ -158,8 +158,8 @@ endif()
include(GNUInstallDirs)
find_package(LLVM REQUIRED CONFIG)

if(${LLVM_VERSION_MAJOR} LESS 13 OR ${LLVM_VERSION_MAJOR} GREATER 18)
message(FATAL_ERROR "LLVM >= 13 and <= 18 is required.")
if(${LLVM_VERSION_MAJOR} LESS 15 OR ${LLVM_VERSION_MAJOR} GREATER 18)
message(FATAL_ERROR "LLVM >= 15 and <= 18 is required.")
endif()

# List of source files.
Expand Down Expand Up @@ -334,7 +334,7 @@ if(HEYOKA_WITH_SLEEF)
endif()

# Setup the heyoka ABI version number.
set(HEYOKA_ABI_VERSION 29)
set(HEYOKA_ABI_VERSION 30)

if(HEYOKA_BUILD_STATIC_LIBRARY)
# Setup of the heyoka static library.
Expand Down
2 changes: 1 addition & 1 deletion doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ and several CPU architectures (x86-64, 64-bit ARM and 64-bit PowerPC).

heyoka has the following **mandatory** dependencies:

* the `LLVM <https://llvm.org/>`__ compiler infrastructure library (version >=13 and <=18),
* the `LLVM <https://llvm.org/>`__ compiler infrastructure library (version >=15 and <=18),
* the `Boost <https://www.boost.org/>`__ C++ libraries (version >=1.69),
* the `{fmt} <https://fmt.dev/latest/index.html>`__ library (version >=9 and <=11),
* the `spdlog <https://github.com/gabime/spdlog>`__ library,
Expand Down
1 change: 1 addition & 0 deletions include/heyoka/detail/fwd_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class HEYOKA_DLL_PUBLIC func;
class HEYOKA_DLL_PUBLIC param;

class HEYOKA_DLL_PUBLIC llvm_state;
class HEYOKA_DLL_PUBLIC llvm_multi_state;

template <typename>
class HEYOKA_DLL_PUBLIC_INLINE_CLASS taylor_adaptive;
Expand Down
62 changes: 58 additions & 4 deletions include/heyoka/llvm_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
#include <heyoka/config.hpp>

#include <concepts>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <memory>
#include <ostream>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

#include <boost/numeric/conversion/cast.hpp>

Expand Down Expand Up @@ -78,6 +79,7 @@ HEYOKA_DLL_PUBLIC const target_features &get_target_features();
} // namespace detail

HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const llvm_state &);
HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const llvm_multi_state &);

template <typename T>
inline std::uint32_t recommended_simd_size()
Expand Down Expand Up @@ -132,6 +134,8 @@ HEYOKA_BEGIN_NAMESPACE
class HEYOKA_DLL_PUBLIC llvm_state
{
friend HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const llvm_state &);
friend HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const llvm_multi_state &);
friend class HEYOKA_DLL_PUBLIC llvm_multi_state;

struct jit;

Expand Down Expand Up @@ -337,15 +341,65 @@ namespace detail

// The value contained in the in-memory cache.
struct llvm_mc_value {
std::string opt_bc, opt_ir, obj;
std::vector<std::string> opt_bc, opt_ir, obj;

std::size_t total_size() const;
};

// Cache lookup and insertion.
std::optional<llvm_mc_value> llvm_state_mem_cache_lookup(const std::string &, unsigned);
void llvm_state_mem_cache_try_insert(std::string, unsigned, llvm_mc_value);
std::optional<llvm_mc_value> llvm_state_mem_cache_lookup(const std::vector<std::string> &, unsigned);
void llvm_state_mem_cache_try_insert(std::vector<std::string>, unsigned, llvm_mc_value);

} // namespace detail

class HEYOKA_DLL_PUBLIC llvm_multi_state
{
friend HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const llvm_multi_state &);

struct impl;

std::unique_ptr<impl> m_impl;

HEYOKA_DLL_LOCAL void compile_impl();
HEYOKA_DLL_LOCAL void add_obj_triggers();

// Check functions.
HEYOKA_DLL_LOCAL void check_compiled(const char *) const;
HEYOKA_DLL_LOCAL void check_uncompiled(const char *) const;

friend class boost::serialization::access;
void save(boost::archive::binary_oarchive &, unsigned) const;
void load(boost::archive::binary_iarchive &, unsigned);
BOOST_SERIALIZATION_SPLIT_MEMBER()

public:
llvm_multi_state();
explicit llvm_multi_state(std::vector<llvm_state>);
llvm_multi_state(const llvm_multi_state &);
llvm_multi_state(llvm_multi_state &&) noexcept;
llvm_multi_state &operator=(const llvm_multi_state &);
llvm_multi_state &operator=(llvm_multi_state &&) noexcept;
~llvm_multi_state();

[[nodiscard]] bool is_compiled() const noexcept;

[[nodiscard]] unsigned get_n_modules() const noexcept;

[[nodiscard]] bool fast_math() const noexcept;
[[nodiscard]] bool force_avx512() const noexcept;
[[nodiscard]] unsigned get_opt_level() const noexcept;
[[nodiscard]] bool get_slp_vectorize() const noexcept;
[[nodiscard]] code_model get_code_model() const noexcept;

[[nodiscard]] std::vector<std::string> get_ir() const;
[[nodiscard]] std::vector<std::string> get_bc() const;
[[nodiscard]] const std::vector<std::string> &get_object_code() const;

void compile();

std::uintptr_t jit_lookup(const std::string &);
};

HEYOKA_END_NAMESPACE

// Archive version changelog:
Expand Down
108 changes: 72 additions & 36 deletions src/detail/llvm_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <boost/algorithm/string/predicate.hpp>
#include <boost/core/demangle.hpp>
#include <boost/numeric/conversion/cast.hpp>
#include <boost/safe_numerics/safe_integer.hpp>

#include <fmt/format.h>
#include <fmt/ranges.h>
Expand Down Expand Up @@ -271,6 +272,74 @@ llvm::AttributeList llvm_ext_math_func_attrs(llvm_state &s)
return f->getAttributes();
}

// Add a pointer to the llvm.used global variable of a module:
//
// https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable
//
// If the llvm.used variable does not exist yet, create it.
//
// NOTE: this has quadratic complexity when appending ptr to an existing
// array. It should not be a problem for the type of use we do as we expect
// just a few entries in this array, but something to keep in mind.
void llvm_append_used(llvm_state &s, llvm::Constant *ptr)
{
assert(ptr != nullptr);
assert(ptr->getType()->isPointerTy());

auto &md = s.module();
auto &ctx = s.context();

// Fetch the pointer type.
auto *ptr_type = llvm::PointerType::getUnqual(ctx);

if (auto *orig_used = md.getGlobalVariable("llvm.used")) {
// The llvm.used variable exists already.

// Fetch the original initializer.
assert(orig_used->hasInitializer());
auto *orig_init = llvm::cast<llvm::ConstantArray>(orig_used->getInitializer());

// Construct a new initializer with the original values
// plus the new pointer.
std::vector<llvm::Constant *> arr_values;
arr_values.reserve(
boost::safe_numerics::safe<decltype(arr_values.size())>(orig_init->getType()->getNumElements()) + 1);
for (decltype(orig_init->getType()->getNumElements()) i = 0; i < orig_init->getType()->getNumElements(); ++i) {
auto *orig_el = orig_init->getAggregateElement(boost::numeric_cast<unsigned>(i));
assert(orig_el->getType()->isPointerTy());

// NOTE: if ptr was already in the llvm.used vector, just bail
// out early.
if (orig_el->isElementWiseEqual(ptr)) {
return;
}

arr_values.push_back(orig_el);
}
arr_values.push_back(ptr);

// Create the new array.
auto *used_array_type = llvm::ArrayType::get(ptr_type, boost::numeric_cast<std::uint64_t>(arr_values.size()));
auto *used_arr = llvm::ConstantArray::get(used_array_type, arr_values);

// Remove the original one.
orig_used->eraseFromParent();

// Add the new global variable.
auto *g_used_arr = new llvm::GlobalVariable(md, used_arr->getType(), true,
llvm::GlobalVariable::AppendingLinkage, used_arr, "llvm.used");
g_used_arr->setSection("llvm.metadata");
} else {
// The llvm.used variable does not exist yet, create it.
auto *used_array_type = llvm::ArrayType::get(ptr_type, 1);
std::vector<llvm::Constant *> arr_values{ptr};
auto *used_arr = llvm::ConstantArray::get(used_array_type, arr_values);
auto *g_used_arr = new llvm::GlobalVariable(md, used_arr->getType(), true,
llvm::GlobalVariable::AppendingLinkage, used_arr, "llvm.used");
g_used_arr->setSection("llvm.metadata");
}
}

// Attach the vfabi attributes to "call", which must be a call to a function with scalar arguments.
// The necessary vfabi information is stored in vfi. The function returns "call".
// The attributes of the scalar function will be attached to the vector variants.
Expand Down Expand Up @@ -312,17 +381,8 @@ llvm::CallInst *llvm_add_vfabi_attrs(llvm_state &s, llvm::CallInst *call, const
= (use_fast_math && !el.lp_vf_abi_attr.empty()) ? el.lp_vf_abi_attr : el.vf_abi_attr;
vf_abi_strs.push_back(vf_abi_attr);
}
#if LLVM_VERSION_MAJOR >= 14
call->addFnAttr(llvm::Attribute::get(context, "vector-function-abi-variant",
fmt::format("{}", fmt::join(vf_abi_strs, ","))));
#else
{
auto attrs = call->getAttributes();
attrs = attrs.addAttribute(context, llvm::AttributeList::FunctionIndex, "vector-function-abi-variant",
fmt::format("{}", fmt::join(vf_abi_strs, ",")));
call->setAttributes(attrs);
}
#endif

// Now we need to:
// - add the declarations of the vector variants to the module,
Expand Down Expand Up @@ -374,33 +434,9 @@ llvm::CallInst *llvm_add_vfabi_attrs(llvm_state &s, llvm::CallInst *call, const
assert(vf_ptr->getAttributes() == f->getAttributes());
}

// Create the name of the dummy function to ensure the variant is not optimised out.
//
// NOTE: another way of doing this involves the llvm.used global variable - need
// to learn about the metadata API apparently.
//
// https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable
// https://godbolt.org/z/1neaG4bYj
const auto dummy_name = fmt::format("heyoka.dummy_vector_call.{}", el_name);

if (auto *dummy_ptr = md.getFunction(dummy_name); dummy_ptr == nullptr) {
// The dummy function has not been defined yet, do it.
auto *dummy = llvm_func_create(vec_ft, llvm::Function::ExternalLinkage, dummy_name, &md);

builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", dummy));

// The dummy function just forwards its arguments to the variant.
std::vector<llvm::Value *> dummy_args;
for (auto *dummy_arg = dummy->args().begin(); dummy_arg != dummy->args().end(); ++dummy_arg) {
dummy_args.emplace_back(dummy_arg);
}

builder.CreateRet(builder.CreateCall(vf_ptr, dummy_args));
} else {
// The declaration of the dummy function is already there.
// Check that the signatures match.
assert(dummy_ptr->getFunctionType() == vec_ft);
}
// Ensure that the variant is not optimised out because it is not
// explicitly used in the code.
detail::llvm_append_used(s, vf_ptr);
}

// Restore the original insertion block.
Expand Down
Loading

0 comments on commit 3df1333

Please sign in to comment.