diff --git a/include/circuitbootstrapping.hpp b/include/circuitbootstrapping.hpp index eb1053e..a764529 100644 --- a/include/circuitbootstrapping.hpp +++ b/include/circuitbootstrapping.hpp @@ -26,7 +26,7 @@ void CircuitBootstrapping(TRGSW &trgsw, const TLWE &tlwe, const EvalKey &ek) { - std::array, privksP::targetP::l> temp; + alignas(64) std::array, privksP::targetP::l> temp; GateBootstrappingManyLUT( temp, tlwe, ek.getbkfft(), CBtestvector()); for (int i = 0; i < privksP::targetP::l; i++) { @@ -55,7 +55,7 @@ void CircuitBootstrappingFFT(TRGSWFFT &trgswfft, const TLWE &tlwe, const EvalKey &ek) { - TRGSW trgsw; + alignas(64) TRGSW trgsw; CircuitBootstrapping(trgsw, tlwe, ek); for (int i = 0; i < (privksP::targetP::k + 1) * privksP::targetP::l; i++) for (int j = 0; j < privksP::targetP::k + 1; j++) @@ -67,7 +67,7 @@ void CircuitBootstrappingFFT(TRGSWFFT &trgswfft, const TLWE &tlwe, const EvalKey &ek) { - TRGSW trgsw; + alignas(64) TRGSW trgsw; CircuitBootstrapping(trgsw, tlwe, ek); for (int i = 0; i < (privksP::targetP::k + 1) * privksP::targetP::l; i++) for (int j = 0; j < privksP::targetP::k + 1; j++) @@ -79,9 +79,9 @@ void CircuitBootstrappingSub(TRGSW &trgsw, const TLWE &tlwe, const EvalKey &ek) { - TLWE tlwelvl0; + alignas(64) TLWE tlwelvl0; IdentityKeySwitch(tlwelvl0, tlwe, ek.getiksk()); - std::array, privksP::targetP::l> temp; + alignas(64) std::array, privksP::targetP::l> temp; GateBootstrappingManyLUT( temp, tlwelvl0, ek.getbkfft(), CBtestvector()); for (int i = 0; i < privksP::targetP::l; i++) { @@ -89,7 +89,7 @@ void CircuitBootstrappingSub(TRGSW &trgsw, 1ULL << (numeric_limits::digits - (i + 1) * privksP::targetP::Bgbit - 1); for (int k = 0; k < privksP::targetP::k + 1; k++) { - TLWE subsettlwe; + alignas(64) TLWE subsettlwe; SubsetIdentityKeySwitch(subsettlwe, temp[i], ek.getsubiksk()); SubsetPrivKeySwitch( @@ -105,7 +105,7 @@ void CircuitBootstrappingSubFFT(TRGSWFFT &trgswfft, const TLWE &tlwe, const EvalKey &ek) { - TRGSW trgsw; + alignas(64) TRGSW trgsw; CircuitBootstrappingSub(trgsw, tlwe, ek); for (int i = 0; i < (privksP::targetP::k + 1) * privksP::targetP::l; i++) for (int j = 0; j < privksP::targetP::k + 1; j++) @@ -117,7 +117,7 @@ void CircuitBootstrappingFFTInv( TRGSWFFT &invtrgswfft, const TLWE &tlwe, const EvalKey &ek) { - TLWE invtlwe; + alignas(64) TLWE invtlwe; // HomNot for (int i = 0; i <= brP::domainP::k * brP::domainP::n; i++) invtlwe[i] = -tlwe[i]; @@ -129,7 +129,7 @@ void CircuitBootstrappingFFTInv( TRGSWFFT &invtrgswfft, const TLWE &tlwe, const EvalKey &ek) { - TLWE invtlwe; + alignas(64) TLWE invtlwe; // HomNot for (int i = 0; i <= iksP::domainP::k * iksP::domainP::n; i++) invtlwe[i] = -tlwe[i]; @@ -145,7 +145,7 @@ void CircuitBootstrappingFFTwithInv( constexpr array h = hgen(); - TRGSW trgsw; + alignas(64) TRGSW trgsw; CircuitBootstrapping(trgsw, tlwe, ek); for (int i = 0; i < (privksP::targetP::k + 1) * privksP::targetP::l; i++) for (int j = 0; j < privksP::targetP::k + 1; j++) { @@ -171,7 +171,7 @@ void CircuitBootstrappingFFTwithInv( constexpr array h = hgen(); - TRGSW trgsw; + alignas(64) TRGSW trgsw; CircuitBootstrapping(trgsw, tlwe, ek); for (int i = 0; i < (privksP::targetP::k + 1) * privksP::targetP::l; i++) for (int j = 0; j < privksP::targetP::k + 1; j++) { diff --git a/include/cloudkey.hpp b/include/cloudkey.hpp index a61465c..c0e613b 100644 --- a/include/cloudkey.hpp +++ b/include/cloudkey.hpp @@ -576,18 +576,15 @@ struct EvalKey { const SecretKey& sk) { if constexpr (std::is_same_v) { - privksklvl11[key] = std::make_unique_for_overwrite< - PrivateKeySwitchingKey>(); + privksklvl11[key] = std::unique_ptr>(new (std::align_val_t(64)) PrivateKeySwitchingKey()); privkskgen(*privksklvl11[key], func, sk); } else if constexpr (std::is_same_v) { - privksklvl21[key] = std::make_unique_for_overwrite< - PrivateKeySwitchingKey>(); + privksklvl21[key] = std::unique_ptr>(new (std::align_val_t(64)) PrivateKeySwitchingKey()); privkskgen(*privksklvl21[key], func, sk); } else if constexpr (std::is_same_v) { - privksklvl22[key] = std::make_unique_for_overwrite< - PrivateKeySwitchingKey>(); + privksklvl22[key] = std::unique_ptr>(new (std::align_val_t(64)) PrivateKeySwitchingKey()); privkskgen(*privksklvl22[key], func, sk); } else diff --git a/include/detwfa.hpp b/include/detwfa.hpp index d787ab6..d4f28bb 100644 --- a/include/detwfa.hpp +++ b/include/detwfa.hpp @@ -39,7 +39,7 @@ void CMUXFFTwithPolynomialMulByXaiMinusOne( const BootstrappingKeyElementFFT &cs, const int a) { if constexpr (bkP::domainP::key_value_diff == 1) { - TRLWE temp; + alignas(64) TRLWE temp; for (int k = 0; k < bkP::targetP::k + 1; k++) PolynomialMulByXaiMinusOne(temp[k], acc[k], a); diff --git a/include/keyswitch.hpp b/include/keyswitch.hpp index 8650c8a..383acd5 100644 --- a/include/keyswitch.hpp +++ b/include/keyswitch.hpp @@ -264,8 +264,8 @@ void PrivKeySwitch(TRLWE &res, mask; if (aij != 0) { - for (int p = 0; p < P::targetP::n; p++) - for (int k = 0; k < P::targetP::k + 1; k++) + for (int k = 0; k < P::targetP::k + 1; k++) + for (int p = 0; p < P::targetP::n; p++) res[k][p] -= privksk[i][j][aij - 1][k][p]; } } diff --git a/include/utils.hpp b/include/utils.hpp index c31ba0f..e0e7e48 100644 --- a/include/utils.hpp +++ b/include/utils.hpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace TFHEpp { #ifdef USE_RANDEN @@ -36,6 +37,60 @@ concept hasqbit = requires T::qbit; }; + +// https://github.com/zhourrr/aligned-memory-allocator/blob/main/aligned_allocator.h +// A minimal implementation of an allocator for C++ Standard Library, which +// allocates aligned memory (specified by the alignment argument). +// Note: +// A minimal custom allocator is preferred because C++ allocator_traits class +// provides default implementation for you. Take a look at Microsoft's +// documentation about Allocators and allocator class. +template class AlignedAllocator { + public: + using value_type = T; + + public: + // According to Microsoft's documentation, default constructor is not required + // by C++ Standard Library. + AlignedAllocator() noexcept {}; + + template AlignedAllocator(const AlignedAllocator& other) noexcept {}; + + template + inline bool operator==(const AlignedAllocator& other) const noexcept { + return true; + } + + template + inline bool operator!=(const AlignedAllocator& other) const noexcept { + return false; + } + + template struct rebind { + using other = AlignedAllocator; + }; + + // STL containers call this function to allocate uninitialized memory block to + // store (no more than n) elements of type T (value_type). + inline value_type* allocate(const std::size_t n) const { + auto size = n; + /* + If you wish, for some strange reason, that the size of allocated buffer is + also aligned to alignment, uncomment the following statement. + + Note: this increases the size of underlying memory, but STL containers + still treat it as a memory block of size n, i.e., STL containers will not + put more than n elements into the returned memory. + */ + // size = (n + alignment - 1) / alignment * alignment; + return static_cast(std::aligned_alloc(alignment, sizeof(T) * size)); + }; + + // STL containers call this function to free a memory block beginning at a + // specified position. + inline void deallocate(value_type* const ptr, std::size_t n) const noexcept { std::free(ptr); } +}; + // Double to Torus(32bit fixed-point number) inline uint16_t dtot16(double d) { diff --git a/test/circuitbootstrapping.cpp b/test/circuitbootstrapping.cpp index 4e53e8b..44209f4 100644 --- a/test/circuitbootstrapping.cpp +++ b/test/circuitbootstrapping.cpp @@ -36,9 +36,9 @@ int main() for (int j = 0; j < privksP::targetP::n; j++) pmu[i][j] = pa[i][j] ? privksP::targetP::μ : -privksP::targetP::μ; for (int i = 0; i < num_test; i++) pones[i] = true; - std::vector> ca(num_test); - std::vector> cones(num_test); - std::vector> bootedTGSW( + alignas(64) std::vector> ca(num_test); + alignas(64) std::vector> cones(num_test); + std::vector,TFHEpp::AlignedAllocator,64>> bootedTGSW( num_test); for (int i = 0; i < num_test; i++) diff --git a/test/nested_cmux.cpp b/test/nested_cmux.cpp index 97ecaad..1d378aa 100644 --- a/test/nested_cmux.cpp +++ b/test/nested_cmux.cpp @@ -49,16 +49,16 @@ int main() SecretKey skey; - std::vector guard; + alignas(64) std::vector guard; TFHEpp::Polynomial plainpoly = {}; plainpoly[0] = 1; for (size_t i = 0; i < N; i++) guard.push_back( TFHEpp::trgswfftSymEncrypt(plainpoly, skey.key.lvl1)); - TRLWELvl1 c1 = trivial_TRLWELvl1(uint2weight(1)), + alignas(64) TRLWELvl1 c1 = trivial_TRLWELvl1(uint2weight(1)), c0 = trivial_TRLWELvl1(uint2weight(0)); - TRLWELvl1 res = c1; + alignas(64) TRLWELvl1 res = c1; dump_histgram_of_phase_of_TRLWELvl1( std::cout, TFHEpp::trlwePhase(res, skey.key.lvl1)); for (size_t i = 0; i < N; i++) {