From d8252214ee2bfa727c530d135cb8e25e64972fec Mon Sep 17 00:00:00 2001 From: nindanaoto Date: Fri, 23 Aug 2024 13:49:57 +0000 Subject: [PATCH 1/2] Fix Xbyak and Randen --- thirdparties/randen/vector128.h | 2 +- .../fft_processor_spqliox_aarch64.cpp | 30 +++++++++++++++++-- .../fft_processor_spqliox_aarch64.h | 7 ++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/thirdparties/randen/vector128.h b/thirdparties/randen/vector128.h index 9a49f6d..b15dd97 100644 --- a/thirdparties/randen/vector128.h +++ b/thirdparties/randen/vector128.h @@ -29,7 +29,7 @@ #define RANDEN_BIG_ENDIAN 1 #include -#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_CRYPTO) +#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_AES) #define RANDEN_ARM 1 #include diff --git a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp index 6b4212e..d897db3 100644 --- a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp +++ b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp @@ -193,6 +193,17 @@ void FFT_Processor_Spqliox_AArch64::execute_reverse_int(double *res, for (size_t i = 0; i < N; i++) res[i] = real_inout[i]; } +void FFT_Processor_Spqliox_AArch64::execute_reverse_uint(double *res, + const uint32_t *a) +{ + for (size_t i = 0; i < N; i++) real_inout[i] = (double)a[i]; + + ifft_(real_inout, NULL, NULL, &tables_reverse_, real_inout, + table_negation_reverse_.trig_tables); + + for (size_t i = 0; i < N; i++) res[i] = real_inout[i]; +} + void FFT_Processor_Spqliox_AArch64::execute_reverse_torus32(double *res, const uint32_t *a) { @@ -236,9 +247,24 @@ void FFT_Processor_Spqliox_AArch64::execute_direct_torus32_rescale( fft_(dst, sit, send, bla, &tables_direct_, real_inout, table_negation_forward_.trig_tables); for (int32_t i = 0; i < N; i++) - res[i] = uint32_t(int64_t(real_inout[i] / (Δ / 4))); + res[i] = uint32_t(int64_t(real_inout[i] / Δ)); } +void FFT_Processor_Spqliox_AArch64::execute_direct_torus64_rescale( + uint64_t *res, const double *a, const double Δ) +{ + const double *sit = a; + const double *send = a + N; + static const double _2sN = double(2) / double(N); + const double *bla = &_2sN; + double *dst = real_inout; + fft_(dst, sit, send, bla, &tables_direct_, real_inout, + table_negation_forward_.trig_tables); + for (int32_t i = 0; i < N; i++) + res[i] = uint64_t(int64_t(real_inout[i] / Δ)); +} + + void FFT_Processor_Spqliox_AArch64::execute_direct_torus64(uint64_t *res, const double *a) { @@ -267,4 +293,4 @@ void FFT_Processor_Spqliox_AArch64::execute_direct_torus64(uint64_t *res, FFT_Processor_Spqliox_AArch64::~FFT_Processor_Spqliox_AArch64() {} thread_local FFT_Processor_Spqliox_AArch64 fftplvl1(TFHEpp::lvl1param::n); -thread_local FFT_Processor_Spqliox_AArch64 fftplvl2(TFHEpp::lvl2param::n); \ No newline at end of file +thread_local FFT_Processor_Spqliox_AArch64 fftplvl2(TFHEpp::lvl2param::n); diff --git a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.h b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.h index 73a53e8..3041af5 100644 --- a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.h +++ b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.h @@ -34,6 +34,8 @@ class FFT_Processor_Spqliox_AArch64 { void execute_reverse_int(double *res, const int32_t *a); + void execute_reverse_uint(double *res, const uint32_t *a); + void execute_reverse_torus32(double *res, const uint32_t *a); void execute_direct_torus32(uint32_t *res, const double *a); @@ -41,6 +43,9 @@ class FFT_Processor_Spqliox_AArch64 { void execute_direct_torus32_rescale(uint32_t *res, const double *a, const double Δ); + void execute_direct_torus64_rescale(uint64_t *res, const double *a, + const double Δ); + void execute_reverse_torus64(double *res, const uint64_t *a); void execute_direct_torus64(uint64_t *res, const double *a); @@ -49,4 +54,4 @@ class FFT_Processor_Spqliox_AArch64 { }; extern thread_local FFT_Processor_Spqliox_AArch64 fftplvl1; -extern thread_local FFT_Processor_Spqliox_AArch64 fftplvl2; \ No newline at end of file +extern thread_local FFT_Processor_Spqliox_AArch64 fftplvl2; From 5207698deab2b53d9a0e59331526716e679eef86 Mon Sep 17 00:00:00 2001 From: nindanaoto Date: Fri, 23 Aug 2024 13:50:45 +0000 Subject: [PATCH 2/2] clang formatted --- include/mulfft.hpp | 21 +++++++++++-------- .../fft_processor_spqliox_aarch64.cpp | 3 +-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/mulfft.hpp b/include/mulfft.hpp index 6b8e43a..50c48ac 100644 --- a/include/mulfft.hpp +++ b/include/mulfft.hpp @@ -152,10 +152,10 @@ inline void TwistIFFTUInt(PolynomialInFD

&res, const Polynomial

&a) if constexpr (std::is_same_v) fftplvl1.execute_reverse_uint(res.data(), a.data()); // if constexpr (std::is_same_v) - // fftplvl1.execute_reverse_torus64(res.data(), a.data()); + // fftplvl1.execute_reverse_torus64(res.data(), a.data()); } // else if constexpr (std::is_same_v) - // fftplvl2.execute_reverse_torus64(res.data(), a.data()); + // fftplvl2.execute_reverse_torus64(res.data(), a.data()); else static_assert(false_v, "Undefined TwistIFFT!"); } @@ -316,8 +316,7 @@ inline void PolyMul(Polynomial

&res, const Polynomial

&a, } template -inline void PolyMulRescaleUnsigned(Polynomial

&res, - const Polynomial

&a, +inline void PolyMulRescaleUnsigned(Polynomial

&res, const Polynomial

&a, const Polynomial

&b) { // if constexpr (std::is_same_v) { @@ -350,16 +349,20 @@ inline void PolyMulNaive(Polynomial

&res, const Polynomial

&a, } template -inline void PolyMulNaieveRescaleUnsigned(Polynomial

&res, const Polynomial

&a, - const Polynomial

&b) +inline void PolyMulNaieveRescaleUnsigned(Polynomial

&res, + const Polynomial

&a, + const Polynomial

&b) { for (int i = 0; i < P::n; i++) { __int128_t ri = 0; for (int j = 0; j <= i; j++) - ri += static_cast<__int128_t>(a[j]) * static_cast<__int128_t>(b[i - j]); + ri += static_cast<__int128_t>(a[j]) * + static_cast<__int128_t>(b[i - j]); for (int j = i + 1; j < P::n; j++) - ri -= static_cast<__int128_t>(a[j]) * static_cast<__int128_t>(b[P::n + i - j]); - // res[i] = static_cast((ri) >> (std::numeric_limits::digits - 3)); + ri -= static_cast<__int128_t>(a[j]) * + static_cast<__int128_t>(b[P::n + i - j]); + // res[i] = static_cast((ri) >> + // (std::numeric_limits::digits - 3)); res[i] = static_cast((ri) >> 29); } } diff --git a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp index d897db3..a9466e0 100644 --- a/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp +++ b/thirdparties/spqliox_aarch64/fft_processor_spqliox_aarch64.cpp @@ -194,7 +194,7 @@ void FFT_Processor_Spqliox_AArch64::execute_reverse_int(double *res, } void FFT_Processor_Spqliox_AArch64::execute_reverse_uint(double *res, - const uint32_t *a) + const uint32_t *a) { for (size_t i = 0; i < N; i++) real_inout[i] = (double)a[i]; @@ -264,7 +264,6 @@ void FFT_Processor_Spqliox_AArch64::execute_direct_torus64_rescale( res[i] = uint64_t(int64_t(real_inout[i] / Δ)); } - void FFT_Processor_Spqliox_AArch64::execute_direct_torus64(uint64_t *res, const double *a) {