From 1d3aab1258d1303d2c0960ec161421109d5b5773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Juventin?= Date: Mon, 16 Sep 2024 19:19:09 +0200 Subject: [PATCH 1/2] improve CIOS implementation --- include/evmmax/evmmax.hpp | 52 ++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/include/evmmax/evmmax.hpp b/include/evmmax/evmmax.hpp index 70feec2bc3..e766f8279a 100644 --- a/include/evmmax/evmmax.hpp +++ b/include/evmmax/evmmax.hpp @@ -83,26 +83,48 @@ class ModArith // Based on 2.3.2 from // High-Speed Algorithms & Architectures For Number-Theoretic Cryptosystems // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf + // and on 2.2 from + // EdMSM: Multi-Scalar-Multiplication for SNARKs and Faster Montgomery multiplication + // https://eprint.iacr.org/2022/1400.pdf + constexpr uint64_t most_significant_mod_word_limit {std::numeric_limits::max() >> 1}; constexpr auto S = UintT::num_words; // TODO(C++23): Make it static intx::uint t; - for (size_t i = 0; i != S; ++i) + if (mod[S - 1] < most_significant_mod_word_limit) { - uint64_t c = 0; - for (size_t j = 0; j != S; ++j) - std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); - auto tmp = intx::addc(t[S], c); - t[S] = tmp.value; - const auto d = tmp.carry; // TODO: Carry is 0 for sparse modulus. - - const auto m = t[0] * m_mod_inv; - std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); - for (size_t j = 1; j != S; ++j) - std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); - tmp = intx::addc(t[S], c); - t[S - 1] = tmp.value; - t[S] = d + tmp.carry; // TODO: Carry is 0 for sparse modulus. + for (size_t i = 0; i != S; ++i) + { + uint64_t c = 0; + for (size_t j = 0; j != S; ++j) + std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); + auto const c_2 = c; + const auto m = t[0] * m_mod_inv; + std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); + for (size_t j = 1; j != S; ++j) + std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); + t[S - 1] = c_2 + c; + } + } + else + { + for (size_t i = 0; i != S; ++i) + { + uint64_t c = 0; + for (size_t j = 0; j != S; ++j) + std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); + auto tmp = intx::addc(t[S], c); + t[S] = tmp.value; + const auto d = tmp.carry; // TODO: Carry is 0 for sparse modulus. + + const auto m = t[0] * m_mod_inv; + std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); + for (size_t j = 1; j != S; ++j) + std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); + tmp = intx::addc(t[S], c); + t[S - 1] = tmp.value; + t[S] = d + tmp.carry; // TODO: Carry is 0 for sparse modulus. + } } if (t >= mod) From 2c9d6f6e0b0ee9308057afb4c13a70c3a3185b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Juventin?= Date: Thu, 19 Sep 2024 18:00:34 +0200 Subject: [PATCH 2/2] improve CIOS implementation - second approach --- include/evmmax/evmmax.hpp | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/include/evmmax/evmmax.hpp b/include/evmmax/evmmax.hpp index e766f8279a..0d7b106783 100644 --- a/include/evmmax/evmmax.hpp +++ b/include/evmmax/evmmax.hpp @@ -87,43 +87,43 @@ class ModArith // EdMSM: Multi-Scalar-Multiplication for SNARKs and Faster Montgomery multiplication // https://eprint.iacr.org/2022/1400.pdf - constexpr uint64_t most_significant_mod_word_limit {std::numeric_limits::max() >> 1}; + constexpr uint64_t most_significant_mod_word_limit{ + std::numeric_limits::max() >> 1}; constexpr auto S = UintT::num_words; // TODO(C++23): Make it static intx::uint t; - if (mod[S - 1] < most_significant_mod_word_limit) + for (size_t i = 0; i != S; ++i) { - for (size_t i = 0; i != S; ++i) + uint64_t c = 0; + for (size_t j = 0; j != S; ++j) + std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); + + uint64_t carry = 0; + if (mod[S - 1] < most_significant_mod_word_limit) { - uint64_t c = 0; - for (size_t j = 0; j != S; ++j) - std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); - auto const c_2 = c; - const auto m = t[0] * m_mod_inv; - std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); - for (size_t j = 1; j != S; ++j) - std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); - t[S - 1] = c_2 + c; + carry = c; } - } - else - { - for (size_t i = 0; i != S; ++i) + else { - uint64_t c = 0; - for (size_t j = 0; j != S; ++j) - std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c); auto tmp = intx::addc(t[S], c); t[S] = tmp.value; - const auto d = tmp.carry; // TODO: Carry is 0 for sparse modulus. + carry = tmp.carry; + } + + const auto m = t[0] * m_mod_inv; + std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); + for (size_t j = 1; j != S; ++j) + std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); - const auto m = t[0] * m_mod_inv; - std::tie(c, std::ignore) = addmul(t[0], m, mod[0], 0); - for (size_t j = 1; j != S; ++j) - std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c); - tmp = intx::addc(t[S], c); + if (mod[S - 1] < most_significant_mod_word_limit) + { + t[S - 1] = carry + c; + } + else + { + auto tmp = intx::addc(t[S], c); t[S - 1] = tmp.value; - t[S] = d + tmp.carry; // TODO: Carry is 0 for sparse modulus. + t[S] = carry + tmp.carry; } }