diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ab14e19e3..e31d6f4f41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,11 @@ cmake_minimum_required(VERSION 3.27) project(placeholder) +option(GPU_PROVER "Enable GPU support" OFF) +if (GPU_PROVER) + add_compile_options(-DGPU_PROVER) +endif() + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") add_compile_options (-fdiagnostics-color=always) elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") @@ -26,7 +31,7 @@ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/generated-dummy.cpp) add_library(crypto3_precompiled_headers STATIC ${CMAKE_CURRENT_BINARY_DIR}/generated-dummy.cpp) set_target_properties(crypto3_precompiled_headers PROPERTIES LINKER_LANGUAGE CXX - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE ) diff --git a/crypto3/benchmarks/CMakeLists.txt b/crypto3/benchmarks/CMakeLists.txt index b3dea658f1..772298f8c8 100644 --- a/crypto3/benchmarks/CMakeLists.txt +++ b/crypto3/benchmarks/CMakeLists.txt @@ -40,7 +40,7 @@ macro(define_benchmark benchmark) set_target_properties(${full_name} PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/crypto3/libs/algebra/example/CMakeLists.txt b/crypto3/libs/algebra/example/CMakeLists.txt index c9c0d345c2..5691506a83 100644 --- a/crypto3/libs/algebra/example/CMakeLists.txt +++ b/crypto3/libs/algebra/example/CMakeLists.txt @@ -15,7 +15,7 @@ macro(define_algebra_example name) Boost::random ) - set_target_properties(algebra_${name}_example PROPERTIES CXX_STANDARD 20) + set_target_properties(algebra_${name}_example PROPERTIES CXX_STANDARD 23) endmacro() set(EXAMPLES_NAMES diff --git a/crypto3/libs/algebra/test/CMakeLists.txt b/crypto3/libs/algebra/test/CMakeLists.txt index 2140872daa..1fceeefd07 100644 --- a/crypto3/libs/algebra/test/CMakeLists.txt +++ b/crypto3/libs/algebra/test/CMakeLists.txt @@ -23,7 +23,7 @@ macro(define_runtime_algebra_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20 + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") @@ -44,7 +44,7 @@ macro(define_compile_time_algebra_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(algebra_${name}_compile_test PROPERTIES CXX_STANDARD 20) + set_target_properties(algebra_${name}_compile_test PROPERTIES CXX_STANDARD 23) endmacro() set(RUNTIME_TESTS_NAMES diff --git a/crypto3/libs/blueprint/example/CMakeLists.txt b/crypto3/libs/blueprint/example/CMakeLists.txt index 0043c270d2..b4f6968eae 100644 --- a/crypto3/libs/blueprint/example/CMakeLists.txt +++ b/crypto3/libs/blueprint/example/CMakeLists.txt @@ -21,7 +21,7 @@ macro(define_blueprint_example name) ${CMAKE_WORKSPACE_NAME}::multiprecision ${CMAKE_WORKSPACE_NAME}::zk ) - set_target_properties(${full_example_name} PROPERTIES CXX_STANDARD 20 CXX_STANDARD_REQUIRED TRUE) + set_target_properties(${full_example_name} PROPERTIES CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) endmacro() diff --git a/crypto3/libs/blueprint/test/CMakeLists.txt b/crypto3/libs/blueprint/test/CMakeLists.txt index 38db7a296a..a7c1aa85e2 100644 --- a/crypto3/libs/blueprint/test/CMakeLists.txt +++ b/crypto3/libs/blueprint/test/CMakeLists.txt @@ -44,7 +44,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} crypto3::hash Boost::unit_test_framework ) -set_target_properties(_cm_internal_tests-crypto3-blueprint-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-blueprint-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-blueprint-test REUSE_FROM crypto3_precompiled_headers) macro(define_blueprint_test test) @@ -60,7 +60,7 @@ macro(define_blueprint_test test) target_include_directories(${full_test_name} PRIVATE ${Boost_INCLUDE_DIRS}) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) target_compile_definitions(${full_test_name} PRIVATE TEST_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/zkevm/data/") diff --git a/crypto3/libs/blueprint/test/zkevm_bbf/hardhat.cpp b/crypto3/libs/blueprint/test/zkevm_bbf/hardhat.cpp index 175409e57d..ddb796e2c3 100644 --- a/crypto3/libs/blueprint/test/zkevm_bbf/hardhat.cpp +++ b/crypto3/libs/blueprint/test/zkevm_bbf/hardhat.cpp @@ -139,7 +139,7 @@ class zkEVMHardhatTestFixture: public BBFTestFixture { // std::cout << std::endl; // Max_copy, Max_rw, Max_keccak, Max_bytecode - result =test_bbf_component( + result = test_bbf_component( "exp", {}, exp_assignment_input, exp_constraint_input, max_exp_rows, diff --git a/crypto3/libs/containers/example/CMakeLists.txt b/crypto3/libs/containers/example/CMakeLists.txt index 3c9a21834e..0f329aaccc 100644 --- a/crypto3/libs/containers/example/CMakeLists.txt +++ b/crypto3/libs/containers/example/CMakeLists.txt @@ -37,7 +37,7 @@ macro(define_containers_example example) ${CMAKE_WORKSPACE_NAME}::algebra ${CMAKE_WORKSPACE_NAME}::hash Boost::container) - set_target_properties(${target_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${target_name} PROPERTIES CXX_STANDARD 23) endmacro() set(EXAMPLES_NAMES diff --git a/crypto3/libs/containers/test/CMakeLists.txt b/crypto3/libs/containers/test/CMakeLists.txt index ec5296ad0f..f1db3ba537 100644 --- a/crypto3/libs/containers/test/CMakeLists.txt +++ b/crypto3/libs/containers/test/CMakeLists.txt @@ -33,7 +33,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} Boost::unit_test_framework Boost::random ) -set_target_properties(_cm_internal_tests-crypto3-containers-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-containers-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-containers-test REUSE_FROM crypto3_precompiled_headers) macro(define_storage_test test) @@ -57,7 +57,7 @@ macro(define_storage_test test) Boost::random ) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") target_compile_options(${full_test_name} PRIVATE "-fconstexpr-steps=2147483647" "-ftemplate-backtrace-limit=0") diff --git a/crypto3/libs/hash/test/CMakeLists.txt b/crypto3/libs/hash/test/CMakeLists.txt index e67a041c07..5d72a47f86 100644 --- a/crypto3/libs/hash/test/CMakeLists.txt +++ b/crypto3/libs/hash/test/CMakeLists.txt @@ -12,7 +12,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} ${${CURRENT_PROJECT_NAME}_INTERFACE_LIBRARIES} Boost::unit_test_framework) -set_target_properties(_cm_internal_tests-crypto3-hash-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-hash-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-hash-test REUSE_FROM crypto3_precompiled_headers) macro(define_hash_test name) @@ -27,7 +27,7 @@ macro(define_hash_test name) ${Boost_INCLUDE_DIRS}) set_target_properties(${test_name} PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/crypto3/libs/marshalling/algebra/test/CMakeLists.txt b/crypto3/libs/marshalling/algebra/test/CMakeLists.txt index a23fdbc245..1e521ef07b 100644 --- a/crypto3/libs/marshalling/algebra/test/CMakeLists.txt +++ b/crypto3/libs/marshalling/algebra/test/CMakeLists.txt @@ -32,7 +32,7 @@ macro(define_marshalling_test name) ${Boost_INCLUDE_DIRS}) set_target_properties(${test_name} PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/crypto3/libs/marshalling/core/CMakeLists.txt b/crypto3/libs/marshalling/core/CMakeLists.txt index bff49113d0..2b80e7fd18 100644 --- a/crypto3/libs/marshalling/core/CMakeLists.txt +++ b/crypto3/libs/marshalling/core/CMakeLists.txt @@ -20,7 +20,7 @@ if(CRYPTO3_MARSHALLING_THROWS) endif() if(NOT CMAKE_CXX_STANDARD) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23) endif() cm_setup_version(VERSION 0.1.0 PREFIX ${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME}) diff --git a/crypto3/libs/marshalling/core/test/CMakeLists.txt b/crypto3/libs/marshalling/core/test/CMakeLists.txt index f9ebe1188f..1b182d2ae2 100644 --- a/crypto3/libs/marshalling/core/test/CMakeLists.txt +++ b/crypto3/libs/marshalling/core/test/CMakeLists.txt @@ -28,7 +28,7 @@ macro(define_marshalling_test name) ${Boost_INCLUDE_DIRS}) set_target_properties(marshalling_core_${name}_test PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) endmacro() diff --git a/crypto3/libs/marshalling/multiprecision/CMakeLists.txt b/crypto3/libs/marshalling/multiprecision/CMakeLists.txt index d5cd627942..22a7edfb98 100644 --- a/crypto3/libs/marshalling/multiprecision/CMakeLists.txt +++ b/crypto3/libs/marshalling/multiprecision/CMakeLists.txt @@ -18,7 +18,7 @@ cm_setup_version(VERSION 0.1.0 PREFIX ${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_ add_library(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} INTERFACE) add_library(${CMAKE_WORKSPACE_NAME}::${CURRENT_PROJECT_NAME} ALIAS ${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME}) -set_target_properties(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} PROPERTIES CXX_STANDARD 20) +set_target_properties(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} PROPERTIES CXX_STANDARD 23) set_target_properties(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} PROPERTIES EXPORT_NAME ${CURRENT_PROJECT_NAME}) diff --git a/crypto3/libs/marshalling/multiprecision/test/CMakeLists.txt b/crypto3/libs/marshalling/multiprecision/test/CMakeLists.txt index 07dffa77af..6227d45df9 100644 --- a/crypto3/libs/marshalling/multiprecision/test/CMakeLists.txt +++ b/crypto3/libs/marshalling/multiprecision/test/CMakeLists.txt @@ -39,7 +39,7 @@ macro(define_marshalling_test name) ${Boost_INCLUDE_DIRS}) set_target_properties(marshalling_${name}_test PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/crypto3/libs/marshalling/zk/test/CMakeLists.txt b/crypto3/libs/marshalling/zk/test/CMakeLists.txt index f54dca72bc..199249e1fa 100644 --- a/crypto3/libs/marshalling/zk/test/CMakeLists.txt +++ b/crypto3/libs/marshalling/zk/test/CMakeLists.txt @@ -22,7 +22,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} ${CMAKE_WORKSPACE_NAME}::marshalling-algebra ${CMAKE_WORKSPACE_NAME}::marshalling-core) -set_target_properties(_cm_internal_tests-crypto3-marshalling-zk-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-marshalling-zk-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-marshalling-zk-test REUSE_FROM crypto3_precompiled_headers) macro(define_marshalling_test test) @@ -40,7 +40,7 @@ macro(define_marshalling_test test) ${Boost_INCLUDE_DIRS}) set_target_properties(marshalling_zk_${name}_test PROPERTIES - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/crypto3/libs/math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp b/crypto3/libs/math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp index 147ea6cbc4..b56c99b5a1 100644 --- a/crypto3/libs/math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp +++ b/crypto3/libs/math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp @@ -883,4 +883,4 @@ struct std::hash> } }; -#endif // CRYPTO3_MATH_POLYNOMIAL_POLYNOM_DFT_HPP +#endif // CRYPTO3_MATH_POLYNOMIAL_POLYNOM_DFT_HPP \ No newline at end of file diff --git a/crypto3/libs/math/test/CMakeLists.txt b/crypto3/libs/math/test/CMakeLists.txt index b7e57806d2..825422c637 100644 --- a/crypto3/libs/math/test/CMakeLists.txt +++ b/crypto3/libs/math/test/CMakeLists.txt @@ -24,7 +24,7 @@ macro(define_math_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23) endmacro() diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/big_uint.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/big_uint.hpp index 0207c4a7c4..9e86bad38d 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/big_uint.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/big_uint.hpp @@ -42,6 +42,7 @@ #include "nil/crypto3/multiprecision/detail/endian.hpp" #include "nil/crypto3/multiprecision/detail/force_inline.hpp" #include "nil/crypto3/multiprecision/unsigned_utils.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { /** @@ -155,7 +156,7 @@ namespace nil::crypto3::multiprecision { do_assign_integral_unchecked(unsigned_or_throw(a)); if constexpr (sizeof(T) * CHAR_BIT > Bits) { if (compare(a) != 0) { - throw std::range_error("big_uint: overflow"); + NIL_THROW(std::range_error("big_uint: overflow")); } } } @@ -177,7 +178,7 @@ namespace nil::crypto3::multiprecision { do_assign_unchecked(other); if constexpr (Bits2 > Bits) { if (other.compare(*this) != 0) { - throw std::range_error("big_uint: overflow"); + NIL_THROW(std::range_error("big_uint: overflow")); } } } @@ -253,7 +254,7 @@ namespace nil::crypto3::multiprecision { } } if (bits > Bits) { - throw std::range_error("big_uint: not enough bits to store bytes"); + NIL_THROW(std::range_error("big_uint: not enough bits to store bytes")); } return *this; } @@ -315,7 +316,7 @@ namespace nil::crypto3::multiprecision { return decimal_str(); } if (!(flags & std::ios_base::hex)) { - throw std::invalid_argument("big_uint: unsupported format flags"); + NIL_THROW(std::invalid_argument("big_uint: unsupported format flags")); } auto result = hex_str(); if (flags & std::ios_base::uppercase) { @@ -370,7 +371,7 @@ namespace nil::crypto3::multiprecision { auto result = to_unsigned_unchecked(); if constexpr (sizeof(T) * CHAR_BIT < Bits) { if (compare(result) != 0) { - throw std::overflow_error("big_uint: overflow"); + NIL_THROW(std::overflow_error("big_uint: overflow")); } } return result; @@ -382,7 +383,7 @@ namespace nil::crypto3::multiprecision { T result = static_cast(to_unsigned_unchecked>()); if constexpr (sizeof(T) * CHAR_BIT <= Bits) { if (compare(result) != 0) { - throw std::overflow_error("big_uint: overflow"); + NIL_THROW(std::overflow_error("big_uint: overflow")); } } return result; @@ -749,7 +750,7 @@ namespace nil::crypto3::multiprecision { try { return static_cast>(result); } catch (const std::range_error&) { - throw std::overflow_error("big_uint: division overflow"); + NIL_THROW(std::overflow_error("big_uint: division overflow")); } } @@ -762,7 +763,7 @@ namespace nil::crypto3::multiprecision { try { a = result; } catch (const std::range_error&) { - throw std::overflow_error("big_uint: division overflow"); + NIL_THROW(std::overflow_error("big_uint: division overflow")); } return a; } @@ -848,11 +849,11 @@ namespace nil::crypto3::multiprecision { if constexpr (Bits2 > Bits && !std::is_same_v>) { for (; i < os; ++i) { if (po[i] != 0) { - throw std::overflow_error("big_uint: bitwise_op overflow"); + NIL_THROW(std::overflow_error("big_uint: bitwise_op overflow")); } } if (normalize()) { - throw std::overflow_error("big_uint: bitwise_op overflow"); + NIL_THROW(std::overflow_error("big_uint: bitwise_op overflow")); } } } @@ -886,7 +887,7 @@ namespace nil::crypto3::multiprecision { limbs()[0] |= l; if constexpr (static_limb_count == 1) { if (normalize()) { - throw std::overflow_error("big_uint: or overflow"); + NIL_THROW(std::overflow_error("big_uint: or overflow")); } } } @@ -895,7 +896,7 @@ namespace nil::crypto3::multiprecision { limbs()[0] ^= l; if constexpr (static_limb_count == 1) { if (normalize()) { - throw std::overflow_error("big_uint: xor overflow"); + NIL_THROW(std::overflow_error("big_uint: xor overflow")); } } } @@ -1168,7 +1169,7 @@ namespace nil::crypto3::multiprecision { } if (index == limb_count()) { - throw std::invalid_argument("zero has no lsb"); + NIL_THROW(std::invalid_argument("zero has no lsb")); } // @@ -1189,7 +1190,7 @@ namespace nil::crypto3::multiprecision { } } if (limbs()[0] == 0) { - throw std::invalid_argument("zero has no msb"); + NIL_THROW(std::invalid_argument("zero has no msb")); } return std::bit_width(limbs()[0]) - 1; } @@ -1207,7 +1208,7 @@ namespace nil::crypto3::multiprecision { constexpr big_uint& bit_set(std::size_t index) { if (index >= Bits) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } std::size_t offset = index / limb_bits; std::size_t shift = index % limb_bits; @@ -1218,7 +1219,7 @@ namespace nil::crypto3::multiprecision { constexpr big_uint& bit_unset(std::size_t index) { if (index >= Bits) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } std::size_t offset = index / limb_bits; std::size_t shift = index % limb_bits; @@ -1229,7 +1230,7 @@ namespace nil::crypto3::multiprecision { constexpr big_uint& bit_flip(std::size_t index) { if (index >= Bits) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } std::size_t offset = index / limb_bits; std::size_t shift = index % limb_bits; @@ -1254,7 +1255,7 @@ namespace nil::crypto3::multiprecision { limb_type value = static_cast(bits & mask) << shift; if (value) { if (limb >= limb_count()) { - throw std::overflow_error("import_bits: overflow"); + NIL_THROW(std::overflow_error("import_bits: overflow")); } limbs()[limb] |= value; } @@ -1317,7 +1318,7 @@ namespace nil::crypto3::multiprecision { } if (normalize()) { - throw std::overflow_error("import_bits: overflow"); + NIL_THROW(std::overflow_error("import_bits: overflow")); } } @@ -1329,7 +1330,7 @@ namespace nil::crypto3::multiprecision { if (std::any_of(reinterpret_cast(i) + copy_len, reinterpret_cast(j), [](char c) { return c != 0; })) { - throw std::overflow_error("import_bits: overflow"); + NIL_THROW(std::overflow_error("import_bits: overflow")); } std::memcpy(reinterpret_cast(limbs()), i, copy_len); @@ -1337,7 +1338,7 @@ namespace nil::crypto3::multiprecision { limb_count() * sizeof(limb_type) - copy_len); if (normalize()) { - throw std::overflow_error("import_bits: overflow"); + NIL_THROW(std::overflow_error("import_bits: overflow")); } } diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/goldilocks.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/goldilocks.hpp index 01cdbc825f..0b74d6daed 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/goldilocks.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/goldilocks.hpp @@ -46,7 +46,7 @@ namespace nil::crypto3::multiprecision { if (sum >= goldilocks_modulus) { sum -= goldilocks_modulus; } - result = sum; + result = static_cast(sum); BOOST_ASSERT(result < goldilocks_modulus); } @@ -69,8 +69,8 @@ Goldilocks::new(t2) */ - std::uint64_t x_lo = input; - std::uint64_t x_hi = input >> 64; + std::uint64_t x_lo = static_cast(input); + std::uint64_t x_hi = static_cast(input >> 64); std::uint64_t x_hi_hi = x_hi >> 32; std::uint64_t x_hi_lo = x_hi & NEG_ORDER; diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/montgomery.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/montgomery.hpp index 5df98fc1db..6d26fea8cc 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/montgomery.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_mod/modular_ops/montgomery.hpp @@ -23,6 +23,7 @@ #include "nil/crypto3/multiprecision/detail/big_mod/modular_ops/barrett.hpp" #include "nil/crypto3/multiprecision/detail/big_uint/storage.hpp" #include "nil/crypto3/multiprecision/detail/integer_ops_base.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision::detail { template @@ -46,7 +47,7 @@ namespace nil::crypto3::multiprecision::detail { constexpr montgomery_modular_ops(const big_uint_t &m) : barrett_modular_ops(m) { if (!modulus_supports_montgomery(m)) { - throw std::invalid_argument("module not usable with montgomery"); + NIL_THROW(std::invalid_argument("module not usable with montgomery")); } m_montgomery_p_dash = monty_inverse(this->mod().limbs()[0]); @@ -73,7 +74,7 @@ namespace nil::crypto3::multiprecision::detail { */ static constexpr limb_type monty_inverse(const limb_type &a) { if (a % 2 == 0) { - throw std::invalid_argument("inverse does not exist"); + NIL_THROW(std::invalid_argument("inverse does not exist")); } limb_type b = 1; limb_type r = 0; diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/arithmetic.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/arithmetic.hpp index c055349e49..2244917ecb 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/arithmetic.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/arithmetic.hpp @@ -26,6 +26,7 @@ #include "nil/crypto3/multiprecision/detail/big_uint/storage.hpp" #include "nil/crypto3/multiprecision/type_traits.hpp" #include "nil/crypto3/multiprecision/unsigned_utils.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { template @@ -121,7 +122,7 @@ namespace nil::crypto3::multiprecision { OverflowPolicy != overflow_policy::throw_exception) { if constexpr (OverflowPolicy == overflow_policy::throw_exception) { if (overflow) { - throw std::overflow_error("big_uint: addition overflow"); + NIL_THROW(std::overflow_error("big_uint: addition overflow")); } } else if constexpr (OverflowPolicy == overflow_policy::debug_assert) { BOOST_ASSERT_MSG(!overflow, "big_uint: addition overflow"); @@ -303,7 +304,7 @@ namespace nil::crypto3::multiprecision { constexpr void subtract_overflow() noexcept(OverflowPolicy != overflow_policy::throw_exception) { if constexpr (OverflowPolicy == overflow_policy::throw_exception) { - throw std::overflow_error("big_uint: subtraction overflow"); + NIL_THROW(std::overflow_error("big_uint: subtraction overflow")); } else if constexpr (OverflowPolicy == overflow_policy::debug_assert) { BOOST_ASSERT_MSG(false, "big_uint: subtraction overflow"); } @@ -454,7 +455,7 @@ namespace nil::crypto3::multiprecision { OverflowPolicy != overflow_policy::throw_exception) { if constexpr (OverflowPolicy == overflow_policy::throw_exception) { if (carry) { - throw std::overflow_error("big_uint: addition overflow"); + NIL_THROW(std::overflow_error("big_uint: addition overflow")); } } else if constexpr (OverflowPolicy == overflow_policy::debug_assert) { BOOST_ASSERT_MSG(!carry, "big_uint: addition overflow"); @@ -532,7 +533,7 @@ namespace nil::crypto3::multiprecision { */ if (y.is_zero()) { - throw std::overflow_error("integer division by zero"); + NIL_THROW(std::overflow_error("integer division by zero")); } const_limb_pointer px = x.limbs(); @@ -746,7 +747,7 @@ namespace nil::crypto3::multiprecision { OverflowPolicy != overflow_policy::throw_exception) { if constexpr (OverflowPolicy == overflow_policy::throw_exception) { if (overflow) { - throw std::overflow_error("big_uint: multiplication overflow"); + NIL_THROW(std::overflow_error("big_uint: multiplication overflow")); } } else if constexpr (OverflowPolicy == overflow_policy::debug_assert) { BOOST_ASSERT_MSG(!overflow, "big_uint: multiplication overflow"); diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/big_uint_impl.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/big_uint_impl.hpp new file mode 100644 index 0000000000..4b163285e8 --- /dev/null +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/big_uint_impl.hpp @@ -0,0 +1,1091 @@ +#pragma once + +// IWYU pragma: private; include "nil/crypto3/multiprecision/big_uint.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "nil/crypto3/multiprecision/detail/assert.hpp" +#include "nil/crypto3/multiprecision/detail/big_uint/arithmetic.hpp" +#include "nil/crypto3/multiprecision/detail/big_uint/parsing.hpp" // IWYU pragma: export +#include "nil/crypto3/multiprecision/detail/big_uint/storage.hpp" +#include "nil/crypto3/multiprecision/detail/big_uint/type_traits.hpp" // IWYU pragma: export +#include "nil/crypto3/multiprecision/detail/config.hpp" +#include "nil/crypto3/multiprecision/detail/endian.hpp" +#include "nil/crypto3/multiprecision/detail/type_traits.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" + +namespace nil::crypto3::multiprecision { + /** + * @brief Big unsigned integer type + * + * @tparam Bits Number of bits + * + * @details + * This is a class that represents a big unsigned integer with a fixed size in bits. + * + * @note + * Addition and subtraction operations are optimized, while multiplication and division are not. + * Multiplication and division should be used in compile time or in non-performance critical + * code. + * If you need fast arithmetic, you probably are looking for big_mod, which implements fast + * modular arithmetic. + */ + template + class big_uint { + public: + static constexpr std::size_t Bits = Bits_; + using self_type = big_uint; + + using limb_type = detail::limb_type; + using double_limb_type = detail::double_limb_type; + using signed_limb_type = detail::signed_limb_type; + using signed_double_limb_type = detail::signed_double_limb_type; + + // Storage + + using limb_pointer = detail::limb_pointer; + using const_limb_pointer = detail::const_limb_pointer; + static constexpr std::size_t limb_bits = detail::limb_bits; + static constexpr limb_type max_limb_value = detail::max_limb_value; + + static constexpr std::size_t internal_limb_count = + (Bits / limb_bits) + (((Bits % limb_bits) != 0u) ? 1u : 0u); + static constexpr limb_type upper_limb_mask = + (Bits % limb_bits) ? (limb_type(1) << (Bits % limb_bits)) - 1 : (~limb_type(0u)); + + // + // Helper functions for getting at our internal data, and manipulating storage: + // + constexpr std::size_t limbs_count() const noexcept { + static_assert(internal_limb_count != 0, "No limbs in storage."); + return internal_limb_count; + } + constexpr limb_pointer limbs() noexcept { return m_data.data(); } + constexpr const_limb_pointer limbs() const noexcept { return m_data.data(); } + constexpr auto& limbs_array() noexcept { return m_data; } + constexpr const auto& limbs_array() const noexcept { return m_data; } + + private: + // Zeros out everything after limb[i], replaces resizing. + constexpr void zero_after(std::size_t start_index) { + auto pr = this->limbs(); + for (std::size_t i = start_index; i < this->limbs_count(); ++i) { + pr[i] = 0; + } + } + + constexpr std::size_t used_limbs() const noexcept { + for (int i = internal_limb_count - 1; i >= 0; --i) { + if (limbs()[i] != 0) { + return i + 1; + } + } + return 0; + } + + constexpr std::size_t order() const noexcept { + for (int i = internal_limb_count - 1; i >= 0; --i) { + if (limbs()[i] != 0) { + return i; + } + } + return 0; + } + + // Assignment + + template && std::is_unsigned_v || std::is_same_v, int> = 0> + constexpr void do_assign_integral(const T& a) noexcept { + if constexpr (sizeof(T) <= sizeof(limb_type)) { + this->limbs()[0] = a; + this->zero_after(1); + } else { + static_assert(sizeof(T) % sizeof(limb_type) == 0); + constexpr std::size_t n = + std::min(internal_limb_count, sizeof(T) / sizeof(limb_type)); + auto a_copy = a; + for (std::size_t i = 0; i < n; ++i) { + limbs()[i] = a_copy & static_cast(static_cast(-1)); + a_copy >>= limb_bits; + } + zero_after(n); + } + this->normalize(); + if constexpr (sizeof(T) * CHAR_BIT > Bits) { + NIL_CO3_MP_ASSERT(big_uint(a).compare(*this) == 0); + } + } + + template + constexpr void do_assign(const big_uint& other) noexcept { + std::size_t count = (std::min)(other.limbs_count(), this->limbs_count()); + for (std::size_t i = 0; i < count; ++i) { + this->limbs()[i] = other.limbs()[i]; + } + // Zero out everything after (std::min)(other.limbs_count(), limbs_count()), so if size + // of other was less, we have 0s at the end. + this->zero_after((std::min)(other.limbs_count(), this->limbs_count())); + this->normalize(); + } + + public: + // TODO(ioxid): this should be private + constexpr void normalize() noexcept { limbs()[internal_limb_count - 1] &= upper_limb_mask; } + + constexpr bool has_carry() const noexcept { return m_carry; } + constexpr void set_carry(bool carry) noexcept { m_carry = carry; } + + // Constructor + + constexpr big_uint() noexcept {} + + constexpr big_uint(std::string_view str) { *this = str; } + constexpr big_uint(const char* str) { *this = str; } + constexpr big_uint(const std::string &str) { *this = str; } + + template && std::is_signed_v, int> = 0> + constexpr big_uint(T val) noexcept { + NIL_CO3_MP_ASSERT_MSG(val >= 0, "big_uint: assignment from negative integer"); + do_assign_integral(static_cast>(val)); + } + + template && std::is_unsigned_v, int> = 0> + constexpr big_uint(T val) noexcept { + do_assign_integral(val); + } + + // TODO(ioxid): make this explicit for the case when Bits2 > Bits + template + constexpr big_uint(const big_uint& other) noexcept { + do_assign(other); + if constexpr (Bits2 > Bits) { + NIL_CO3_MP_ASSERT(other.compare(*this) == 0); + } + } + + template + constexpr big_uint(const std::array& bytes) noexcept { + *this = bytes; + } + + // Assignment + + constexpr big_uint& operator=(std::string_view str) { + *this = detail::parse_int(str); + return *this; + } + constexpr big_uint& operator=(const char* str) { + *this = detail::parse_int(str); + return *this; + } + constexpr big_uint& operator=(const std::string &str) { + *this = detail::parse_int(str); + return *this; + } + + template && std::is_signed_v, int> = 0> + constexpr big_uint& operator=(T val) noexcept { + NIL_CO3_MP_ASSERT_MSG(val >= 0, "big_uint: assignment from negative integer"); + do_assign_integral(static_cast>(val)); + return *this; + } + + template && std::is_unsigned_v || std::is_same_v, int> = 0> + constexpr big_uint& operator=(T val) noexcept { + do_assign_integral(val); + return *this; + } + + template + constexpr big_uint& operator=(const big_uint& other) noexcept { + do_assign(other); + if constexpr (Bits2 > Bits) { + NIL_CO3_MP_ASSERT(other.compare(*this) == 0); + } + return *this; + } + + template + constexpr big_uint& operator=(const std::array& bytes) { + std::size_t bits = 0; + for (std::size_t i = 0; i < bytes.size(); ++i) { + *this <<= 8; + if (bits != 0) { + bits += 8; + } + unsigned b = bytes[i]; + *this += b; + if (bits == 0 && b != 0) { + bits += std::bit_width(b); + } + } + if (bits > Bits) { + NIL_THROW(std::invalid_argument("not enough bits")); + } + return *this; + } + + // String conversion + + constexpr std::string str(std::ios_base::fmtflags flags = std::ios_base::hex | + std::ios_base::showbase | + std::ios_base::uppercase) const { + if (flags & std::ios_base::dec) { + // TODO(ioxid): this is inefficient + std::string result; + auto copy = *this; + while (!copy.is_zero()) { + result += static_cast(static_cast(copy % 10u) + '0'); + copy /= 10u; + } + std::reverse(result.begin(), result.end()); + if (result.empty()) { + result += '0'; + } + return result; + } + if (!(flags & std::ios_base::hex)) { + NIL_THROW(std::invalid_argument("big_uint: unsupported format flags")); + } + std::string result; + result.reserve(used_limbs() * limb_bits / 4); + bool found_first = false; + for (int i = internal_limb_count - 1; i >= 0; --i) { + auto limb = limbs()[i]; + bool should_pad = found_first; + found_first = found_first || limb != 0; + if (found_first) { + std::size_t len = limb == 0 ? 1 : (std::bit_width(limb) + 3) / 4; + std::size_t padded_len = len; + if (should_pad) { + padded_len = sizeof(limb_type) * 2; + } + for (std::size_t j = 0; j < padded_len - len; ++j) { + result += '0'; + } + std::size_t start_offset = result.size(); + result.resize(result.size() + len); + auto ec = std::to_chars(result.data() + start_offset, + result.data() + result.size(), limb, 16) + .ec; + NIL_CO3_MP_ASSERT(ec == std::errc{}); + } + } + if (flags & std::ios_base::uppercase) { + for (std::size_t i = 0; i < result.size(); ++i) { + result[i] = + static_cast(std::toupper(static_cast(result[i]))); + } + } + if (result.size() == 0) { + result += '0'; + } + if (flags & std::ios_base::showbase) { + result = "0x" + result; + } + return result; + } + + template = 0> + constexpr big_uint truncate() const noexcept { + big_uint result; + result.do_assign(*this); + return result; + } + + // Cast to integral types + + template && std::is_integral_v && + std::is_unsigned_v, + int> = 0> + explicit constexpr operator T() const { + if constexpr (sizeof(T) <= sizeof(limb_type)) { + return static_cast(this->limbs()[0]); + } else { + constexpr std::size_t n = + std::min(sizeof(T) / sizeof(limb_type), internal_limb_count); + T result = 0; + for (std::size_t i = 0; i < n; ++i) { + result <<= limb_bits; + result |= limbs()[n - i - 1]; + } + return result; + } + } + + template && std::is_signed_v, int> = 0> + explicit constexpr operator T() const { + return static_cast(static_cast>(*this)); + } + + explicit constexpr operator bool() const { return !is_zero(); } + + // Comparison + + template + constexpr int compare(const big_uint& b) const noexcept { + std::size_t as = used_limbs(); + std::size_t bs = b.used_limbs(); + if (as != bs) { + return as > bs ? 1 : -1; + } + auto pa = limbs(); + auto pb = b.limbs(); + for (auto i = static_cast(as) - 1; i >= 0; --i) { + if (pa[i] != pb[i]) { + return pa[i] > pb[i] ? 1 : -1; + } + } + return 0; + } + + template && std::is_signed_v, int> = 0> + constexpr int compare(const T& b) const noexcept { + if (b < 0) { + return 1; + } + return compare(static_cast>(b)); + } + + template && std::is_unsigned_v, int> = 0> + constexpr int compare(const T& b) const noexcept { + static_assert(sizeof(T) <= sizeof(double_limb_type)); + std::size_t s = used_limbs(); + if constexpr (sizeof(T) <= sizeof(limb_type)) { + if (s > 1) { + return 1; + } + auto lmb = this->limbs()[0]; + return lmb == b ? 0 : lmb > b ? 1 : -1; + } else { + if (s > 2) { + return 1; + } + auto dbl = static_cast(*this); + return dbl == b ? 0 : dbl > b ? 1 : -1; + } + } + + // Comparison + +#define NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(OP_) \ + template, int> = 0> \ + constexpr bool operator OP_(const T& o) const noexcept { \ + return compare(o) OP_ 0; \ + } \ + \ + template, int> = 0> \ + friend constexpr bool operator OP_(const T& a, const big_uint& b) noexcept { \ + return (-(b.compare(a)))OP_ 0; \ + } + + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(<) + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(<=) + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(>) + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(>=) + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(==) + NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR(!=) + +#undef NIL_CO3_MP_BIG_UINT_IMPL_COMPARISON_OPERATOR + + // Arithmetic operations + + constexpr void negate() noexcept { + if (is_zero()) { + return; + } + complement(*this); + ++*this; + } + + constexpr auto& operator++() noexcept { + if (limbs()[0] < max_limb_value) { + ++limbs()[0]; + if constexpr (Bits < limb_bits) { + normalize(); + } + } else { + detail::add(*this, *this, static_cast(1u)); + } + return *this; + } + + constexpr auto operator++(int) noexcept { + auto copy = *this; + ++*this; + return copy; + } + + NIL_CO3_MP_FORCEINLINE constexpr void decrement() noexcept {} + + constexpr auto operator+() const noexcept { return *this; } + + constexpr auto& operator--() noexcept { + if (limbs()[0]) { + --limbs()[0]; + } else { + detail::subtract(*this, *this, static_cast(1u)); + } + return *this; + } + constexpr auto operator--(int) noexcept { + auto copy = *this; + --*this; + return copy; + } + + constexpr big_uint operator-() const noexcept { + big_uint result = *this; + result.negate(); + return result; + } + + // Arithmetic operations + + template, int> = 0> + constexpr auto operator+(const T& b) const noexcept { + detail::largest_big_uint_t result; + detail::add(result, *this, b); + return result; + } + + template, int> = 0> + friend constexpr auto operator+(const T& a, const big_uint& b) noexcept { + return b + a; + } + + template, int> = 0> + constexpr auto& operator+=(const T& b) noexcept { + detail::add(*this, *this, b); + return *this; + } + + template, int> = 0> + constexpr auto operator-(const T& b) const noexcept { + detail::largest_big_uint_t result; + detail::subtract(result, *this, b); + return result; + } + + template, int> = 0> + friend constexpr auto operator-(const T& a, const big_uint& b) noexcept { + return (-b) + a; + } + + template, int> = 0> + constexpr auto& operator-=(const T& b) noexcept { + detail::subtract(*this, *this, b); + return *this; + } + + template, int> = 0> + constexpr auto operator*(const T& b) const noexcept { + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + detail::largest_big_uint_t result; + detail::multiply(result, *this, detail::as_big_uint(b_unsigned)); + return result; + } + + template, int> = 0> + friend constexpr auto operator*(const T& a, const big_uint& b) noexcept { + return b * a; + } + + template, int> = 0> + constexpr auto& operator*=(const T& b) noexcept { + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + big_uint result; + detail::multiply(result, *this, detail::as_big_uint(b_unsigned)); + *this = result; + return *this; + } + + template && detail::is_integral_v) || + (std::is_integral_v && std::is_same_v), + int> = 0> + friend constexpr auto operator/(const T1& a, const T2& b) noexcept { + decltype(auto) a_unsigned = detail::unsigned_or_throw(a); + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + using big_uint_a = std::decay_t; + big_uint_a result; + big_uint_a modulus; + detail::divide(&result, detail::as_big_uint(a_unsigned), + detail::as_big_uint(b_unsigned), modulus); + return static_cast>(result); + } + + template, int> = 0> + constexpr auto& operator/=(const T& b) noexcept { + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + big_uint result; + big_uint modulus; + detail::divide(&result, *this, detail::as_big_uint(b_unsigned), modulus); + *this = result; + return *this; + } + + template && detail::is_integral_v) || + (std::is_integral_v && std::is_same_v), + int> = 0> + friend constexpr auto operator%(const T1& a, const T2& b) { + decltype(auto) a_unsigned = detail::unsigned_or_throw(a); + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + using big_uint_a = std::decay_t; + big_uint_a modulus; + detail::divide(static_cast(nullptr), detail::as_big_uint(a_unsigned), + detail::as_big_uint(b_unsigned), modulus); + return static_cast>(modulus); + } + + template, int> = 0> + constexpr auto& operator%=(const T& b) { + decltype(auto) b_unsigned = detail::unsigned_or_throw(b); + big_uint modulus; + detail::divide(static_cast(nullptr), *this, detail::as_big_uint(b_unsigned), + modulus); + *this = modulus; + return *this; + } + +#define NIL_CO3_MP_BIG_UINT_BITWISE_OPERATOR_IMPL(OP_, OP_ASSIGN_, METHOD_) \ + template, int> = 0> \ + constexpr auto operator OP_(const T& b) const noexcept { \ + detail::largest_big_uint_t result = *this; \ + result.METHOD_(detail::as_limb_type_or_big_uint(detail::unsigned_or_throw(b))); \ + return result; \ + } \ + \ + template, int> = 0> \ + friend constexpr auto operator OP_(const T& a, const big_uint& b) noexcept { \ + return b OP_ a; \ + } \ + \ + template, int> = 0> \ + constexpr auto& operator OP_ASSIGN_(const T & b) noexcept { \ + METHOD_(detail::as_limb_type_or_big_uint(detail::unsigned_or_throw(b))); \ + return *this; \ + } + + NIL_CO3_MP_BIG_UINT_BITWISE_OPERATOR_IMPL(&, &=, bitwise_and) + NIL_CO3_MP_BIG_UINT_BITWISE_OPERATOR_IMPL(|, |=, bitwise_or) + NIL_CO3_MP_BIG_UINT_BITWISE_OPERATOR_IMPL(^, ^=, bitwise_xor) + +#undef NIL_CO3_MP_BIG_UINT_BITWISE_OPERATOR_IMPL + + // Bitwise operations + + private: + template + constexpr void bitwise_op(const big_uint& o, Op op) noexcept { + // + // Both arguments are unsigned types, very simple case handled as a special case. + // + // First figure out how big the result needs to be and set up some data: + // + std::size_t rs = limbs_count(); + std::size_t os = o.limbs_count(); + auto [m, x] = std::minmax(rs, os); + limb_pointer pr = limbs(); + const_limb_pointer po = o.limbs(); + for (std::size_t i = rs; i < x; ++i) { + pr[i] = 0; + } + + for (std::size_t i = 0; i < os; ++i) { + pr[i] = op(pr[i], po[i]); + } + for (std::size_t i = os; i < x; ++i) { + pr[i] = op(pr[i], static_cast(0u)); + } + normalize(); + } + + template + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_and(const big_uint& o) noexcept { + bitwise_op(o, std::bit_and()); + } + + template + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_or(const big_uint& o) noexcept { + bitwise_op(o, std::bit_or()); + } + + template + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_xor(const big_uint& o) noexcept { + bitwise_op(o, std::bit_xor()); + } + + // + // Again for operands which are single limbs: + // + + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_and(limb_type l) noexcept { + limbs()[0] &= l; + zero_after(1); + } + + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_or(limb_type l) noexcept { limbs()[0] |= l; } + + NIL_CO3_MP_FORCEINLINE constexpr void bitwise_xor(limb_type l) noexcept { limbs()[0] ^= l; } + + NIL_CO3_MP_FORCEINLINE constexpr void complement(const big_uint& o) noexcept { + std::size_t os = o.limbs_count(); + for (std::size_t i = 0; i < os; ++i) { + limbs()[i] = ~o.limbs()[i]; + } + normalize(); + } + + // Left shift will throw away upper Bits. + // This function must be called only when s % 8 == 0, i.e. we shift bytes. + void left_shift_byte(double_limb_type s) noexcept { + limb_pointer pr = limbs(); + + std::size_t bytes = static_cast(s / CHAR_BIT); + if (s >= Bits) { + // Set result to 0. + zero_after(0); + } else { + unsigned char* pc = reinterpret_cast(pr); + std::memmove(pc + bytes, pc, limbs_count() * sizeof(limb_type) - bytes); + std::memset(pc, 0, bytes); + } + } + + // Left shift will throw away upper Bits. + // This function must be called only when s % limb_bits == 0, i.e. we shift limbs, which + // are normally 64 bit. + + constexpr void left_shift_limb(double_limb_type s) noexcept { + limb_type offset = static_cast(s / limb_bits); + NIL_CO3_MP_ASSERT(static_cast(s % limb_bits) == 0); + + limb_pointer pr = limbs(); + + if (s >= Bits) { + // Set result to 0. + zero_after(0); + } else { + std::size_t i = offset; + std::size_t rs = limbs_count() + offset; + for (; i < limbs_count(); ++i) { + pr[rs - 1 - i] = pr[limbs_count() - 1 - i]; + } + for (; i < rs; ++i) { + pr[rs - 1 - i] = 0; + } + } + } + + // Left shift will throw away upper Bits. + + constexpr void left_shift_generic(double_limb_type s) noexcept { + if (s >= Bits) { + // Set result to 0. + zero_after(0); + } else { + limb_type offset = static_cast(s / limb_bits); + limb_type shift = static_cast(s % limb_bits); + + limb_pointer pr = limbs(); + std::size_t i = 0; + std::size_t rs = limbs_count(); + // This code only works when shift is non-zero, otherwise we invoke undefined + // behaviour! + NIL_CO3_MP_ASSERT(shift); + for (; rs - i >= 2 + offset; ++i) { + pr[rs - 1 - i] = pr[rs - 1 - i - offset] << shift; + pr[rs - 1 - i] |= pr[rs - 2 - i - offset] >> (limb_bits - shift); + } + if (rs - i >= 1 + offset) { + pr[rs - 1 - i] = pr[rs - 1 - i - offset] << shift; + ++i; + } + for (; i < rs; ++i) { + pr[rs - 1 - i] = 0; + } + } + } + + void right_shift_byte(double_limb_type s) noexcept { + limb_type offset = static_cast(s / limb_bits); + NIL_CO3_MP_ASSERT((s % CHAR_BIT) == 0); + std::size_t ors = limbs_count(); + std::size_t rs = ors; + if (offset >= rs) { + zero_after(0); + return; + } + rs -= offset; + limb_pointer pr = limbs(); + unsigned char* pc = reinterpret_cast(pr); + limb_type shift = static_cast(s / CHAR_BIT); + std::memmove(pc, pc + shift, ors * sizeof(pr[0]) - shift); + shift = (sizeof(limb_type) - shift % sizeof(limb_type)) * CHAR_BIT; + if (shift < limb_bits) { + pr[ors - offset - 1] &= (static_cast(1u) << shift) - 1; + if (!pr[ors - offset - 1] && (rs > 1)) { + --rs; + } + } + // Set zeros after 'rs', alternative to resizing to size 'rs'. + zero_after(rs); + } + + constexpr void right_shift_limb(double_limb_type s) noexcept { + limb_type offset = static_cast(s / limb_bits); + NIL_CO3_MP_ASSERT((s % limb_bits) == 0); + std::size_t ors = limbs_count(); + std::size_t rs = ors; + if (offset >= rs) { + zero_after(0); + return; + } + rs -= offset; + limb_pointer pr = limbs(); + std::size_t i = 0; + for (; i < rs; ++i) { + pr[i] = pr[i + offset]; + } + // Set zeros after 'rs', alternative to resizing to size 'rs'. + zero_after(rs); + } + + constexpr void right_shift_generic(double_limb_type s) noexcept { + limb_type offset = static_cast(s / limb_bits); + limb_type shift = static_cast(s % limb_bits); + std::size_t ors = limbs_count(); + std::size_t rs = ors; + + if (offset >= rs) { + *this = static_cast(0u); + return; + } + rs -= offset; + limb_pointer pr = limbs(); + if ((pr[ors - 1] >> shift) == 0) { + if (--rs == 0) { + *this = static_cast(0u); + return; + } + } + std::size_t i = 0; + + // This code only works for non-zero shift, otherwise we invoke undefined behaviour! + NIL_CO3_MP_ASSERT(shift); + for (; i + offset + 1 < ors; ++i) { + pr[i] = pr[i + offset] >> shift; + pr[i] |= pr[i + offset + 1] << (limb_bits - shift); + } + pr[i] = pr[i + offset] >> shift; + + // We cannot resize any more, so we need to set all the limbs to zero. + zero_after(rs); + } + + public: + constexpr auto operator~() const noexcept { + big_uint result; + result.complement(*this); + return result; + } + + // Shifting left throws away upper Bits. + constexpr big_uint& operator<<=(double_limb_type s) noexcept { + if (!s) { + return *this; + } + +#if NIL_CO3_MP_ENDIAN_LITTLE_BYTE && defined(NIL_CO3_MP_USE_LIMB_SHIFT) + constexpr limb_type limb_shift_mask = limb_bits - 1; + constexpr limb_type byte_shift_mask = CHAR_BIT - 1; + + if ((s & limb_shift_mask) == 0) { + left_shift_limb(s); + } else if (((s & byte_shift_mask) == 0) && !std::is_constant_evaluated()) { + left_shift_byte(s); + } +#elif NIL_CO3_MP_ENDIAN_LITTLE_BYTE + constexpr limb_type limb_shift_mask = limb_bits - 1; + constexpr limb_type byte_shift_mask = CHAR_BIT - 1; + + if (std::is_constant_evaluated() && ((s & limb_shift_mask) == 0)) { + left_shift_limb(s); + } else if (((s & byte_shift_mask) == 0) && !std::is_constant_evaluated()) { + left_shift_byte(s); + } +#else + constexpr limb_type limb_shift_mask = limb_bits - 1; + + if ((s & limb_shift_mask) == 0) { + left_shift_limb(s); + } +#endif + else { + left_shift_generic(s); + } + normalize(); + return *this; + } + + constexpr big_uint operator<<(double_limb_type s) const noexcept { + big_uint result = *this; + result <<= s; + return result; + } + + constexpr big_uint& operator>>=(double_limb_type s) noexcept { + if (!s) { + return *this; + } + +#if NIL_CO3_MP_ENDIAN_LITTLE_BYTE && defined(NIL_CO3_MP_USE_LIMB_SHIFT) + constexpr limb_type limb_shift_mask = limb_bits - 1; + constexpr limb_type byte_shift_mask = CHAR_BIT - 1; + + if ((s & limb_shift_mask) == 0) { + right_shift_limb(s); + } else if (((s & byte_shift_mask) == 0) && !std::is_constant_evaluated()) { + right_shift_byte(s); + } +#elif NIL_CO3_MP_ENDIAN_LITTLE_BYTE + constexpr limb_type byte_shift_mask = CHAR_BIT - 1; + + constexpr limb_type limb_shift_mask = limb_bits - 1; + if (std::is_constant_evaluated() && ((s & limb_shift_mask) == 0)) { + right_shift_limb(s); + } else if (((s & byte_shift_mask) == 0) && !std::is_constant_evaluated()) { + right_shift_byte(s); + } +#else + constexpr limb_type limb_shift_mask = limb_bits - 1; + + if ((s & limb_shift_mask) == 0) { + right_shift_limb(s); + } +#endif + else { + right_shift_generic(s); + } + return *this; + } + + constexpr big_uint operator>>(double_limb_type s) const noexcept { + big_uint result = *this; + result >>= s; + return result; + } + + // IO + + friend std::ostream& operator<<(std::ostream& os, const big_uint& value) { + os << value.str(os.flags()); + return os; + } + + // Misc ops + + NIL_CO3_MP_FORCEINLINE constexpr bool is_zero() const noexcept { + for (std::size_t i = 0; i < limbs_count(); ++i) { + if (limbs()[i] != 0) { + return false; + } + } + return true; + } + + constexpr std::size_t lsb() const { + // + // Find the index of the least significant limb that is non-zero: + // + std::size_t index = 0; + while ((index < limbs_count()) && !limbs()[index]) { + ++index; + } + + if (index == limbs_count()) { + NIL_THROW(std::invalid_argument("zero has no lsb")); + } + + // + // Find the index of the least significant bit within that limb: + // + std::size_t result = std::countr_zero(limbs()[index]); + + return result + index * limb_bits; + } + + constexpr std::size_t msb() const { + // + // Find the index of the most significant bit that is non-zero: + // + for (std::size_t i = limbs_count() - 1; i > 0; --i) { + if (limbs()[i] != 0) { + return i * limb_bits + std::bit_width(limbs()[i]) - 1; + } + } + if (limbs()[0] == 0) { + NIL_THROW(std::invalid_argument("zero has no msb")); + } + return std::bit_width(limbs()[0]) - 1; + } + + constexpr bool bit_test(std::size_t index) const { + if (index >= Bits) { + return false; + // TODO(ioxid): this throws in multiexp tests + // NIL_THROW(std::invalid_argument("fixed precision overflow")); + } + std::size_t offset = index / limb_bits; + std::size_t shift = index % limb_bits; + limb_type mask = limb_type(1u) << shift; + return static_cast(limbs()[offset] & mask); + } + + constexpr void bit_set(std::size_t index) { + if (index >= Bits) { + NIL_THROW(std::invalid_argument("fixed precision overflow")); + } + std::size_t offset = index / limb_bits; + std::size_t shift = index % limb_bits; + limb_type mask = limb_type(1u) << shift; + limbs()[offset] |= mask; + } + + constexpr void bit_unset(std::size_t index) { + if (index >= Bits) { + NIL_THROW(std::invalid_argument("fixed precision overflow")); + } + std::size_t offset = index / limb_bits; + std::size_t shift = index % limb_bits; + limb_type mask = limb_type(1u) << shift; + limbs()[offset] &= ~mask; + } + + constexpr void bit_flip(big_uint& val, std::size_t index) { + if (index >= Bits) { + NIL_THROW(std::invalid_argument("fixed precision overflow")); + } + std::size_t offset = index / limb_bits; + std::size_t shift = index % limb_bits; + limb_type mask = limb_type(1u) << shift; + val.limbs()[offset] ^= mask; + } + + private: + // Data + + // m_data[0] contains the lowest bits. + std::array m_data{0}; + + // This is a temporary value which is set when carry has happend during addition. + // If this value is true, reduction by modulus must happen next. + bool m_carry = false; + + // Friends + + template + friend class big_uint; + + template + friend constexpr void detail::add_constexpr_unsigned(big_uint& result, + const big_uint& a, + const big_uint& b) noexcept; + template + friend constexpr void detail::subtract_constexpr_unsigned( + big_uint& result, const big_uint& a, const big_uint& b) noexcept; + template + friend constexpr void detail::add_unsigned(big_uint& result, + const big_uint& a, + const big_uint& b) noexcept; + template + friend constexpr void detail::subtract_unsigned(big_uint& result, + const big_uint& a, + const big_uint& b) noexcept; + template + friend constexpr void detail::add_unsigned(big_uint& result, + const big_uint& a, + const limb_type& o) noexcept; + template + friend constexpr void detail::subtract_unsigned(big_uint& result, + const big_uint& a, + const limb_type& b) noexcept; + template + friend constexpr void detail::divide(big_uint* div, const big_uint& x, + const big_uint& y, big_uint& rem); + template + friend constexpr void detail::multiply(big_uint& result, const big_uint& a, + const T& b) noexcept; + }; + + // Hash + + template + constexpr std::size_t hash_value(const big_uint& val) noexcept { + std::size_t result = 0; + for (std::size_t i = 0; i < val.limbs_count(); ++i) { + boost::hash_combine(result, val.limbs()[i]); + } + return result; + } + + // Misc ops + + template + constexpr std::size_t msb(const big_uint& a) { + return a.msb(); + } + + template + constexpr std::size_t lsb(const big_uint& a) { + return a.lsb(); + } + + template + constexpr bool bit_test(const big_uint& a, std::size_t index) { + return a.bit_test(index); + } + + template + constexpr bool is_zero(const big_uint& a) { + return a.is_zero(); + } + + template + constexpr void divide_qr(const big_uint& a, const big_uint& b, big_uint& q, + big_uint& r) { + detail::divide(&q, a, b, r); + } +} // namespace nil::crypto3::multiprecision + +template +struct std::hash> { + std::size_t operator()(const nil::crypto3::multiprecision::big_uint& a) const noexcept { + return boost::hash>{}(a); + } +}; diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/parsing.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/parsing.hpp index 1e1598ec7c..fbe3d82711 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/parsing.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/big_uint/parsing.hpp @@ -13,6 +13,8 @@ #include #include +#include "nil/crypto3/multiprecision/detail/throw.hpp" + namespace nil::crypto3::multiprecision { template class big_uint; @@ -35,7 +37,7 @@ namespace nil::crypto3::multiprecision { template constexpr big_uint parse_int_hex(std::string_view str) { if (str.size() < 2 || str[0] != '0' || str[1] != 'x') { - throw std::invalid_argument("hex literal should start with 0x"); + NIL_THROW(std::invalid_argument("hex literal should start with 0x")); } big_uint result{0}; @@ -44,7 +46,7 @@ namespace nil::crypto3::multiprecision { for (std::size_t i = 2; i < str.size(); ++i) { char c = str[i]; if (!is_valid_hex_digit(c)) { - throw std::invalid_argument("non-hex character in literal"); + NIL_THROW(std::invalid_argument("non-hex character in literal")); } result <<= 4; if (bits != 0) { @@ -57,7 +59,7 @@ namespace nil::crypto3::multiprecision { } } if (bits > Bits) { - throw std::range_error("not enough bits to store literal"); + NIL_THROW(std::range_error("not enough bits to store literal")); } return result; } @@ -69,7 +71,7 @@ namespace nil::crypto3::multiprecision { for (std::size_t i = 0; i < str.size(); ++i) { char c = str[i]; if (c < '0' || c > '9') { - throw std::invalid_argument("non decimal character in literal"); + NIL_THROW(std::invalid_argument("non decimal character in literal")); } result *= 10u; result += static_cast(c - '0'); diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/config.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/config.hpp index f5a7956ace..94a2aa0e08 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/config.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/config.hpp @@ -17,3 +17,10 @@ // Disable use of int128 // #define NIL_CO3_MP_DISABLE_INT128 + +#if defined(GPU_PROVER) + #define NIL_CO3_MP_DISABLE_INT128 + #define NIL_CO3_MP_DISABLE_INTRINSICS + // black magick, i do not remember why i did this + struct float128_type {}; +#endif diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/integer_ops_base.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/integer_ops_base.hpp index e21b943942..7934c10649 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/integer_ops_base.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/integer_ops_base.hpp @@ -13,13 +13,15 @@ #include #include +#include "nil/crypto3/multiprecision/detail/throw.hpp" + namespace nil::crypto3::multiprecision { template && std::is_unsigned_v, int> = 0> constexpr std::size_t lsb(T a) { if (a == 0) { - throw std::invalid_argument("zero has no lsb"); + NIL_THROW(std::invalid_argument("zero has no lsb")); } return std::countr_zero(a); } @@ -28,7 +30,7 @@ namespace nil::crypto3::multiprecision { std::enable_if_t && std::is_unsigned_v, int> = 0> constexpr std::size_t msb(T a) { if (a == 0) { - throw std::invalid_argument("zero has no msb"); + NIL_THROW(std::invalid_argument("zero has no msb")); } return std::bit_width(a) - 1; } @@ -48,7 +50,7 @@ namespace nil::crypto3::multiprecision { std::enable_if_t && std::is_unsigned_v, int> = 0> constexpr T &bit_set(T &a, std::size_t index) { if (index >= sizeof(T) * CHAR_BIT) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } auto mask = static_cast(1u) << index; a |= mask; @@ -59,7 +61,7 @@ namespace nil::crypto3::multiprecision { std::enable_if_t && std::is_unsigned_v, int> = 0> constexpr T &bit_unset(T &a, std::size_t index) { if (index >= sizeof(T) * CHAR_BIT) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } auto mask = static_cast(1u) << index; a &= ~mask; @@ -70,7 +72,7 @@ namespace nil::crypto3::multiprecision { std::enable_if_t && std::is_unsigned_v, int> = 0> constexpr T &bit_flip(T &a, std::size_t index) { if (index >= sizeof(T) * CHAR_BIT) { - throw std::invalid_argument("fixed precision overflow"); + NIL_THROW(std::invalid_argument("fixed precision overflow")); } auto mask = static_cast(1u) << index; a ^= mask; diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/throw.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/throw.hpp new file mode 100644 index 0000000000..6bddf58699 --- /dev/null +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/detail/throw.hpp @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2025 Dmitrii Tabalin +// +// MIT License +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +//---------------------------------------------------------------------------// + +#pragma once + +#include + +// we cannot throw exceptions on the GPU +// so we replace throw with assert in that case + +#ifdef GPU_PROVER +#define NIL_THROW(x) BOOST_ASSERT_MSG(false, x.what()); +#else +#define NIL_THROW(x) throw x; +#endif diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/inverse.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/inverse.hpp index afd3b575af..327dc9faa5 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/inverse.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/inverse.hpp @@ -20,6 +20,7 @@ #include "nil/crypto3/multiprecision/detail/big_int.hpp" #include "nil/crypto3/multiprecision/detail/half_extended_euclidean_algorithm.hpp" #include "nil/crypto3/multiprecision/type_traits.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { template @@ -28,7 +29,7 @@ namespace nil::crypto3::multiprecision { big_int aa = a, mm = m, x, g; g = detail::half_extended_euclidean_algorithm(aa, mm, x); if (g != 1u) { - throw std::invalid_argument("no multiplicative inverse"); + NIL_THROW(std::invalid_argument("no multiplicative inverse")); } x %= m; if (x.negative()) { diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/jacobi.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/jacobi.hpp index 6581c316f5..5bca633252 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/jacobi.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/jacobi.hpp @@ -14,6 +14,7 @@ #include #include "nil/crypto3/multiprecision/big_uint.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { @@ -23,7 +24,7 @@ namespace nil::crypto3::multiprecision { // TODO(ioxid): optimize if (n % 2u == 0 || n <= 1) { - throw std::invalid_argument("jacobi: second argument must be odd and > 1"); + NIL_THROW(std::invalid_argument("jacobi: second argument must be odd and > 1")); } big_uint_t x = a, y = n; diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/ressol.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/ressol.hpp index 6f15600f61..e5ad4d4f56 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/ressol.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/ressol.hpp @@ -21,6 +21,7 @@ #include "nil/crypto3/multiprecision/big_uint.hpp" #include "nil/crypto3/multiprecision/jacobi.hpp" #include "nil/crypto3/multiprecision/pow.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { template @@ -49,7 +50,7 @@ namespace nil::crypto3::multiprecision { BOOST_ASSERT(p % 2u != 0u); if (jacobi(a, p) != 1) { - throw std::invalid_argument("Not a quadratic residue"); + NIL_THROW(std::invalid_argument("Not a quadratic residue")); } // We can use montgomery_big_mod because p is odd @@ -89,7 +90,7 @@ namespace nil::crypto3::multiprecision { big_uint_t z = two; while (jacobi(z, p) == 1) { if (z.is_zero()) { - throw std::invalid_argument("No quadratic nonresidue"); + NIL_THROW(std::invalid_argument("No quadratic nonresidue")); } ++z; } @@ -113,7 +114,7 @@ namespace nil::crypto3::multiprecision { if (i >= s) { // TODO(ioxid): when can this happen? (jacobi said that this should // not happen) Martun: the value now has a square root - throw std::invalid_argument("Not a quadratic residue"); + NIL_THROW(std::invalid_argument("Not a quadratic residue")); } } diff --git a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/unsigned_utils.hpp b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/unsigned_utils.hpp index 3e00b3a2e3..35884a6bb3 100644 --- a/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/unsigned_utils.hpp +++ b/crypto3/libs/multiprecision/include/nil/crypto3/multiprecision/unsigned_utils.hpp @@ -12,6 +12,7 @@ #include #include "nil/crypto3/multiprecision/type_traits.hpp" +#include "nil/crypto3/multiprecision/detail/throw.hpp" namespace nil::crypto3::multiprecision { // unsigned_abs returns the absolute value of a signed type as an unsigned type. @@ -39,9 +40,13 @@ namespace nil::crypto3::multiprecision { template, int> = 0> constexpr std::make_unsigned_t unsigned_or_throw(const T& a) { + // we are unable to throw exceptions on GPU + // so we pray that the value is nonnegative + #ifndef GPU_PROVER if (a < 0) { - throw std::range_error("nonnegative value expected"); + NIL_THROW(std::range_error("nonnegative value expected")); } + #endif return static_cast>(a); } diff --git a/crypto3/libs/random/example/CMakeLists.txt b/crypto3/libs/random/example/CMakeLists.txt index 8a38f3b78b..0dc63b492d 100644 --- a/crypto3/libs/random/example/CMakeLists.txt +++ b/crypto3/libs/random/example/CMakeLists.txt @@ -20,7 +20,7 @@ macro(define_random_example name) ${CMAKE_WORKSPACE_NAME}::multiprecision Boost::container) - set_target_properties(random_${name}_example PROPERTIES CXX_STANDARD 20) + set_target_properties(random_${name}_example PROPERTIES CXX_STANDARD 23) endmacro() set(EXAMPLES_NAMES diff --git a/crypto3/libs/random/test/CMakeLists.txt b/crypto3/libs/random/test/CMakeLists.txt index 484d40f82a..a758c724c9 100644 --- a/crypto3/libs/random/test/CMakeLists.txt +++ b/crypto3/libs/random/test/CMakeLists.txt @@ -12,7 +12,7 @@ include(CMTest) cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} ${CMAKE_WORKSPACE_NAME}::multiprecision Boost::unit_test_framework) -set_target_properties(_cm_internal_tests-crypto3-random-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-random-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-random-test REUSE_FROM crypto3_precompiled_headers) macro(define_random_test test) @@ -27,7 +27,7 @@ macro(define_random_test test) ${Boost_INCLUDE_DIRS}) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") target_compile_options(${full_test_name} PRIVATE "-fconstexpr-steps=2147483647") diff --git a/crypto3/libs/transpiler/test/CMakeLists.txt b/crypto3/libs/transpiler/test/CMakeLists.txt index 6550755099..77a02e9373 100644 --- a/crypto3/libs/transpiler/test/CMakeLists.txt +++ b/crypto3/libs/transpiler/test/CMakeLists.txt @@ -18,7 +18,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} crypto3::zk Boost::unit_test_framework ) -set_target_properties(_cm_internal_tests-crypto3-transpiler-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-transpiler-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-transpiler-test REUSE_FROM crypto3_precompiled_headers) add_custom_target(compile_and_run_transpiler_tests) @@ -39,7 +39,7 @@ macro(define_transpiler_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) get_target_property(target_type Boost::unit_test_framework TYPE) if(target_type STREQUAL "SHARED_LIB") diff --git a/crypto3/libs/zk/test/CMakeLists.txt b/crypto3/libs/zk/test/CMakeLists.txt index d1b8250b77..23e37c48fa 100644 --- a/crypto3/libs/zk/test/CMakeLists.txt +++ b/crypto3/libs/zk/test/CMakeLists.txt @@ -19,7 +19,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} Boost::unit_test_framework Boost::log ) -set_target_properties(_cm_internal_tests-crypto3-zk-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-crypto3-zk-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-crypto3-zk-test REUSE_FROM crypto3_precompiled_headers) if(PROFILING_ENABLED) @@ -38,7 +38,7 @@ macro(define_zk_test test) ${Boost_INCLUDE_DIRS}) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") target_compile_options(${full_test_name} PRIVATE "-fconstexpr-steps=2147483647") diff --git a/debug-tools/bin/circgen/CMakeLists.txt b/debug-tools/bin/circgen/CMakeLists.txt index 003f359d2a..5cfa6dd736 100644 --- a/debug-tools/bin/circgen/CMakeLists.txt +++ b/debug-tools/bin/circgen/CMakeLists.txt @@ -16,7 +16,7 @@ add_executable(circgen set_target_properties(circgen PROPERTIES LINKER_LANGUAGE CXX EXPORT_NAME circgen - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) diff --git a/debug-tools/bin/excalibur/src/CMakeLists.txt b/debug-tools/bin/excalibur/src/CMakeLists.txt index 3b54265c50..1f28da1e04 100644 --- a/debug-tools/bin/excalibur/src/CMakeLists.txt +++ b/debug-tools/bin/excalibur/src/CMakeLists.txt @@ -59,7 +59,7 @@ add_executable(${C3_TARGET} set_target_properties(${C3_TARGET} PROPERTIES LINKER_LANGUAGE CXX EXPORT_NAME ${CMAKE_PROJECT_NAME} - CXX_STANDARD 20 + CXX_STANDARD 23 CXX_STANDARD_REQUIRED TRUE) target_link_directories( diff --git a/find_symbol.sh b/find_symbol.sh new file mode 100755 index 0000000000..563d0d10a8 --- /dev/null +++ b/find_symbol.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Symbol to search for +SYMBOL="$2" + +# Check if a directory was provided +if [ -z "$1" ]; then + echo "Usage: $0 /path/to/directory symbol" + exit 1 +fi + +# Directory to search +DIR="$1" + +# Find all .so and .a files and process them +find "$DIR" -type f \( -name '*.so*' -o -name '*.a' \) -print0 | while IFS= read -r -d '' file; do + # Run nm and search for the symbol + if nm -D -U "$file" 2>/dev/null | grep -Fq "$SYMBOL"; then + echo "Symbol found in: $file" + fi +done diff --git a/flake.nix b/flake.nix index 5c75a0af9f..5ac28442f5 100644 --- a/flake.nix +++ b/flake.nix @@ -21,6 +21,9 @@ pkgs = import nixpkgs { inherit system; overlays = [ nix-3rdparty.overlays.${system}.default ]; + config = { + allowUnfree = true; + }; }; # For proof-producer, our main target is statically linked binaries, @@ -75,26 +78,33 @@ parallel-crypto3 = (pkgs.callPackage ./parallel-crypto3.nix { + stdenv = pkgs.llvmPackages_19.stdenv; runTests = false; enableDebug = false; + enableGPU = false; }); parallel-crypto3-tests = (pkgs.callPackage ./parallel-crypto3.nix { + stdenv = pkgs.llvmPackages_19.stdenv; runTests = true; enableDebug = false; + enableGPU = true; }); parallel-crypto3-clang-bench = (pkgs.callPackage ./parallel-crypto3.nix { runTests = true; enableDebug = false; benchmarkTests = true; + enableGPU = false; }); parallel-crypto3-debug-tests = (pkgs.callPackage ./parallel-crypto3.nix { enableDebug = true; runTests = true; + enableGPU = false; }); parallel-crypto3-clang-debug = (pkgs.callPackage ./parallel-crypto3.nix { stdenv = pkgs.llvmPackages_19.stdenv; enableDebug = true; runTests = false; + enableGPU = false; }); proof-producer = (staticPkgs.callPackage ./proof-producer.nix { @@ -124,9 +134,10 @@ runTests = true; sanitize = true; crypto3_tests = true; - parallel_crypto3_tets = true; + parallel_crypto3_tests = true; crypto3_bechmarks = true; parallel_crypto3_bechmarks = true; + enableGPU = true; }); develop-clang = (pkgs.callPackage ./proof-producer.nix { @@ -136,7 +147,7 @@ runTests = true; sanitize = true; crypto3_tests = true; - parallel_crypto3_tets = true; + parallel_crypto3_tests = true; crypto3_bechmarks = true; parallel_crypto3_bechmarks = true; }); diff --git a/gpu_gpustat_monitor.sh b/gpu_gpustat_monitor.sh new file mode 100755 index 0000000000..0d1b41d49e --- /dev/null +++ b/gpu_gpustat_monitor.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while true; do + gpustat --json >> gpu_utilization_gpustat_log.json +done diff --git a/lzpatcher.sh b/lzpatcher.sh new file mode 100755 index 0000000000..773782bae8 --- /dev/null +++ b/lzpatcher.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: $0 /path/to/directory" + exit 1 +} + +if [ -z "$1" ]; then + usage +fi + +DIRECTORY="$1" + +if [ ! -d "$DIRECTORY" ]; then + echo "Error: Directory '$DIRECTORY' does not exist." + exit 1 +fi + +# Iterate over all files in the directory +find "$DIRECTORY" -maxdepth 1 -type f | while read -r FILE; do + # Check if file is an ELF executable + if file "$FILE" | grep -q 'ELF'; then + echo "Processing ELF executable: $FILE" + + # Backup the original file + cp "$FILE" "$FILE.bak" + + # Get existing RPATH + EXISTING_RPATH=$(patchelf --print-rpath "$FILE" || true) + + # Determine the new RPATH + if [ -z "$EXISTING_RPATH" ]; then + NEW_RPATH="$LIBZ_DIR" + else + NEW_RPATH="$EXISTING_RPATH:$LIBZ_DIR" + fi + + # Modify the RPATH + patchelf --set-rpath "$NEW_RPATH" "$FILE" + + echo "Updated RPATH for $FILE to $NEW_RPATH" + else + echo "Skipping non-ELF file: $FILE" + fi +done + +echo "RPATH update complete." diff --git a/nclang++ b/nclang++ new file mode 100644 index 0000000000..afded054b3 --- /dev/null +++ b/nclang++ @@ -0,0 +1,275 @@ +#! /nix/store/717iy55ncqs0wmhdkwc5fg2vci5wbmq8-bash-5.2p32/bin/bash +set -eu -o pipefail +o posix +shopt -s nullglob + +if (( "${NIX_DEBUG:-0}" >= 7 )); then + set -x +fi + +path_backup="$PATH" + +# That @-vars are substituted separately from bash evaluation makes +# shellcheck think this, and others like it, are useless conditionals. +# shellcheck disable=SC2157 +if [[ -n "/nix/store/ph44jcx3ddmlwh394mh1wb7f1qigxqb1-coreutils-9.5" && -n "/nix/store/lvnwdmnjm7nvaq0a3vhvvn46iy4ql7gr-gnugrep-3.11" ]]; then + PATH="/nix/store/ph44jcx3ddmlwh394mh1wb7f1qigxqb1-coreutils-9.5/bin:/nix/store/lvnwdmnjm7nvaq0a3vhvvn46iy4ql7gr-gnugrep-3.11/bin" +fi + +source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/utils.bash + + +# Parse command line options and set several variables. +# For instance, figure out if linker flags should be passed. +# GCC prints annoying warnings when they are not needed. +dontLink=0 +nonFlagArgs=0 +cc1=0 +# shellcheck disable=SC2193 +[[ "/nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++" = *++ ]] && isCxx=1 || isCxx=0 +cxxInclude=1 +cxxLibrary=1 +cInclude=1 + +expandResponseParams "$@" + +declare -ag positionalArgs=() +declare -i n=0 +nParams=${#params[@]} +while (( "$n" < "$nParams" )); do + p=${params[n]} + p2=${params[n+1]:-} # handle `p` being last one + n+=1 + + case "$p" in + -[cSEM] | -MM) dontLink=1 ;; + -cc1) cc1=1 ;; + -nostdinc) cInclude=0 cxxInclude=0 ;; + -nostdinc++) cxxInclude=0 ;; + -nostdlib) cxxLibrary=0 ;; + -x*-header) dontLink=1 ;; # both `-x c-header` and `-xc-header` are accepted by clang + -xc++*) isCxx=1 ;; # both `-xc++` and `-x c++` are accepted by clang + -x) + case "$p2" in + *-header) dontLink=1 ;; + c++*) isCxx=1 ;; + esac + ;; + --) # Everything else is positional args! + # See: https://github.com/llvm/llvm-project/commit/ed1d07282cc9d8e4c25d585e03e5c8a1b6f63a74 + + # Any positional arg (i.e. any argument after `--`) will be + # interpreted as a "non flag" arg: + if [[ -v "params[$n]" ]]; then nonFlagArgs=1; fi + + positionalArgs=("${params[@]:$n}") + params=("${params[@]:0:$((n - 1))}") + break; + ;; + -?*) ;; + *) nonFlagArgs=1 ;; # Includes a solitary dash (`-`) which signifies standard input; it is not a flag + esac +done + +# If we pass a flag like -Wl, then gcc will call the linker unless it +# can figure out that it has to do something else (e.g., because of a +# "-c" flag). So if no non-flag arguments are given, don't pass any +# linker flags. This catches cases like "gcc" (should just print +# "gcc: no input files") and "gcc -v" (should print the version). +if [ "$nonFlagArgs" = 0 ]; then + dontLink=1 +fi + +# Arocc does not link +if [ "" = 1 ]; then + dontLink=1 +fi + +# Optionally filter out paths not refering to the store. +if [[ "${NIX_ENFORCE_PURITY:-}" = 1 && -n "$NIX_STORE" ]]; then + kept=() + nParams=${#params[@]} + declare -i n=0 + while (( "$n" < "$nParams" )); do + p=${params[n]} + p2=${params[n+1]:-} # handle `p` being last one + n+=1 + + skipNext=false + path="" + case "$p" in + -[IL]/*) path=${p:2} ;; + -[IL] | -isystem) path=$p2 skipNext=true ;; + esac + + if [[ -n $path ]] && badPath "$path"; then + skip "$path" + $skipNext && n+=1 + continue + fi + + kept+=("$p") + done + # Old bash empty array hack + params=(${kept+"${kept[@]}"}) +fi + +# Flirting with a layer violation here. +if [ -z "${NIX_BINTOOLS_WRAPPER_FLAGS_SET_x86_64_unknown_linux_gnu:-}" ]; then + source /nix/store/lfabp2rmzyn7ddbhgls0gsjjqckzw3np-binutils-wrapper-2.43.1/nix-support/add-flags.sh +fi + +# Put this one second so libc ldflags take priority. +if [ -z "${NIX_CC_WRAPPER_FLAGS_SET_x86_64_unknown_linux_gnu:-}" ]; then + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-flags.sh +fi + +# Clear march/mtune=native -- they bring impurity. +if [ "$NIX_ENFORCE_NO_NATIVE_x86_64_unknown_linux_gnu" = 1 ]; then + kept=() + # Old bash empty array hack + for p in ${params+"${params[@]}"}; do + if [[ "$p" = -m*=native ]]; then + skip "$p" + else + kept+=("$p") + fi + done + # Old bash empty array hack + params=(${kept+"${kept[@]}"}) +fi + +if [[ "$isCxx" = 1 ]]; then + if [[ "$cxxInclude" = 1 ]]; then + # + # The motivation for this comment is to explain the reason for appending + # the C++ stdlib to NIX_CFLAGS_COMPILE, which I initially thought should + # change and later realized it shouldn't in: + # + # https://github.com/NixOS/nixpkgs/pull/185569#issuecomment-1234959249 + # + # NIX_CFLAGS_COMPILE contains dependencies added using "-isystem", and + # NIX_CXXSTDLIB_COMPILE adds the C++ stdlib using "-isystem". Appending + # NIX_CXXSTDLIB_COMPILE to NIX_CLAGS_COMPILE emulates this part of the + # include lookup order from GCC/Clang: + # + # > 4. Directories specified with -isystem options are scanned in + # > left-to-right order. + # > 5. Standard system directories are scanned. + # > 6. Directories specified with -idirafter options are scanned + # > in left-to-right order. + # + # NIX_CXX_STDLIB_COMPILE acts as the "standard system directories" that + # are otherwise missing from CC in nixpkgs, so should be added last. + # + # This means that the C standard library should never be present inside + # NIX_CFLAGS_COMPILE, because it MUST come after the C++ stdlib. It is + # added automatically by cc-wrapper later using "-idirafter". + # + NIX_CFLAGS_COMPILE_x86_64_unknown_linux_gnu+=" $NIX_CXXSTDLIB_COMPILE_x86_64_unknown_linux_gnu" + fi + if [[ "$cxxLibrary" = 1 ]]; then + NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu+=" $NIX_CXXSTDLIB_LINK_x86_64_unknown_linux_gnu" + fi +fi + +source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-hardening.sh + +# Add the flags for the C compiler proper. +extraAfter=(${hardeningCFlagsAfter[@]+"${hardeningCFlagsAfter[@]}"} $NIX_CFLAGS_COMPILE_x86_64_unknown_linux_gnu) +extraBefore=(${hardeningCFlagsBefore[@]+"${hardeningCFlagsBefore[@]}"} $NIX_CFLAGS_COMPILE_BEFORE_x86_64_unknown_linux_gnu) + +# Remove '-fzero-call-used-regs=used-gpr' from extraBefore +filteredExtraBefore=() +for arg in "${extraBefore[@]}"; do + if [[ "$arg" != "-fzero-call-used-regs=used-gpr" ]]; then + filteredExtraBefore+=("$arg") + fi +done +extraBefore=("${filteredExtraBefore[@]}") + +if [ "$dontLink" != 1 ]; then + linkType=$(checkLinkType $NIX_LDFLAGS_BEFORE_x86_64_unknown_linux_gnu "${params[@]}" ${NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu:-} $NIX_LDFLAGS_x86_64_unknown_linux_gnu) + + # Add the flags that should only be passed to the compiler when + # linking. + extraAfter+=($(filterRpathFlags "$linkType" $NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu)) + + # Add the flags that should be passed to the linker (and prevent + # `ld-wrapper' from adding NIX_LDFLAGS_x86_64_unknown_linux_gnu again). + for i in $(filterRpathFlags "$linkType" $NIX_LDFLAGS_BEFORE_x86_64_unknown_linux_gnu); do + extraBefore+=("-Wl,$i") + done + if [[ "$linkType" == dynamic && -n "$NIX_DYNAMIC_LINKER_x86_64_unknown_linux_gnu" ]]; then + extraBefore+=("-Wl,-dynamic-linker=$NIX_DYNAMIC_LINKER_x86_64_unknown_linux_gnu") + fi + for i in $(filterRpathFlags "$linkType" $NIX_LDFLAGS_x86_64_unknown_linux_gnu); do + if [ "${i:0:3}" = -L/ ]; then + extraAfter+=("$i") + else + extraAfter+=("-Wl,$i") + fi + done + export NIX_LINK_TYPE_x86_64_unknown_linux_gnu=$linkType +fi + +if [[ -e /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-local-cc-cflags-before.sh ]]; then + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-local-cc-cflags-before.sh +fi + +# As a very special hack, if the arguments are just `-v', then don't +# add anything. This is to prevent `gcc -v' (which normally prints +# out the version number and returns exit code 0) from printing out +# `No input files specified' and returning exit code 1. +if [ "$*" = -v ]; then + extraAfter=() + extraBefore=() +fi + +# clang's -cc1 mode is not compatible with most options +# that we would pass. Rather than trying to pass only +# options that would work, let's just remove all of them. +if [ "$cc1" = 1 ]; then + extraAfter=() + extraBefore=() +fi + +# Finally, if we got any positional args, append them to `extraAfter` +# now: +if [[ "${#positionalArgs[@]}" -gt 0 ]]; then + extraAfter+=(-- "${positionalArgs[@]}") +fi + +# Optionally print debug info. +if (( "${NIX_DEBUG:-0}" >= 1 )); then + # Old bash workaround, see ld-wrapper for explanation. + echo "extra flags before to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${extraBefore+"${extraBefore[@]}"} >&2 + echo "original flags to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${params+"${params[@]}"} >&2 + echo "extra flags after to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${extraAfter+"${extraAfter[@]}"} >&2 +fi + +PATH="$path_backup" +# Old bash workaround, see above. + +# if a cc-wrapper-hook exists, run it. +if [[ -e /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/cc-wrapper-hook ]]; then + compiler=/nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/cc-wrapper-hook +fi + +if (( "${NIX_CC_USE_RESPONSE_FILE:-1}" >= 1 )); then + responseFile=$(mktemp "${TMPDIR:-/tmp}/cc-params.XXXXXX") + trap 'rm -f -- "$responseFile"' EXIT + printf "%q\n" \ + ${extraBefore+"${extraBefore[@]}"} \ + ${params+"${params[@]}"} \ + ${extraAfter+"${extraAfter[@]}"} > "$responseFile" + /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ "@$responseFile" +else + exec /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ \ + ${extraBefore+"${extraBefore[@]}"} \ + ${params+"${params[@]}"} \ + ${extraAfter+"${extraAfter[@]}"} +fi diff --git a/opensycl.nix b/opensycl.nix new file mode 100644 index 0000000000..11ca5aface --- /dev/null +++ b/opensycl.nix @@ -0,0 +1,99 @@ +# stolen from nixpkgs (/pkgs/development/compilers/opensycl/default.nix +# we need a custom version because (at the time of writing) the nixpkgs version is broken +{ lib +, fetchFromGitHub +, llvmPackages_19 +, lld_19 +, python3 +, cmake +, boost +, libxml2 +, libffi +, makeWrapper +, config +, cudaPackages +, linuxPackages +, rocmPackages_5 +, ompSupport ? true +, openclSupport ? false +, rocmSupport ? config.rocmSupport +, cudaSupport +, autoAddDriverRunpath +}: +let + inherit (llvmPackages_19) stdenv; + # move to newer ROCm version once supported + rocmPackages = rocmPackages_5; +in +stdenv.mkDerivation rec { + pname = "AdaptiveCpp"; + version = "24.10.0"; + + src = fetchFromGitHub { + owner = "AdaptiveCpp"; + repo = "AdaptiveCpp"; + rev = "v24.10.0"; + sha256 = "sha256-ZwHDiwv1ybC+2UhiOe2f7fnfqcul+CD9Uta8PT9ICr4="; + }; + # zerocallusedregs is disabled because passing it to gpu compilers confuses them + # fortify is disabled because it was also disabled above in flake, idk why + hardeningDisable = [ "fortify" "zerocallusedregs" ]; + + nativeBuildInputs = [ + cmake + makeWrapper + ] ++ lib.optionals cudaSupport [ + autoAddDriverRunpath + linuxPackages.nvidia_x11 + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + cudaPackages.cudatoolkit + cudaPackages.cuda_nvrtc + cudaPackages.cuda_cupti + ]; + + buildInputs = [ + libxml2 + libffi + boost + llvmPackages_19.openmp + llvmPackages_19.llvm + llvmPackages_19.libclang.dev + ] ++ lib.optionals rocmSupport [ + rocmPackages.clr + rocmPackages.rocm-runtime + ] ++ lib.optionals cudaSupport [ + linuxPackages.nvidia_x11 + cudaPackages.cuda_cudart + (lib.getOutput "stubs" cudaPackages.cuda_cudart) + ]; + + # set the gpu architecture for the cuda backend here + NIX_CXXFLAGS_COMPILE = lib.optionalString cudaSupport "--cuda-gpu-arch=sm_89"; + # opensycl makes use of clangs internal headers. Its cmake does not successfully discover them automatically on nixos, so we supply the path manually + cmakeFlags = [ + "-DCLANG_INCLUDE_PATH=${llvmPackages_19.libclang.dev}/include" + ] ++ lib.optionals cudaSupport [ + "-DCMAKE_CUDA_COMPILER=$(which nvcc)" + ] ++ [ + (lib.cmakeBool "WITH_CPU_BACKEND" ompSupport) + (lib.cmakeBool "WITH_CUDA_BACKEND" cudaSupport) + (lib.cmakeBool "WITH_ROCM_BACKEND" rocmSupport) + ] ++ lib.optionals (lib.versionAtLeast version "24") [ + (lib.cmakeBool "WITH_OPENCL_BACKEND" openclSupport) + ]; + + postFixup = '' + wrapProgram $out/bin/syclcc-clang \ + --prefix PATH : ${lib.makeBinPath [ python3 lld_19 ]} \ + '' + lib.optionalString rocmSupport '' + --add-flags "--rocm-device-lib-path=${rocmPackages.rocm-device-libs}/amdgcn/bitcode" + ''; + + meta = with lib; { + homepage = "https://github.com/AdaptiveCpp/AdaptiveCpp"; + description = "Multi-backend implementation of SYCL for CPUs and GPUs"; + maintainers = with maintainers; [ yboettcher ]; + license = licenses.bsd2; + }; +} \ No newline at end of file diff --git a/parallel-crypto3.nix b/parallel-crypto3.nix index 0e26f4ae6e..96d9de82a4 100644 --- a/parallel-crypto3.nix +++ b/parallel-crypto3.nix @@ -1,4 +1,5 @@ { lib, + pkgs, stdenv, ninja, pkg-config, @@ -8,7 +9,23 @@ lldb, mold, cmake_modules, + libgcc, + glibc, + libffi, + libz, + libxml2, + icu70, + ncurses, + gcc, + xz, + libedit, + llvm, + libcxx, + libstdcxx5, + llvmPackages_19, + opensycl, enableDebugging, + enableGPU ? false, enableDebug ? false, runTests ? false, sanitize? false, @@ -16,32 +33,49 @@ }: let inherit (lib) optional; + opensycl = pkgs.callPackage ./opensycl.nix { + inherit (pkgs); + cudaSupport = enableGPU; + }; + in stdenv.mkDerivation { name = "Parallel Crypto3"; src = lib.sourceByRegex ./. ["^crypto3(/.*)?$" "^parallel-crypto3(/.*)?$" "CMakeLists.txt"]; - hardeningDisable = [ "fortify" ]; - - nativeBuildInputs = [ cmake ninja pkg-config ] ++ + hardeningDisable = [ "fortify" "zerocallusedregs" ]; + nativeBuildInputs = [ cmake ninja pkg-config llvmPackages_19.openmp opensycl ] ++ (lib.optional (!stdenv.isDarwin) gdb) ++ (lib.optional (stdenv.isDarwin) lldb); # enableDebugging will keep debug symbols in boost propagatedBuildInputs = [ (if enableDebug then (enableDebugging boost) else boost) ]; - buildInputs = [cmake_modules]; + buildInputs = [ + cmake_modules + opensycl + ] ++ (if enableGPU then [ + pkgs.cudaPackages.cudatoolkit + pkgs.cudaPackages.cuda_cudart + pkgs.cudaPackages.cuda_nvcc + pkgs.linuxPackages.nvidia_x11 + ] else []); + + makeWrapperArgs = [ + # Ensure the real NVIDIA libraries are found first + "--prefix LD_LIBRARY_PATH : ${pkgs.linuxPackages.nvidia_x11}/lib" + ]; cmakeFlags = [ (if runTests then "-DBUILD_PARALLEL_CRYPTO3_TESTS=TRUE" else "") (if sanitize then "-DSANITIZE=ON" else "-DSANITIZE=OFF") (if benchmarkTests then "-DENABLE_BENCHMARKS=ON" else "-DENABLE_BENCHMARKS=OFF") + (if enableGPU then "-DGPU_PROVER=ON" else "") "-DPARALLEL_CRYPTO3_ENABLE=TRUE" ]; cmakeBuildType = if enableDebug then "Debug" else "Release"; doCheck = runTests; # tests are inside parallel-crypto3-tests derivation - checkPhase = '' # JUNIT file without explicit file name is generated after the name of the master test suite inside `CMAKE_CURRENT_SOURCE_DIR` export BOOST_TEST_LOGGER=JUNIT:HRF @@ -52,7 +86,14 @@ in stdenv.mkDerivation { find .. -type f -name '*_test.xml' -exec cp {} ${placeholder "out"}/test-logs \; ''; - shellHook = '' + shellHook = + (if enableGPU then '' + CXX=syclcc-clang; export CXX + ACPP_ADAPTIVITY_LEVEL=2; export ACPP_ADAPTIVITY_LEVEL + '' else "") + + '' + rm -rf build + eval $configurePhase PS1="\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " echo "Welcome to Parallel Crypto3 development environment!" ''; diff --git a/parallel-crypto3/CMakeLists.txt b/parallel-crypto3/CMakeLists.txt index 481878b48b..9c4d573993 100644 --- a/parallel-crypto3/CMakeLists.txt +++ b/parallel-crypto3/CMakeLists.txt @@ -1,17 +1,20 @@ cmake_minimum_required(VERSION 3.22 FATAL_ERROR) project(parallel-crypto3) -option(BUILD_PARALLEL_CRYPTO3_TESTS "Enable tests" FALSE) +option(BUILD_PARALLEL_CRYPTO3_TESTS "Enable tests" TRUE) +set(CMAKE_CUDA_COMPILER nvc++) find_package(CM REQUIRED) include(CMConfig) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # The file compile_commands.json is generated in build directory, so LSP could # pick it up and guess all include paths, defines and other stuff. # If Nix is used, LSP could not guess the locations of implicit include # directories, so we need to include them explicitly. if(CMAKE_EXPORT_COMPILE_COMMANDS) - set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES + set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}) endif() @@ -41,7 +44,8 @@ target_link_libraries(${PROJECT_NAME}_all INTERFACE actor::containers actor::math actor::zk - actor::core) + actor::core + ) # Configure package file to be able to import headers include(CMakePackageConfigHelpers) diff --git a/parallel-crypto3/benchmarks/CMakeLists.txt b/parallel-crypto3/benchmarks/CMakeLists.txt index 6d7ed4fa39..9dcf40980f 100644 --- a/parallel-crypto3/benchmarks/CMakeLists.txt +++ b/parallel-crypto3/benchmarks/CMakeLists.txt @@ -34,7 +34,7 @@ macro(define_bench_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23) target_precompile_headers(${test_name} REUSE_FROM crypto3_precompiled_headers) endmacro() @@ -46,7 +46,7 @@ cm_test_link_libraries( Boost::unit_test_framework Boost::timer ) -set_target_properties(_cm_internal_tests--parallel-crypto3-benchmarks PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests--parallel-crypto3-benchmarks PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests--parallel-crypto3-benchmarks REUSE_FROM crypto3_precompiled_headers) set(TESTS_NAMES diff --git a/parallel-crypto3/libs/parallel-containers/example/CMakeLists.txt b/parallel-crypto3/libs/parallel-containers/example/CMakeLists.txt index 1c949653b1..e8c4941415 100644 --- a/parallel-crypto3/libs/parallel-containers/example/CMakeLists.txt +++ b/parallel-crypto3/libs/parallel-containers/example/CMakeLists.txt @@ -37,7 +37,7 @@ macro(define_containers_example example) ${CMAKE_WORKSPACE_NAME}::algebra ${CMAKE_WORKSPACE_NAME}::hash) - set_target_properties(${target_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${target_name} PROPERTIES CXX_STANDARD 23) endmacro() set(EXAMPLES_NAMES diff --git a/parallel-crypto3/libs/parallel-containers/test/CMakeLists.txt b/parallel-crypto3/libs/parallel-containers/test/CMakeLists.txt index 259cfda1c4..fd322a2d17 100644 --- a/parallel-crypto3/libs/parallel-containers/test/CMakeLists.txt +++ b/parallel-crypto3/libs/parallel-containers/test/CMakeLists.txt @@ -31,7 +31,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} Boost::random Boost::unit_test_framework ) -set_target_properties(_cm_internal_tests-actor-containers-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-actor-containers-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-actor-containers-test REUSE_FROM crypto3_precompiled_headers) macro(define_storage_test test) @@ -62,7 +62,7 @@ macro(define_storage_test test) ${Boost_INCLUDE_DIRS}) - set_target_properties(${target_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${target_name} PROPERTIES CXX_STANDARD 23) get_target_property(target_type Boost::unit_test_framework TYPE) if(target_type STREQUAL "SHARED_LIB") diff --git a/parallel-crypto3/libs/parallel-math/CMakeLists.txt b/parallel-crypto3/libs/parallel-math/CMakeLists.txt index ed1633ca8d..12be7d1a77 100644 --- a/parallel-crypto3/libs/parallel-math/CMakeLists.txt +++ b/parallel-crypto3/libs/parallel-math/CMakeLists.txt @@ -38,7 +38,7 @@ target_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} INTERFACE crypto3::algebra crypto3::multiprecision - + crypto3::random Boost::random ) diff --git a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/algorithms/make_evaluation_domain.hpp b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/algorithms/make_evaluation_domain.hpp index c016ab4105..a7f068f97f 100644 --- a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/algorithms/make_evaluation_domain.hpp +++ b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/algorithms/make_evaluation_domain.hpp @@ -120,7 +120,7 @@ namespace nil { return result; } - if (detail::is_extended_radix2_domain(m)) { + /*if (detail::is_extended_radix2_domain(m)) { result_type result; result.reset(new extended_radix2_domain(m)); return result; @@ -130,7 +130,7 @@ namespace nil { result_type result; result.reset(new step_radix2_domain(m)); return result; - } + }*/ if (detail::is_basic_radix2_domain(big + rounded_small)) { result_type result; @@ -138,7 +138,7 @@ namespace nil { return result; } - if (detail::is_extended_radix2_domain(big + rounded_small)) { + /*if (detail::is_extended_radix2_domain(big + rounded_small)) { result_type result; result.reset(new extended_radix2_domain(big + rounded_small)); return result; @@ -160,7 +160,7 @@ namespace nil { result_type result; result.reset(new arithmetic_sequence_domain(m)); return result; - } + }*/ return result_type(); } diff --git a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp index 0d5843d84b..36a8245e81 100644 --- a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp +++ b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp @@ -54,7 +54,6 @@ namespace nil { typedef typename FieldType::value_type field_value_type; typedef ValueType value_type; typedef std::pair, std::vector> cache_type; - std::shared_ptr fft_cache; void create_fft_cache() { fft_cache = std::make_shared(std::vector(), @@ -67,6 +66,7 @@ namespace nil { typedef FieldType field_type; field_value_type omega; + std::shared_ptr fft_cache; basic_radix2_domain(const std::size_t m) : evaluation_domain(m), @@ -85,6 +85,10 @@ namespace nil { create_fft_cache(); } + std::shared_ptr get_fft_cache() override { + return fft_cache; + } + void fft(std::vector &a) override { if (a.size() != this->m) { if (a.size() < this->m) { diff --git a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/evaluation_domain.hpp b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/evaluation_domain.hpp index ec1e676b7e..953671e8d1 100644 --- a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/evaluation_domain.hpp +++ b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/evaluation_domain.hpp @@ -32,6 +32,8 @@ #include +#include + #include namespace nil { @@ -49,7 +51,7 @@ namespace nil { public: typedef FieldType field_type; - + typedef std::pair, std::vector> fft_cache_type; std::size_t m; std::size_t log2_size; @@ -69,6 +71,8 @@ namespace nil { */ virtual ~evaluation_domain() {}; + virtual std::shared_ptr get_fft_cache() = 0; + /** * Get the unity root. */ diff --git a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp index 6da143b12a..71154cf4db 100644 --- a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp +++ b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/polynomial/polynomial_dfs.hpp @@ -230,6 +230,10 @@ namespace nil { return _d; } + void set_degree(size_type d) { + _d = d; + } + size_type max_degree() const BOOST_NOEXCEPT { return this->size(); } @@ -862,10 +866,254 @@ namespace nil { return dfs_result; } +#ifdef GPU_PROVER + template + sycl::event gpu_fft( + typename FieldType::value_type* a, + std::size_t n, + typename FieldType::value_type* omega_cache, + + sycl::queue& queue, + std::vector a_events, + sycl::event cache_event + ) { + using value_type = typename FieldType::value_type; + const std::size_t logn = log2(n); + + // swapping in place (from Storer's book) + // We can parallelize this look, since k and rk are pairs, they will never intersect. + a_events.push_back(cache_event); + auto swap_event = queue.submit([a_events, a, n, logn](sycl::handler &cgh) { + cgh.depends_on(a_events); + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> idx) { + const std::size_t r_idx = crypto3::math::detail::bitreverse(idx, logn); + if (idx < r_idx) { + std::swap(a[idx], a[r_idx]); + } + }); + }); + + // invariant: m = 2^{s-1} + sycl::event last_fft_event = swap_event; + for (std::size_t s = 1, m = 1, inc = n / 2; s <= logn; ++s, m <<= 1, inc >>= 1) { + // w_m is 2^s-th root of unity now + // Here we can parallelize on the both loops with 'k' and 'm', because for each value of k and m + // the ranges of array 'a' used do not intersect. Think of these 2 loops as 1. + const size_t count_k = n / (2 * m) + (n % (2 * m) ? 1 : 0); + last_fft_event = queue.submit([count_k, m, inc, a, omega_cache, last_fft_event](sycl::handler &cgh) { + cgh.depends_on(last_fft_event); + cgh.parallel_for(sycl::range<1>(count_k * m), [=](sycl::id<1> index) { + const std::size_t k = (index / m) * m * 2; + const std::size_t j = index % m; + const std::size_t idx = j * inc; + const value_type t = a[k + j + m] * omega_cache[idx]; + a[k + j + m] = a[k + j] - t; + a[k + j] += t; + }); + }); + } + return last_fft_event; + } + template + sycl::event gpu_inverse_fft( + typename FieldType::value_type* a, + const std::size_t n, + typename FieldType::value_type* fft_cache, + + sycl::queue& queue, + std::vector a_events, + sycl::event cache_event + ) { + using value_type = typename FieldType::value_type; + auto fft_event = gpu_fft(a, n, fft_cache, queue, a_events, cache_event); + + const value_type sconst = value_type(n).inversed(); + return queue.submit([sconst, fft_event, a, n](sycl::handler &cgh) { + cgh.depends_on(fft_event); + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> idx) { + a[idx] *= sconst; + }); + }); + } + + template + sycl::event handle_polynomial_resizing( + typename FieldType::value_type* buffer, + const std::size_t cur_size, + const std::size_t new_size, + const std::size_t degree, + typename FieldType::value_type* current_domain_buf, + typename FieldType::value_type* new_domain_buf, + + sycl::queue& queue, + sycl::event buffer_event, + sycl::event current_domain_event, + sycl::event new_domain_event + ) { + using value_type = typename FieldType::value_type; + + if (cur_size >= new_size) { + std::cout << "cur_size >= new_size" << std::endl; + return sycl::event(); + } + + if (degree == 0) { + // add zeros to the end of the buffer + auto fill_event = queue.fill(buffer + cur_size, value_type::zero(), new_size - cur_size); + return fill_event; + } else { + auto ifft_event = gpu_inverse_fft( + buffer, cur_size, current_domain_buf, + queue, {buffer_event}, current_domain_event + ); + auto fill_event = queue.fill(buffer + cur_size, value_type::zero(), new_size - cur_size); + auto fft_event = gpu_fft( + buffer, new_size, new_domain_buf, + queue, {ifft_event, fill_event}, new_domain_event + ); + return fft_event; + } + } + + template + std::size_t create_domain_buffers( + const ContainerType& polynomials, + std::unordered_map>>& domain_cache, + + sycl::queue& queue, + std::unordered_map& domain_buffers, + std::unordered_map& domain_events, + typename FieldType::value_type*& max_domain_buf, + sycl::event& max_domain_buf_event + ) { + using value_type = typename FieldType::value_type; + + std::size_t max_domain_size = 0; + std::size_t total_degree = 0; + std::set needed_domain_sizes; + for (const auto& polynomial : polynomials) { + max_domain_size = std::max(max_domain_size, polynomial.size()); + total_degree += polynomial.degree(); + needed_domain_sizes.insert(polynomial.size()); + } + max_domain_size = std::max(max_domain_size, detail::power_of_two(total_degree + 1)); + needed_domain_sizes.insert(max_domain_size); + + for (const std::size_t domain_size : needed_domain_sizes) { + domain_cache[domain_size] = nullptr; + } + // We cannot use LOW level thread pool here, make_evaluation_domain uses it. + parallel_foreach(needed_domain_sizes.begin(), needed_domain_sizes.end(), + [&domain_cache](std::size_t domain_size) { + domain_cache[domain_size] = make_evaluation_domain(domain_size); + }, ThreadPool::PoolLevel::HIGH); + + for (const std::size_t domain_size : needed_domain_sizes) { + auto domain = domain_cache[domain_size]; + domain_buffers[domain_size] = sycl::malloc_device( + domain_size, queue + ); + domain_events[domain_size] = queue.copy( + domain->get_fft_cache()->second.data(), domain_buffers[domain_size], domain_size + ); + } + max_domain_buf = sycl::malloc_device( + max_domain_size, queue + ); + max_domain_buf_event = queue.copy( + domain_cache[max_domain_size]->get_fft_cache()->first.data(), max_domain_buf, max_domain_size + ); + + return max_domain_size; + } + + + template + polynomial_dfs polynomial_product( + const std::vector> &multipliers + ) { + using value_type = typename FieldType::value_type; + using polynomial_type = polynomial_dfs; + + if (multipliers.size() == 0) { + throw std::invalid_argument("polynomial_product multipliers.size() == 0"); + } + if (multipliers.size() == 1) { + return multipliers[0]; + } + + sycl::queue queue(sycl::gpu_selector{}); + + value_type* max_domain_buf = nullptr; + sycl::event max_domain_buf_event = sycl::event(); + std::unordered_map domain_events; + std::unordered_map domain_buffers; + std::unordered_map>> domain_cache; + + std::size_t max_domain_size = create_domain_buffers( + multipliers, domain_cache, queue, + domain_buffers, domain_events, max_domain_buf, max_domain_buf_event + ); + + std::vector multipliers_buf(multipliers.size()); + std::vector multipliers_events(multipliers.size()); + + for (std::size_t i = 0; i < multipliers.size(); ++i) { + multipliers_buf[i] = sycl::malloc_device(max_domain_size, queue); + multipliers_events[i] = queue.copy( + multipliers[i].data(), multipliers_buf[i], multipliers[i].size() + ); + } + + // pre-resize the multipliers + std::vector buffer_events(multipliers.size()); + for (std::size_t i = 0; i < multipliers.size(); ++i) { + buffer_events[i] = handle_polynomial_resizing( + multipliers_buf[i], multipliers[i].size(), max_domain_size, multipliers[i].degree(), + domain_buffers[multipliers[i].size()], max_domain_buf, + queue, multipliers_events[i], domain_events[multipliers[i].size()], max_domain_buf_event + ); + } + for (std::size_t stride = 1; stride < multipliers.size(); stride <<= 1) { + const std::size_t double_stride = stride << 1; + std::size_t max_i = (multipliers.size() - stride) / double_stride; + if ((multipliers.size() - stride) % double_stride != 0) { + max_i++; + } + for (std::size_t i = 0; i < max_i; ++i) { + const std::size_t index1 = i * double_stride; + const std::size_t index2 = index1 + stride; + std::vector b_events = {buffer_events[index1], buffer_events[index2]}; + value_type* first_buf = multipliers_buf[index1]; + value_type* second_buf = multipliers_buf[index2]; + buffer_events[index1] = queue.submit([b_events, first_buf, second_buf, max_domain_size](sycl::handler &cgh) { + cgh.depends_on(b_events); + cgh.parallel_for(sycl::range<1>(max_domain_size), [=](sycl::id<1> idx) { + first_buf[idx] *= second_buf[idx]; + }); + }); + } + } + polynomial_type result(max_domain_size - 1, max_domain_size); + auto copy_back_event = queue.copy( + multipliers_buf[0], result.data(), max_domain_size, buffer_events[0] + ); + copy_back_event.wait(); + for (std::size_t i = 0; i < multipliers.size(); ++i) { + sycl::free(multipliers_buf[i], queue); + } + for (auto& domain_buffer : domain_buffers) { + sycl::free(domain_buffer.second, queue); + } + sycl::free(max_domain_buf, queue); + return result; + } +#else template static inline polynomial_dfs polynomial_product( - std::vector> multipliers) { + std::vector> &&multipliers + ) { // Pre-create all the domains. We could do this on-the-go, but we want this function to be more // parallelization-friendly. This single-threaded version may look a bit complicated, // but it's now very similar to what we have in parallel code. @@ -921,14 +1169,11 @@ namespace nil { domain_cache[current_domain_size], domain_cache[next_domain_size], domain_cache[new_domain_size]); - - // Free the memory we are not going to use anymore. - multipliers[index2] = polynomial_dfs(); }, ThreadPool::PoolLevel::HIGH); } - return multipliers[0]; + return std::move(multipliers[0]); } - +#endif } // namespace math } // namespace crypto3 } // namespace nil @@ -950,4 +1195,4 @@ struct std::hash> } }; -#endif // CRYPTO3_MATH_POLYNOMIAL_POLYNOM_DFT_HPP +#endif // CRYPTO3_MATH_POLYNOMIAL_POLYNOM_DFT_HPP \ No newline at end of file diff --git a/parallel-crypto3/libs/parallel-math/test/CMakeLists.txt b/parallel-crypto3/libs/parallel-math/test/CMakeLists.txt index 8339de6ebf..d463a7ee8e 100644 --- a/parallel-crypto3/libs/parallel-math/test/CMakeLists.txt +++ b/parallel-crypto3/libs/parallel-math/test/CMakeLists.txt @@ -34,7 +34,7 @@ macro(define_math_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23) endmacro() diff --git a/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp b/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp index 719db76f0a..bbde04f212 100644 --- a/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp +++ b/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp @@ -29,6 +29,8 @@ #include #include +#include + #include #include #include @@ -40,7 +42,7 @@ #include #include #include -#include +#include using namespace nil::crypto3::algebra; using namespace nil::crypto3::math; @@ -49,7 +51,7 @@ typedef fields::bls12_fr<381> FieldType; BOOST_AUTO_TEST_SUITE(polynomial_dfs_from_coefficients_test_suite) -BOOST_AUTO_TEST_CASE(polynomial_dfs_equal_test){ +BOOST_AUTO_TEST_CASE(polynomial_dfs_equal_test) { polynomial_dfs a = { 7, {0x35_big_uint255, 0x26D37C08AED60085FDE335498E7DFEE2AFB1463D06E338219CD0E5DDAF27D68F_big_uint255, @@ -1332,7 +1334,7 @@ BOOST_AUTO_TEST_CASE(polynomial_dfs_zero_one_test) { BOOST_CHECK((small_poly - one * small_poly).is_zero()); } -BOOST_AUTO_TEST_CASE(polynomial_dfs_2_levels_test) { +BOOST_AUTO_TEST_CASE(polynomial_dfs_2_levels_test, *boost::unit_test::disabled()) { size_t size = 131072; polynomial_dfs poly = { @@ -1397,19 +1399,33 @@ BOOST_AUTO_TEST_CASE(polynomial_dfs_multiplication_perf_test, *boost::unit_test: std::cout << "Multiplication time: " << duration.count() << " microseconds." << std::endl; } -BOOST_AUTO_TEST_CASE(polynomial_dfs_resize_perf_test, *boost::unit_test::disabled()) { - std::vector values; - std::size_t size = 131072 * 16; - for (std::size_t i = 0; i < size; i++) { - values.push_back(nil::crypto3::algebra::random_element()); +template +polynomial_dfs + generate_random_polynomial( + std::size_t size, nil::crypto3::random::algebraic_engine& engine) { + using value_type = typename Field::value_type; + std::vector random_field_values(size); + for (std::size_t i = 0; i < size; ++i) { + random_field_values[i] = engine(); } + return polynomial_dfs(size - 1, std::move(random_field_values)); +} - polynomial_dfs poly = { - size - 1, values}; +BOOST_AUTO_TEST_CASE(polynomial_dfs_resize_perf_test, *boost::unit_test::disabled()) { + using field_type = nil::crypto3::algebra::fields::bls12_fr<381>; + using value_type = typename FieldType::value_type; + using polynomial_dfs_type = polynomial_dfs; + nil::crypto3::random::algebraic_engine alg_rnd_engine; + std::size_t size = 131072 * 16; + polynomial_dfs_type poly = + generate_random_polynomial( + size, + alg_rnd_engine + ); auto start = std::chrono::high_resolution_clock::now(); for (std::size_t i = 0; i < 10; ++i) { - auto poly2 = poly; + polynomial_dfs_type poly2 = poly; poly2.resize(8 * size); BOOST_CHECK(poly2.size() == 8 * size); } @@ -1446,4 +1462,28 @@ BOOST_AUTO_TEST_CASE(polynomial_dfs_equality_check_perf_test, *boost::unit_test: std::cout << "Equality check time: " << duration.count() << " microseconds." << std::endl; } +BOOST_AUTO_TEST_CASE(polynomial_product_test) { + using field_type = nil::crypto3::algebra::fields::bls12_fr<381>; + using value_type = typename field_type::value_type; + using polynomial_dfs_type = polynomial_dfs; + + nil::crypto3::random::algebraic_engine alg_rnd_engine; + std::vector random_polynomials; + random_polynomials.reserve(8); + std::vector sizes = {23, 17, 17, 16, 14, 15, 13, 15, 21, 16, 22}; + for (auto size : sizes) { + random_polynomials.emplace_back( + generate_random_polynomial( + 1u << size, + alg_rnd_engine + ) + ); + } + auto start = std::chrono::high_resolution_clock::now(); + polynomial_product(std::move(random_polynomials)); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + std::cout << "Polynomial product time: " << duration.count() << " microseconds." << std::endl; +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/parallel-crypto3/libs/parallel-zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/gates_argument.hpp b/parallel-crypto3/libs/parallel-zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/gates_argument.hpp index 566a76c976..7ccfa5f24a 100644 --- a/parallel-crypto3/libs/parallel-zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/gates_argument.hpp +++ b/parallel-crypto3/libs/parallel-zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/gates_argument.hpp @@ -73,15 +73,16 @@ namespace nil { struct placeholder_gates_argument { typedef typename ParamsType::transcript_hash_type transcript_hash_type; + using value_type = typename FieldType::value_type; using transcript_type = transcript::fiat_shamir_heuristic_sequential; - using polynomial_dfs_type = math::polynomial_dfs; - using variable_type = plonk_variable; + using polynomial_dfs_type = math::polynomial_dfs; + using variable_type = plonk_variable; using polynomial_dfs_variable_type = plonk_variable; typedef detail::placeholder_policy policy_type; constexpr static const std::size_t argument_size = 1; - +#ifndef GPU_PROVER static inline void build_variable_value_map( const math::expression& expr, const plonk_polynomial_dfs_table& assignments, @@ -239,7 +240,188 @@ namespace nil { }; return F; } +#else + static inline std::unordered_map build_variable_value_map( + const math::expression& expr, + const plonk_polynomial_dfs_table& assignments, + std::shared_ptr> domain, + std::size_t extended_domain_size, + std::unordered_map& variable_values_out, + const polynomial_dfs_type &mask_polynomial, + const polynomial_dfs_type &lagrange_0, + + sycl::queue& queue + ) { + + std::unordered_map variable_events_set; + + math::expression_for_each_variable_visitor visitor( + [&variable_set, &variable_values_out](const variable_type& var) { + // Create the structure of the map so we can change the values later. + if (variable_events_set.find(var) == variable_events_set.end()) { + variable_events_set[var] = sycl::event(); + variable_values_out[var] = sycl::malloc_device(extended_domain_size, queue); + } + }); + + visitor.visit(expr); + + std::shared_ptr> extended_domain = + math::make_evaluation_domain(extended_domain_size); + + // TODO: move mask_polynomial/lagrange_0 conversions up the callstack + value_type* mask_polynomial_buf = sycl::malloc_device(extended_domain_size, queue); + value_type* lagrange_0_buf = sycl::malloc_device(extended_domain_size, queue); + + auto mask_polynomial_event = queue.copy( + mask_polynomial.data(), mask_polynomial_buf, + mask_polynomial.size() + ); + auto lagrange_0_event = queue.copy( + lagrange_0.data(), lagrange_0_buf, + lagrange_0.size() + ); + value_type* mask_lagrange_diff_buf = sycl::malloc_device(extended_domain_size, queue); + sycl::event mask_lagrange_diff_event = queue.submit([&](sycl::handler& cgh) { + cgh.depends_on({mask_polynomial_event, lagrange_0_event}); + cgh.parallel_for(sycl::range<1>(extended_domain_size), [=](sycl::id<1> idx) { + mask_lagrange_diff_buf[idx] = mask_polynomial_buf[idx] - lagrange_0_buf[idx]; + }); + }); + + for (const auto& var : variable_set) { + // Convert the variable to polynomial_dfs variable type. + polynomial_dfs_variable_type var_dfs(var.index, var.rotation, var.relative, + static_cast( + static_cast(var.type))); + + value_type* assignment = nullptr; + if( var.index == PLONK_SPECIAL_SELECTOR_ALL_USABLE_ROWS_SELECTED && var.type == variable_type::column_type::selector){ + assignment = mask_polynomial_buf; + variable_events_set[var] = mask_polynomial_event; + } else if( var.index == PLONK_SPECIAL_SELECTOR_ALL_NON_FIRST_USABLE_ROWS_SELECTED && var.type == variable_type::column_type::selector) { + assignment = mask_lagrange_diff_buf; + variable_events_set[var] = mask_lagrange_diff_event; + } else { + assignment = variable_values_out[var]; + variable_events_set[var] = handle_polynomial_resizing( + assignment, extended_domain_size, max_domain_size, + assignments.get_variable_value(var_dfs, domain).degree(), + queue, + ); + } + + } + + sycl::free(mask_polynomial_buf, queue); + sycl::free(lagrange_0_buf, queue); + sycl::free(mask_lagrange_diff_buf, queue); + return variable_set; + } + + static inline std::array prove_eval( + const typename policy_type::constraint_system_type &constraint_system, + const plonk_polynomial_dfs_table &column_polynomials, + std::shared_ptr> original_domain, + std::uint32_t max_gates_degree, + const polynomial_dfs_type &mask_polynomial, + const polynomial_dfs_type &lagrange_0, + transcript_type& transcript + ) { + PROFILE_SCOPE("gate_argument_time"); + // max_gates_degree that comes from the outside does not take into account multiplication + // by selector. + ++max_gates_degree; + typename FieldType::value_type theta = transcript.template challenge(); + + auto value_type_to_polynomial_dfs = []( + const typename variable_type::assignment_type& coeff) { + return polynomial_dfs_type(0, 1, coeff); + }; + + std::vector extended_domain_sizes; + std::vector degree_limits; + std::uint32_t max_degree = std::pow(2, ceil(std::log2(max_gates_degree))); + std::uint32_t max_domain_size = original_domain->m * max_degree; + + degree_limits.push_back(max_degree); + extended_domain_sizes.push_back(max_domain_size); + degree_limits.push_back(max_degree / 2); + extended_domain_sizes.push_back(max_domain_size / 2); + + std::vector> expressions(extended_domain_sizes.size()); + auto theta_acc = FieldType::value_type::one(); + + // Every constraint has variable type 'variable_type', but we want it to use + // 'polynomial_dfs_variable_type' instead. The only difference is the coefficient type + // inside a term. We want the coefficients to be dfs polynomials here. + math::expression_variable_type_converter converter( + value_type_to_polynomial_dfs); + + math::expression_max_degree_visitor visitor; + + const auto& gates = constraint_system.gates(); + + for (const auto& gate: gates) { + std::vector> gate_results(extended_domain_sizes.size()); + for (std::size_t constraint_idx = 0; constraint_idx < gate.constraints.size(); ++constraint_idx) { + const auto& constraint = gate.constraints[constraint_idx]; + auto next_term = constraint * theta_acc; + + theta_acc *= theta; + // +1 stands for the selector multiplication. + size_t constraint_degree = visitor.compute_max_degree(constraint) + 1; + for (int i = extended_domain_sizes.size() - 1; i >= 0; --i) { + // Whatever the degree of term is, add it to the maximal degree expression. + if (degree_limits[i] >= constraint_degree || i == 0) { + gate_results[i] += next_term; + break; + } + } + } + variable_type selector(gate.selector_index, 0, false, variable_type::column_type::selector); + for (size_t i = 0; i < extended_domain_sizes.size(); ++i) { + gate_results[i] *= selector; + expressions[i] += gate_results[i]; + } + } + + std::array F; + + F[0] = polynomial_dfs_type::zero(); + for (std::size_t i = 0; i < extended_domain_sizes.size(); ++i) { + std::unordered_map variable_values; + + build_variable_value_map( + expressions[i], column_polynomials, original_domain, + extended_domain_sizes[i], variable_values, + mask_polynomial, lagrange_0 + ); + + polynomial_dfs_type result(extended_domain_sizes[i] - 1, extended_domain_sizes[i]); + wait_for_all(parallel_run_in_chunks( + extended_domain_sizes[i], + [&variable_values, &extended_domain_sizes, &result, &expressions, i] + (std::size_t begin, std::size_t end) { + for (std::size_t j = begin; j < end; ++j) { + // Don't use cache here. In practice it's slower to maintain the cache + // than to re-compute the subexpression value when value type is field element. + math::expression_evaluator evaluator( + expressions[i], + [&assignments=variable_values, j] + (const variable_type &var) -> const typename FieldType::value_type& { + return assignments[var][j]; + }); + result[j] = evaluator.evaluate(); + } + }, ThreadPool::PoolLevel::HIGH)); + + F[0] += result; + }; + return F; + } +#endif static inline std::array verify_eval(const std::vector>> &gates, typename policy_type::evaluation_map &evaluations, diff --git a/parallel-crypto3/libs/parallel-zk/test/CMakeLists.txt b/parallel-crypto3/libs/parallel-zk/test/CMakeLists.txt index e77dc23406..9cc408c128 100644 --- a/parallel-crypto3/libs/parallel-zk/test/CMakeLists.txt +++ b/parallel-crypto3/libs/parallel-zk/test/CMakeLists.txt @@ -19,7 +19,7 @@ cm_test_link_libraries(${CMAKE_WORKSPACE_NAME}_${CURRENT_PROJECT_NAME} Boost::unit_test_framework Boost::log ) -set_target_properties(_cm_internal_tests-actor-zk-test PROPERTIES CXX_STANDARD 20) +set_target_properties(_cm_internal_tests-actor-zk-test PROPERTIES CXX_STANDARD 23) target_precompile_headers(_cm_internal_tests-actor-zk-test REUSE_FROM crypto3_precompiled_headers) if(PROFILING_ENABLED) @@ -39,7 +39,7 @@ macro(define_zk_test test) ${Boost_INCLUDE_DIRS}) - set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${full_test_name} PROPERTIES CXX_STANDARD 23) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") target_compile_options(${full_test_name} PRIVATE "-fconstexpr-steps=2147483647") diff --git a/parallel-crypto3/libs/parallelization-utils/test/CMakeLists.txt b/parallel-crypto3/libs/parallelization-utils/test/CMakeLists.txt index 4656ac9944..dffafd0e65 100644 --- a/parallel-crypto3/libs/parallelization-utils/test/CMakeLists.txt +++ b/parallel-crypto3/libs/parallelization-utils/test/CMakeLists.txt @@ -38,7 +38,7 @@ macro(define_actor_core_test name) ${Boost_INCLUDE_DIRS}) - set_target_properties(${test_name} PROPERTIES CXX_STANDARD 20) + set_target_properties(${test_name} PROPERTIES CXX_STANDARD 23) get_target_property(target_type Boost::unit_test_framework TYPE) if(target_type STREQUAL "SHARED_LIB") diff --git a/proof-producer.nix b/proof-producer.nix index ec8f0c696d..8b894abb6a 100644 --- a/proof-producer.nix +++ b/proof-producer.nix @@ -1,4 +1,6 @@ { lib, + pkgs, + llvmPackages_19, stdenv, ninja, pkg-config, @@ -16,13 +18,18 @@ staticBuild ? true, runTests ? false, sanitize? false, - crypto3_tests? false, - parallel_crypto3_tets? false, - crypto3_bechmarks? false, - parallel_crypto3_bechmarks? false, + crypto3_tests ? false, + parallel_crypto3_tests ? false, + crypto3_bechmarks ? false, + parallel_crypto3_bechmarks ? false, + enableGPU ? false, }: let inherit (lib) optional; + opensycl = pkgs.callPackage ./opensycl.nix { + inherit (pkgs); + cudaSupport = enableGPU; + }; in stdenv.mkDerivation { name = "Proof-producer"; pname = "proof-producer"; @@ -30,15 +37,22 @@ in stdenv.mkDerivation { src = lib.sourceByRegex ./. ["^proof-producer(/.*)?$" "^crypto3(/.*)?$" "^parallel-crypto3(/.*)?$" "CMakeLists.txt"]; hardeningDisable = [ "fortify" ]; - nativeBuildInputs = [ cmake ninja pkg-config ] ++ + nativeBuildInputs = [ cmake ninja pkg-config opensycl llvmPackages_19.openmp ] ++ (lib.optional (!stdenv.isDarwin) gdb) ++ (lib.optional (stdenv.isDarwin) lldb); # enableDebugging will keep debug symbols in boost propagatedBuildInputs = [ (if enableDebug then (enableDebugging boost) else boost) ]; - buildInputs = [cmake_modules gtest protobuf] ++ - ( lib.optional (staticBuild) glibc.static ); + buildInputs = + [cmake_modules gtest protobuf] + ++ ( lib.optional (staticBuild) glibc.static ) + ++ (if enableGPU then [ + pkgs.cudaPackages.cudatoolkit + pkgs.cudaPackages.cuda_cudart + pkgs.cudaPackages.cuda_nvcc + pkgs.linuxPackages.nvidia_x11 + ] else []); cmakeFlags = [ @@ -48,11 +62,12 @@ in stdenv.mkDerivation { (if sanitize then "-DSANITIZE=ON" else "-DSANITIZE=OFF") "-DPROOF_PRODUCER_ENABLE=TRUE" (if crypto3_tests then "-DBUILD_CRYPTO3_TESTS=TRUE" else "-DBUILD_CRYPTO3_TESTS=False") - (if parallel_crypto3_tets then "-DBUILD_PARALLEL_CRYPTO3_TESTS=TRUE" else "") + (if parallel_crypto3_tests then "-DBUILD_PARALLEL_CRYPTO3_TESTS=TRUE" else "") (if parallel_crypto3_bechmarks then "-DENABLE_BENCHMARKS=ON" else "-DENABLE_BENCHMARKS=OFF") (if crypto3_bechmarks then "-DBUILD_CRYPTO3_BENCH_TESTS=ON" else "-DBUILD_CRYPTO3_BENCH_TESTS=OFF") (if staticBuild then "-DPROOF_PRODUCER_STATIC_BINARIES=ON" else "-DPROOF_PRODUCER_STATIC_BINARIES=OFF") "-G Ninja" + (if enableGPU then "-DGPU_PROVER=ON" else "-DGPU_PROVER=OFF") ]; cmakeBuildType = if enableDebug then "Debug" else "Release"; @@ -69,7 +84,14 @@ in stdenv.mkDerivation { find .. -type f -name '*_benchmark.xml' -exec cp {} ${placeholder "out"}/test-logs \; ''; - shellHook = '' + shellHook = + (if enableGPU then '' + CXX=syclcc-clang; export CXX + ACPP_ADAPTIVITY_LEVEL=2; export ACPP_ADAPTIVITY_LEVEL + '' else "") + + '' + rm -rf build + eval $configurePhase PS1="\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " echo "Welcome to Proof-producer development environment!" ''; diff --git a/proof-producer/libs/assigner/CMakeLists.txt b/proof-producer/libs/assigner/CMakeLists.txt index 5d85ab5d19..2888d38830 100644 --- a/proof-producer/libs/assigner/CMakeLists.txt +++ b/proof-producer/libs/assigner/CMakeLists.txt @@ -36,7 +36,7 @@ add_library(proof_generatorAssigner ${PROTO_SRC} ${PROTO_HASH_HEADER} ) -set_target_properties(proof_generatorAssigner PROPERTIES CXX_STANDARD 20) +set_target_properties(proof_generatorAssigner PROPERTIES CXX_STANDARD 23) target_include_directories(proof_generatorAssigner PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}