From 43d729a40365f6ea643f779ffd2d89def205d60a Mon Sep 17 00:00:00 2001 From: root Date: Mon, 25 Nov 2024 12:16:10 +0100 Subject: [PATCH] SYCL update. --- .../math/domains/basic_radix2_domain.hpp | 12 +- find_symbol.sh | 21 ++ flake.nix | 1 + lzpatcher.sh | 59 ++++ nclang++ | 276 ++++++++++++++++++ opensycl.nix | 71 +++++ parallel-crypto3.nix | 38 ++- parallel-crypto3/CMakeLists.txt | 83 +++++- .../detail/basic_radix2_domain_aux.hpp | 74 +++-- .../parallel-math/test/polynomial_dfs.cpp | 6 +- .../actor/core/sycl_parallelization_utils.hpp | 40 ++- 11 files changed, 611 insertions(+), 70 deletions(-) create mode 100755 find_symbol.sh create mode 100755 lzpatcher.sh create mode 100644 nclang++ create mode 100644 opensycl.nix diff --git a/crypto3/libs/math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp b/crypto3/libs/math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp index 53fe992865..373b1f7c9e 100755 --- a/crypto3/libs/math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp +++ b/crypto3/libs/math/include/nil/crypto3/math/domains/basic_radix2_domain.hpp @@ -86,10 +86,10 @@ namespace nil { } } - if (!fft_cache) { + /*if (!fft_cache) { create_fft_cache(); - } - detail::basic_radix2_fft_cached(a, fft_cache->first); + }*/ + detail::basic_radix2_fft_cached(a, omega); } void inverse_fft(std::vector &a) override { @@ -101,10 +101,10 @@ namespace nil { } } - if (!fft_cache) { + /*if (!fft_cache) { create_fft_cache(); - } - detail::basic_radix2_fft_cached(a, fft_cache->second); + }*/ + detail::basic_radix2_fft_cached(a, omega.inversed()); const field_value_type sconst = field_value_type(a.size()).inversed(); for (std::size_t i = 0; i < a.size(); ++i) { diff --git a/find_symbol.sh b/find_symbol.sh new file mode 100755 index 0000000000..563d0d10a8 --- /dev/null +++ b/find_symbol.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Symbol to search for +SYMBOL="$2" + +# Check if a directory was provided +if [ -z "$1" ]; then + echo "Usage: $0 /path/to/directory symbol" + exit 1 +fi + +# Directory to search +DIR="$1" + +# Find all .so and .a files and process them +find "$DIR" -type f \( -name '*.so*' -o -name '*.a' \) -print0 | while IFS= read -r -d '' file; do + # Run nm and search for the symbol + if nm -D -U "$file" 2>/dev/null | grep -Fq "$SYMBOL"; then + echo "Symbol found in: $file" + fi +done diff --git a/flake.nix b/flake.nix index 0cc747f9a1..1461621551 100644 --- a/flake.nix +++ b/flake.nix @@ -50,6 +50,7 @@ enableDebug = false; }); parallel-crypto3-tests = (pkgs.callPackage ./parallel-crypto3.nix { + stdenv = pkgs.llvmPackages_19.stdenv; runTests = true; enableDebug = false; }); diff --git a/lzpatcher.sh b/lzpatcher.sh new file mode 100755 index 0000000000..82fa8112fa --- /dev/null +++ b/lzpatcher.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# check-rpath.sh + +#!/bin/bash + +set -e + +# Usage function +usage() { + echo "Usage: $0 /path/to/directory" + exit 1 +} + +# Check if directory is provided +if [ -z "$1" ]; then + usage +fi + +DIRECTORY="$1" + +# Verify the directory exists +if [ ! -d "$DIRECTORY" ]; then + echo "Error: Directory '$DIRECTORY' does not exist." + exit 1 +fi + +LIBZ_DIR="/nix/store/2k9k3q1vk8z6w7743k6nb22vnb05xv06-zlib-1.3.1/lib/" +echo "Library directory: $LIBZ_DIR" + +# Iterate over all files in the directory +find "$DIRECTORY" -maxdepth 1 -type f | while read -r FILE; do + # Check if file is an ELF executable + if file "$FILE" | grep -q 'ELF'; then + echo "Processing ELF executable: $FILE" + + # Backup the original file + cp "$FILE" "$FILE.bak" + + # Get existing RPATH + EXISTING_RPATH=$(patchelf --print-rpath "$FILE" || true) + + # Determine the new RPATH + if [ -z "$EXISTING_RPATH" ]; then + NEW_RPATH="$LIBZ_DIR" + else + NEW_RPATH="$EXISTING_RPATH:$LIBZ_DIR" + fi + + # Modify the RPATH + patchelf --set-rpath "$NEW_RPATH" "$FILE" + + echo "Updated RPATH for $FILE to $NEW_RPATH" + else + echo "Skipping non-ELF file: $FILE" + fi +done + +echo "RPATH update complete." diff --git a/nclang++ b/nclang++ new file mode 100644 index 0000000000..452408bfb1 --- /dev/null +++ b/nclang++ @@ -0,0 +1,276 @@ +#! /nix/store/717iy55ncqs0wmhdkwc5fg2vci5wbmq8-bash-5.2p32/bin/bash +set -eu -o pipefail +o posix +shopt -s nullglob + +if (( "${NIX_DEBUG:-0}" >= 7 )); then + set -x +fi + +path_backup="$PATH" + +# That @-vars are substituted separately from bash evaluation makes +# shellcheck think this, and others like it, are useless conditionals. +# shellcheck disable=SC2157 +if [[ -n "/nix/store/ph44jcx3ddmlwh394mh1wb7f1qigxqb1-coreutils-9.5" && -n "/nix/store/lvnwdmnjm7nvaq0a3vhvvn46iy4ql7gr-gnugrep-3.11" ]]; then + PATH="/nix/store/ph44jcx3ddmlwh394mh1wb7f1qigxqb1-coreutils-9.5/bin:/nix/store/lvnwdmnjm7nvaq0a3vhvvn46iy4ql7gr-gnugrep-3.11/bin" +fi + +source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/utils.bash + + +# Parse command line options and set several variables. +# For instance, figure out if linker flags should be passed. +# GCC prints annoying warnings when they are not needed. +dontLink=0 +nonFlagArgs=0 +cc1=0 +# shellcheck disable=SC2193 +[[ "/nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++" = *++ ]] && isCxx=1 || isCxx=0 +cxxInclude=1 +cxxLibrary=1 +cInclude=1 + +expandResponseParams "$@" + +declare -ag positionalArgs=() +declare -i n=0 +nParams=${#params[@]} +while (( "$n" < "$nParams" )); do + p=${params[n]} + p2=${params[n+1]:-} # handle `p` being last one + n+=1 + + case "$p" in + -[cSEM] | -MM) dontLink=1 ;; + -cc1) cc1=1 ;; + -nostdinc) cInclude=0 cxxInclude=0 ;; + -nostdinc++) cxxInclude=0 ;; + -nostdlib) cxxLibrary=0 ;; + -x*-header) dontLink=1 ;; # both `-x c-header` and `-xc-header` are accepted by clang + -xc++*) isCxx=1 ;; # both `-xc++` and `-x c++` are accepted by clang + -x) + case "$p2" in + *-header) dontLink=1 ;; + c++*) isCxx=1 ;; + esac + ;; + --) # Everything else is positional args! + # See: https://github.com/llvm/llvm-project/commit/ed1d07282cc9d8e4c25d585e03e5c8a1b6f63a74 + + # Any positional arg (i.e. any argument after `--`) will be + # interpreted as a "non flag" arg: + if [[ -v "params[$n]" ]]; then nonFlagArgs=1; fi + + positionalArgs=("${params[@]:$n}") + params=("${params[@]:0:$((n - 1))}") + break; + ;; + -?*) ;; + *) nonFlagArgs=1 ;; # Includes a solitary dash (`-`) which signifies standard input; it is not a flag + esac +done + +# If we pass a flag like -Wl, then gcc will call the linker unless it +# can figure out that it has to do something else (e.g., because of a +# "-c" flag). So if no non-flag arguments are given, don't pass any +# linker flags. This catches cases like "gcc" (should just print +# "gcc: no input files") and "gcc -v" (should print the version). +if [ "$nonFlagArgs" = 0 ]; then + dontLink=1 +fi + +# Arocc does not link +if [ "" = 1 ]; then + dontLink=1 +fi + +# Optionally filter out paths not refering to the store. +if [[ "${NIX_ENFORCE_PURITY:-}" = 1 && -n "$NIX_STORE" ]]; then + kept=() + nParams=${#params[@]} + declare -i n=0 + while (( "$n" < "$nParams" )); do + p=${params[n]} + p2=${params[n+1]:-} # handle `p` being last one + n+=1 + + skipNext=false + path="" + case "$p" in + -[IL]/*) path=${p:2} ;; + -[IL] | -isystem) path=$p2 skipNext=true ;; + esac + + if [[ -n $path ]] && badPath "$path"; then + skip "$path" + $skipNext && n+=1 + continue + fi + + kept+=("$p") + done + # Old bash empty array hack + params=(${kept+"${kept[@]}"}) +fi + +# Flirting with a layer violation here. +if [ -z "${NIX_BINTOOLS_WRAPPER_FLAGS_SET_x86_64_unknown_linux_gnu:-}" ]; then + source /nix/store/lfabp2rmzyn7ddbhgls0gsjjqckzw3np-binutils-wrapper-2.43.1/nix-support/add-flags.sh +fi + +# Put this one second so libc ldflags take priority. +if [ -z "${NIX_CC_WRAPPER_FLAGS_SET_x86_64_unknown_linux_gnu:-}" ]; then + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-flags.sh +fi + +# Clear march/mtune=native -- they bring impurity. +if [ "$NIX_ENFORCE_NO_NATIVE_x86_64_unknown_linux_gnu" = 1 ]; then + kept=() + # Old bash empty array hack + for p in ${params+"${params[@]}"}; do + if [[ "$p" = -m*=native ]]; then + skip "$p" + else + kept+=("$p") + fi + done + # Old bash empty array hack + params=(${kept+"${kept[@]}"}) +fi + +if [[ "$isCxx" = 1 ]]; then + if [[ "$cxxInclude" = 1 ]]; then + # + # The motivation for this comment is to explain the reason for appending + # the C++ stdlib to NIX_CFLAGS_COMPILE, which I initially thought should + # change and later realized it shouldn't in: + # + # https://github.com/NixOS/nixpkgs/pull/185569#issuecomment-1234959249 + # + # NIX_CFLAGS_COMPILE contains dependencies added using "-isystem", and + # NIX_CXXSTDLIB_COMPILE adds the C++ stdlib using "-isystem". Appending + # NIX_CXXSTDLIB_COMPILE to NIX_CLAGS_COMPILE emulates this part of the + # include lookup order from GCC/Clang: + # + # > 4. Directories specified with -isystem options are scanned in + # > left-to-right order. + # > 5. Standard system directories are scanned. + # > 6. Directories specified with -idirafter options are scanned + # > in left-to-right order. + # + # NIX_CXX_STDLIB_COMPILE acts as the "standard system directories" that + # are otherwise missing from CC in nixpkgs, so should be added last. + # + # This means that the C standard library should never be present inside + # NIX_CFLAGS_COMPILE, because it MUST come after the C++ stdlib. It is + # added automatically by cc-wrapper later using "-idirafter". + # + NIX_CFLAGS_COMPILE_x86_64_unknown_linux_gnu+=" $NIX_CXXSTDLIB_COMPILE_x86_64_unknown_linux_gnu" + fi + if [[ "$cxxLibrary" = 1 ]]; then + NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu+=" $NIX_CXXSTDLIB_LINK_x86_64_unknown_linux_gnu" + fi +fi + +source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-hardening.sh + +# Add the flags for the C compiler proper. +extraAfter=(${hardeningCFlagsAfter[@]+"${hardeningCFlagsAfter[@]}"} $NIX_CFLAGS_COMPILE_x86_64_unknown_linux_gnu) +extraBefore=(${hardeningCFlagsBefore[@]+"${hardeningCFlagsBefore[@]}"} $NIX_CFLAGS_COMPILE_BEFORE_x86_64_unknown_linux_gnu) + +# Remove '-fzero-call-used-regs=used-gpr' from extraBefore +filteredExtraBefore=() +for arg in "${extraBefore[@]}"; do + if [[ "$arg" != "-fzero-call-used-regs=used-gpr" ]]; then + filteredExtraBefore+=("$arg") + fi +done +extraBefore=("${filteredExtraBefore[@]}") + +if [ "$dontLink" != 1 ]; then + linkType=$(checkLinkType $NIX_LDFLAGS_BEFORE_x86_64_unknown_linux_gnu "${params[@]}" ${NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu:-} $NIX_LDFLAGS_x86_64_unknown_linux_gnu) + + # Add the flags that should only be passed to the compiler when + # linking. + extraAfter+=($(filterRpathFlags "$linkType" $NIX_CFLAGS_LINK_x86_64_unknown_linux_gnu)) + + # Add the flags that should be passed to the linker (and prevent + # `ld-wrapper' from adding NIX_LDFLAGS_x86_64_unknown_linux_gnu again). + for i in $(filterRpathFlags "$linkType" $NIX_LDFLAGS_BEFORE_x86_64_unknown_linux_gnu); do + extraBefore+=("-Wl,$i") + done + if [[ "$linkType" == dynamic && -n "$NIX_DYNAMIC_LINKER_x86_64_unknown_linux_gnu" ]]; then + extraBefore+=("-Wl,-dynamic-linker=$NIX_DYNAMIC_LINKER_x86_64_unknown_linux_gnu") + fi + for i in $(filterRpathFlags "$linkType" $NIX_LDFLAGS_x86_64_unknown_linux_gnu); do + if [ "${i:0:3}" = -L/ ]; then + extraAfter+=("$i") + else + extraAfter+=("-Wl,$i") + fi + done + export NIX_LINK_TYPE_x86_64_unknown_linux_gnu=$linkType +fi + +if [[ -e /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-local-cc-cflags-before.sh ]]; then + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/add-local-cc-cflags-before.sh +fi + +# As a very special hack, if the arguments are just `-v', then don't +# add anything. This is to prevent `gcc -v' (which normally prints +# out the version number and returns exit code 0) from printing out +# `No input files specified' and returning exit code 1. +if [ "$*" = -v ]; then + extraAfter=() + extraBefore=() +fi + +# clang's -cc1 mode is not compatible with most options +# that we would pass. Rather than trying to pass only +# options that would work, let's just remove all of them. +if [ "$cc1" = 1 ]; then + extraAfter=() + extraBefore=() +fi + +# Finally, if we got any positional args, append them to `extraAfter` +# now: +if [[ "${#positionalArgs[@]}" -gt 0 ]]; then + extraAfter+=(-- "${positionalArgs[@]}") +fi + +#NIX_DEBUG=1 +# Optionally print debug info. +if (( "${NIX_DEBUG:-0}" >= 1 )); then + # Old bash workaround, see ld-wrapper for explanation. + echo "extra flags before to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${extraBefore+"${extraBefore[@]}"} >&2 + echo "original flags to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${params+"${params[@]}"} >&2 + echo "extra flags after to /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++:" >&2 + printf " %q\n" ${extraAfter+"${extraAfter[@]}"} >&2 +fi + +PATH="$path_backup" +# Old bash workaround, see above. + +# if a cc-wrapper-hook exists, run it. +if [[ -e /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/cc-wrapper-hook ]]; then + compiler=/nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ + source /nix/store/5j2f6adr7awqag8c7cv6q4px0lz477gc-clang-wrapper-19.1.1/nix-support/cc-wrapper-hook +fi + +if (( "${NIX_CC_USE_RESPONSE_FILE:-1}" >= 1 )); then + responseFile=$(mktemp "${TMPDIR:-/tmp}/cc-params.XXXXXX") + trap 'rm -f -- "$responseFile"' EXIT + printf "%q\n" \ + ${extraBefore+"${extraBefore[@]}"} \ + ${params+"${params[@]}"} \ + ${extraAfter+"${extraAfter[@]}"} > "$responseFile" + /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ "@$responseFile" +else + exec /nix/store/m4yb6xs0g07l0bc3c4i0klgv5lgz7g6s-clang-19.1.1/bin/clang++ \ + ${extraBefore+"${extraBefore[@]}"} \ + ${params+"${params[@]}"} \ + ${extraAfter+"${extraAfter[@]}"} +fi diff --git a/opensycl.nix b/opensycl.nix new file mode 100644 index 0000000000..bb3335d0b4 --- /dev/null +++ b/opensycl.nix @@ -0,0 +1,71 @@ +{ lib +, fetchFromGitHub +, llvmPackages_15 +, lld_15 +, python3 +, cmake +, boost +, libxml2 +, libffi +, makeWrapper +, config +, rocmPackages_5 +, rocmSupport ? config.rocmSupport +}: +let + inherit (llvmPackages_15) stdenv; + # move to newer ROCm version once supported + rocmPackages = rocmPackages_5; +in +stdenv.mkDerivation rec { + pname = "OpenSYCL"; + version = "0.9.4"; + + src = fetchFromGitHub { + owner = "OpenSYCL"; + repo = "OpenSYCL"; + rev = "v${version}"; + sha256 = "sha256-5YkuUOAnvoAD5xDKxKMPq0B7+1pb6hVisPAhs0Za1ls="; + }; + + nativeBuildInputs = [ + cmake + makeWrapper + ]; + + buildInputs = [ + libxml2 + libffi + boost + llvmPackages_15.openmp + llvmPackages_15.libclang.dev + llvmPackages_15.llvm + ] ++ lib.optionals rocmSupport [ + rocmPackages.clr + rocmPackages.rocm-runtime + ]; + + # opensycl makes use of clangs internal headers. Its cmake does not successfully discover them automatically on nixos, so we supply the path manually + cmakeFlags = [ + "-DCLANG_INCLUDE_PATH=${llvmPackages_15.libclang.dev}/include" + "-DNVCXX_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/compilers/bin/nvc++" + "-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda" + "-DWITH_CUDA_BACKEND=ON" + ]; + + postFixup = '' + wrapProgram $out/bin/syclcc-clang \ + --prefix PATH : ${lib.makeBinPath [ python3 lld_15 ]} \ + --add-flags "-L${llvmPackages_15.openmp}/lib" \ + --add-flags "-I${llvmPackages_15.openmp.dev}/include" \ + '' + lib.optionalString rocmSupport '' + --add-flags "--rocm-device-lib-path=${rocmPackages.rocm-device-libs}/amdgcn/bitcode" + ''; + + meta = with lib; { + homepage = "https://github.com/OpenSYCL/OpenSYCL"; + description = "Multi-backend implementation of SYCL for CPUs and GPUs"; + maintainers = with maintainers; [ yboettcher ]; + license = licenses.bsd2; + }; +} \ No newline at end of file diff --git a/parallel-crypto3.nix b/parallel-crypto3.nix index 1047ff921b..ec2811ab55 100644 --- a/parallel-crypto3.nix +++ b/parallel-crypto3.nix @@ -1,4 +1,5 @@ { lib, + pkgs, stdenv, ninja, pkg-config, @@ -7,8 +8,22 @@ gdb, lldb, cmake_modules, - crypto3, - opensycl, + libgcc, + glibc, + libffi, + libz, + libxml2, + icu70, + ncurses, + gcc, + xz, + libedit, + llvm, + libcxx, + libstdcxx5, + llvmPackages_19, + #opensycl, + #cudatoolkit, enableDebugging, enableDebug ? false, runTests ? false, @@ -20,8 +35,8 @@ in stdenv.mkDerivation { src = lib.sourceByRegex ./. ["^crypto3(/.*)?$" "^parallel-crypto3(/.*)?$" "CMakeLists.txt"]; hardeningDisable = [ "fortify" ]; - - nativeBuildInputs = [ cmake ninja pkg-config opensycl ] ++ + # libgcc gcc glibc libffi libxml2 icu70 ncurses xz libedit llvm libcxx libstdcxx5 libz + nativeBuildInputs = [ cmake ninja pkg-config llvmPackages_19.openmp ] ++ (lib.optional (!stdenv.isDarwin) gdb) ++ (lib.optional (stdenv.isDarwin) lldb); @@ -35,15 +50,28 @@ in stdenv.mkDerivation { (if runTests then "-DBUILD_PARALLEL_CRYPTO3_TESTS=TRUE" else "") (if enableDebug then "-DCMAKE_BUILD_TYPE=Debug" else "-DCMAKE_BUILD_TYPE=Release") "-DPARALLEL_CRYPTO3_ENABLE=TRUE" + #"-DCMAKE_CUDA_HOST_COMPILER=/nix/store/ykv9x1iirnkxfdnyzwhfzhz23csqvqn9-clang-wrapper-19.1.1/bin/clang++" + #"-DCMAKE_CXX_COMPILER_WORKS=1" ]; doCheck = runTests; # tests are inside parallel-crypto3-tests derivation - + dontFixCmake = true; checkPhase = '' cd parallel-crypto3 && ctest --verbose --output-on-failure -R && cd .. ''; shellHook = '' + NVARCH=`uname -s`_`uname -m`; export NVARCH + NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS + MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/24.9/compilers/man; export MANPATH + PATH=/root/acpp/bin/:$NVCOMPILERS/$NVARCH/24.9/compilers/bin:$PATH; export PATH + CXX=/root/acpp/bin/acpp; export CXX + # source /opt/intel/oneapi/setvars.sh + # PATH=/root/sycl_workspace/llvm/build/install/bin/:$PATH; export PATH + # DCPP_HOME=/root/sycl_workspace/; export DCPP_HOME + # LD_LIBRARY_PATH=/root/sycl_workspace/llvm/build/lib/; export LD_LIBRARY_PATH + rm -rf build + eval $configurePhase PS1="\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ " echo "Welcome to Parallel Crypto3 development environment!" ''; diff --git a/parallel-crypto3/CMakeLists.txt b/parallel-crypto3/CMakeLists.txt index dfcec9081d..ec5ec07f75 100644 --- a/parallel-crypto3/CMakeLists.txt +++ b/parallel-crypto3/CMakeLists.txt @@ -1,12 +1,88 @@ cmake_minimum_required(VERSION 3.22 FATAL_ERROR) +#set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") +#set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") project(parallel-crypto3) -option(BUILD_PARALLEL_CRYPTO3_TESTS "Enable tests" FALSE) +option(BUILD_PARALLEL_CRYPTO3_TESTS "Enable tests" TRUE) +set(CMAKE_CUDA_COMPILER nvc++) find_package(CM REQUIRED) +# list(APPEND CMAKE_PREFIX_PATH /root/sycl/lib/cmake/AdaptiveCpp/) +# find_package(AdaptiveCpp CONFIG REQUIRED) include(CMConfig) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# add_compile_options(-v) +# #add_compile_options(--hipsycl-platform=cuda --hipsycl-gpu-arch=sm_75 --hipsycl-targets=cuda:sm_75 --hipsycl-cuda-path=/var/empty/local/cuda/ --hipsycl-nvcxx=/var/empty/nvidia/hpc_sdk/Linux_x86_64/24.9/compilers/bin/nvc++) +# #add_compile_options(-I/nix/store/wkpm1n7q62pbs8ck6ad5i7zklr1k9il0-OpenSYCL-0.9.4/include/) +# add_compile_options(-I/root/sycl/include/AdaptiveCpp/) +# add_compile_options(-I/nix/store/x8rg4vhgd20i8vzykm1196f9qdb8klhh-gcc-13.3.0/include/c++/13.3.0/) +# add_compile_options(-I/nix/store/x8rg4vhgd20i8vzykm1196f9qdb8klhh-gcc-13.3.0/include/c++/13.3.0/x86_64-unknown-linux-gnu/) + +# add_compile_options(-I/nix/store/ifqg35znvnvay7j5zank40jc7p016zi8-cuda_cccl-12.4.99-dev/include/cuda/std/detail/libcxx/include/) + +# add_link_options(-L/nix/store/22nxhmsfcv2q2rpkmfvzwg2w5z1l231z-gcc-13.3.0-lib/lib64/) +# #add_link_options(-L/nix/store/sl141d1g77wvhr050ah87lcyz2czdxa3-glibc-2.40-36/lib64/) +# #add_link_options(-L/nix/store/yldgl6y1dy6nmvy9gza0flgh9is3k4s1-libcxx-18.1.8/lib) + +# add_link_options(-Xlinker -dynamic-linker -Xlinker /nix/store/sl141d1g77wvhr050ah87lcyz2czdxa3-glibc-2.40-36/lib64/ld-linux-x86-64.so.2) + +# add_link_options(-L/root/sycl/lib/hipSYCL/) +# add_link_options(-L/root/sycl/lib/) + +# add_link_options(-L/usr/local/cuda/lib64) +# add_link_options(-L/usr/local/cuda/lib64/stubs/) +# add_link_options(-L/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/cuda/12.6/compat/) +# # add_link_options(-L/usr/lib/x86_64-linux-gnu/) + +# add_link_options(-rpath=/root/sycl/lib/hipSYCL/) +# add_link_options(-rpath=/root/sycl/lib/) + +# add_link_options(-rpath=/usr/local/cuda/lib64) +# add_link_options(-rpath=/usr/local/cuda/lib64/stubs/) +# add_link_options(-rpath=/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/cuda/12.6/compat/) + +# add_link_options(-rpath=/usr/lib/x86_64-linux-gnu/) +# add_link_options(-rpath=/nix/store/53iigsmf32bwkfdhhihq2rppgk23k2rg-ncurses-6.4.20221231/lib/) + +# add_link_options(-lacpp-rt -lacpp-common -lcuda -lrt-backend-cuda) +# add_link_options(-ledit -ltinfo -lffi -lz -lxml2 -licuuc -licui18n -licudata -llzma) +# add_link_options(-lstdc++ -lc -v) +add_link_options(-L/usr/local/cuda/lib64) +add_link_options(-Wl,-rpath=/usr/local/cuda/lib64) +add_link_options(-L/usr/local/cuda/lib64/stubs/) +add_link_options(-Wl,-rpath=/usr/local/cuda/lib64/stubs/) +add_link_options(-L/root/acpp/lib/hipSYCL/) +add_link_options(-Wl,-rpath=/root/acpp/lib/hipSYCL/) +add_link_options(-L/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/REDIST/cuda/12.6/compat/) +add_link_options(-Wl,-rpath=/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/REDIST/cuda/12.6/compat/) +add_link_options(-L/nix/store/1m2jrj85fmj0sqjp8bk8hf1j6wnb1lpc-openmp-19.1.1/lib/) +add_link_options(-Wl,-rpath=/nix/store/1m2jrj85fmj0sqjp8bk8hf1j6wnb1lpc-openmp-19.1.1/lib/) +add_link_options(-L/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/REDIST/cuda/12.6/lib64) +add_link_options(-Wl,-rpath=/opt/nvidia/hpc_sdk/Linux_x86_64/24.9/REDIST/cuda/12.6/lib64) +add_link_options(-lrt-backend-cuda -lcuda -lacpp-rt -lacpp-common -lomp -lnvJitLink -lnvidia-ptxjitcompiler) + +#add_compile_options(-I/opt/intel/oneapi/2025.0/include/) +#add_compile_options(-I/opt/intel/oneapi/2025.0/include/sycl/) +#add_compile_options(-Wl,-rpath -Wl,/opt/intel/oneapi/2025.0/lib/) +#add_compile_options(-Wl,-rpath=/root/sycl_workspace/llvm/build/install/lib/) +#add_compile_options(-I/nix/store/x8rg4vhgd20i8vzykm1196f9qdb8klhh-gcc-13.3.0/include/c++/13.3.0/tr1/) +#add_compile_options(-I/nix/store/n15bxkd7id2gvlwpihm4kp675aqkrwmj-glibc-2.40-36-dev/include/) +#add_compile_options(-I/nix/store/x8rg4vhgd20i8vzykm1196f9qdb8klhh-gcc-13.3.0/include/c++/13.3.0/x86_64-unknown-linux-gnu/) +# add_link_options(-L/opt/intel/oneapi/2025.0/lib/) +# add_link_options(-L/nix/store/sl141d1g77wvhr050ah87lcyz2czdxa3-glibc-2.40-36/lib64/) +# add_link_options(-L/nix/store/22nxhmsfcv2q2rpkmfvzwg2w5z1l231z-gcc-13.3.0-lib/lib64/) +# #add_link_options(-rpath /opt/intel/oneapi/2025.0/lib/ -rpath /root/sycl_workspace/llvm/build/install/lib/) +# add_link_options(-Xlinker -dynamic-linker -Xlinker /nix/store/sl141d1g77wvhr050ah87lcyz2czdxa3-glibc-2.40-36/lib64/ld-linux-x86-64.so.2) +# #add_link_options(-lsycl -lur_loader -limf -lsvml -lirng -lintlc -lz -v) # -lstdc++ -lc -lz + +# add_compile_options(-fsycl -fsycl-targets=nvptx64-nvidia-cuda) +# add_compile_options(-I/root/sycl_workspace/llvm/build/install/include/) +# add_compile_options(--cuda-path=/usr/local/cuda/) +# add_link_options(-L/root/sycl_workspace/llvm/build/install/lib/) +# add_link_options(-rpath /root/sycl_workspace/llvm/build/install/lib/:/opt/intel/oneapi/2025.0/lib/:/nix/store/2k9k3q1vk8z6w7743k6nb22vnb05xv06-zlib-1.3.1/lib/) +# add_link_options(-lsycl -lur_loader -limf -lsvml -lirng -lintlc -lz -v) + # The file compile_commands.json is generated in build directory, so LSP could # pick it up and guess all include paths, defines and other stuff. @@ -40,7 +116,8 @@ target_link_libraries(${PROJECT_NAME}_all INTERFACE actor::containers actor::math actor::zk - actor::core) + actor::core + ) # Configure package file to be able to import headers include(CMakePackageConfigHelpers) diff --git a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/detail/basic_radix2_domain_aux.hpp b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/detail/basic_radix2_domain_aux.hpp index 9244a37571..d1e989708c 100644 --- a/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/detail/basic_radix2_domain_aux.hpp +++ b/parallel-crypto3/libs/parallel-math/include/nil/crypto3/math/domains/detail/basic_radix2_domain_aux.hpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include @@ -37,7 +40,6 @@ #include #include -#include namespace nil { namespace crypto3 { @@ -81,50 +83,40 @@ namespace nil { if (n != (1u << logn)) throw std::invalid_argument("expected n == (1u << logn)"); + auto a_buffer = sycl::buffer( + a.data(), n, sycl::property::buffer::use_host_ptr{}); + sycl::queue q; // swapping in place (from Storer's book) // We can parallelize this look, since k and rk are pairs, they will never intersect. - sycl_parallel_for(0, n, - [logn, &a](std::size_t k) { - const std::size_t rk = crypto3::math::detail::bitreverse(k, logn); - if (k < rk) - std::swap(a[k], a[rk]); - } - ); - - + q.submit([&](sycl::handler &cgh) { + auto a_acc = a_buffer. template get_access(cgh); + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> idx) { + const std::size_t ridx = crypto3::math::detail::bitreverse(idx, logn); + if (idx < ridx) + std::swap(a_acc[idx], a_acc[ridx]); + }); + }); + q.wait(); + auto omega_cache_buffer = + sycl::buffer( + omega_cache.data(), n, sycl::property::buffer::use_host_ptr{}); // invariant: m = 2^{s-1} - value_type t; for (std::size_t s = 1, m = 1, inc = n / 2; s <= logn; ++s, m <<= 1, inc >>= 1) { - // w_m is 2^s-th root of unity now - size_t count_k = n / (2 * m) + (n % (2 * m) ? 1 : 0); - - // Here we can parallelize on the both loops with 'k' and 'm', because for each value of k and m - // the ranges of array 'a' used do not intersect. Think of these 2 loops as 1. - sycl_run_in_chunks( - m * count_k, - [&a, m, count_k, inc, &omega_cache](std::size_t begin, std::size_t end) { - size_t current_index = begin; - size_t start_k = begin / m; - value_type t; - for (std::size_t k_index = start_k; k_index < count_k; ++k_index) { - std::size_t k = k_index * 2 * m; - - std::size_t j = (start_k == k_index) ? (begin % m): 0; - std::size_t idx = j * inc; - - for (; j < m; ++j, idx += inc) { - t = a[k + j + m]; - t *= omega_cache[idx]; - a[k + j + m] = a[k + j]; - a[k + j + m] -= t; - a[k + j] += t; - - ++current_index; - if (current_index == end) - return; - } - } - })); + const size_t count_k = n / (2 * m) + (n % (2 * m) ? 1 : 0); + q.submit([&](sycl::handler &cgh) { + auto a_acc = a_buffer. template get_access(cgh); + auto omega_acc = omega_cache_buffer. template get_access(cgh); + cgh.parallel_for(sycl::range<1>(count_k * m), [=](sycl::id<1> index) { + const std::size_t k = (index / m) * m * 2; + const std::size_t j = index % m; + const std::size_t idx = j * inc; + const value_type t = a_acc[k + j + m] * omega_acc[idx]; + a_acc[k + j + m] = a_acc[k + j]; + a_acc[k + j + m] -= t; + a_acc[k + j] += t; + }); + }); + q.wait(); } } diff --git a/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp b/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp index efc6fdbdaf..50db088828 100644 --- a/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp +++ b/parallel-crypto3/libs/parallel-math/test/polynomial_dfs.cpp @@ -26,6 +26,8 @@ #define BOOST_TEST_MODULE polynomial_dfs_test +struct float128_type {}; + #include #include @@ -49,7 +51,7 @@ typedef fields::bls12_fr<381> FieldType; BOOST_AUTO_TEST_SUITE(polynomial_dfs_from_coefficients_test_suite) -BOOST_AUTO_TEST_CASE(polynomial_dfs_equal_test){ +/*BOOST_AUTO_TEST_CASE(polynomial_dfs_equal_test){ polynomial_dfs a = { 7, {0x35_cppui_modular253, 0x26D37C08AED60085FDE335498E7DFEE2AFB1463D06E338219CD0E5DDAF27D68F_cppui_modular253, @@ -1395,7 +1397,7 @@ BOOST_AUTO_TEST_CASE(polynomial_dfs_multiplication_perf_test, *boost::unit_test: auto duration = std::chrono::duration_cast(end - start); std::cout << "Multiplication time: " << duration.count() << " microseconds." << std::endl; -} +}*/ BOOST_AUTO_TEST_CASE(polynomial_dfs_resize_perf_test) { std::vector values; diff --git a/parallel-crypto3/libs/parallelization-utils/include/nil/actor/core/sycl_parallelization_utils.hpp b/parallel-crypto3/libs/parallelization-utils/include/nil/actor/core/sycl_parallelization_utils.hpp index f1cdebf61f..4c9da799a6 100644 --- a/parallel-crypto3/libs/parallelization-utils/include/nil/actor/core/sycl_parallelization_utils.hpp +++ b/parallel-crypto3/libs/parallelization-utils/include/nil/actor/core/sycl_parallelization_utils.hpp @@ -24,28 +24,42 @@ #pragma once -#include -#include - namespace nil { namespace crypto3 { - template + /*template void sycl_run_in_chunks( std::size_t elements_count, Function func ) { - hipsycl::queue q; - std::size_t max_compute_units = q.get_device().get_info(); + hipsycl::sycl::queue q; + // show what device we are on + std::cout << "Running on device: " << q.get_device().get_info() << std::endl; + std::size_t max_compute_units = q.get_device().get_info(); std::size_t workers_to_use = std::max(static_cast(1), std::min(elements_count, max_compute_units)); + //std::cout << "Using " << workers_to_use << " workers" << std::endl; + { + q.submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl::range<1>(workers_to_use), [=](sycl::id<1> idx) { + const std::size_t i = idx[0]; + const std::size_t chunk_size = elements_count / workers_to_use; + const std::size_t remainder = elements_count % workers_to_use; + const std::size_t begin = i * chunk_size + sycl::min(i, remainder); + const std::size_t end = begin + chunk_size + (i < remainder ? 1 : 0); + func(begin, end); + }); + }); + // The buffer destructor ensures synchronization + } { - q.submit([&](hipsycl::handler& cgh) { + q.submit([&](sycl::handler& cgh) { cgh.parallel_for( - hipsycl::range<1>(workers_to_use), [=](hipsycl::id<1> idx) { + sycl::range<1>(workers_to_use), [=](sycl::id<1> idx) { const std::size_t i = idx[0]; const std::size_t chunk_size = elements_count / workers_to_use; const std::size_t remainder = elements_count % workers_to_use; - const std::size_t begin = i * chunk_size + hipsycl::min(i, remainder); + const std::size_t begin = i * chunk_size + sycl::min(i, remainder); const std::size_t end = begin + chunk_size + (i < remainder ? 1 : 0); func(begin, end); }); @@ -60,16 +74,16 @@ namespace nil { std::size_t end, Function func ) { - hipsycl::queue q; + sycl::queue q; { - q.submit([&](hipsycl::handler& cgh) { + q.submit([&](sycl::handler& cgh) { cgh.parallel_for( - hipsycl::range<1>(end - start), [=](hipsycl::id<1> idx) { + sycl::range<1>(end - start), [=](sycl::id<1> idx) { func(start + idx[0]); }); }); // The buffer destructor ensures synchronization } - } + }*/ } // namespace crypto3 } // namespace nil \ No newline at end of file