diff --git a/velox/experimental/breeze/algorithms/reduce.h b/velox/experimental/breeze/breeze/algorithms/reduce.h similarity index 93% rename from velox/experimental/breeze/algorithms/reduce.h rename to velox/experimental/breeze/breeze/algorithms/reduce.h index 0ecd421e5c0fd..1014e963cda51 100644 --- a/velox/experimental/breeze/algorithms/reduce.h +++ b/velox/experimental/breeze/breeze/algorithms/reduce.h @@ -22,11 +22,11 @@ #pragma once -#include "functions/load.h" -#include "functions/reduce.h" -#include "platforms/platform.h" -#include "utils/block_details.h" -#include "utils/types.h" +#include "breeze/functions/load.h" +#include "breeze/functions/reduce.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/block_details.h" +#include "breeze/utils/types.h" namespace breeze { namespace algorithms { diff --git a/velox/experimental/breeze/algorithms/scan.h b/velox/experimental/breeze/breeze/algorithms/scan.h similarity index 97% rename from velox/experimental/breeze/algorithms/scan.h rename to velox/experimental/breeze/breeze/algorithms/scan.h index 6747a4d2dbfc4..6c60eef0afdc8 100644 --- a/velox/experimental/breeze/algorithms/scan.h +++ b/velox/experimental/breeze/breeze/algorithms/scan.h @@ -22,12 +22,12 @@ #pragma once -#include "functions/load.h" -#include "functions/scan.h" -#include "functions/store.h" -#include "platforms/platform.h" -#include "utils/block_details.h" -#include "utils/types.h" +#include "breeze/functions/load.h" +#include "breeze/functions/scan.h" +#include "breeze/functions/store.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/block_details.h" +#include "breeze/utils/types.h" namespace breeze { namespace algorithms { diff --git a/velox/experimental/breeze/algorithms/sort.h b/velox/experimental/breeze/breeze/algorithms/sort.h similarity index 98% rename from velox/experimental/breeze/algorithms/sort.h rename to velox/experimental/breeze/breeze/algorithms/sort.h index 9f03e93975e1e..c09218445c4f6 100644 --- a/velox/experimental/breeze/algorithms/sort.h +++ b/velox/experimental/breeze/breeze/algorithms/sort.h @@ -22,10 +22,10 @@ #pragma once -#include "functions/sort.h" -#include "platforms/platform.h" -#include "utils/block_details.h" -#include "utils/types.h" +#include "breeze/functions/sort.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/block_details.h" +#include "breeze/utils/types.h" namespace breeze { namespace algorithms { diff --git a/velox/experimental/breeze/functions/load.h b/velox/experimental/breeze/breeze/functions/load.h similarity index 99% rename from velox/experimental/breeze/functions/load.h rename to velox/experimental/breeze/breeze/functions/load.h index 45daeea7e401d..4238c661c0e9a 100644 --- a/velox/experimental/breeze/functions/load.h +++ b/velox/experimental/breeze/breeze/functions/load.h @@ -22,8 +22,8 @@ #pragma once -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace breeze { namespace functions { diff --git a/velox/experimental/breeze/functions/reduce.h b/velox/experimental/breeze/breeze/functions/reduce.h similarity index 98% rename from velox/experimental/breeze/functions/reduce.h rename to velox/experimental/breeze/breeze/functions/reduce.h index 1ead896d93b8d..3c8a5708b9081 100644 --- a/velox/experimental/breeze/functions/reduce.h +++ b/velox/experimental/breeze/breeze/functions/reduce.h @@ -22,8 +22,8 @@ #pragma once -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace breeze { namespace functions { diff --git a/velox/experimental/breeze/functions/scan.h b/velox/experimental/breeze/breeze/functions/scan.h similarity index 99% rename from velox/experimental/breeze/functions/scan.h rename to velox/experimental/breeze/breeze/functions/scan.h index cf7926d4392d0..2b563f247c71e 100644 --- a/velox/experimental/breeze/functions/scan.h +++ b/velox/experimental/breeze/breeze/functions/scan.h @@ -22,8 +22,8 @@ #pragma once -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace breeze { namespace functions { diff --git a/velox/experimental/breeze/functions/sort.h b/velox/experimental/breeze/breeze/functions/sort.h similarity index 98% rename from velox/experimental/breeze/functions/sort.h rename to velox/experimental/breeze/breeze/functions/sort.h index 079f5da5c32e8..02271c5b08569 100644 --- a/velox/experimental/breeze/functions/sort.h +++ b/velox/experimental/breeze/breeze/functions/sort.h @@ -22,11 +22,11 @@ #pragma once -#include "functions/load.h" -#include "functions/scan.h" -#include "functions/store.h" -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/functions/load.h" +#include "breeze/functions/scan.h" +#include "breeze/functions/store.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace breeze { namespace functions { diff --git a/velox/experimental/breeze/functions/store.h b/velox/experimental/breeze/breeze/functions/store.h similarity index 99% rename from velox/experimental/breeze/functions/store.h rename to velox/experimental/breeze/breeze/functions/store.h index 24dca0475951c..a9f61fd2b25f4 100644 --- a/velox/experimental/breeze/functions/store.h +++ b/velox/experimental/breeze/breeze/functions/store.h @@ -22,8 +22,8 @@ #pragma once -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace breeze { namespace functions { diff --git a/velox/experimental/breeze/platforms/cuda.cuh b/velox/experimental/breeze/breeze/platforms/cuda.cuh similarity index 99% rename from velox/experimental/breeze/platforms/cuda.cuh rename to velox/experimental/breeze/breeze/platforms/cuda.cuh index 54d8488eb708c..95c094d4bf5da 100644 --- a/velox/experimental/breeze/platforms/cuda.cuh +++ b/velox/experimental/breeze/breeze/platforms/cuda.cuh @@ -24,7 +24,7 @@ #include -#include "utils/types.h" +#include "breeze/utils/types.h" struct CudaSpecialization { template diff --git a/velox/experimental/breeze/platforms/hip.hpp b/velox/experimental/breeze/breeze/platforms/hip.hpp similarity index 100% rename from velox/experimental/breeze/platforms/hip.hpp rename to velox/experimental/breeze/breeze/platforms/hip.hpp diff --git a/velox/experimental/breeze/platforms/metal.h b/velox/experimental/breeze/breeze/platforms/metal.h similarity index 99% rename from velox/experimental/breeze/platforms/metal.h rename to velox/experimental/breeze/breeze/platforms/metal.h index 27311ba03d6bc..694b9358df863 100644 --- a/velox/experimental/breeze/platforms/metal.h +++ b/velox/experimental/breeze/breeze/platforms/metal.h @@ -24,7 +24,7 @@ #include -#include "utils/types.h" +#include "breeze/utils/types.h" struct MetalSpecialization { template diff --git a/velox/experimental/breeze/platforms/opencl.h b/velox/experimental/breeze/breeze/platforms/opencl.h similarity index 100% rename from velox/experimental/breeze/platforms/opencl.h rename to velox/experimental/breeze/breeze/platforms/opencl.h diff --git a/velox/experimental/breeze/platforms/openmp.h b/velox/experimental/breeze/breeze/platforms/openmp.h similarity index 99% rename from velox/experimental/breeze/platforms/openmp.h rename to velox/experimental/breeze/breeze/platforms/openmp.h index 3db86bc969669..8e319bcd188a8 100644 --- a/velox/experimental/breeze/platforms/openmp.h +++ b/velox/experimental/breeze/breeze/platforms/openmp.h @@ -235,7 +235,7 @@ struct OpenMPPlatform { inline unsigned lower_rank_lanemask() { static_assert(WARP_THREADS <= sizeof(unsigned) * 8, "WARP_THREADS must be less or equal to unsigned bits"); - return (1 << lane_idx()) - 1; + return (1u << lane_idx()) - 1; } inline unsigned higher_rank_lanemask() { static_assert(WARP_THREADS <= sizeof(unsigned) * 8, diff --git a/velox/experimental/breeze/platforms/platform.h b/velox/experimental/breeze/breeze/platforms/platform.h similarity index 100% rename from velox/experimental/breeze/platforms/platform.h rename to velox/experimental/breeze/breeze/platforms/platform.h diff --git a/velox/experimental/breeze/platforms/specialization/cuda-ptx.cuh b/velox/experimental/breeze/breeze/platforms/specialization/cuda-ptx.cuh similarity index 100% rename from velox/experimental/breeze/platforms/specialization/cuda-ptx.cuh rename to velox/experimental/breeze/breeze/platforms/specialization/cuda-ptx.cuh diff --git a/velox/experimental/breeze/platforms/sycl.hpp b/velox/experimental/breeze/breeze/platforms/sycl.hpp similarity index 99% rename from velox/experimental/breeze/platforms/sycl.hpp rename to velox/experimental/breeze/breeze/platforms/sycl.hpp index 7426d94645a8d..10abee7b95cc4 100644 --- a/velox/experimental/breeze/platforms/sycl.hpp +++ b/velox/experimental/breeze/breeze/platforms/sycl.hpp @@ -32,7 +32,7 @@ #include #pragma GCC diagnostic pop -#include "utils/types.h" +#include "breeze/utils/types.h" template struct SyCLPlatform { diff --git a/velox/experimental/breeze/utils/block_details.h b/velox/experimental/breeze/breeze/utils/block_details.h similarity index 98% rename from velox/experimental/breeze/utils/block_details.h rename to velox/experimental/breeze/breeze/utils/block_details.h index 796619e31bd99..6d96a1eb0be42 100644 --- a/velox/experimental/breeze/utils/block_details.h +++ b/velox/experimental/breeze/breeze/utils/block_details.h @@ -22,7 +22,7 @@ #pragma once -#include "platforms/platform.h" +#include "breeze/platforms/platform.h" namespace breeze { namespace utils { diff --git a/velox/experimental/breeze/utils/device_allocator-cuda.cuh b/velox/experimental/breeze/breeze/utils/device_allocator-cuda.cuh similarity index 98% rename from velox/experimental/breeze/utils/device_allocator-cuda.cuh rename to velox/experimental/breeze/breeze/utils/device_allocator-cuda.cuh index da7e2598e07f7..dad6b8ff4f076 100644 --- a/velox/experimental/breeze/utils/device_allocator-cuda.cuh +++ b/velox/experimental/breeze/breeze/utils/device_allocator-cuda.cuh @@ -28,7 +28,7 @@ #include #ifdef __EXCEPTIONS -#include "utils/types.h" +#include "breeze/utils/types.h" #else #include #endif diff --git a/velox/experimental/breeze/utils/device_allocator.h b/velox/experimental/breeze/breeze/utils/device_allocator.h similarity index 100% rename from velox/experimental/breeze/utils/device_allocator.h rename to velox/experimental/breeze/breeze/utils/device_allocator.h diff --git a/velox/experimental/breeze/utils/device_vector-cuda.cuh b/velox/experimental/breeze/breeze/utils/device_vector-cuda.cuh similarity index 98% rename from velox/experimental/breeze/utils/device_vector-cuda.cuh rename to velox/experimental/breeze/breeze/utils/device_vector-cuda.cuh index 559a70e564834..389487152a4d7 100644 --- a/velox/experimental/breeze/utils/device_vector-cuda.cuh +++ b/velox/experimental/breeze/breeze/utils/device_vector-cuda.cuh @@ -24,7 +24,7 @@ #include -#include "utils/device_allocator.h" +#include "breeze/utils/device_allocator.h" namespace breeze { namespace utils { diff --git a/velox/experimental/breeze/utils/device_vector.h b/velox/experimental/breeze/breeze/utils/device_vector.h similarity index 100% rename from velox/experimental/breeze/utils/device_vector.h rename to velox/experimental/breeze/breeze/utils/device_vector.h diff --git a/velox/experimental/breeze/utils/trace.h b/velox/experimental/breeze/breeze/utils/trace.h similarity index 100% rename from velox/experimental/breeze/utils/trace.h rename to velox/experimental/breeze/breeze/utils/trace.h diff --git a/velox/experimental/breeze/utils/types.h b/velox/experimental/breeze/breeze/utils/types.h similarity index 99% rename from velox/experimental/breeze/utils/types.h rename to velox/experimental/breeze/breeze/utils/types.h index 3a59aae739def..42050f091c528 100644 --- a/velox/experimental/breeze/utils/types.h +++ b/velox/experimental/breeze/breeze/utils/types.h @@ -22,7 +22,7 @@ #pragma once -#include "platforms/platform.h" +#include "breeze/platforms/platform.h" #ifdef __EXCEPTIONS #include @@ -166,7 +166,7 @@ class BadDeviceAlloc : public std::exception { ",free=" + std::to_string(free) + ",total=" + std::to_string(total) + ")") {} - virtual const char *what() const throw() { return message_.c_str(); } + virtual const char *what() const noexcept { return message_.c_str(); } private: std::string message_; diff --git a/velox/experimental/breeze/test/CMakeLists.txt b/velox/experimental/breeze/test/CMakeLists.txt index 5ea75b331c1f0..99c67a79585b7 100644 --- a/velox/experimental/breeze/test/CMakeLists.txt +++ b/velox/experimental/breeze/test/CMakeLists.txt @@ -16,8 +16,6 @@ # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 -include_directories(${CMAKE_SOURCE_DIR}/test ${CMAKE_SOURCE_DIR}) - cxx_library(test_main "${cxx_strict}" test_main.cpp) target_link_libraries( test_main diff --git a/velox/experimental/breeze/test/algorithms/algorithm-kernels.template.h b/velox/experimental/breeze/test/algorithms/algorithm-kernels.template.h index 087e0ab1b6295..e2a22132e8196 100644 --- a/velox/experimental/breeze/test/algorithms/algorithm-kernels.template.h +++ b/velox/experimental/breeze/test/algorithms/algorithm-kernels.template.h @@ -37,11 +37,11 @@ #define PLATFORM(X) [[clang::annotate("PlatformName=" X)]] #define SHARED_MEM(T, id) [[clang::annotate("SharedMem=" T ";" id)]] -#include "algorithms/reduce.h" -#include "algorithms/scan.h" -#include "algorithms/sort.h" -#include "platforms/platform.h" -#include "utils/types.h" +#include "breeze/algorithms/reduce.h" +#include "breeze/algorithms/scan.h" +#include "breeze/algorithms/sort.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" namespace kernels { diff --git a/velox/experimental/breeze/test/algorithms/algorithm_test.h b/velox/experimental/breeze/test/algorithms/algorithm_test.h index 00ba24c6fbec1..445143ed9caa1 100644 --- a/velox/experimental/breeze/test/algorithms/algorithm_test.h +++ b/velox/experimental/breeze/test/algorithms/algorithm_test.h @@ -21,17 +21,17 @@ */ #if defined(PLATFORM_CUDA) -#include "generated/algorithms/algorithm_test-cuda.cuh" +#include "test/generated/algorithms/algorithm_test-cuda.cuh" #elif defined(PLATFORM_HIP) -#include "generated/algorithms/algorithm_test-hip.hpp" +#include "test/generated/algorithms/algorithm_test-hip.hpp" #elif defined(PLATFORM_SYCL) -#include "generated/algorithms/algorithm_test-sycl.hpp" +#include "test/generated/algorithms/algorithm_test-sycl.hpp" #elif defined(PLATFORM_OPENCL) -#include "generated/algorithms/algorithm_test-opencl.h" +#include "test/generated/algorithms/algorithm_test-opencl.h" #elif defined(PLATFORM_OPENMP) -#include "generated/algorithms/algorithm_test-openmp.h" +#include "test/generated/algorithms/algorithm_test-openmp.h" #elif defined(PLATFORM_METAL) -#include "generated/algorithms/algorithm_test-metal.h" +#include "test/generated/algorithms/algorithm_test-metal.h" #else #error unsupported platform #endif diff --git a/velox/experimental/breeze/test/functions/function-kernels.template.h b/velox/experimental/breeze/test/functions/function-kernels.template.h index 41fd9b66c8b12..f264b57b11777 100644 --- a/velox/experimental/breeze/test/functions/function-kernels.template.h +++ b/velox/experimental/breeze/test/functions/function-kernels.template.h @@ -37,12 +37,12 @@ #define PLATFORM(X) [[clang::annotate("PlatformName=" X)]] #define SHARED_MEM(T, id) [[clang::annotate("SharedMem=" T ";" id)]] -#include "functions/load.h" -#include "functions/reduce.h" -#include "functions/scan.h" -#include "functions/sort.h" -#include "functions/store.h" -#include "platforms/platform.h" +#include "breeze/functions/load.h" +#include "breeze/functions/reduce.h" +#include "breeze/functions/scan.h" +#include "breeze/functions/sort.h" +#include "breeze/functions/store.h" +#include "breeze/platforms/platform.h" namespace kernels { diff --git a/velox/experimental/breeze/test/functions/function_test.h b/velox/experimental/breeze/test/functions/function_test.h index d57618958294d..a8ab0b4de4047 100644 --- a/velox/experimental/breeze/test/functions/function_test.h +++ b/velox/experimental/breeze/test/functions/function_test.h @@ -21,17 +21,17 @@ */ #if defined(PLATFORM_CUDA) -#include "generated/functions/function_test-cuda.cuh" +#include "test/generated/functions/function_test-cuda.cuh" #elif defined(PLATFORM_HIP) -#include "generated/functions/function_test-hip.hpp" +#include "test/generated/functions/function_test-hip.hpp" #elif defined(PLATFORM_SYCL) -#include "generated/functions/function_test-sycl.hpp" +#include "test/generated/functions/function_test-sycl.hpp" #elif defined(PLATFORM_OPENCL) -#include "generated/functions/function_test-opencl.h" +#include "test/generated/functions/function_test-opencl.h" #elif defined(PLATFORM_OPENMP) -#include "generated/functions/function_test-openmp.h" +#include "test/generated/functions/function_test-openmp.h" #elif defined(PLATFORM_METAL) -#include "generated/functions/function_test-metal.h" +#include "test/generated/functions/function_test-metal.h" #else #error unsupported platform #endif diff --git a/velox/experimental/breeze/test/generate.sh b/velox/experimental/breeze/test/generate.sh new file mode 100755 index 0000000000000..fc41e4d5edade --- /dev/null +++ b/velox/experimental/breeze/test/generate.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -efx -o pipefail +SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") +cd "$SCRIPTDIR" + +function generate { + BACKEND=$1 + TYPE=$2 + EXT=$3 + DIR="$TYPE"s + mkdir -p generated/"$DIR" + ./kernel_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"-kernels.template.h --out=generated/"$DIR"/kernels-"$BACKEND"."$EXT" + ./test_fixture_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"_test.template.h --out=generated/"$DIR"/"$TYPE"_test-"$BACKEND"."$EXT" +} + +generate openmp "algorithm" h +generate openmp "function" h diff --git a/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h b/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h new file mode 100644 index 0000000000000..cc49085ed80b5 --- /dev/null +++ b/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include +#include + +#include "breeze/platforms/openmp.h" +#include "test/generated/algorithms/kernels-openmp.h" +#include "test/platforms/openmp_test.h" + +template +class AlgorithmTest : public ::testing::Test { + protected: + template + void Reduce(const std::vector& in, U* out, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::algorithms::DeviceReduce::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::Reduce, + in.data(), out, in.size()); + } + + template + void Scan(const std::vector& in, std::vector& out, int* next_blocks_idx, + std::vector& blocks, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::algorithms::DeviceScan::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::Scan, + in.data(), out.data(), next_blocks_idx, blocks.data(), in.size()); + } + + template + void RadixSortHistogram(const std::vector& in, std::vector& out, + int num_blocks) { + using SharedMemType = + typename breeze::algorithms::DeviceRadixSortHistogram::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::RadixSortHistogram, + in.data(), out.data(), in.size()); + } + + template + void RadixSort(const std::vector& in, + const std::vector& in_offsets, int start_bit, + int num_pass_bits, std::vector& out, + std::vector& next_block_idx, + std::vector& blocks, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = typename breeze::algorithms::DeviceRadixSort< + PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::RadixSort, + in.data(), in_offsets.data(), &start_bit, &num_pass_bits, out.data(), + next_block_idx.data(), blocks.data(), in.size()); + } +}; diff --git a/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h b/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h new file mode 100644 index 0000000000000..db8b0c0ccdeb3 --- /dev/null +++ b/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "breeze/algorithms/reduce.h" +#include "breeze/algorithms/scan.h" +#include "breeze/algorithms/sort.h" +#include "breeze/platforms/openmp.h" +#include "breeze/platforms/platform.h" +#include "breeze/utils/types.h" + +namespace kernels { + +template > +void Reduce(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + breeze::algorithms::DeviceReduce::template Reduce< + Op, ITEMS_PER_THREAD>( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void Scan(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int* next_block_idx, V* blocks, int num_items) { + breeze::algorithms::DeviceScan:: + template Scan( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(next_block_idx), + breeze::utils::make_slice(blocks), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void RadixSortHistogram(PlatformT p, SharedMemType* scratch, const T* in, + unsigned* out, int num_items) { + breeze::algorithms::DeviceRadixSortHistogram::template Build< + ITEMS_PER_THREAD, TILE_SIZE>( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void RadixSort(PlatformT p, SharedMemType* scratch, const T* in, + const unsigned* in_offsets, const int* start_bit, + const int* num_pass_bits, T* out, int* next_block_idx, + unsigned* blocks, int num_items) { + breeze::algorithms::DeviceRadixSort:: + template Sort( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(in_offsets), + *start_bit, *num_pass_bits, + breeze::utils::make_slice(out), + breeze::utils::make_slice(next_block_idx), + breeze::utils::make_slice(blocks), + breeze::utils::make_slice(scratch), num_items); +} + +} // namespace kernels diff --git a/velox/experimental/breeze/test/generated/functions/function_test-openmp.h b/velox/experimental/breeze/test/generated/functions/function_test-openmp.h new file mode 100644 index 0000000000000..825616f41541e --- /dev/null +++ b/velox/experimental/breeze/test/generated/functions/function_test-openmp.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include +#include + +#include "breeze/platforms/openmp.h" +#include "test/generated/functions/kernels-openmp.h" +#include "test/platforms/openmp_test.h" + +template +class FunctionTest : public ::testing::Test { + protected: + template + void BlockLoad(const std::vector& in, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoad, in.data(), + out.data(), in.size()); + } + + template + void BlockLoadIf(const std::vector& in, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoadIf, in.data(), + selection_flags.data(), out.data(), in.size()); + } + + template + void BlockLoadFrom(const std::vector& in, const std::vector& offsets, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoadFrom, in.data(), + offsets.data(), out.data(), in.size()); + } + + template + void BlockStore(const std::vector& in, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStore, in.data(), + out.data(), out.size()); + } + + template + void BlockStoreIf(const std::vector& in, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreIf, in.data(), + selection_flags.data(), out.data(), out.size()); + } + + template + void BlockStoreAt(const std::vector& in, const std::vector& offsets, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreAt, in.data(), + offsets.data(), out.data(), out.size()); + } + + template + void BlockStoreAtIf(const std::vector& in, const std::vector& offsets, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreAtIf, in.data(), + offsets.data(), selection_flags.data(), out.data(), out.size()); + } + + template + void BlockFill(T value, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockFill, &value, + out.data(), out.size()); + } + + template + void BlockFillAtIf(T value, const std::vector& offsets, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockFillAtIf, &value, + offsets.data(), selection_flags.data(), out.data(), out.size()); + } + + template + void BlockReduce(const std::vector& in, U* out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockReduce::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockReduce, + in.data(), out, in.size()); + } + + template + void BlockScan(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockScan::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockScan, + in.data(), out.data(), in.size()); + } + + template + void BlockRadixRank(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockRadixRank::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockRadixRank, + in.data(), out.data(), in.size()); + } + + template + void BlockRadixSort(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockRadixSort::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockRadixSort, + in.data(), out.data(), in.size()); + } +}; diff --git a/velox/experimental/breeze/test/generated/functions/kernels-openmp.h b/velox/experimental/breeze/test/generated/functions/kernels-openmp.h new file mode 100644 index 0000000000000..4274f986d5d1f --- /dev/null +++ b/velox/experimental/breeze/test/generated/functions/kernels-openmp.h @@ -0,0 +1,243 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "breeze/functions/load.h" +#include "breeze/functions/reduce.h" +#include "breeze/functions/scan.h" +#include "breeze/functions/sort.h" +#include "breeze/functions/store.h" +#include "breeze/platforms/openmp.h" +#include "breeze/platforms/platform.h" + +namespace kernels { + +template > +void BlockLoad(PlatformT p, const T* in, T* out, int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockLoadIf(PlatformT p, const T* in, const int* in_selection_flags, + T* out, int num_items) { + int selection_flags[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in_selection_flags), + breeze::utils::make_slice(selection_flags), num_items); + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoadIf( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(selection_flags), + breeze::utils::make_slice(items), num_items); + breeze::functions::BlockStoreIf( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockLoadFrom(PlatformT p, const T* in, const int* in_offsets, T* out, + int num_items) { + int offsets[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in_offsets), + breeze::utils::make_slice(offsets), num_items); + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoadFrom( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(offsets), breeze::utils::make_slice(items), + num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStore(PlatformT p, const T* in, T* out, int num_items) { + breeze::functions::BlockStore( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreIf(PlatformT p, const T* in, const int* selection_flags, T* out, + int num_items) { + breeze::functions::BlockStoreIf( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreAt(PlatformT p, const T* in, const int* offsets, T* out, + int num_items) { + breeze::functions::BlockStoreAt( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreAtIf(PlatformT p, const T* in, const int* offsets, + const int* selection_flags, T* out, int num_items) { + breeze::functions::BlockStoreAtIf( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockFill(PlatformT p, const T* value, T* out, int num_items) { + breeze::functions::BlockFill( + p, *value, breeze::utils::make_slice(out), + num_items); +} + +template > +void BlockFillAtIf(PlatformT p, const T* value, const int* offsets, + const int* selection_flags, T* out, int num_items) { + breeze::functions::BlockFillAtIf( + p, *value, + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockReduce(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + U aggregate = breeze::functions::BlockReduce::template Reduce< + Op, ITEMS_PER_THREAD>( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(scratch), num_items); + p.syncthreads(); + if (p.thread_idx() == 0) { + *out = aggregate; + } +} + +template > +void BlockScan(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + U sums[ITEMS_PER_THREAD]; + breeze::functions::BlockScan::template Scan< + Op>(p, breeze::utils::make_slice(items), breeze::utils::make_slice(sums), + breeze::utils::make_slice(scratch), num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(sums), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockRadixRank(PlatformT p, SharedMemType* scratch, const T* in, int* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + // initialize invalid items to max value + for (int i = 0; i < ITEMS_PER_THREAD; ++i) { + items[i] = static_cast((1 << RADIX_BITS) - 1); + } + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), + num_items); + int ranks[ITEMS_PER_THREAD]; + breeze::functions::BlockRadixRank:: + Rank(p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(ranks), + breeze::utils::make_slice(scratch)); + breeze::functions::BlockStore( + p, + breeze::utils::make_slice(ranks), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockRadixSort(PlatformT p, SharedMemType* scratch, const T* in, T* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), + num_items); + breeze::functions:: + BlockRadixSort::Sort( + p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(scratch), num_items); + breeze::functions::BlockStore( + p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +} // namespace kernels diff --git a/velox/experimental/breeze/test/generator_common.py b/velox/experimental/breeze/test/generator_common.py index d56eba64cd8eb..f8f7af842d985 100644 --- a/velox/experimental/breeze/test/generator_common.py +++ b/velox/experimental/breeze/test/generator_common.py @@ -22,7 +22,23 @@ import re from datetime import datetime -COPYRIGHT = f"""// Copyright (c) {datetime.now().year} by Rivos Inc. +COPYRIGHT = f"""/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) {datetime.now().year} by Rivos Inc. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 """ diff --git a/velox/experimental/breeze/test/kernel_generator.py b/velox/experimental/breeze/test/kernel_generator.py index 46f27d43b1dfc..2b4ddccf857e4 100755 --- a/velox/experimental/breeze/test/kernel_generator.py +++ b/velox/experimental/breeze/test/kernel_generator.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# @nolint # Copyright (c) Facebook, Inc. and its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -154,7 +155,7 @@ def generate(self, tu, filename): class OpenclBackend(KernelCodeGen): @property def includes(self): - return '#include "platforms/opencl.h"\n' + return '#include "breeze/platforms/opencl.h"\n' @property def platform_type(self): @@ -213,7 +214,7 @@ def get_args(self, fn_props): class MetalBackend(KernelCodeGen): @property def includes(self): - return '#include \n#include "platforms/metal.h"\n' + return '#include \n#include "breeze/platforms/metal.h"\n' @property def platform_type(self): @@ -277,7 +278,7 @@ def get_args(self, fn_props): class OpenmpBackend(KernelCodeGen): @property def includes(self): - return '#include "platforms/openmp.h"\n' + return '#include "breeze/platforms/openmp.h"\n' @property def platform_type(self): @@ -312,7 +313,7 @@ def get_args_prefix(self, attrs): class SyclBackend(KernelCodeGen): @property def includes(self): - return '#include "platforms/sycl.hpp"\n' + return '#include "breeze/platforms/sycl.hpp"\n' @property def platform_type(self): @@ -361,7 +362,7 @@ def write_func_prologue(self, out, attrs): class CudaBackend(KernelCodeGen): @property def includes(self): - return '#include "platforms/cuda.cuh"\n' + return '#include "breeze/platforms/cuda.cuh"\n' @property def platform_type(self): @@ -390,7 +391,7 @@ def write_func_prologue(self, out, attrs): class HipBackend(CudaBackend): @property def includes(self): - return '#include "platforms/hip.hpp"\n' + return '#include "breeze/platforms/hip.hpp"\n' @property def platform_type(self): diff --git a/velox/experimental/breeze/test/test_fixture_generator.py b/velox/experimental/breeze/test/test_fixture_generator.py index a5b0302e600f7..9865245e4ac0a 100755 --- a/velox/experimental/breeze/test/test_fixture_generator.py +++ b/velox/experimental/breeze/test/test_fixture_generator.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# @nolint # Copyright (c) Facebook, Inc. and its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -153,8 +154,8 @@ def includes(self, fixture_name): test_type = fixture_name.replace("Test", "").lower() return f""" #include - #include "generated/{test_type}s/kernels-hip.hpp" - #include "platforms/hip_test.hpp" + #include "test/generated/{test_type}s/kernels-hip.hpp" + #include "test/platforms/hip_test.hpp" """ def needs_shared_mem(self): @@ -265,9 +266,9 @@ def __init__(self): def includes(self, fixture_name): test_type = fixture_name.replace("Test", "").lower() return f""" - #include "generated/{test_type}s/kernels.cuh" - #include "platforms/cuda.cuh" - #include "platforms/cuda_test.cuh" + #include "test/generated/{test_type}s/kernels.cuh" + #include "breeze/platforms/cuda.cuh" + #include "test/platforms/cuda_test.cuh" """ @@ -282,9 +283,9 @@ def includes(self, fixture_name): #include #include - #include "generated/{test_type}s/kernels-openmp.h" - #include "platforms/openmp.h" - #include "platforms/openmp_test.h" + #include "test/generated/{test_type}s/kernels-openmp.h" + #include "breeze/platforms/openmp.h" + #include "test/platforms/openmp_test.h" """ def needs_shared_mem(self): @@ -317,8 +318,8 @@ def includes(self, fixture_name): #include #pragma GCC diagnostic pop - #include "generated/{test_type}s/kernels-sycl.hpp" - #include "platforms/sycl_test.hpp" + #include "test/generated/{test_type}s/kernels-sycl.hpp" + #include "test/platforms/sycl_test.hpp" using kernels::WARP_THREADS; """ @@ -423,8 +424,8 @@ def includes(self, fixture_name): return f""" #include - #include "platforms/opencl_test.h" - #include "type_helpers.h" + #include "test/platforms/opencl_test.h" + #include "test/type_helpers.h" """ def class_preamble(self, fixture_name): @@ -534,8 +535,8 @@ def includes(self, fixture_name): return f""" #include - #include "platforms/metal_test.h" - #include "type_helpers.h" + #include "test/platforms/metal_test.h" + #include "test/type_helpers.h" """ def class_preamble(self, fixture_name): diff --git a/velox/experimental/breeze/test/test_main.cpp b/velox/experimental/breeze/test/test_main.cpp index e73d62dc04c72..437c121a68e96 100644 --- a/velox/experimental/breeze/test/test_main.cpp +++ b/velox/experimental/breeze/test/test_main.cpp @@ -24,7 +24,7 @@ #include -#include "utils/trace.h" +#include "breeze/utils/trace.h" TRACE_TRACK_EVENT_STATIC_STORAGE(); diff --git a/velox/experimental/wave/common/tests/BreezeCudaTest.cu b/velox/experimental/wave/common/tests/BreezeCudaTest.cu new file mode 100644 index 0000000000000..9b7f0cca56d70 --- /dev/null +++ b/velox/experimental/wave/common/tests/BreezeCudaTest.cu @@ -0,0 +1,70 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PLATFORM_CUDA + +// clang-format off +#define CUDA_PLATFORM_SPECIALIZATION_HEADER \ + breeze/platforms/specialization/cuda-ptx.cuh +// clang-format on + +#include +#include +#include +#include + +#include + +namespace breeze { +namespace { + +using namespace functions; +using namespace utils; + +constexpr int kBlockThreads = 256; +constexpr int kItemsPerThread = 8; +constexpr int kBlockItems = kBlockThreads * kItemsPerThread; +constexpr int kNumItems = 250'000; +constexpr int kNumBlocks = (kNumItems + kBlockItems - 1) / kBlockItems; + +__global__ __launch_bounds__(kBlockThreads) void reduceKernel(int* out) { + CudaPlatform p; + using BlockReduceT = BlockReduce; + __shared__ typename BlockReduceT::Scratch scratch; + int items[kItemsPerThread]; + for (int i = 0; i < kItemsPerThread; ++i) { + items[i] = 1; + } + int aggregate = BlockReduceT::template Reduce( + p, + make_slice(items), + make_slice(&scratch).template reinterpret()); + if (p.thread_idx() == 0) { + out[p.block_idx()] = aggregate; + } +} + +TEST(BreezeCudaTest, reduce) { + device_vector result(kNumBlocks); + reduceKernel<<>>(result.data()); + std::vector actual(kNumBlocks); + result.copy_to_host(actual.data(), actual.size()); + std::vector expected(kNumBlocks, kBlockThreads * kItemsPerThread); + ASSERT_EQ(actual, expected); +} + +} // namespace +} // namespace breeze diff --git a/velox/experimental/wave/common/tests/CMakeLists.txt b/velox/experimental/wave/common/tests/CMakeLists.txt index 22507b09db9fd..2d797fb893149 100644 --- a/velox/experimental/wave/common/tests/CMakeLists.txt +++ b/velox/experimental/wave/common/tests/CMakeLists.txt @@ -17,6 +17,7 @@ add_executable( GpuArenaTest.cpp CudaTest.cpp CudaTest.cu + BreezeCudaTest.cu CompileTest.cu BlockTest.cpp BlockTest.cu @@ -26,6 +27,7 @@ add_executable( add_test(velox_wave_common_test velox_wave_common_test) set_tests_properties(velox_wave_common_test PROPERTIES LABELS cuda_driver) +target_include_directories(velox_wave_common_test PRIVATE ../../../breeze) target_link_libraries( velox_wave_common_test velox_wave_common