diff --git a/velox/experimental/breeze/platforms/openmp.h b/velox/experimental/breeze/platforms/openmp.h index 3db86bc969669..8e319bcd188a8 100644 --- a/velox/experimental/breeze/platforms/openmp.h +++ b/velox/experimental/breeze/platforms/openmp.h @@ -235,7 +235,7 @@ struct OpenMPPlatform { inline unsigned lower_rank_lanemask() { static_assert(WARP_THREADS <= sizeof(unsigned) * 8, "WARP_THREADS must be less or equal to unsigned bits"); - return (1 << lane_idx()) - 1; + return (1u << lane_idx()) - 1; } inline unsigned higher_rank_lanemask() { static_assert(WARP_THREADS <= sizeof(unsigned) * 8, diff --git a/velox/experimental/breeze/test/algorithms/algorithm_test.h b/velox/experimental/breeze/test/algorithms/algorithm_test.h index 00ba24c6fbec1..445143ed9caa1 100644 --- a/velox/experimental/breeze/test/algorithms/algorithm_test.h +++ b/velox/experimental/breeze/test/algorithms/algorithm_test.h @@ -21,17 +21,17 @@ */ #if defined(PLATFORM_CUDA) -#include "generated/algorithms/algorithm_test-cuda.cuh" +#include "test/generated/algorithms/algorithm_test-cuda.cuh" #elif defined(PLATFORM_HIP) -#include "generated/algorithms/algorithm_test-hip.hpp" +#include "test/generated/algorithms/algorithm_test-hip.hpp" #elif defined(PLATFORM_SYCL) -#include "generated/algorithms/algorithm_test-sycl.hpp" +#include "test/generated/algorithms/algorithm_test-sycl.hpp" #elif defined(PLATFORM_OPENCL) -#include "generated/algorithms/algorithm_test-opencl.h" +#include "test/generated/algorithms/algorithm_test-opencl.h" #elif defined(PLATFORM_OPENMP) -#include "generated/algorithms/algorithm_test-openmp.h" +#include "test/generated/algorithms/algorithm_test-openmp.h" #elif defined(PLATFORM_METAL) -#include "generated/algorithms/algorithm_test-metal.h" +#include "test/generated/algorithms/algorithm_test-metal.h" #else #error unsupported platform #endif diff --git a/velox/experimental/breeze/test/functions/function_test.h b/velox/experimental/breeze/test/functions/function_test.h index d57618958294d..a8ab0b4de4047 100644 --- a/velox/experimental/breeze/test/functions/function_test.h +++ b/velox/experimental/breeze/test/functions/function_test.h @@ -21,17 +21,17 @@ */ #if defined(PLATFORM_CUDA) -#include "generated/functions/function_test-cuda.cuh" +#include "test/generated/functions/function_test-cuda.cuh" #elif defined(PLATFORM_HIP) -#include "generated/functions/function_test-hip.hpp" +#include "test/generated/functions/function_test-hip.hpp" #elif defined(PLATFORM_SYCL) -#include "generated/functions/function_test-sycl.hpp" +#include "test/generated/functions/function_test-sycl.hpp" #elif defined(PLATFORM_OPENCL) -#include "generated/functions/function_test-opencl.h" +#include "test/generated/functions/function_test-opencl.h" #elif defined(PLATFORM_OPENMP) -#include "generated/functions/function_test-openmp.h" +#include "test/generated/functions/function_test-openmp.h" #elif defined(PLATFORM_METAL) -#include "generated/functions/function_test-metal.h" +#include "test/generated/functions/function_test-metal.h" #else #error unsupported platform #endif diff --git a/velox/experimental/breeze/test/generate.sh b/velox/experimental/breeze/test/generate.sh new file mode 100755 index 0000000000000..fc41e4d5edade --- /dev/null +++ b/velox/experimental/breeze/test/generate.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -efx -o pipefail +SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") +cd "$SCRIPTDIR" + +function generate { + BACKEND=$1 + TYPE=$2 + EXT=$3 + DIR="$TYPE"s + mkdir -p generated/"$DIR" + ./kernel_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"-kernels.template.h --out=generated/"$DIR"/kernels-"$BACKEND"."$EXT" + ./test_fixture_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"_test.template.h --out=generated/"$DIR"/"$TYPE"_test-"$BACKEND"."$EXT" +} + +generate openmp "algorithm" h +generate openmp "function" h diff --git a/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h b/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h new file mode 100644 index 0000000000000..a8c7de29c073b --- /dev/null +++ b/velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include +#include + +#include "platforms/openmp.h" +#include "test/generated/algorithms/kernels-openmp.h" +#include "test/platforms/openmp_test.h" + +template +class AlgorithmTest : public ::testing::Test { + protected: + template + void Reduce(const std::vector& in, U* out, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::algorithms::DeviceReduce::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::Reduce, + in.data(), out, in.size()); + } + + template + void Scan(const std::vector& in, std::vector& out, int* next_blocks_idx, + std::vector& blocks, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::algorithms::DeviceScan::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::Scan, + in.data(), out.data(), next_blocks_idx, blocks.data(), in.size()); + } + + template + void RadixSortHistogram(const std::vector& in, std::vector& out, + int num_blocks) { + using SharedMemType = + typename breeze::algorithms::DeviceRadixSortHistogram::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::RadixSortHistogram, + in.data(), out.data(), in.size()); + } + + template + void RadixSort(const std::vector& in, + const std::vector& in_offsets, int start_bit, + int num_pass_bits, std::vector& out, + std::vector& next_block_idx, + std::vector& blocks, int num_blocks) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = typename breeze::algorithms::DeviceRadixSort< + PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch; + OpenMPTestLaunch( + num_blocks, + &kernels::RadixSort, + in.data(), in_offsets.data(), &start_bit, &num_pass_bits, out.data(), + next_block_idx.data(), blocks.data(), in.size()); + } +}; diff --git a/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h b/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h new file mode 100644 index 0000000000000..008bd56b063cc --- /dev/null +++ b/velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "algorithms/reduce.h" +#include "algorithms/scan.h" +#include "algorithms/sort.h" +#include "platforms/openmp.h" +#include "platforms/platform.h" +#include "utils/types.h" + +namespace kernels { + +template > +void Reduce(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + breeze::algorithms::DeviceReduce::template Reduce< + Op, ITEMS_PER_THREAD>( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void Scan(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int* next_block_idx, V* blocks, int num_items) { + breeze::algorithms::DeviceScan:: + template Scan( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(next_block_idx), + breeze::utils::make_slice(blocks), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void RadixSortHistogram(PlatformT p, SharedMemType* scratch, const T* in, + unsigned* out, int num_items) { + breeze::algorithms::DeviceRadixSortHistogram::template Build< + ITEMS_PER_THREAD, TILE_SIZE>( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(out), + breeze::utils::make_slice(scratch), num_items); +} + +template > +void RadixSort(PlatformT p, SharedMemType* scratch, const T* in, + const unsigned* in_offsets, const int* start_bit, + const int* num_pass_bits, T* out, int* next_block_idx, + unsigned* blocks, int num_items) { + breeze::algorithms::DeviceRadixSort:: + template Sort( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(in_offsets), + *start_bit, *num_pass_bits, + breeze::utils::make_slice(out), + breeze::utils::make_slice(next_block_idx), + breeze::utils::make_slice(blocks), + breeze::utils::make_slice(scratch), num_items); +} + +} // namespace kernels diff --git a/velox/experimental/breeze/test/generated/functions/function_test-openmp.h b/velox/experimental/breeze/test/generated/functions/function_test-openmp.h new file mode 100644 index 0000000000000..9b8c81c732176 --- /dev/null +++ b/velox/experimental/breeze/test/generated/functions/function_test-openmp.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include +#include + +#include "platforms/openmp.h" +#include "test/generated/functions/kernels-openmp.h" +#include "test/platforms/openmp_test.h" + +template +class FunctionTest : public ::testing::Test { + protected: + template + void BlockLoad(const std::vector& in, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoad, in.data(), + out.data(), in.size()); + } + + template + void BlockLoadIf(const std::vector& in, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoadIf, in.data(), + selection_flags.data(), out.data(), in.size()); + } + + template + void BlockLoadFrom(const std::vector& in, const std::vector& offsets, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockLoadFrom, in.data(), + offsets.data(), out.data(), in.size()); + } + + template + void BlockStore(const std::vector& in, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStore, in.data(), + out.data(), out.size()); + } + + template + void BlockStoreIf(const std::vector& in, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreIf, in.data(), + selection_flags.data(), out.data(), out.size()); + } + + template + void BlockStoreAt(const std::vector& in, const std::vector& offsets, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreAt, in.data(), + offsets.data(), out.data(), out.size()); + } + + template + void BlockStoreAtIf(const std::vector& in, const std::vector& offsets, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockStoreAtIf, in.data(), + offsets.data(), selection_flags.data(), out.data(), out.size()); + } + + template + void BlockFill(T value, std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockFill, &value, + out.data(), out.size()); + } + + template + void BlockFillAtIf(T value, const std::vector& offsets, + const std::vector& selection_flags, + std::vector& out) { + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockFillAtIf, &value, + offsets.data(), selection_flags.data(), out.data(), out.size()); + } + + template + void BlockReduce(const std::vector& in, U* out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockReduce::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockReduce, + in.data(), out, in.size()); + } + + template + void BlockScan(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockScan::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockScan, + in.data(), out.data(), in.size()); + } + + template + void BlockRadixRank(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockRadixRank::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockRadixRank, + in.data(), out.data(), in.size()); + } + + template + void BlockRadixSort(const std::vector& in, std::vector& out) { + using PlatformT = + OpenMPPlatform; + using SharedMemType = + typename breeze::functions::BlockRadixSort::Scratch; + OpenMPTestLaunch( + /*num_blocks=*/1, + &kernels::BlockRadixSort, + in.data(), out.data(), in.size()); + } +}; diff --git a/velox/experimental/breeze/test/generated/functions/kernels-openmp.h b/velox/experimental/breeze/test/generated/functions/kernels-openmp.h new file mode 100644 index 0000000000000..4bc9eef89fa2a --- /dev/null +++ b/velox/experimental/breeze/test/generated/functions/kernels-openmp.h @@ -0,0 +1,243 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) 2024 by Rivos Inc. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "functions/load.h" +#include "functions/reduce.h" +#include "functions/scan.h" +#include "functions/sort.h" +#include "functions/store.h" +#include "platforms/openmp.h" +#include "platforms/platform.h" + +namespace kernels { + +template > +void BlockLoad(PlatformT p, const T* in, T* out, int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockLoadIf(PlatformT p, const T* in, const int* in_selection_flags, + T* out, int num_items) { + int selection_flags[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in_selection_flags), + breeze::utils::make_slice(selection_flags), num_items); + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoadIf( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(selection_flags), + breeze::utils::make_slice(items), num_items); + breeze::functions::BlockStoreIf( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockLoadFrom(PlatformT p, const T* in, const int* in_offsets, T* out, + int num_items) { + int offsets[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in_offsets), + breeze::utils::make_slice(offsets), num_items); + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoadFrom( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(offsets), breeze::utils::make_slice(items), + num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStore(PlatformT p, const T* in, T* out, int num_items) { + breeze::functions::BlockStore( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreIf(PlatformT p, const T* in, const int* selection_flags, T* out, + int num_items) { + breeze::functions::BlockStoreIf( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreAt(PlatformT p, const T* in, const int* offsets, T* out, + int num_items) { + breeze::functions::BlockStoreAt( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockStoreAtIf(PlatformT p, const T* in, const int* offsets, + const int* selection_flags, T* out, int num_items) { + breeze::functions::BlockStoreAtIf( + p, + breeze::utils::make_slice( + in), + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockFill(PlatformT p, const T* value, T* out, int num_items) { + breeze::functions::BlockFill( + p, *value, breeze::utils::make_slice(out), + num_items); +} + +template > +void BlockFillAtIf(PlatformT p, const T* value, const int* offsets, + const int* selection_flags, T* out, int num_items) { + breeze::functions::BlockFillAtIf( + p, *value, + breeze::utils::make_slice( + offsets), + breeze::utils::make_slice( + selection_flags), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockReduce(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + U aggregate = breeze::functions::BlockReduce::template Reduce< + Op, ITEMS_PER_THREAD>( + p, breeze::utils::make_slice(items), + breeze::utils::make_slice(scratch), num_items); + p.syncthreads(); + if (p.thread_idx() == 0) { + *out = aggregate; + } +} + +template > +void BlockScan(PlatformT p, SharedMemType* scratch, const T* in, U* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), num_items); + U sums[ITEMS_PER_THREAD]; + breeze::functions::BlockScan::template Scan< + Op>(p, breeze::utils::make_slice(items), breeze::utils::make_slice(sums), + breeze::utils::make_slice(scratch), num_items); + breeze::functions::BlockStore( + p, breeze::utils::make_slice(sums), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockRadixRank(PlatformT p, SharedMemType* scratch, const T* in, int* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + // initialize invalid items to max value + for (int i = 0; i < ITEMS_PER_THREAD; ++i) { + items[i] = static_cast((1 << RADIX_BITS) - 1); + } + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), + num_items); + int ranks[ITEMS_PER_THREAD]; + breeze::functions::BlockRadixRank:: + Rank(p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(ranks), + breeze::utils::make_slice(scratch)); + breeze::functions::BlockStore( + p, + breeze::utils::make_slice(ranks), + breeze::utils::make_slice(out), num_items); +} + +template > +void BlockRadixSort(PlatformT p, SharedMemType* scratch, const T* in, T* out, + int num_items) { + T items[ITEMS_PER_THREAD]; + breeze::functions::BlockLoad( + p, breeze::utils::make_slice(in), + breeze::utils::make_slice(items), + num_items); + breeze::functions:: + BlockRadixSort::Sort( + p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(scratch), num_items); + breeze::functions::BlockStore( + p, + breeze::utils::make_slice(items), + breeze::utils::make_slice(out), num_items); +} + +} // namespace kernels diff --git a/velox/experimental/breeze/test/generator_common.py b/velox/experimental/breeze/test/generator_common.py index d56eba64cd8eb..f8f7af842d985 100644 --- a/velox/experimental/breeze/test/generator_common.py +++ b/velox/experimental/breeze/test/generator_common.py @@ -22,7 +22,23 @@ import re from datetime import datetime -COPYRIGHT = f"""// Copyright (c) {datetime.now().year} by Rivos Inc. +COPYRIGHT = f"""/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Copyright (c) {datetime.now().year} by Rivos Inc. // Licensed under the Apache License, Version 2.0, see LICENSE for details. // SPDX-License-Identifier: Apache-2.0 """ diff --git a/velox/experimental/breeze/test/test_fixture_generator.py b/velox/experimental/breeze/test/test_fixture_generator.py index a5b0302e600f7..c3f605d2d39a8 100755 --- a/velox/experimental/breeze/test/test_fixture_generator.py +++ b/velox/experimental/breeze/test/test_fixture_generator.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# @nolint # Copyright (c) Facebook, Inc. and its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -153,8 +154,8 @@ def includes(self, fixture_name): test_type = fixture_name.replace("Test", "").lower() return f""" #include - #include "generated/{test_type}s/kernels-hip.hpp" - #include "platforms/hip_test.hpp" + #include "test/generated/{test_type}s/kernels-hip.hpp" + #include "test/platforms/hip_test.hpp" """ def needs_shared_mem(self): @@ -265,9 +266,9 @@ def __init__(self): def includes(self, fixture_name): test_type = fixture_name.replace("Test", "").lower() return f""" - #include "generated/{test_type}s/kernels.cuh" + #include "test/generated/{test_type}s/kernels.cuh" #include "platforms/cuda.cuh" - #include "platforms/cuda_test.cuh" + #include "test/platforms/cuda_test.cuh" """ @@ -282,9 +283,9 @@ def includes(self, fixture_name): #include #include - #include "generated/{test_type}s/kernels-openmp.h" + #include "test/generated/{test_type}s/kernels-openmp.h" #include "platforms/openmp.h" - #include "platforms/openmp_test.h" + #include "test/platforms/openmp_test.h" """ def needs_shared_mem(self): @@ -317,8 +318,8 @@ def includes(self, fixture_name): #include #pragma GCC diagnostic pop - #include "generated/{test_type}s/kernels-sycl.hpp" - #include "platforms/sycl_test.hpp" + #include "test/generated/{test_type}s/kernels-sycl.hpp" + #include "test/platforms/sycl_test.hpp" using kernels::WARP_THREADS; """ @@ -423,8 +424,8 @@ def includes(self, fixture_name): return f""" #include - #include "platforms/opencl_test.h" - #include "type_helpers.h" + #include "test/platforms/opencl_test.h" + #include "test/type_helpers.h" """ def class_preamble(self, fixture_name): diff --git a/velox/experimental/breeze/utils/types.h b/velox/experimental/breeze/utils/types.h index 3a59aae739def..e6cc5146127a4 100644 --- a/velox/experimental/breeze/utils/types.h +++ b/velox/experimental/breeze/utils/types.h @@ -166,7 +166,7 @@ class BadDeviceAlloc : public std::exception { ",free=" + std::to_string(free) + ",total=" + std::to_string(total) + ")") {} - virtual const char *what() const throw() { return message_.c_str(); } + virtual const char *what() const noexcept { return message_.c_str(); } private: std::string message_; diff --git a/velox/experimental/wave/common/tests/BreezeCudaTest.cu b/velox/experimental/wave/common/tests/BreezeCudaTest.cu new file mode 100644 index 0000000000000..fa5f8b55b5510 --- /dev/null +++ b/velox/experimental/wave/common/tests/BreezeCudaTest.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PLATFORM_CUDA + +// clang-format off +#define CUDA_PLATFORM_SPECIALIZATION_HEADER \ + platforms/specialization/cuda-ptx.cuh +// clang-format on + +// FIXME: These includes should start with "breeze". +#include // @manual=//velox/experimental/breeze:breeze +#include // @manual=//velox/experimental/breeze:breeze +#include // @manual=//velox/experimental/breeze:breeze +#include // @manual=//velox/experimental/breeze:breeze + +#include + +namespace breeze { +namespace { + +using namespace functions; +using namespace utils; + +constexpr int kBlockThreads = 256; +constexpr int kItemsPerThread = 8; +constexpr int kBlockItems = kBlockThreads * kItemsPerThread; +constexpr int kNumItems = 250'000; +constexpr int kNumBlocks = (kNumItems + kBlockItems - 1) / kBlockItems; + +__global__ __launch_bounds__(kBlockThreads) void reduceKernel(int* out) { + CudaPlatform p; + using BlockReduceT = BlockReduce; + __shared__ typename BlockReduceT::Scratch scratch; + int items[kItemsPerThread]; + for (int i = 0; i < kItemsPerThread; ++i) { + items[i] = 1; + } + int aggregate = BlockReduceT::template Reduce( + p, + make_slice(items), + make_slice(&scratch).template reinterpret()); + if (p.thread_idx() == 0) { + out[p.block_idx()] = aggregate; + } +} + +TEST(BreezeCudaTest, reduce) { + device_vector result(kNumBlocks); + reduceKernel<<>>(result.data()); + std::vector actual(kNumBlocks); + result.copy_to_host(actual.data(), actual.size()); + std::vector expected(kNumBlocks, kBlockThreads * kItemsPerThread); + ASSERT_EQ(actual, expected); +} + +} // namespace +} // namespace breeze diff --git a/velox/experimental/wave/common/tests/CMakeLists.txt b/velox/experimental/wave/common/tests/CMakeLists.txt index 22507b09db9fd..2d797fb893149 100644 --- a/velox/experimental/wave/common/tests/CMakeLists.txt +++ b/velox/experimental/wave/common/tests/CMakeLists.txt @@ -17,6 +17,7 @@ add_executable( GpuArenaTest.cpp CudaTest.cpp CudaTest.cu + BreezeCudaTest.cu CompileTest.cu BlockTest.cpp BlockTest.cu @@ -26,6 +27,7 @@ add_executable( add_test(velox_wave_common_test velox_wave_common_test) set_tests_properties(velox_wave_common_test PROPERTIES LABELS cuda_driver) +target_include_directories(velox_wave_common_test PRIVATE ../../../breeze) target_link_libraries( velox_wave_common_test velox_wave_common