-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
build: check-in breeze test fixtures for cuda
- Loading branch information
David Reveman
committed
Nov 25, 2024
1 parent
011d3d5
commit 827f116
Showing
5 changed files
with
654 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
velox/experimental/breeze/test/generated/algorithms/algorithm_test-cuda.cuh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/* | ||
* Copyright (c) 2024 by Rivos Inc. | ||
* Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
/* | ||
* This file is auto-generated from test_fixture_generator.py | ||
* DO NOT EDIT! | ||
*/ | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <vector> | ||
|
||
#include "breeze/platforms/cuda.cuh" | ||
#include "test/generated/algorithms/kernels-cuda.cuh" | ||
#include "test/platforms/cuda_test.cuh" | ||
|
||
template <typename T> | ||
class AlgorithmTest : public ::testing::Test { | ||
protected: | ||
template <typename ReduceOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
typename U> | ||
void Reduce(const std::vector<T>& in, U* out, int num_blocks) { | ||
std::vector<U> vec_out(1, *out); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
num_blocks, | ||
&kernels::Reduce<ReduceOp, BLOCK_THREADS, ITEMS_PER_THREAD, T, U>, in, | ||
vec_out, in.size()); | ||
*out = vec_out[0]; | ||
} | ||
|
||
template <typename ScanOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
int LOOKBACK_DISTANCE, typename U, typename V> | ||
void Scan(const std::vector<T>& in, std::vector<U>& out, int* next_blocks_idx, | ||
std::vector<V>& blocks, int num_blocks) { | ||
std::vector<int> vec_next_blocks_idx(1, *next_blocks_idx); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
num_blocks, | ||
&kernels::Scan<ScanOp, BLOCK_THREADS, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE, T, U, V>, | ||
in, out, vec_next_blocks_idx, blocks, in.size()); | ||
*next_blocks_idx = vec_next_blocks_idx[0]; | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE, | ||
int RADIX_BITS> | ||
void RadixSortHistogram(const std::vector<T>& in, std::vector<unsigned>& out, | ||
int num_blocks) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
num_blocks, | ||
&kernels::RadixSortHistogram<BLOCK_THREADS, ITEMS_PER_THREAD, TILE_SIZE, | ||
RADIX_BITS, T>, | ||
in, out, in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS> | ||
void RadixSort(const std::vector<T>& in, | ||
const std::vector<unsigned>& in_offsets, int start_bit, | ||
int num_pass_bits, std::vector<T>& out, | ||
std::vector<int>& next_block_idx, | ||
std::vector<unsigned>& blocks, int num_blocks) { | ||
const std::vector<int> vec_start_bit(1, start_bit); | ||
const std::vector<int> vec_num_pass_bits(1, num_pass_bits); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
num_blocks, | ||
&kernels::RadixSort<BLOCK_THREADS, ITEMS_PER_THREAD, RADIX_BITS, T>, in, | ||
in_offsets, vec_start_bit, vec_num_pass_bits, out, next_block_idx, | ||
blocks, in.size()); | ||
} | ||
}; |
120 changes: 120 additions & 0 deletions
120
velox/experimental/breeze/test/generated/algorithms/kernels-cuda.cuh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/* | ||
* Copyright (c) 2024 by Rivos Inc. | ||
* Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
/* | ||
* This file is auto-generated from kernel_generator.py | ||
* DO NOT EDIT! | ||
*/ | ||
|
||
#include "breeze/algorithms/reduce.h" | ||
#include "breeze/algorithms/scan.h" | ||
#include "breeze/algorithms/sort.h" | ||
#include "breeze/platforms/cuda.cuh" | ||
#include "breeze/platforms/platform.h" | ||
#include "breeze/utils/types.h" | ||
|
||
namespace kernels { | ||
|
||
enum { WARP_THREADS = 32 }; | ||
template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD, typename T, | ||
typename U> | ||
__global__ void Reduce(const T* in, U* out, int num_items) { | ||
using PlatformT = CudaPlatform<BLOCK_THREADS, WARP_THREADS>; | ||
PlatformT p; | ||
__shared__ | ||
typename breeze::algorithms::DeviceReduce<PlatformT, U>::Scratch scratch_; | ||
auto scratch = | ||
(typename breeze::algorithms::DeviceReduce<PlatformT, | ||
U>::Scratch*)&scratch_; | ||
|
||
breeze::algorithms::DeviceReduce<PlatformT, U>::template Reduce< | ||
Op, ITEMS_PER_THREAD>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
int LOOKBACK_DISTANCE, typename T, typename U, typename V> | ||
__global__ void Scan(const T* in, U* out, int* next_block_idx, V* blocks, | ||
int num_items) { | ||
using PlatformT = CudaPlatform<BLOCK_THREADS, WARP_THREADS>; | ||
PlatformT p; | ||
__shared__ typename breeze::algorithms::DeviceScan< | ||
PlatformT, U, ITEMS_PER_THREAD, LOOKBACK_DISTANCE>::Scratch scratch_; | ||
auto scratch = (typename breeze::algorithms::DeviceScan< | ||
PlatformT, U, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE>::Scratch*)&scratch_; | ||
|
||
breeze::algorithms::DeviceScan<PlatformT, U, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE>:: | ||
template Scan<Op>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE, | ||
int RADIX_BITS, typename T> | ||
__global__ void RadixSortHistogram(const T* in, unsigned* out, int num_items) { | ||
using PlatformT = CudaPlatform<BLOCK_THREADS, WARP_THREADS>; | ||
PlatformT p; | ||
__shared__ typename breeze::algorithms::DeviceRadixSortHistogram< | ||
RADIX_BITS, T>::Scratch scratch_; | ||
auto scratch = (typename breeze::algorithms::DeviceRadixSortHistogram< | ||
RADIX_BITS, T>::Scratch*)&scratch_; | ||
|
||
breeze::algorithms::DeviceRadixSortHistogram<RADIX_BITS, T>::template Build< | ||
ITEMS_PER_THREAD, TILE_SIZE>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS, typename T> | ||
__global__ void RadixSort(const T* in, const unsigned* in_offsets, | ||
const int* start_bit, const int* num_pass_bits, | ||
T* out, int* next_block_idx, unsigned* blocks, | ||
int num_items) { | ||
using PlatformT = CudaPlatform<BLOCK_THREADS, WARP_THREADS>; | ||
PlatformT p; | ||
__shared__ typename breeze::algorithms::DeviceRadixSort< | ||
PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch scratch_; | ||
auto scratch = | ||
(typename breeze::algorithms::DeviceRadixSort< | ||
PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch*)&scratch_; | ||
|
||
breeze::algorithms::DeviceRadixSort<PlatformT, ITEMS_PER_THREAD, RADIX_BITS, | ||
T>:: | ||
template Sort<unsigned>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(in_offsets), | ||
*start_bit, *num_pass_bits, | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
} // namespace kernels |
160 changes: 160 additions & 0 deletions
160
velox/experimental/breeze/test/generated/functions/function_test-cuda.cuh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/* | ||
* Copyright (c) 2024 by Rivos Inc. | ||
* Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
/* | ||
* This file is auto-generated from test_fixture_generator.py | ||
* DO NOT EDIT! | ||
*/ | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <vector> | ||
|
||
#include "breeze/platforms/cuda.cuh" | ||
#include "test/generated/functions/kernels-cuda.cuh" | ||
#include "test/platforms/cuda_test.cuh" | ||
|
||
template <typename T> | ||
class FunctionTest : public ::testing::Test { | ||
protected: | ||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockLoad(const std::vector<T>& in, std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockLoad<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, out, | ||
in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockLoadIf(const std::vector<T>& in, | ||
const std::vector<int>& selection_flags, | ||
std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockLoadIf<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, | ||
selection_flags, out, in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockLoadFrom(const std::vector<T>& in, const std::vector<int>& offsets, | ||
std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockLoadFrom<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, | ||
offsets, out, in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockStore(const std::vector<T>& in, std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockStore<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, out, | ||
out.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockStoreIf(const std::vector<T>& in, | ||
const std::vector<int>& selection_flags, | ||
std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockStoreIf<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, | ||
selection_flags, out, out.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockStoreAt(const std::vector<T>& in, const std::vector<int>& offsets, | ||
std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockStoreAt<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, offsets, | ||
out, out.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockStoreAtIf(const std::vector<T>& in, const std::vector<int>& offsets, | ||
const std::vector<int>& selection_flags, | ||
std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockStoreAtIf<BLOCK_THREADS, ITEMS_PER_THREAD, T>, in, | ||
offsets, selection_flags, out, out.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockFill(T value, std::vector<T>& out) { | ||
const std::vector<T> vec_value(1, value); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockFill<BLOCK_THREADS, ITEMS_PER_THREAD, T>, vec_value, out, | ||
out.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD> | ||
void BlockFillAtIf(T value, const std::vector<int>& offsets, | ||
const std::vector<int>& selection_flags, | ||
std::vector<T>& out) { | ||
const std::vector<T> vec_value(1, value); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockFillAtIf<BLOCK_THREADS, ITEMS_PER_THREAD, T>, vec_value, | ||
offsets, selection_flags, out, out.size()); | ||
} | ||
|
||
template <typename ReduceOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
typename U> | ||
void BlockReduce(const std::vector<T>& in, U* out) { | ||
std::vector<U> vec_out(1, *out); | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockReduce<ReduceOp, BLOCK_THREADS, ITEMS_PER_THREAD, T, U>, | ||
in, vec_out, in.size()); | ||
*out = vec_out[0]; | ||
} | ||
|
||
template <typename ScanOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
typename U> | ||
void BlockScan(const std::vector<T>& in, std::vector<U>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockScan<ScanOp, BLOCK_THREADS, ITEMS_PER_THREAD, T, U>, in, | ||
out, in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS> | ||
void BlockRadixRank(const std::vector<T>& in, std::vector<int>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockRadixRank<BLOCK_THREADS, ITEMS_PER_THREAD, RADIX_BITS, | ||
T>, | ||
in, out, in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS> | ||
void BlockRadixSort(const std::vector<T>& in, std::vector<T>& out) { | ||
CudaTestLaunch<BLOCK_THREADS>( | ||
/*num_blocks=*/1, | ||
&kernels::BlockRadixSort<BLOCK_THREADS, ITEMS_PER_THREAD, RADIX_BITS, | ||
T>, | ||
in, out, in.size()); | ||
} | ||
}; |
Oops, something went wrong.