forked from facebookincubator/velox
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
11 changed files
with
663 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -efx -o pipefail | ||
SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") | ||
cd "$SCRIPTDIR" | ||
|
||
function generate { | ||
BACKEND=$1 | ||
TYPE=$2 | ||
EXT=$3 | ||
DIR="$TYPE"s | ||
mkdir -p generated/"$DIR" | ||
./kernel_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"-kernels.template.h --out=generated/"$DIR"/kernels-"$BACKEND"."$EXT" | ||
./test_fixture_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"_test.template.h --out=generated/"$DIR"/"$TYPE"_test-"$BACKEND"."$EXT" | ||
} | ||
|
||
generate openmp "algorithm" h | ||
generate openmp "function" h |
79 changes: 79 additions & 0 deletions
79
velox/experimental/breeze/test/generated/algorithms/algorithm_test-openmp.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// Copyright (c) 2024 by Rivos Inc. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <gtest/gtest.h> | ||
#include <omp.h> | ||
|
||
#include <cassert> | ||
#include <vector> | ||
|
||
#include "platforms/openmp.h" | ||
#include "test/generated/algorithms/kernels-openmp.h" | ||
#include "test/platforms/openmp_test.h" | ||
|
||
template <typename T> | ||
class AlgorithmTest : public ::testing::Test { | ||
protected: | ||
template <typename ReduceOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
typename U> | ||
void Reduce(const std::vector<T>& in, U* out, int num_blocks) { | ||
using PlatformT = | ||
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>; | ||
using SharedMemType = | ||
typename breeze::algorithms::DeviceReduce<PlatformT, U>::Scratch; | ||
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>( | ||
num_blocks, | ||
&kernels::Reduce<ReduceOp, BLOCK_THREADS, ITEMS_PER_THREAD, T, U, | ||
SharedMemType>, | ||
in.data(), out, in.size()); | ||
} | ||
|
||
template <typename ScanOp, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
int LOOKBACK_DISTANCE, typename U, typename V> | ||
void Scan(const std::vector<T>& in, std::vector<U>& out, int* next_blocks_idx, | ||
std::vector<V>& blocks, int num_blocks) { | ||
using PlatformT = | ||
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>; | ||
using SharedMemType = | ||
typename breeze::algorithms::DeviceScan<PlatformT, U, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE>::Scratch; | ||
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>( | ||
num_blocks, | ||
&kernels::Scan<ScanOp, BLOCK_THREADS, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE, T, U, V, SharedMemType>, | ||
in.data(), out.data(), next_blocks_idx, blocks.data(), in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE, | ||
int RADIX_BITS> | ||
void RadixSortHistogram(const std::vector<T>& in, std::vector<unsigned>& out, | ||
int num_blocks) { | ||
using SharedMemType = | ||
typename breeze::algorithms::DeviceRadixSortHistogram<RADIX_BITS, | ||
T>::Scratch; | ||
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>( | ||
num_blocks, | ||
&kernels::RadixSortHistogram<BLOCK_THREADS, ITEMS_PER_THREAD, TILE_SIZE, | ||
RADIX_BITS, T, SharedMemType>, | ||
in.data(), out.data(), in.size()); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS> | ||
void RadixSort(const std::vector<T>& in, | ||
const std::vector<unsigned>& in_offsets, int start_bit, | ||
int num_pass_bits, std::vector<T>& out, | ||
std::vector<int>& next_block_idx, | ||
std::vector<unsigned>& blocks, int num_blocks) { | ||
using PlatformT = | ||
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>; | ||
using SharedMemType = typename breeze::algorithms::DeviceRadixSort< | ||
PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch; | ||
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>( | ||
num_blocks, | ||
&kernels::RadixSort<BLOCK_THREADS, ITEMS_PER_THREAD, RADIX_BITS, T, | ||
SharedMemType>, | ||
in.data(), in_offsets.data(), &start_bit, &num_pass_bits, out.data(), | ||
next_block_idx.data(), blocks.data(), in.size()); | ||
} | ||
}; |
73 changes: 73 additions & 0 deletions
73
velox/experimental/breeze/test/generated/algorithms/kernels-openmp.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// Copyright (c) 2024 by Rivos Inc. | ||
// Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "algorithms/reduce.h" | ||
#include "algorithms/scan.h" | ||
#include "algorithms/sort.h" | ||
#include "platforms/openmp.h" | ||
#include "platforms/platform.h" | ||
#include "utils/types.h" | ||
|
||
namespace kernels { | ||
|
||
template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD, typename T, | ||
typename U, typename SharedMemType, | ||
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>> | ||
void Reduce(PlatformT p, SharedMemType* scratch, const T* in, U* out, | ||
int num_items) { | ||
breeze::algorithms::DeviceReduce<PlatformT, U>::template Reduce< | ||
Op, ITEMS_PER_THREAD>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD, | ||
int LOOKBACK_DISTANCE, typename T, typename U, typename V, | ||
typename SharedMemType, | ||
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>> | ||
void Scan(PlatformT p, SharedMemType* scratch, const T* in, U* out, | ||
int* next_block_idx, V* blocks, int num_items) { | ||
breeze::algorithms::DeviceScan<PlatformT, U, ITEMS_PER_THREAD, | ||
LOOKBACK_DISTANCE>:: | ||
template Scan<Op>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE, | ||
int RADIX_BITS, typename T, typename SharedMemType, | ||
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>> | ||
void RadixSortHistogram(PlatformT p, SharedMemType* scratch, const T* in, | ||
unsigned* out, int num_items) { | ||
breeze::algorithms::DeviceRadixSortHistogram<RADIX_BITS, T>::template Build< | ||
ITEMS_PER_THREAD, TILE_SIZE>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS, typename T, | ||
typename SharedMemType, | ||
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>> | ||
void RadixSort(PlatformT p, SharedMemType* scratch, const T* in, | ||
const unsigned* in_offsets, const int* start_bit, | ||
const int* num_pass_bits, T* out, int* next_block_idx, | ||
unsigned* blocks, int num_items) { | ||
breeze::algorithms::DeviceRadixSort<PlatformT, ITEMS_PER_THREAD, RADIX_BITS, | ||
T>:: | ||
template Sort<unsigned>( | ||
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(in_offsets), | ||
*start_bit, *num_pass_bits, | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(out), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx), | ||
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks), | ||
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items); | ||
} | ||
|
||
} // namespace kernels |
Oops, something went wrong.