Skip to content

Commit

Permalink
Buckify breeze (facebookincubator#11512)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#11512

bypass-github-export-checks

Reviewed By: oerling

Differential Revision: D65797319
  • Loading branch information
Yuhta authored and facebook-github-bot committed Nov 13, 2024
1 parent 01c1264 commit 88c5a78
Show file tree
Hide file tree
Showing 13 changed files with 746 additions and 25 deletions.
2 changes: 1 addition & 1 deletion velox/experimental/breeze/platforms/openmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ struct OpenMPPlatform {
inline unsigned lower_rank_lanemask() {
static_assert(WARP_THREADS <= sizeof(unsigned) * 8,
"WARP_THREADS must be less or equal to unsigned bits");
return (1 << lane_idx()) - 1;
return (1u << lane_idx()) - 1;
}
inline unsigned higher_rank_lanemask() {
static_assert(WARP_THREADS <= sizeof(unsigned) * 8,
Expand Down
12 changes: 6 additions & 6 deletions velox/experimental/breeze/test/algorithms/algorithm_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
*/

#if defined(PLATFORM_CUDA)
#include "generated/algorithms/algorithm_test-cuda.cuh"
#include "test/generated/algorithms/algorithm_test-cuda.cuh"
#elif defined(PLATFORM_HIP)
#include "generated/algorithms/algorithm_test-hip.hpp"
#include "test/generated/algorithms/algorithm_test-hip.hpp"
#elif defined(PLATFORM_SYCL)
#include "generated/algorithms/algorithm_test-sycl.hpp"
#include "test/generated/algorithms/algorithm_test-sycl.hpp"
#elif defined(PLATFORM_OPENCL)
#include "generated/algorithms/algorithm_test-opencl.h"
#include "test/generated/algorithms/algorithm_test-opencl.h"
#elif defined(PLATFORM_OPENMP)
#include "generated/algorithms/algorithm_test-openmp.h"
#include "test/generated/algorithms/algorithm_test-openmp.h"
#elif defined(PLATFORM_METAL)
#include "generated/algorithms/algorithm_test-metal.h"
#include "test/generated/algorithms/algorithm_test-metal.h"
#else
#error unsupported platform
#endif
12 changes: 6 additions & 6 deletions velox/experimental/breeze/test/functions/function_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
*/

#if defined(PLATFORM_CUDA)
#include "generated/functions/function_test-cuda.cuh"
#include "test/generated/functions/function_test-cuda.cuh"
#elif defined(PLATFORM_HIP)
#include "generated/functions/function_test-hip.hpp"
#include "test/generated/functions/function_test-hip.hpp"
#elif defined(PLATFORM_SYCL)
#include "generated/functions/function_test-sycl.hpp"
#include "test/generated/functions/function_test-sycl.hpp"
#elif defined(PLATFORM_OPENCL)
#include "generated/functions/function_test-opencl.h"
#include "test/generated/functions/function_test-opencl.h"
#elif defined(PLATFORM_OPENMP)
#include "generated/functions/function_test-openmp.h"
#include "test/generated/functions/function_test-openmp.h"
#elif defined(PLATFORM_METAL)
#include "generated/functions/function_test-metal.h"
#include "test/generated/functions/function_test-metal.h"
#else
#error unsupported platform
#endif
32 changes: 32 additions & 0 deletions velox/experimental/breeze/test/generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -efx -o pipefail
SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}")
cd "$SCRIPTDIR"

function generate {
BACKEND=$1
TYPE=$2
EXT=$3
DIR="$TYPE"s
mkdir -p generated/"$DIR"
./kernel_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"-kernels.template.h --out=generated/"$DIR"/kernels-"$BACKEND"."$EXT"
./test_fixture_generator.py --backend="$BACKEND" --template="$DIR"/"$TYPE"_test.template.h --out=generated/"$DIR"/"$TYPE"_test-"$BACKEND"."$EXT"
}

generate openmp "algorithm" h
generate openmp "function" h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Copyright (c) 2024 by Rivos Inc.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include <gtest/gtest.h>
#include <omp.h>

#include <cassert>
#include <vector>

#include "platforms/openmp.h"
#include "test/generated/algorithms/kernels-openmp.h"
#include "test/platforms/openmp_test.h"

template <typename T>
class AlgorithmTest : public ::testing::Test {
protected:
template <typename ReduceOp, int BLOCK_THREADS, int ITEMS_PER_THREAD,
typename U>
void Reduce(const std::vector<T>& in, U* out, int num_blocks) {
using PlatformT =
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>;
using SharedMemType =
typename breeze::algorithms::DeviceReduce<PlatformT, U>::Scratch;
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>(
num_blocks,
&kernels::Reduce<ReduceOp, BLOCK_THREADS, ITEMS_PER_THREAD, T, U,
SharedMemType>,
in.data(), out, in.size());
}

template <typename ScanOp, int BLOCK_THREADS, int ITEMS_PER_THREAD,
int LOOKBACK_DISTANCE, typename U, typename V>
void Scan(const std::vector<T>& in, std::vector<U>& out, int* next_blocks_idx,
std::vector<V>& blocks, int num_blocks) {
using PlatformT =
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>;
using SharedMemType =
typename breeze::algorithms::DeviceScan<PlatformT, U, ITEMS_PER_THREAD,
LOOKBACK_DISTANCE>::Scratch;
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>(
num_blocks,
&kernels::Scan<ScanOp, BLOCK_THREADS, ITEMS_PER_THREAD,
LOOKBACK_DISTANCE, T, U, V, SharedMemType>,
in.data(), out.data(), next_blocks_idx, blocks.data(), in.size());
}

template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE,
int RADIX_BITS>
void RadixSortHistogram(const std::vector<T>& in, std::vector<unsigned>& out,
int num_blocks) {
using SharedMemType =
typename breeze::algorithms::DeviceRadixSortHistogram<RADIX_BITS,
T>::Scratch;
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>(
num_blocks,
&kernels::RadixSortHistogram<BLOCK_THREADS, ITEMS_PER_THREAD, TILE_SIZE,
RADIX_BITS, T, SharedMemType>,
in.data(), out.data(), in.size());
}

template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS>
void RadixSort(const std::vector<T>& in,
const std::vector<unsigned>& in_offsets, int start_bit,
int num_pass_bits, std::vector<T>& out,
std::vector<int>& next_block_idx,
std::vector<unsigned>& blocks, int num_blocks) {
using PlatformT =
OpenMPPlatform<BLOCK_THREADS, /*WARP_THREADS=*/BLOCK_THREADS>;
using SharedMemType = typename breeze::algorithms::DeviceRadixSort<
PlatformT, ITEMS_PER_THREAD, RADIX_BITS, T>::Scratch;
OpenMPTestLaunch<BLOCK_THREADS, SharedMemType>(
num_blocks,
&kernels::RadixSort<BLOCK_THREADS, ITEMS_PER_THREAD, RADIX_BITS, T,
SharedMemType>,
in.data(), in_offsets.data(), &start_bit, &num_pass_bits, out.data(),
next_block_idx.data(), blocks.data(), in.size());
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Copyright (c) 2024 by Rivos Inc.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#include "algorithms/reduce.h"
#include "algorithms/scan.h"
#include "algorithms/sort.h"
#include "platforms/openmp.h"
#include "platforms/platform.h"
#include "utils/types.h"

namespace kernels {

template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD, typename T,
typename U, typename SharedMemType,
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>>
void Reduce(PlatformT p, SharedMemType* scratch, const T* in, U* out,
int num_items) {
breeze::algorithms::DeviceReduce<PlatformT, U>::template Reduce<
Op, ITEMS_PER_THREAD>(
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in),
breeze::utils::make_slice<breeze::utils::GLOBAL>(out),
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items);
}

template <typename Op, int BLOCK_THREADS, int ITEMS_PER_THREAD,
int LOOKBACK_DISTANCE, typename T, typename U, typename V,
typename SharedMemType,
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>>
void Scan(PlatformT p, SharedMemType* scratch, const T* in, U* out,
int* next_block_idx, V* blocks, int num_items) {
breeze::algorithms::DeviceScan<PlatformT, U, ITEMS_PER_THREAD,
LOOKBACK_DISTANCE>::
template Scan<Op>(
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in),
breeze::utils::make_slice<breeze::utils::GLOBAL>(out),
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx),
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks),
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items);
}

template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int TILE_SIZE,
int RADIX_BITS, typename T, typename SharedMemType,
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>>
void RadixSortHistogram(PlatformT p, SharedMemType* scratch, const T* in,
unsigned* out, int num_items) {
breeze::algorithms::DeviceRadixSortHistogram<RADIX_BITS, T>::template Build<
ITEMS_PER_THREAD, TILE_SIZE>(
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in),
breeze::utils::make_slice<breeze::utils::GLOBAL>(out),
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items);
}

template <int BLOCK_THREADS, int ITEMS_PER_THREAD, int RADIX_BITS, typename T,
typename SharedMemType,
typename PlatformT = OpenMPPlatform<BLOCK_THREADS, BLOCK_THREADS>>
void RadixSort(PlatformT p, SharedMemType* scratch, const T* in,
const unsigned* in_offsets, const int* start_bit,
const int* num_pass_bits, T* out, int* next_block_idx,
unsigned* blocks, int num_items) {
breeze::algorithms::DeviceRadixSort<PlatformT, ITEMS_PER_THREAD, RADIX_BITS,
T>::
template Sort<unsigned>(
p, breeze::utils::make_slice<breeze::utils::GLOBAL>(in),
breeze::utils::make_slice<breeze::utils::GLOBAL>(in_offsets),
*start_bit, *num_pass_bits,
breeze::utils::make_slice<breeze::utils::GLOBAL>(out),
breeze::utils::make_slice<breeze::utils::GLOBAL>(next_block_idx),
breeze::utils::make_slice<breeze::utils::GLOBAL>(blocks),
breeze::utils::make_slice<breeze::utils::SHARED>(scratch), num_items);
}

} // namespace kernels
Loading

0 comments on commit 88c5a78

Please sign in to comment.