Skip to content

Commit

Permalink
Review updates.
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas Grützmacher <[email protected]>
Co-authored-by: Yu-Hsiang Tsai <[email protected]>
Co-authored-by: Marcel Koch <[email protected]>
  • Loading branch information
4 people committed Jul 31, 2023
1 parent f0ef0fa commit 966a877
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 122 deletions.
80 changes: 45 additions & 35 deletions common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,15 @@ __device__ __forceinline__ void scale(
}

template <typename ValueType, typename Mapping>
__global__
__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void scale_kernel(const gko::batch_multi_vector::
uniform_batch<const ValueType>
alpha,
const gko::batch_multi_vector::
uniform_batch<ValueType>
x,
Mapping map)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -78,11 +83,20 @@ __device__ __forceinline__ void add_scaled(
}

template <typename ValueType, typename Mapping>
__global__
__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
const gko::batch_multi_vector::uniform_batch<const ValueType> x,
const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void add_scaled_kernel(const gko::batch_multi_vector::
uniform_batch<
const ValueType>
alpha,
const gko::batch_multi_vector::
uniform_batch<
const ValueType>
x,
const gko::batch_multi_vector::
uniform_batch<ValueType>
y,
Mapping map)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -139,24 +153,12 @@ __device__ __forceinline__ void compute_gen_dot_product(


template <typename ValueType, typename Mapping>
__global__ __launch_bounds__(
default_block_size,
sm_multiplier) void compute_gen_dot_product_kernel(const gko::
batch_multi_vector::
uniform_batch<
const ValueType>
x,
const gko::
batch_multi_vector::
uniform_batch<
const ValueType>
y,
const gko::
batch_multi_vector::
uniform_batch<
ValueType>
result,
Mapping map)
__global__
__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> x,
const gko::batch_multi_vector::uniform_batch<const ValueType> y,
const gko::batch_multi_vector::uniform_batch<ValueType> result,
Mapping map)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -218,11 +220,19 @@ __device__ __forceinline__ void compute_norm2(


template <typename ValueType>
__global__
__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> x,
const gko::batch_multi_vector::uniform_batch<remove_complex<ValueType>>
result)
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void compute_norm2_kernel(const gko::
batch_multi_vector::
uniform_batch<
const ValueType>
x,
const gko::
batch_multi_vector::
uniform_batch<
remove_complex<
ValueType>>
result)
{
for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
batch_id += gridDim.x) {
Expand Down Expand Up @@ -255,9 +265,9 @@ __device__ __forceinline__ void copy(

template <typename ValueType>
__global__
__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> src,
const gko::batch_multi_vector::uniform_batch<ValueType> dst)
__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
const gko::batch_multi_vector::uniform_batch<const ValueType> src,
const gko::batch_multi_vector::uniform_batch<ValueType> dst)
{
for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
batch_id += gridDim.x) {
Expand Down
80 changes: 80 additions & 0 deletions core/base/batch_multi_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,86 @@ GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
} // namespace
} // namespace batch_multi_vector

namespace detail {


template <typename ValueType>
batch_dim<2> compute_batch_size(
const std::vector<matrix::Dense<ValueType>*>& matrices)
{
auto common_size = matrices[0]->get_size();
for (size_type i = 1; i < matrices.size(); ++i) {
GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
}
return batch_dim<2>{matrices.size(), common_size};
}


} // namespace detail


template <typename ValueType>
BatchMultiVector<ValueType>::BatchMultiVector(
std::shared_ptr<const Executor> exec, const batch_dim<2>& size)
: EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
batch_size_(size),
values_(exec, compute_num_elems(size))
{}


template <typename ValueType>
BatchMultiVector<ValueType>::BatchMultiVector(
std::shared_ptr<const Executor> exec,
const std::vector<matrix::Dense<ValueType>*>& matrices)
: EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
batch_size_{detail::compute_batch_size(matrices)},
values_(exec, compute_num_elems(batch_size_))
{
for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
auto local_exec = matrices[i]->get_executor();
exec->copy_from(
local_exec.get(), matrices[i]->get_num_stored_elements(),
matrices[i]->get_const_values(),
this->get_values() + this->get_size().get_cumulative_offset(i));
}
}


template <typename ValueType>
BatchMultiVector<ValueType>::BatchMultiVector(
std::shared_ptr<const Executor> exec, size_type num_duplications,
const matrix::Dense<value_type>* input)
: BatchMultiVector<ValueType>(
exec, gko::batch_dim<2>(num_duplications, input->get_size()))
{
size_type offset = 0;
for (size_type i = 0; i < num_duplications; ++i) {
exec->copy_from(input->get_executor().get(),
input->get_num_stored_elements(),
input->get_const_values(), this->get_values() + offset);
offset += input->get_num_stored_elements();
}
}


template <typename ValueType>
BatchMultiVector<ValueType>::BatchMultiVector(
std::shared_ptr<const Executor> exec, size_type num_duplications,
const BatchMultiVector<value_type>* input)
: BatchMultiVector<ValueType>(
exec,
gko::batch_dim<2>(input->get_num_batch_entries() * num_duplications,
input->get_common_size()))
{
size_type offset = 0;
for (size_type i = 0; i < num_duplications; ++i) {
exec->copy_from(input->get_executor().get(),
input->get_num_stored_elements(),
input->get_const_values(), this->get_values() + offset);
offset += input->get_num_stored_elements();
}
}


template <typename ValueType>
std::unique_ptr<BatchMultiVector<ValueType>>
Expand Down
13 changes: 13 additions & 0 deletions core/test/base/batch_multi_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
TYPED_TEST(BatchMultiVector, CanBeEmpty)
{
auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);

this->assert_empty(empty.get());
}


TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
{
ASSERT_NE(this->mtx->get_const_values(), nullptr);

this->assert_equal_to_original_mtx(this->mtx.get());
}

Expand All @@ -119,7 +121,9 @@ TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
TYPED_TEST(BatchMultiVector, CanBeCopied)
{
auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);

mtx_copy->copy_from(this->mtx.get());

this->assert_equal_to_original_mtx(this->mtx.get());
this->mtx->at(0, 0, 0) = 7;
this->mtx->at(0, 1) = 7;
Expand All @@ -130,14 +134,17 @@ TYPED_TEST(BatchMultiVector, CanBeCopied)
TYPED_TEST(BatchMultiVector, CanBeMoved)
{
auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);

this->mtx->move_to(mtx_copy.get());

this->assert_equal_to_original_mtx(mtx_copy.get());
}


TYPED_TEST(BatchMultiVector, CanBeCloned)
{
auto mtx_clone = this->mtx->clone();

this->assert_equal_to_original_mtx(
dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
}
Expand All @@ -146,13 +153,15 @@ TYPED_TEST(BatchMultiVector, CanBeCloned)
TYPED_TEST(BatchMultiVector, CanBeCleared)
{
this->mtx->clear();

this->assert_empty(this->mtx.get());
}


TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
{
using size_type = gko::size_type;

auto m = gko::BatchMultiVector<TypeParam>::create(
this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 4)));

Expand Down Expand Up @@ -281,6 +290,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
TYPED_TEST(BatchMultiVector, CanBeListConstructed)
{
using value_type = typename TestFixture::value_type;

auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
{{1.0, 2.0}, {1.0, 3.0}}, this->exec);

Expand All @@ -296,6 +306,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
{
using value_type = typename TestFixture::value_type;

auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
2, I<value_type>({1.0, 2.0}), this->exec);

Expand All @@ -312,6 +323,7 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
{
using value_type = typename TestFixture::value_type;
using T = value_type;

auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
{{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
{I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
Expand Down Expand Up @@ -401,6 +413,7 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromSparseMatrixData)
{
using value_type = typename TestFixture::value_type;
auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);

// clang-format off
m->read({gko::matrix_data<TypeParam>{{2, 2},
{{0, 0, 1.0},
Expand Down
2 changes: 1 addition & 1 deletion cuda/base/batch_multi_vector_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace batch_multi_vector {


constexpr auto default_block_size = 256;
constexpr int sm_multiplier = 4;
constexpr int sm_oversubscription = 4;

// clang-format off

Expand Down
2 changes: 1 addition & 1 deletion cuda/base/batch_struct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ namespace cuda {
/** @file batch_struct.hpp
*
* Helper functions to generate a batch struct from a batch LinOp,
* while also shallow-casting to the requried CUDA scalar type.
* while also shallow-casting to the required CUDA scalar type.
*
* A specialization is needed for every format of every kind of linear algebra
* object. These are intended to be called on the host.
Expand Down
2 changes: 1 addition & 1 deletion dpcpp/base/batch_struct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace dpcpp {
/** @file batch_struct.hpp
*
* Helper functions to generate a batch struct from a batch LinOp,
* while also shallow-casting to the requried DPCPP scalar type.
* while also shallow-casting to the required DPCPP scalar type.
*
* A specialization is needed for every format of every kind of linear algebra
* object. These are intended to be called on the host.
Expand Down
2 changes: 1 addition & 1 deletion hip/base/batch_multi_vector_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ namespace batch_multi_vector {


constexpr auto default_block_size = 256;
constexpr int sm_multiplier = 4;
constexpr int sm_oversubscription = 4;


// clang-format off
Expand Down
2 changes: 1 addition & 1 deletion hip/base/batch_struct.hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ namespace hip {
/** @file batch_struct.hpp
*
* Helper functions to generate a batch struct from a batch LinOp,
* while also shallow-casting to the requried Hip scalar type.
* while also shallow-casting to the required Hip scalar type.
*
* A specialization is needed for every format of every kind of linear algebra
* object. These are intended to be called on the host.
Expand Down
1 change: 1 addition & 0 deletions include/ginkgo/core/base/batch_lin_op_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>


#include <ginkgo/core/base/abstract_factory.hpp>
Expand Down
Loading

0 comments on commit 966a877

Please sign in to comment.