Skip to content

Commit

Permalink
Working refactored BDDC
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzgoebel committed Dec 8, 2023
1 parent a206ab6 commit 8077436
Show file tree
Hide file tree
Showing 17 changed files with 1,565 additions and 51 deletions.
17 changes: 17 additions & 0 deletions common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,20 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL);


template <typename ValueType>
void scale(std::shared_ptr<const DefaultExecutor> exec, ValueType s,
array<ValueType>& values)
{
auto nnz = values.get_num_elems();
auto vals = as_device_type(values.get_data());

auto grid_dim = ceildiv(nnz, config::warp_size);
kernel::scale<<<grid_dim, config::warp_size>>>(nnz, as_device_type(s),
vals);
}


GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
GKO_DECLARE_DEVICE_MATRIX_DATA_SCALE_KERNEL);
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ if(GINKGO_BUILD_MPI)
distributed/matrix.cpp
distributed/partition_helpers.cpp
distributed/vector.cpp
distributed/preconditioner/bddc.cpp
distributed/preconditioner/schwarz.cpp)
endif()

Expand Down
102 changes: 79 additions & 23 deletions core/base/composition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@


#include <ginkgo/core/base/precision_dispatch.hpp>
#include <ginkgo/core/distributed/matrix.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/dense.hpp>


Expand Down Expand Up @@ -191,36 +193,90 @@ std::unique_ptr<LinOp> Composition<ValueType>::conj_transpose() const
template <typename ValueType>
void Composition<ValueType>::apply_impl(const LinOp* b, LinOp* x) const
{
precision_dispatch_real_complex<ValueType>(
[this](auto dense_b, auto dense_x) {
if (operators_.size() > 1) {
operators_[0]->apply(
apply_inner_operators(operators_, storage_, dense_b),
dense_x);
} else {
operators_[0]->apply(dense_b, dense_x);
}
},
b, x);
if (dynamic_cast<const ConvertibleTo<experimental::distributed::Vector<>>*>(
x)) {
auto exec = x->get_executor();
auto comm = as<experimental::distributed::Vector<ValueType>>(x)
->get_communicator();
if (!dynamic_cast<experimental::distributed::Vector<ValueType>*>(
INTERM1.get())) {
INTERM1 =
share(experimental::distributed::Vector<ValueType>::create(
exec, comm,
dim<2>{as<experimental::distributed::Matrix<
ValueType, int, int>>(operators_[2])
->get_size()[0],
1},
dim<2>{as<experimental::distributed::Matrix<
ValueType, int, int>>(operators_[2])
->get_local_matrix()
->get_size()[0],
1}));
INTERM2 = share(clone(INTERM1));
}
operators_[2]->apply(b, INTERM1);
operators_[1]->apply(INTERM1, INTERM2);
operators_[0]->apply(INTERM2, x);
} else {
precision_dispatch_real_complex<ValueType>(
[this](auto dense_b, auto dense_x) {
if (operators_.size() > 1) {
operators_[0]->apply(
apply_inner_operators(operators_, storage_, dense_b),
dense_x);
} else {
operators_[0]->apply(dense_b, dense_x);
}
},
b, x);
}
}


template <typename ValueType>
void Composition<ValueType>::apply_impl(const LinOp* alpha, const LinOp* b,
const LinOp* beta, LinOp* x) const
{
precision_dispatch_real_complex<ValueType>(
[this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) {
if (operators_.size() > 1) {
operators_[0]->apply(
dense_alpha,
apply_inner_operators(operators_, storage_, dense_b),
dense_beta, dense_x);
} else {
operators_[0]->apply(dense_alpha, dense_b, dense_beta, dense_x);
}
},
alpha, b, beta, x);
if (dynamic_cast<const ConvertibleTo<experimental::distributed::Vector<>>*>(
x)) {
auto exec = x->get_executor();
auto comm = as<experimental::distributed::Vector<ValueType>>(x)
->get_communicator();
if (!dynamic_cast<experimental::distributed::Vector<ValueType>*>(
INTERM1.get())) {
INTERM1 =
share(experimental::distributed::Vector<ValueType>::create(
exec, comm,
dim<2>{as<experimental::distributed::Matrix<
ValueType, int, int>>(operators_[2])
->get_size()[0],
1},
dim<2>{as<experimental::distributed::Matrix<
ValueType, int, int>>(operators_[2])
->get_local_matrix()
->get_size()[0],
1}));
INTERM2 = share(clone(INTERM1));
}
operators_[2]->apply(b, INTERM1);
operators_[1]->apply(INTERM1, INTERM2);
operators_[0]->apply(alpha, INTERM2, beta, x);
} else {
precision_dispatch_real_complex<ValueType>(
[this](auto dense_alpha, auto dense_b, auto dense_beta,
auto dense_x) {
if (operators_.size() > 1) {
operators_[0]->apply(
dense_alpha,
apply_inner_operators(operators_, storage_, dense_b),
dense_beta, dense_x);
} else {
operators_[0]->apply(dense_alpha, dense_b, dense_beta,
dense_x);
}
},
alpha, b, beta, x);
}
}


Expand Down
8 changes: 8 additions & 0 deletions core/base/device_matrix_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ GKO_REGISTER_OPERATION(soa_to_aos, components::soa_to_aos);
GKO_REGISTER_OPERATION(remove_zeros, components::remove_zeros);
GKO_REGISTER_OPERATION(sum_duplicates, components::sum_duplicates);
GKO_REGISTER_OPERATION(sort_row_major, components::sort_row_major);
GKO_REGISTER_OPERATION(scale, components::scale);


} // anonymous namespace
Expand Down Expand Up @@ -133,6 +134,13 @@ device_matrix_data<ValueType, IndexType>::empty_out()
}


template <typename ValueType, typename IndexType>
void device_matrix_data<ValueType, IndexType>::scale(ValueType s)
{
this->values_.get_executor()->run(components::make_scale(s, this->values_));
}


#define GKO_DECLARE_DEVICE_MATRIX_DATA(ValueType, IndexType) \
struct device_matrix_data<ValueType, IndexType>
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DEVICE_MATRIX_DATA);
Expand Down
9 changes: 8 additions & 1 deletion core/base/device_matrix_data_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ namespace kernels {
void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, \
device_matrix_data<ValueType, IndexType>& data)

#define GKO_DECLARE_DEVICE_MATRIX_DATA_SCALE_KERNEL(ValueType) \
void scale(std::shared_ptr<const DefaultExecutor> exec, ValueType s, \
array<ValueType>& values)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename ValueType, typename IndexType> \
Expand All @@ -63,7 +67,10 @@ namespace kernels {
GKO_DECLARE_DEVICE_MATRIX_DATA_SUM_DUPLICATES_KERNEL(ValueType, \
IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL(ValueType, IndexType)
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL(ValueType, \
IndexType); \
template <typename ValueType> \
GKO_DECLARE_DEVICE_MATRIX_DATA_SCALE_KERNEL(ValueType)


GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(components,
Expand Down
1 change: 1 addition & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DEVICE_MATRIX_DATA_AOS_TO_SOA_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DEVICE_MATRIX_DATA_SOA_TO_AOS_KERNEL);
GKO_STUB_VALUE_TYPE(GKO_DECLARE_DEVICE_MATRIX_DATA_SCALE_KERNEL);

template <typename IndexType, typename RowPtrType>
GKO_DECLARE_CONVERT_PTRS_TO_IDXS(IndexType, RowPtrType)
Expand Down
Loading

0 comments on commit 8077436

Please sign in to comment.