diff --git a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
new file mode 100644
index 00000000000..23ae8ebd5f0
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Dense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    simple_apply_kernel<<<num_blocks, default_block_size, 0,
+                          exec->get_stream()>>>(mat_ub, b_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Dense<ValueType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_apply_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
+                                                  beta_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..7a38cfea215
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,164 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+__device__ __forceinline__ void simple_apply(
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
+{
+    constexpr auto tile_size = config::warp_size;
+
+    auto thread_block = group::this_thread_block();
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int row = subgroup_id; row < mat.num_rows;
+         row += num_subgroups_per_block) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.thread_rank(); j < mat.num_cols;
+             j += subgroup.size()) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += val * b[j];
+        }
+
+        // subgroup level reduction
+        temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
+
+        if (subgroup.thread_rank() == 0) {
+            x[row] = temp;
+        }
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
+                                                      dense::uniform_batch<
+                                                          const ValueType>
+                                                          mat,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              b,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              ValueType>
+                                                              x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        simple_apply(mat_b, b_b.values, x_b.values);
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void advanced_apply(
+    const ValueType alpha,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x)
+{
+    constexpr auto tile_size = config::warp_size;
+
+    auto thread_block = group::this_thread_block();
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int row = subgroup_id; row < mat.num_rows;
+         row += num_subgroups_per_block) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.thread_rank(); j < mat.num_cols;
+             j += subgroup.size()) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += alpha * val * b[j];
+        }
+
+        // subgroup level reduction
+        temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
+
+        if (subgroup.thread_rank() == 0) {
+            x[row] = temp + beta * x[row];
+        }
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void advanced_apply_kernel(const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                alpha,
+                                                    const gko::batch::matrix::
+                                                        dense::uniform_batch<
+                                                            const ValueType>
+                                                            mat,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                b,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                beta,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                ValueType>
+                                                                x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
+        advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
+                       x_b.values);
+    }
+}
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 7932976d6c9..46ea67abc65 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(ginkgo
     log/vtune.cpp
     log/record.cpp
     log/stream.cpp
+    matrix/batch_dense.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 23591cd1ffe..6a14919bf2f 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_multi_vector_kernels.hpp"
@@ -72,7 +73,7 @@ namespace detail {
 
 template <typename ValueType>
 batch_dim<2> compute_batch_size(
-    const std::vector<matrix::Dense<ValueType>*>& matrices)
+    const std::vector<gko::matrix::Dense<ValueType>*>& matrices)
 {
     auto common_size = matrices[0]->get_size();
     for (size_type i = 1; i < matrices.size(); ++i) {
@@ -86,7 +87,7 @@ batch_dim<2> compute_batch_size(
 
 
 template <typename ValueType>
-std::unique_ptr<matrix::Dense<ValueType>>
+std::unique_ptr<gko::matrix::Dense<ValueType>>
 MultiVector<ValueType>::create_view_for_item(size_type item_id)
 {
     auto exec = this->get_executor();
@@ -102,7 +103,7 @@ MultiVector<ValueType>::create_view_for_item(size_type item_id)
 
 
 template <typename ValueType>
-std::unique_ptr<const matrix::Dense<ValueType>>
+std::unique_ptr<const gko::matrix::Dense<ValueType>>
 MultiVector<ValueType>::create_const_view_for_item(size_type item_id) const
 {
     auto exec = this->get_executor();
@@ -290,6 +291,27 @@ void MultiVector<ValueType>::move_to(
 }
 
 
+template <typename ValueType>
+void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
+{
+    auto exec = result->get_executor() == nullptr ? this->get_executor()
+                                                  : result->get_executor();
+    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
+        exec, this->get_size(),
+        make_const_array_view(this->get_executor(),
+                              this->get_num_stored_elements(),
+                              this->get_const_values()));
+    result->copy_from(tmp);
+}
+
+
+template <typename ValueType>
+void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
+{
+    this->convert_to(result);
+}
+
+
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 8603a2b9055..5a39567f470 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/diagonal.hpp>
 
 
 #include "core/base/kernel_declaration.hpp"
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index caca4577cf7..71445550b87 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -51,9 +51,9 @@ template <typename ValueType>
 struct batch_item {
     using value_type = ValueType;
     ValueType* values;
-    int stride;
-    int num_rows;
-    int num_rhs;
+    int32 stride;
+    int32 num_rows;
+    int32 num_rhs;
 };
 
 
@@ -67,9 +67,9 @@ struct uniform_batch {
 
     ValueType* values;
     size_type num_batch_items;
-    int stride;
-    int num_rows;
-    int num_rhs;
+    int32 stride;
+    int32 num_rows;
+    int32 num_rhs;
 
     size_type get_entry_storage() const
     {
@@ -117,8 +117,8 @@ extract_batch_item(const multi_vector::uniform_batch<ValueType>& batch,
 
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType>
-extract_batch_item(ValueType* const batch_values, const int stride,
-                   const int num_rows, const int num_rhs,
+extract_batch_item(ValueType* const batch_values, const int32 stride,
+                   const int32 num_rows, const int32 num_rhs,
                    const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index e5dc22faeda..834e89c8358 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -51,16 +51,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
-namespace multivector {
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> duplicate(
-    std::shared_ptr<const Executor> exec, size_type num_duplications,
-    const batch::MultiVector<ValueType>* input)
+template <typename OutputType>
+std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
+                                      size_type num_duplications,
+                                      const OutputType* input)
 {
     auto num_batch_items = input->get_num_batch_items();
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items * num_duplications,
                            input->get_common_size()));
 
@@ -75,13 +74,13 @@ std::unique_ptr<batch::MultiVector<ValueType>> duplicate(
 }
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+template <typename OutputType>
+std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec, const size_type num_duplications,
-    const matrix::Dense<ValueType>* input)
+    const typename OutputType::unbatch_type* input)
 {
     auto num_batch_items = num_duplications;
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items, input->get_size()));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
@@ -92,13 +91,13 @@ std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
 }
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+template <typename OutputType>
+std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec,
-    const std::vector<matrix::Dense<ValueType>*>& input)
+    const std::vector<typename OutputType::unbatch_type*>& input)
 {
     auto num_batch_items = input.size();
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items, input[0]->get_size()));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
@@ -109,29 +108,27 @@ std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
 }
 
 
-template <typename ValueType>
-std::vector<std::unique_ptr<matrix::Dense<ValueType>>> unbatch(
-    const batch::MultiVector<ValueType>* batch_multivec)
+template <typename InputType>
+auto unbatch(const InputType* batch_object)
 {
-    auto exec = batch_multivec->get_executor();
     auto unbatched_mats =
-        std::vector<std::unique_ptr<matrix::Dense<ValueType>>>{};
-    for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) {
+        std::vector<std::unique_ptr<typename InputType::unbatch_type>>{};
+    for (size_type b = 0; b < batch_object->get_num_batch_items(); ++b) {
         unbatched_mats.emplace_back(
-            batch_multivec->create_const_view_for_item(b)->clone());
+            batch_object->create_const_view_for_item(b)->clone());
     }
     return unbatched_mats;
 }
 
 
-template <typename ValueType, typename IndexType>
-std::unique_ptr<MultiVector<ValueType>> read(
+template <typename ValueType, typename IndexType, typename OutputType>
+std::unique_ptr<OutputType> read(
     std::shared_ptr<const Executor> exec,
     const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
 {
     auto num_batch_items = data.size();
-    auto tmp = MultiVector<ValueType>::create(
-        exec, batch_dim<2>(num_batch_items, data[0].size));
+    auto tmp =
+        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->read(data[b]);
@@ -141,9 +138,9 @@ std::unique_ptr<MultiVector<ValueType>> read(
 }
 
 
-template <typename ValueType, typename IndexType>
+template <typename ValueType, typename IndexType, typename OutputType>
 std::vector<gko::matrix_data<ValueType, IndexType>> write(
-    const MultiVector<ValueType>* mvec)
+    const OutputType* mvec)
 {
     auto data = std::vector<gko::matrix_data<ValueType, IndexType>>(
         mvec->get_num_batch_items());
@@ -157,7 +154,6 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 }
 
 
-}  // namespace multivector
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index c8bbd2e0a31..87cab3dcf0b 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -57,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/par_ict_kernels.hpp"
 #include "core/factorization/par_ilu_kernels.hpp"
 #include "core/factorization/par_ilut_kernels.hpp"
+#include "core/matrix/batch_dense_kernels.hpp"
 #include "core/matrix/coo_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
@@ -299,6 +300,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 }  // namespace batch_multi_vector
 
 
+namespace batch_dense {
+
+
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+
+
 namespace dense {
 
 
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
new file mode 100644
index 00000000000..758635cea7f
--- /dev/null
+++ b/core/matrix/batch_dense.cpp
@@ -0,0 +1,170 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace dense {
+namespace {
+
+
+GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply);
+GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply);
+
+
+}  // namespace
+}  // namespace dense
+
+
+template <typename ValueType>
+std::unique_ptr<gko::matrix::Dense<ValueType>>
+Dense<ValueType>::create_view_for_item(size_type item_id)
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create(
+        exec, this->get_common_size(),
+        make_array_view(exec, num_rows * stride,
+                        this->get_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const gko::matrix::Dense<ValueType>>
+Dense<ValueType>::create_const_view_for_item(size_type item_id) const
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create_const(
+        exec, this->get_common_size(),
+        make_const_array_view(exec, num_rows * stride,
+                              this->get_const_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_config_of(
+    ptr_param<const Dense<ValueType>> other)
+{
+    return Dense<ValueType>::create(other->get_executor(), other->get_size());
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const Dense<ValueType>> Dense<ValueType>::create_const(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+    gko::detail::const_array_view<ValueType>&& values)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const Dense>(new Dense{
+        exec, sizes, gko::detail::array_const_cast(std::move(values))});
+}
+
+
+template <typename ValueType>
+Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
+                        const batch_dim<2>& size)
+    : EnableBatchLinOp<Dense<ValueType>>(exec, size),
+      values_(exec, compute_num_elems(size))
+{}
+
+
+template <typename ValueType>
+void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
+                                  MultiVector<ValueType>* x) const
+{
+    this->validate_application_parameters(b, x);
+    this->get_executor()->run(dense::make_simple_apply(this, b, x));
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                  const MultiVector<ValueType>* b,
+                                  const MultiVector<ValueType>* beta,
+                                  MultiVector<ValueType>* x) const
+{
+    this->validate_application_parameters(alpha, b, beta, x);
+    this->get_executor()->run(
+        dense::make_advanced_apply(alpha, this, b, beta, x));
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::convert_to(
+    Dense<next_precision<ValueType>>* result) const
+{
+    result->values_ = this->values_;
+    result->set_size(this->get_size());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::move_to(Dense<next_precision<ValueType>>* result)
+{
+    this->convert_to(result);
+}
+
+
+#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class Dense<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp
new file mode 100644
index 00000000000..ef59ff3e9cc
--- /dev/null
+++ b/core/matrix/batch_dense_kernels.hpp
@@ -0,0 +1,83 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type)         \
+    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                      const batch::matrix::Dense<_type>* a,        \
+                      const batch::MultiVector<_type>* b,          \
+                      batch::MultiVector<_type>* c)
+
+#define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type)         \
+    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                        const batch::MultiVector<_type>* alpha,      \
+                        const batch::matrix::Dense<_type>* a,        \
+                        const batch::MultiVector<_type>* b,          \
+                        const batch::MultiVector<_type>* beta,       \
+                        batch::MultiVector<_type>* c)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                        \
+    template <typename ValueType>                           \
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \
+    template <typename ValueType>                           \
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(ValueType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_dense,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..0bbfde40cc9
--- /dev/null
+++ b/core/matrix/batch_struct.hpp
@@ -0,0 +1,124 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace dense {
+
+
+/**
+ * Encapsulates one matrix from a batch of dense matrices.
+ */
+template <typename ValueType>
+struct batch_item {
+    using value_type = ValueType;
+    value_type* values;
+    int32 stride;
+    int32 num_rows;
+    int32 num_cols;
+};
+
+
+/**
+ * A 'simple' structure to store a global uniform batch of dense matrices.
+ */
+template <typename ValueType>
+struct uniform_batch {
+    using value_type = ValueType;
+    using entry_type = batch_item<ValueType>;
+
+    ValueType* values;
+    size_type num_batch_items;
+    int32 stride;
+    int32 num_rows;
+    int32 num_cols;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * stride * sizeof(value_type);
+    }
+};
+
+
+}  // namespace dense
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<const ValueType> to_const(
+    const dense::batch_item<ValueType>& b)
+{
+    return {b.values, b.stride, b.num_rows, b.num_cols};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE dense::uniform_batch<const ValueType> to_const(
+    const dense::uniform_batch<ValueType>& ub)
+{
+    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
+    const dense::uniform_batch<ValueType>& batch, const size_type batch_idx)
+{
+    return {batch.values + batch_idx * batch.stride * batch.num_rows,
+            batch.stride, batch.num_rows, batch.num_cols};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
+    ValueType* const batch_values, const int32 stride, const int32 num_rows,
+    const int32 num_cols, const size_type batch_idx)
+{
+    return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
+            num_cols};
+}
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
index 7914eb4d15e..e8722530fba 100644
--- a/core/test/base/batch_dim.cpp
+++ b/core/test/base/batch_dim.cpp
@@ -85,16 +85,6 @@ TEST(BatchDim, NotEqualWorks)
 }
 
 
-TEST(BatchDim, CanGetCumulativeOffsets)
-{
-    auto d = gko::batch_dim<2>(3, gko::dim<2>(4, 2));
-
-    ASSERT_EQ(d.get_cumulative_offset(0), 0);
-    ASSERT_EQ(d.get_cumulative_offset(1), 8);
-    ASSERT_EQ(d.get_cumulative_offset(2), 16);
-}
-
-
 TEST(BatchDim, TransposesBatchDimensions)
 {
     ASSERT_EQ(gko::transpose(gko::batch_dim<2>(2, gko::dim<2>{4, 2})),
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 85168a406cc..8390a6c4327 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -188,11 +188,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0,
-       -1.0,3.0,
+       1.0,  2.0,
+      -1.0,  3.0,
        4.0, -1.0,
-       3.0, 5.0,
-       1.0, 5.0,
+       3.0,  5.0,
+       1.0,  5.0,
        6.0, -3.0};
     // clang-format on
 
@@ -218,11 +218,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingConstData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0,
-       -1.0,3.0,
+       1.0,  2.0,
+      -1.0,  3.0,
        4.0, -1.0,
-       3.0, 5.0,
-       1.0, 5.0,
+       3.0,  5.0,
+       1.0,  5.0,
        6.0, -3.0};
     // clang-format on
 
@@ -252,7 +252,7 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::multivector::create_from_dense(
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
@@ -269,10 +269,12 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::batch::multivector::create_from_dense(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
-    auto m =
-        gko::batch::multivector::create_from_dense(this->exec, 3, mat1.get());
+    auto bat_m =
+        gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+        this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
@@ -287,14 +289,16 @@ TYPED_TEST(MultiVector, CanBeConstructedByDuplicatingMultiVectors)
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-    auto m = gko::batch::multivector::create_from_dense(
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::batch::multivector::create_from_dense(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
-                                           mat2.get(), mat1.get(), mat2.get()});
+    auto m_ref =
+        gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                   mat2.get(), mat1.get(), mat2.get()});
 
-    auto m2 =
-        gko::batch::multivector::duplicate<value_type>(this->exec, 3, m.get());
+    auto m2 = gko::batch::duplicate<gko::batch::MultiVector<value_type>>(
+        this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
@@ -385,7 +389,8 @@ TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto dense_mats = gko::batch::multivector::unbatch(this->mtx.get());
+    auto dense_mats = gko::batch::unbatch<gko::batch::MultiVector<value_type>>(
+        this->mtx.get());
 
     ASSERT_EQ(dense_mats.size(), 2);
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
@@ -404,11 +409,12 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
 
-    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::MultiVector<value_type>>(this->exec,
                                                                    vec_data);
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
     EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
@@ -429,7 +435,8 @@ TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData)
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
 
-    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::MultiVector<value_type>>(this->exec,
                                                                    vec_data);
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
@@ -451,7 +458,8 @@ TYPED_TEST(MultiVector, GeneratesCorrectMatrixData)
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
 
     auto data =
-        gko::batch::multivector::write<value_type, index_type>(this->mtx.get());
+        gko::batch::write<value_type, index_type,
+                          gko::batch::MultiVector<value_type>>(this->mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
     ASSERT_EQ(data[0].nonzeros.size(), 6);
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index 433361a054f..cca4b8da1c0 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_dense)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
new file mode 100644
index 00000000000..8e64c913a6a
--- /dev/null
+++ b/core/test/matrix/batch_dense.cpp
@@ -0,0 +1,480 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/range.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+template <typename T>
+class Dense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using size_type = gko::size_type;
+    Dense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch::initialize<gko::batch::matrix::Dense<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
+          mvec(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
+          dense_mtx(gko::initialize<gko::matrix::Dense<value_type>>(
+              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
+    {}
+
+
+    static void assert_equal_to_original_mtx(
+        gko::batch::matrix::Dense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
+        EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
+        EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+        EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5});
+        EXPECT_EQ(m->at(0, 1, 1), value_type{2.5});
+        ASSERT_EQ(m->at(0, 1, 2), value_type{3.5});
+        EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 0, 1), value_type{2.5});
+        EXPECT_EQ(m->at(1, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(1, 1, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 1, 1), value_type{2.0});
+        ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    }
+
+    static void assert_empty(gko::batch::matrix::Dense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 0);
+        ASSERT_EQ(m->get_num_stored_elements(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<gko::batch::matrix::Dense<value_type>> mtx;
+    std::unique_ptr<gko::batch::MultiVector<value_type>> mvec;
+    std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
+};
+
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
+
+
+TYPED_TEST(Dense, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(Dense, CanBeEmpty)
+{
+    auto empty = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
+    this->assert_empty(empty.get());
+}
+
+
+TYPED_TEST(Dense, ReturnsNullValuesArrayWhenEmpty)
+{
+    auto empty = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
+    ASSERT_EQ(empty->get_const_values(), nullptr);
+}
+
+
+TYPED_TEST(Dense, CanGetValuesForEntry)
+{
+    using value_type = typename TestFixture::value_type;
+
+    ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0});
+}
+
+
+TYPED_TEST(Dense, CanCreateDenseItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx,
+                        0.0);
+}
+
+
+TYPED_TEST(Dense, CanBeCopied)
+{
+    auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
+
+    mtx_copy->copy_from(this->mtx.get());
+
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Dense, CanBeMoved)
+{
+    auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
+
+    this->mtx->move_to(mtx_copy);
+
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Dense, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(Dense, CanBeCleared)
+{
+    this->mtx->clear();
+
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedWithSize)
+{
+    using size_type = gko::size_type;
+
+    auto m = gko::batch::matrix::Dense<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}));
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
+    ASSERT_EQ(m->get_num_stored_elements(), 30);
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedFromExistingData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    value_type data[] = {
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::batch::matrix::Dense<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::view(this->exec, 8, data));
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedFromExistingConstData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    const value_type data[] = {
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::batch::matrix::Dense<TypeParam>::create_const(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::const_view(this->exec, 8, data));
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedFromDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+
+    this->assert_equal_to_original_mtx(m.get());
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+    auto bat_m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, 3, mat1.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 0);
+}
+
+
+TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                   mat2.get(), mat1.get(), mat2.get()});
+
+    auto m2 = gko::batch::duplicate<gko::batch::matrix::Dense<value_type>>(
+        this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 0);
+}
+
+
+TYPED_TEST(Dense, CanBeUnbatchedIntoDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto dense_mats =
+        gko::batch::unbatch<gko::batch::matrix::Dense<value_type>>(
+            this->mtx.get());
+
+    GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
+}
+
+
+TYPED_TEST(Dense, CanBeListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
+        {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    EXPECT_EQ(m->at(0, 0), value_type{1});
+    EXPECT_EQ(m->at(0, 1), value_type{2});
+    EXPECT_EQ(m->at(1, 0), value_type{1});
+    EXPECT_EQ(m->at(1, 1), value_type{3});
+}
+
+
+TYPED_TEST(Dense, CanBeListConstructedByCopies)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
+        2, I<value_type>({1.0, 2.0}), this->exec);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(Dense, CanBeDoubleListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    EXPECT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(0, 5), value_type{3.0});
+    EXPECT_EQ(m->at(0, 6), value_type{3.0});
+    EXPECT_EQ(m->at(0, 7), value_type{6.0});
+    EXPECT_EQ(m->at(0, 8), value_type{1.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 3), value_type{3.0});
+    EXPECT_EQ(m->at(1, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 5), value_type{-2.0});
+    EXPECT_EQ(m->at(1, 6), value_type{5.0});
+    EXPECT_EQ(m->at(1, 7), value_type{6.0});
+    EXPECT_EQ(m->at(1, 8), value_type{-3.0});
+}
+
+
+TYPED_TEST(Dense, CanBeReadFromMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::Dense<value_type>>(this->exec,
+                                                                     vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(Dense, CanBeReadFromSparseMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::Dense<value_type>>(this->exec,
+                                                                     vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(Dense, GeneratesCorrectMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+    using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
+
+    auto data = gko::batch::write<value_type, index_type,
+                                  gko::batch::matrix::Dense<value_type>>(
+        this->mtx.get());
+
+    ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[0].nonzeros.size(), 6);
+    EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5}));
+    EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5}));
+    ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[1].nonzeros.size(), 6);
+    EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
+}
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index d723d5a8964..40034883078 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -720,8 +720,8 @@ ::testing::AssertionResult batch_matrices_near(
     using value_type1 = typename Mat1::value_type;
     using value_type2 = typename Mat2::value_type;
 
-    auto first_data = gko::batch::multivector::write<value_type1, int>(first);
-    auto second_data = gko::batch::multivector::write<value_type2, int>(second);
+    auto first_data = gko::batch::write<value_type1, int, Mat1>(first);
+    auto second_data = gko::batch::write<value_type2, int, Mat2>(second);
 
     if (first_data.size() != second_data.size()) {
         return ::testing::AssertionFailure()
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 4c972d2a584..dfa1b2177ee 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -38,6 +38,7 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_select_kernel.cu
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
+    matrix/batch_dense_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 7729d006b75..5c4d1f5bdc5 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -78,6 +78,7 @@ constexpr int sm_oversubscription = 4;
 
 // clang-format on
 
+
 }  // namespace batch_multi_vector
 }  // namespace cuda
 }  // namespace kernels
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 715332418fb..14b300c9204 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 /**
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
new file mode 100644
index 00000000000..dd82e15b8cc
--- /dev/null
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -0,0 +1,84 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <thrust/functional.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/thrust.cuh"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The Dense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
+
+
+// clang-format on
+
+
+}  // namespace batch_dense
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..73712a7b81b
--- /dev/null
+++ b/cuda/matrix/batch_struct.hpp
@@ -0,0 +1,95 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required CUDA scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<const cuda_type<ValueType>>
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
+{
+    return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<cuda_type<ValueType>>
+get_batch_struct(batch::matrix::Dense<ValueType>* const op)
+{
+    return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index dd0d7c4cdfb..4099bb603a3 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -35,6 +35,7 @@ target_sources(ginkgo_dpcpp
     factorization/par_ilut_select_kernel.dp.cpp
     factorization/par_ilut_spgeam_kernel.dp.cpp
     factorization/par_ilut_sweep_kernel.dp.cpp
+    matrix/batch_dense_kernels.dp.cpp
     matrix/coo_kernels.dp.cpp
     matrix/csr_kernels.dp.cpp
     matrix/fbcsr_kernels.dp.cpp
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 10e47ba080e..e0bc15fdc61 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -37,11 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/components/prefix_sum_kernels.hpp"
 #include "dpcpp/base/batch_struct.hpp"
 #include "dpcpp/base/config.hpp"
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 9c752a94b4f..dc8301ecb2e 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -65,9 +65,9 @@ inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
new file mode 100644
index 00000000000..a6fba2df8e3
--- /dev/null
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -0,0 +1,176 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <CL/sycl.hpp>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/components/prefix_sum_kernels.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The Dense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "dpcpp/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Dense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const size_type num_rows = mat->get_common_size()[0];
+    const size_type num_cols = mat->get_common_size()[1];
+
+    const auto num_batch_items = mat->get_num_batch_items();
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+    const auto x_ub = get_batch_struct(x);
+    const auto b_ub = get_batch_struct(b);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Dense<ValueType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto mat_ub = get_batch_struct(mat);
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    const auto num_batch_items = mat_ub.num_batch_items;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..88ef5f54764
--- /dev/null
+++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,98 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+__dpct_inline__ void simple_apply_kernel(
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    constexpr auto tile_size = config::warp_size;
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroups = subgroup.get_group_range().size();
+
+    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroups) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.get_local_id(); j < mat.num_cols;
+             j += subgroup_size) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += val * b.values[j];
+        }
+
+        temp = ::gko::kernels::dpcpp::reduce(
+            subg, temp, [](ValueType a, ValueType b) { return a + b; });
+
+        if (subgroup.get_local_id() == 0) {
+            x.values[row] = temp;
+        }
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void advanced_apply_kernel(
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    constexpr auto tile_size = config::warp_size;
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroup = subgroup.get_group_range().size();
+
+    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.get_local_id(); j < mat.num_cols;
+             j += subgroup_size) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += alpha.values[0] * val * b.values[j];
+        }
+
+        temp = ::gko::kernels::dpcpp::reduce(
+            subg, temp, [](ValueType a, ValueType b) { return a + b; });
+
+        if (subgroup.get_local_id() == 0) {
+            x.values[row] = temp + beta.values[0] * x.values[row];
+        }
+    }
+}
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..b0393daf55d
--- /dev/null
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -0,0 +1,94 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required DPCPP scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Dense<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::Dense<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 779db13d36a..21b573b6cd0 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -35,6 +35,7 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_select_kernel.hip.cpp
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
+    matrix/batch_dense_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 442260e50e6..5747e202fb7 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 /**
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<hip_type<ValueType>> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
new file mode 100644
index 00000000000..eb3da83760a
--- /dev/null
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <hip/hip_runtime.h>
+#include <thrust/functional.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The Dense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
+
+
+// clang-format on
+
+
+}  // namespace batch_dense
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
new file mode 100644
index 00000000000..4670cf0988b
--- /dev/null
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -0,0 +1,95 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+#define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "hip/base/types.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required HIP scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<const hip_type<ValueType>>
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
+{
+    return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<hip_type<ValueType>>
+get_batch_struct(batch::matrix::Dense<ValueType>* const op)
+{
+    return {as_hip_type(op->get_values()), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 3bda352fb9d..e0ade2c872f 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -74,18 +74,6 @@ struct batch_dim {
         return common_size_;
     }
 
-    /**
-     * Get the cumulative storage size offset
-     *
-     * @param batch_id the batch id
-     *
-     * @return the cumulative offset
-     */
-    size_type get_cumulative_offset(size_type batch_id) const
-    {
-        return batch_id * common_size_[0] * common_size_[1];
-    }
-
     /**
      * Checks if two batch_dim objects are equal.
      *
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 78ce4f4a942..a0efb2ea324 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
@@ -110,6 +111,45 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
      */
     const batch_dim<2>& get_size() const noexcept { return size_; }
 
+    /**
+     * Validates the sizes for the apply(b,x) operation in the
+     * concrete BatchLinOp.
+     *
+     */
+    template <typename ValueType>
+    void validate_application_parameters(const MultiVector<ValueType>* b,
+                                         MultiVector<ValueType>* x) const
+    {
+        GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+        GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+        GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+        GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    }
+
+    /**
+     * Validates the sizes for the apply(alpha, b , beta, x) operation in the
+     * concrete BatchLinOp.
+     *
+     */
+    template <typename ValueType>
+    void validate_application_parameters(const MultiVector<ValueType>* alpha,
+                                         const MultiVector<ValueType>* b,
+                                         const MultiVector<ValueType>* beta,
+                                         MultiVector<ValueType>* x) const
+    {
+        GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+        GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+        GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+        GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(),
+                                    gko::dim<2>(1, 1));
+        GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
+    }
+
 protected:
     /**
      * Sets the size of the batch operator.
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index d91274526d3..61dffba3193 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -52,6 +52,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
+namespace matrix {
+
+
+template <typename ValueType>
+class Dense;
+
+
+}
+
 
 /**
  * MultiVector stores multiple vectors in a batched fashion and is useful
@@ -81,21 +90,25 @@ class MultiVector
     : public EnablePolymorphicObject<MultiVector<ValueType>>,
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
-      public ConvertibleTo<MultiVector<next_precision<ValueType>>> {
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
+      public ConvertibleTo<matrix::Dense<ValueType>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
+    friend class matrix::Dense<ValueType>;
 
 public:
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
+    using ConvertibleTo<matrix::Dense<ValueType>>::convert_to;
+    using ConvertibleTo<matrix::Dense<ValueType>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
-    using unbatch_type = matrix::Dense<ValueType>;
+    using unbatch_type = gko::matrix::Dense<ValueType>;
     using absolute_type = remove_complex<MultiVector<ValueType>>;
     using complex_type = to_complex<MultiVector<ValueType>>;
 
@@ -113,6 +126,10 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
+    void convert_to(matrix::Dense<ValueType>* result) const override;
+
+    void move_to(matrix::Dense<ValueType>* result) override;
+
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
      * MultiVector object. Does not perform any deep copies, but only returns a
@@ -185,8 +202,7 @@ class MultiVector
     value_type* get_values_for_item(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -200,8 +216,7 @@ class MultiVector
         size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_const_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_const_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -216,6 +231,19 @@ class MultiVector
         return values_.get_num_elems();
     }
 
+    /**
+     * Get the cumulative storage size offset
+     *
+     * @param batch_id the batch id
+     *
+     * @return the cumulative offset
+     */
+    size_type get_cumulative_offset(size_type batch_id) const
+    {
+        return batch_id * this->get_common_size()[0] *
+               this->get_common_size()[1];
+    }
+
     /**
      * Returns a single element for a particular batch item.
      *
@@ -358,7 +386,8 @@ class MultiVector
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
     {
-        return size.get_cumulative_offset(size.get_num_batch_items());
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               size.get_common_size()[1];
     }
 
 protected:
@@ -417,7 +446,7 @@ class MultiVector
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
-        return batch_size_.get_cumulative_offset(batch) +
+        return this->get_cumulative_offset(batch) +
                row * batch_size_.get_common_size()[1] + col;
     }
 
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
new file mode 100644
index 00000000000..7f3ce5890e4
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -0,0 +1,375 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+
+
+#include <initializer_list>
+#include <vector>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+/**
+ * Dense is a batch matrix format which explicitly stores all values of the
+ * matrix in each of the batches.
+ *
+ * The values in each of the batches are stored in row-major format (values
+ * belonging to the same row appear consecutive in the memory and the values of
+ * each batch item are also stored consecutively in memory).
+ *
+ * @note Though the storage layout is the same as the multi-vector object, the
+ * class semantics and the operations it aims to provide are different. Hence it
+ * is recommended to create multi-vector objects if the user means to view the
+ * data as a set of vectors.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @ingroup batch_dense
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision>
+class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
+                    public EnableCreateMethod<Dense<ValueType>>,
+                    public ConvertibleTo<Dense<next_precision<ValueType>>> {
+    friend class EnableCreateMethod<Dense>;
+    friend class EnablePolymorphicObject<Dense, BatchLinOp>;
+    friend class Dense<to_complex<ValueType>>;
+    friend class Dense<next_precision<ValueType>>;
+
+public:
+    using EnableBatchLinOp<Dense>::convert_to;
+    using EnableBatchLinOp<Dense>::move_to;
+
+    using value_type = ValueType;
+    using index_type = int32;
+    using transposed_type = Dense<ValueType>;
+    using unbatch_type = gko::matrix::Dense<ValueType>;
+    using absolute_type = remove_complex<Dense>;
+    using complex_type = to_complex<Dense>;
+
+    /**
+     * Creates a Dense matrix with the configuration of another Dense
+     * matrix.
+     *
+     * @param other  The other matrix whose configuration needs to copied.
+     */
+    static std::unique_ptr<Dense> create_with_config_of(
+        ptr_param<const Dense> other);
+
+    void convert_to(Dense<next_precision<ValueType>>* result) const override;
+
+    void move_to(Dense<next_precision<ValueType>>* result) override;
+
+    /**
+     * Creates a mutable view (of gko::matrix::Dense type) of one item of the
+     * batch::matrix::Dense<value_type> object. Does not perform any deep
+     * copies, but only returns a view of the data.
+     *
+     * @param item_id  The index of the batch item
+     *
+     * @return  a gko::matrix::Dense object with the data from the batch item
+     * at the given index.
+     */
+    std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
+
+    /**
+     * @copydoc create_view_for_item(size_type)
+     */
+    std::unique_ptr<const unbatch_type> create_const_view_for_item(
+        size_type item_id) const;
+
+    /**
+     * Get the cumulative storage size offset
+     *
+     * @param batch_id the batch id
+     *
+     * @return the cumulative offset
+     */
+    size_type get_cumulative_offset(size_type batch_id) const
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return batch_id * this->get_common_size()[0] *
+               this->get_common_size()[1];
+    }
+
+    /**
+     * Returns a pointer to the array of values of the multi-vector
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values() noexcept { return values_.get_data(); }
+
+    /**
+     * @copydoc get_values()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
+    /**
+     * Returns a single element for a particular batch item.
+     *
+     * @param batch_id  the batch item index to be queried
+     * @param row  the row of the requested element
+     * @param col  the column of the requested element
+     *
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU Dense object
+     *        from the OMP may result in incorrect behaviour)
+     */
+    value_type& at(size_type batch_id, size_type row, size_type col)
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data()[linearize_index(batch_id, row, col)];
+    }
+
+    /**
+     * @copydoc Dense::at(size_type, size_type, size_type)
+     */
+    value_type at(size_type batch_id, size_type row, size_type col) const
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data()[linearize_index(batch_id, row, col)];
+    }
+
+    /**
+     * Returns a single element for a particular batch item.
+     *
+     * Useful for iterating across all elements of the matrix.
+     * However, it is less efficient than the two-parameter variant of this
+     * method.
+     *
+     * @param batch_id  the batch item index to be queried
+     * @param idx  a linear index of the requested element
+     *
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU Dense object
+     *        from the OMP may result in incorrect behaviour)
+     */
+    ValueType& at(size_type batch_id, size_type idx) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data()[linearize_index(batch_id, idx)];
+    }
+
+    /**
+     * @copydoc Dense::at(size_type, size_type, size_type)
+     */
+    ValueType at(size_type batch_id, size_type idx) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data()[linearize_index(batch_id, idx)];
+    }
+
+    /**
+     * Returns a pointer to the array of values of the matrix for a
+     * specific batch item.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data() + this->get_cumulative_offset(batch_id);
+    }
+
+    /**
+     * @copydoc get_values_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data() + this->get_cumulative_offset(batch_id);
+    }
+
+    /**
+     * Returns the number of elements explicitly stored in the batch matrix,
+     * cumulative across all the batch items.
+     *
+     * @return the number of elements explicitly stored in the vector,
+     *         cumulative across all the batch items
+     */
+    size_type get_num_stored_elements() const noexcept
+    {
+        return values_.get_num_elems();
+    }
+
+    /**
+     * Creates a constant (immutable) batch dense matrix from a constant
+     * array.
+     *
+     * @param exec  the executor to create the matrix on
+     * @param size  the dimensions of the matrix
+     * @param values  the value array of the matrix
+     *
+     * @return A smart pointer to the constant matrix wrapping the input
+     * array (if it resides on the same executor as the matrix) or a copy of the
+     * array on the correct executor.
+     */
+    static std::unique_ptr<const Dense<value_type>> create_const(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        gko::detail::const_array_view<ValueType>&& values);
+
+    /**
+     * Apply the matrix to a multi-vector. Represents the matrix vector
+     * multiplication, x = A * b, where x and b are both multi-vectors.
+     *
+     * @param b  the multi-vector to be applied to
+     * @param x  the output multi-vector
+     */
+    void apply(const MultiVector<value_type>* b,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(b, x);
+    }
+
+    /**
+     * Apply the matrix to a multi-vector with a linear combination of the given
+     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * * b + beta * x, where x and b are both multi-vectors.
+     *
+     * @param alpha  the scalar to scale the matrix-vector product with
+     * @param b      the multi-vector to be applied to
+     * @param beta   the scalar to scale the x vector with
+     * @param x      the output multi-vector
+     */
+    void apply(const MultiVector<value_type>* alpha,
+               const MultiVector<value_type>* b,
+               const MultiVector<value_type>* beta,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(alpha, b, beta, x);
+    }
+
+private:
+    inline size_type compute_num_elems(const batch_dim<2>& size)
+    {
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               size.get_common_size()[1];
+    }
+
+protected:
+    /**
+     * Creates an uninitialized Dense matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     */
+    Dense(std::shared_ptr<const Executor> exec,
+          const batch_dim<2>& size = batch_dim<2>{});
+
+    /**
+     * Creates a Dense matrix from an already allocated (and initialized)
+     * array.
+     *
+     * @tparam ValuesArray  type of array of values
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  sizes of the batch matrices in a batch_dim object
+     * @param values  array of matrix values
+     *
+     * @note If `values` is not an rvalue, not an array of ValueType, or is on
+     *       the wrong executor, an internal copy will be created, and the
+     *       original array data will not be used in the matrix.
+     */
+    template <typename ValuesArray>
+    Dense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+          ValuesArray&& values)
+        : EnableBatchLinOp<Dense>(exec, size),
+          values_{exec, std::forward<ValuesArray>(values)}
+    {
+        // Ensure that the values array has the correct size
+        auto num_elems = compute_num_elems(size);
+        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
+    }
+
+    void apply_impl(const MultiVector<value_type>* b,
+                    MultiVector<value_type>* x) const;
+
+    void apply_impl(const MultiVector<value_type>* alpha,
+                    const MultiVector<value_type>* b,
+                    const MultiVector<value_type>* beta,
+                    MultiVector<value_type>* x) const;
+
+    size_type linearize_index(size_type batch, size_type row,
+                              size_type col) const noexcept
+    {
+        return this->get_cumulative_offset(batch) +
+               row * this->get_size().get_common_size()[1] + col;
+    }
+
+    size_type linearize_index(size_type batch, size_type idx) const noexcept
+    {
+        return linearize_index(batch, idx / this->get_common_size()[1],
+                               idx % this->get_common_size()[1]);
+    }
+
+private:
+    array<value_type> values_;
+};
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index aed3b5f3572..8bb29242e88 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -108,6 +108,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/log/record.hpp>
 #include <ginkgo/core/log/stream.hpp>
 
+#include <ginkgo/core/matrix/batch_dense.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 47259feeac0..d87399492f5 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -23,6 +23,7 @@ target_sources(ginkgo_omp
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_dense_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..2d0b7ed4d40
--- /dev/null
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,117 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The Dense matrix format namespace.
+ * @ref Dense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Dense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Dense<ValueType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index dd54e3fb52f..37498588ca7 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -25,6 +25,7 @@ target_sources(ginkgo_reference
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_dense_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index ce7c7af5605..0a3dbf37493 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -67,9 +67,9 @@ inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -81,9 +81,9 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..3d7ef03a3bd
--- /dev/null
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,116 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The Dense matrix format namespace.
+ * @ref Dense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Dense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Dense<ValueType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..17144267af1
--- /dev/null
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,88 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+inline void simple_apply_kernel(
+    const gko::batch::matrix::dense::batch_item<const ValueType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int col = 0; col < c.num_rhs; ++col) {
+            c.values[row * c.stride + col] = gko::zero<ValueType>();
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_cols; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void advanced_apply_kernel(
+    const ValueType alpha,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const ValueType beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    if (beta != gko::zero<ValueType>()) {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] *= beta;
+            }
+        }
+    } else {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] = gko::zero<ValueType>();
+            }
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_cols; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    alpha * a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..483d7717718
--- /dev/null
+++ b/reference/matrix/batch_struct.hpp
@@ -0,0 +1,98 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+/**
+ * @brief A namespace for shared functionality between omp and reference
+ *  executors.
+ */
+namespace host {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Dense<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::Dense<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_items(),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
+}
+
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 4f922c37703..a49168dc24e 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -137,13 +137,14 @@ TYPED_TEST(MultiVector, ScalesData)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_0->scale(alpha.get());
+
     this->mtx_00->scale(ualpha[0].get());
     this->mtx_01->scale(ualpha[1].get());
-
-    auto res = gko::batch::multivector::unbatch(this->mtx_0.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_0.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_01.get(), 0.);
 }
@@ -154,13 +155,14 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->scale(alpha.get());
+
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
-
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -172,13 +174,14 @@ TYPED_TEST(MultiVector, ScalesDataWithMultipleScalars)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->scale(alpha.get());
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -190,13 +193,14 @@ TYPED_TEST(MultiVector, AddsScaled)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -207,13 +211,14 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -236,13 +241,13 @@ TYPED_TEST(MultiVector, ComputesDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = gko::batch::multivector::unbatch(result.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
+
     this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
-
-    auto res = gko::batch::multivector::unbatch(result.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -251,6 +256,7 @@ TYPED_TEST(MultiVector, ComputesDot)
 TYPED_TEST(MultiVector, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
 
@@ -277,13 +283,13 @@ TYPED_TEST(MultiVector, ComputesConjDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = gko::batch::multivector::unbatch(result.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
+
     this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get());
-
-    auto res = gko::batch::multivector::unbatch(result.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -292,6 +298,7 @@ TYPED_TEST(MultiVector, ComputesConjDot)
 TYPED_TEST(MultiVector, ComputeConjDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
 
@@ -359,8 +366,9 @@ TYPED_TEST(MultiVector, ConvertsToPrecision)
     this->mtx_1->convert_to(tmp.get());
     tmp->convert_to(res.get());
 
-    auto ures = gko::batch::multivector::unbatch(res.get());
-    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(res.get());
+    auto umtx =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }
@@ -382,8 +390,9 @@ TYPED_TEST(MultiVector, MovesToPrecision)
     this->mtx_1->move_to(tmp.get());
     tmp->move_to(res.get());
 
-    auto ures = gko::batch::multivector::unbatch(res.get());
-    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(res.get());
+    auto umtx =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }
diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt
index 9670a5df80c..18634de662d 100644
--- a/reference/test/matrix/CMakeLists.txt
+++ b/reference/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_dense_kernels)
 ginkgo_create_test(coo_kernels)
 ginkgo_create_test(csr_kernels)
 ginkgo_create_test(dense_kernels)
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..6a23374f7cb
--- /dev/null
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,218 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+template <typename T>
+class Dense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using size_type = gko::size_type;
+    using BMtx = gko::batch::matrix::Dense<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    Dense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx_0(gko::batch::initialize<BMtx>(
+              {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
+               {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
+              exec)),
+          mtx_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
+          mtx_01(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
+          b_0(gko::batch::initialize<BMVec>(
+              {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})},
+               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})}},
+              exec)),
+          b_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          b_01(gko::initialize<DenseMtx>(
+              {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          x_0(gko::batch::initialize<BMVec>(
+              {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
+               {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
+              exec)),
+          x_00(gko::initialize<DenseMtx>(
+              {I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})}, exec)),
+          x_01(gko::initialize<DenseMtx>(
+              {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}, exec))
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    std::unique_ptr<BMtx> mtx_0;
+    std::unique_ptr<DenseMtx> mtx_00;
+    std::unique_ptr<DenseMtx> mtx_01;
+    std::unique_ptr<BMVec> b_0;
+    std::unique_ptr<DenseMtx> b_00;
+    std::unique_ptr<DenseMtx> b_01;
+    std::unique_ptr<BMVec> x_0;
+    std::unique_ptr<DenseMtx> x_00;
+    std::unique_ptr<DenseMtx> x_01;
+
+    std::default_random_engine rand_engine;
+};
+
+
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
+
+
+TYPED_TEST(Dense, AppliesToBatchMultiVector)
+{
+    using T = typename TestFixture::value_type;
+
+    this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+
+    this->mtx_00->apply(this->b_00.get(), this->x_00.get());
+    this->mtx_01->apply(this->b_01.get(), this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
+{
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<BMVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols)
+{
+    using BMVec = typename TestFixture::BMVec;
+
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows)
+{
+    using BMVec = typename TestFixture::BMVec;
+
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+
+    ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+    auto alpha =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+    auto beta =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Dense, AdvancedApplyFailsOnWrongAlphaDimension)
+{
+    using BMVec = typename TestFixture::BMVec;
+    auto res =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+    auto alpha =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+    auto beta =
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index a9cf267a3c8..9f3b17cd858 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_common_test(batch_dense_kernels)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..a243d51f3c1
--- /dev/null
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,128 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+#include "test/utils/executor.hpp"
+
+
+class Dense : public CommonTestFixture {
+protected:
+    using BMtx = gko::batch::matrix::Dense<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
+
+    Dense() : rand_engine(15) {}
+
+    template <typename BMtxType>
+    std::unique_ptr<BMtxType> gen_mtx(const gko::size_type num_batch_items,
+                                      gko::size_type num_rows,
+                                      gko::size_type num_cols)
+    {
+        return gko::test::generate_random_batch_matrix<BMtxType>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_cols, num_cols),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
+    }
+
+    void set_up_apply_data(gko::size_type num_vecs = 1)
+    {
+        const int num_rows = 252;
+        const int num_cols = 32;
+        mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
+        y = gen_mtx<BMVec>(batch_size, num_cols, num_vecs);
+        alpha = gen_mtx<BMVec>(batch_size, 1, 1);
+        beta = gen_mtx<BMVec>(batch_size, 1, 1);
+        dmat = gko::clone(exec, mat);
+        dy = gko::clone(exec, y);
+        dalpha = gko::clone(exec, alpha);
+        dbeta = gko::clone(exec, beta);
+        expected = BMVec::create(
+            ref,
+            gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
+        expected->fill(gko::one<value_type>());
+        dresult = gko::clone(exec, expected);
+    }
+
+    std::default_random_engine rand_engine;
+
+    const size_t batch_size = 11;
+    std::unique_ptr<BMtx> mat;
+    std::unique_ptr<BMVec> y;
+    std::unique_ptr<BMVec> alpha;
+    std::unique_ptr<BMVec> beta;
+    std::unique_ptr<BMVec> expected;
+    std::unique_ptr<BMVec> dresult;
+    std::unique_ptr<BMtx> dmat;
+    std::unique_ptr<BMVec> dy;
+    std::unique_ptr<BMVec> dalpha;
+    std::unique_ptr<BMVec> dbeta;
+};
+
+
+TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    mat->apply(y.get(), expected.get());
+    dmat->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
+
+
+TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    mat->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index d442647a985..7e53ea8f165 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,6 +219,13 @@ int main()
         auto test = batch_multi_vector_type::create(exec);
     }
 
+    // core/base/batch_dense.hpp
+    {
+        using type1 = float;
+        using batch_dense_type = gko::batch::matrix::Dense<type1>;
+        auto test = batch_dense_type::create(exec);
+    }
+
     // core/base/combination.hpp
     {
         using type1 = int;