Skip to content

Commit

Permalink
clean hmatrix linalg and fix std::complex operationd
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreMarchand20 committed Jul 23, 2024
1 parent ee8dbd3 commit 1fe6650
Show file tree
Hide file tree
Showing 26 changed files with 529 additions and 2,229 deletions.
1,843 changes: 0 additions & 1,843 deletions include/htool/hmatrix/hmatrix.hpp

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion include/htool/hmatrix/linalg/add_hmatrix_matrix_product.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision a
} else {
Matrix<CoefficientPrecision> transposed_C(C.nb_cols(), C.nb_rows());
transpose(C, transposed_C);
sequential_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, A, B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
std::vector<CoefficientPrecision> buffer_B(transb == 'C' ? B.nb_cols() * B.nb_rows() : 0);

if (transb == 'C') {
std::copy(B.data(), B.data() + B.nb_cols() * B.nb_rows(), buffer_B.data());
conj_if_complex(buffer_B.data(), buffer_B.size());
}

sequential_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, A, transb == 'C' ? buffer_B.data() : B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
transpose(transposed_C, C);
}
}
Expand Down
186 changes: 104 additions & 82 deletions include/htool/hmatrix/linalg/add_hmatrix_matrix_product_row_major.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,50 @@
#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_MATRIX_PRODUCT_ROW_MAJOR_HPP

#include "../hmatrix.hpp"
#include "../lrmat/linalg/add_lrmat_matrix_product_row_major.hpp"

namespace htool {

template <typename CoefficientPrecision, typename CoordinatePrecision>
void add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) {
switch (A.get_storage_type()) {
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::Dense:
if (A.get_symmetry() == 'N') {
add_matrix_matrix_product_row_major(transa, transb, alpha, *A.get_dense_data(), in, beta, out, mu);
} else if (A.get_symmetry() == 'S') {
add_symmetric_matrix_matrix_product_row_major('L', A.get_UPLO(), alpha, *A.get_dense_data(), in, beta, out, mu);
}
break;
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::LowRank:
add_lrmat_matrix_product_row_major(transa, transb, alpha, *A.get_low_rank_data(), in, beta, out, mu);
break;
default:
sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, in, beta, out, mu);
break;
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision>
void add_hmatrix_matrix_product_row_major(char transa, char transb, std::complex<CoefficientPrecision> alpha, const HMatrix<std::complex<CoefficientPrecision>, CoordinatePrecision> &A, const std::complex<CoefficientPrecision> *in, std::complex<CoefficientPrecision> beta, std::complex<CoefficientPrecision> *out, int mu) {
switch (A.get_storage_type()) {
case HMatrix<std::complex<CoefficientPrecision>, CoordinatePrecision>::StorageType::Dense:
if (A.get_symmetry() == 'N') {
add_matrix_matrix_product_row_major(transa, transb, alpha, *A.get_dense_data(), in, beta, out, mu);
} else if (A.get_symmetry() == 'S') {
add_symmetric_matrix_matrix_product_row_major('L', A.get_UPLO(), alpha, *A.get_dense_data(), in, beta, out, mu);
} else {
add_hermitian_matrix_matrix_product_row_major('L', A.get_UPLO(), alpha, *A.get_dense_data(), in, beta, out, mu);
}
break;
case HMatrix<std::complex<CoefficientPrecision>, CoordinatePrecision>::StorageType::LowRank:
add_lrmat_matrix_product_row_major(transa, transb, alpha, *A.get_low_rank_data(), in, beta, out, mu);
break;
default:
sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, in, beta, out, mu);
break;
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void sequential_add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) {
auto &leaves = A.get_leaves();
Expand Down Expand Up @@ -129,88 +170,69 @@ void openmp_add_hmatrix_matrix_product_row_major(char transa, char transb, Coeff
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision>
void add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) {
switch (A.get_storage_type()) {
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::Dense:
if (A.get_symmetry() == 'N') {
A.get_dense_data()->add_matrix_product_row_major(transa, alpha, in, beta, out, mu);
} else {
A.get_dense_data()->add_matrix_product_symmetric_row_major(transa, alpha, in, beta, out, mu, A.get_UPLO(), A.get_symmetry());
}
break;
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::LowRank:
A.get_low_rank_data()->add_matrix_product_row_major(transa, alpha, in, beta, out, mu);
break;
default:
sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, in, beta, out, mu);
break;
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void sequential_add_symmetric_hmatrix_matrix_product_row_major(char side, char UPLO, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) {
auto &leaves = A.get_leaves();
auto &leaves_for_symmetry = A.get_leaves_for_symmetry();

if (side != 'L') {
htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for sequential_add_symmetric_hmatrix_matrix_product_row_major (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE
}

int out_size(A.get_target_cluster().get_size() * mu);
auto get_output_cluster{&HMatrix<CoefficientPrecision, CoordinatePrecision>::get_target_cluster};
auto get_input_cluster{&HMatrix<CoefficientPrecision, CoordinatePrecision>::get_source_cluster};
int local_output_offset = A.get_target_cluster().get_offset();
int local_input_offset = A.get_source_cluster().get_offset();
char trans_sym = 'T';

int incx(1), incy(1);
if (CoefficientPrecision(beta) != CoefficientPrecision(1)) {
// TODO: use blas
std::transform(C, C + out_size, C, [&beta](CoefficientPrecision &c) { return c * beta; });
}

// Contribution champ lointain
std::vector<CoefficientPrecision> temp(out_size, 0);
for (int b = 0; b < leaves.size(); b++) {
int input_offset = (leaves[b]->*get_input_cluster)().get_offset();
int output_offset = (leaves[b]->*get_output_cluster)().get_offset();
if (input_offset == output_offset) {
add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, CoefficientPrecision(1), *leaves[b], B + (input_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (output_offset - local_output_offset) * mu, mu);
} else {
add_hmatrix_matrix_product_row_major('N', 'N', CoefficientPrecision(1), *leaves[b], B + (input_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (output_offset - local_output_offset) * mu, mu);
}
}

// Symmetry part of the diagonal part
for (int b = 0; b < leaves_for_symmetry.size(); b++) {
int input_offset = (leaves_for_symmetry[b]->*get_input_cluster)().get_offset();
int output_offset = (leaves_for_symmetry[b]->*get_output_cluster)().get_offset();
add_hmatrix_matrix_product_row_major(trans_sym, 'N', CoefficientPrecision(1), *leaves_for_symmetry[b], B + (output_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (input_offset - local_output_offset) * mu, mu);
}
Blas<CoefficientPrecision>::axpy(&out_size, &alpha, temp.data(), &incx, C, &incy);
}

template <typename CoefficientPrecision, typename CoordinatePrecision>
void add_symmetric_hmatrix_matrix_product_row_major(char side, char UPLO, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) {
switch (A.get_storage_type()) {
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::Dense:
add_symmetric_matrix_matrix_product_row_major(side, UPLO, alpha, *A.get_dense_data(), in, beta, out, mu);
break;
case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::LowRank:
htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_symmetric_hmatrix_matrix_product_row_major (symmetric lrmat?)"); // LCOV_EXCL_LINE
// add_symmetric_matrix_product_row_major(side, UPLO, alpha, *A.get_low_rank_data(), in, beta, out, mu);
break;
default:
if (side == 'L') {
sequential_add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, alpha, A, in, beta, out, mu);
} else {
//
sequential_add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, alpha, A, in, beta, out, mu);
}
break;
}
}
// template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
// void sequential_add_symmetric_hmatrix_matrix_product_row_major(char side, char UPLO, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) {
// auto &leaves = A.get_leaves();
// auto &leaves_for_symmetry = A.get_leaves_for_symmetry();

// if (side != 'L') {
// htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for sequential_add_symmetric_hmatrix_matrix_product_row_major (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE
// }

// int out_size(A.get_target_cluster().get_size() * mu);
// auto get_output_cluster{&HMatrix<CoefficientPrecision, CoordinatePrecision>::get_target_cluster};
// auto get_input_cluster{&HMatrix<CoefficientPrecision, CoordinatePrecision>::get_source_cluster};
// int local_output_offset = A.get_target_cluster().get_offset();
// int local_input_offset = A.get_source_cluster().get_offset();
// char trans_sym = 'T';

// int incx(1), incy(1);
// if (CoefficientPrecision(beta) != CoefficientPrecision(1)) {
// // TODO: use blas
// std::transform(C, C + out_size, C, [&beta](CoefficientPrecision &c) { return c * beta; });
// }

// // Contribution champ lointain
// std::vector<CoefficientPrecision> temp(out_size, 0);
// for (int b = 0; b < leaves.size(); b++) {
// int input_offset = (leaves[b]->*get_input_cluster)().get_offset();
// int output_offset = (leaves[b]->*get_output_cluster)().get_offset();
// if (input_offset == output_offset) {
// add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, CoefficientPrecision(1), *leaves[b], B + (input_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (output_offset - local_output_offset) * mu, mu);
// } else {
// add_hmatrix_matrix_product_row_major('N', 'N', CoefficientPrecision(1), *leaves[b], B + (input_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (output_offset - local_output_offset) * mu, mu);
// }
// }

// // Symmetry part of the diagonal part
// for (int b = 0; b < leaves_for_symmetry.size(); b++) {
// int input_offset = (leaves_for_symmetry[b]->*get_input_cluster)().get_offset();
// int output_offset = (leaves_for_symmetry[b]->*get_output_cluster)().get_offset();
// add_hmatrix_matrix_product_row_major(trans_sym, 'N', CoefficientPrecision(1), *leaves_for_symmetry[b], B + (output_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (input_offset - local_output_offset) * mu, mu);
// }
// Blas<CoefficientPrecision>::axpy(&out_size, &alpha, temp.data(), &incx, C, &incy);
// }

// template <typename CoefficientPrecision, typename CoordinatePrecision>
// void add_symmetric_hmatrix_matrix_product_row_major(char side, char UPLO, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) {
// switch (A.get_storage_type()) {
// case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::Dense:
// add_symmetric_matrix_matrix_product_row_major(side, UPLO, alpha, *A.get_dense_data(), in, beta, out, mu);
// break;
// case HMatrix<CoefficientPrecision, CoordinatePrecision>::StorageType::LowRank:
// htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_symmetric_hmatrix_matrix_product_row_major (symmetric lrmat?)"); // LCOV_EXCL_LINE
// // add_symmetric_matrix_product_row_major(side, UPLO, alpha, *A.get_low_rank_data(), in, beta, out, mu);
// break;
// default:
// if (side == 'L') {
// sequential_add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, alpha, A, in, beta, out, mu);
// } else {
// //
// sequential_add_symmetric_hmatrix_matrix_product_row_major(side, UPLO, alpha, A, in, beta, out, mu);
// }
// break;
// }
// }

} // namespace htool

Expand Down
Loading

0 comments on commit 1fe6650

Please sign in to comment.