From 29f4a79d351e7f8dd6339ff07788635a01a62e9e Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 16:22:12 -0500 Subject: [PATCH 01/66] Move linear algebra from randomized into linear algebra specific file. --- btas/generic/linear_algebra.h | 167 ++++++++++++++++++++++++++++++++++ btas/generic/randomized.h | 136 --------------------------- 2 files changed, 167 insertions(+), 136 deletions(-) create mode 100644 btas/generic/linear_algebra.h diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h new file mode 100644 index 00000000..5f936df9 --- /dev/null +++ b/btas/generic/linear_algebra.h @@ -0,0 +1,167 @@ +// +// Created by Karl Pierce on 1/26/20. +// + +#ifndef BTAS_LINEAR_ALGEBRA_H +#define BTAS_LINEAR_ALGEBRA_H +#include + +namespace btas{ + /// Computes L of the LU decomposition of tensor \c A +/// \param[in, out] A In: A reference matrix to be LU decomposed. Out: +/// The L of an LU decomposition of \c A. + + template void LU_decomp(Tensor &A) { + +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Using this function requires LAPACKE"); +#endif // LAPACKE_ENABLED + + if(A.rank() > 2){ + BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); + } + + btas::Tensor piv(std::min(A.extent(0), A.extent(1))); + Tensor L(A.range()); + Tensor P(A.extent(0), A.extent(0)); + P.fill(0.0); + L.fill(0.0); + + // LAPACKE LU decomposition gives back dense L and U to be + // restored into lower and upper triangular form, and a pivoting + // matrix for L + auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), + A.data(), A.extent(1), piv.data()); + + // This means there was a problem with the LU that must be dealt with, + // The decomposition cannot be continued. + if (info < 0) { + BTAS_EXCEPTION("LU_decomp: LAPACKE_dgetrf received an invalid input parameter"); + } + + // This means that part of the LU is singular which may cause a problem in + // ones QR decomposition but LU can be computed fine. + if (info != 0) { + } + + // indexing the pivot matrix + for (auto &j : piv) + j -= 1; + + int pivsize = piv.extent(0); + piv.resize(Range{Range1{A.extent(0)}}); + + // Walk through the full pivot array and + // put the correct index values throughout + for (int i = 0; i < piv.extent(0); i++) { + if (i == piv(i) || i >= pivsize) { + for (int j = 0; j < i; j++) { + if (i == piv(j)) { + piv(i) = j; + break; + } + } + } + if (i >= pivsize) { + piv(i) = i; + for (int j = 0; j < i; j++) + if (i == piv(j)) { + piv(i) = j; + break; + } + } + } + + // generating the pivot matrix from the correct indices found above + for (int i = 0; i < piv.extent(0); i++) + P(piv(i), i) = 1; + + // Use the output of LAPACKE to make a lower triangular matrix, L + for (int i = 0; i < L.extent(0); i++) { + for (int j = 0; j < i && j < L.extent(1); j++) { + L(i, j) = A(i, j); + } + if (i < L.extent(1)) + L(i, i) = 1; + } + + // contracting the pivoting matrix with L to put in correct order + gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A); + } + +/// Computes the QR decomposition of matrix \c A +/// \param[in, out] A In: A Reference matrix to be QR decomposed. Out: +/// The Q of a QR decomposition of \c A. + + template bool QR_decomp(Tensor &A) { + +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Using this function requires LAPACKE"); +#endif // LAPACKE_ENABLED + + if(A.rank() > 2){ + BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); + } + + int Qm = A.extent(0); + int Qn = A.extent(1); + Tensor B(1, std::min(Qm, Qn)); + + // LAPACKE doesn't directly calculate Q. Must first call this function to + // generate precursors to Q + auto info = LAPACKE_dgeqrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), + A.data(), A.extent(1), B.data()); + + if (info == 0) { + // This function generates Q. + info = LAPACKE_dorgqr(LAPACK_ROW_MAJOR, Qm, Qn, Qn, A.data(), A.extent(1), + B.data()); + + // If there was some problem generating Q, i.e. it is singular, the + // randomized decompsoition will fail. There is an exception thrown if + // there is a problem to stop the randomized decomposition + if (info != 0) { + return false; + } + return true; + } else { + return false; + } + } + +/// Computes the inverse of a matrix \c A using a pivoted LU decomposition +/// \param[in, out] A In: A reference matrix to be inverted. Out: +/// The inverse of A, computed using LU decomposition. + template + bool Inverse_Matrix(Tensor & A){ + +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Using this function requires LAPACKE"); +#endif // LAPACKE_ENABLED + + if(A.rank() > 2){ + BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); + } + + btas::Tensor piv(std::min(A.extent(0), A.extent(1))); + piv.fill(0); + + // LAPACKE LU decomposition gives back dense L and U to be + // restored into lower and upper triangular form, and a pivoting + // matrix for L + auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), + A.data(), A.extent(1), piv.data()); + if(info != 0){ + A = Tensor(); + return false; + } + info = LAPACKE_dgetri(CblasRowMajor, A.extent(0), A.data(), A.extent(0), piv.data()); + if(info != 0){ + A = Tensor(); + return false; + } + return true; + } +} + +#endif //BTAS_LINEAR_ALGEBRA_H diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h index 2b4cc811..4fdb2525 100644 --- a/btas/generic/randomized.h +++ b/btas/generic/randomized.h @@ -9,9 +9,6 @@ #include #include -#ifdef _HAS_INTEL_MKL -#include - namespace btas { /// \param[in,out] A In: An empty matrix of size column dimension of the nth @@ -109,139 +106,6 @@ void randomized_decomposition(Tensor &A, std::vector &transforms, core_contract(A, transforms[n], n); } } - -/// Computes L of the LU decomposition of tensor \c A -/// \param[in, out] A In: A reference matrix to be LU decomposed. Out: -/// The L of an LU decomposition of \c A. - -template void LU_decomp(Tensor &A) { - - btas::Tensor piv(std::min(A.extent(0), A.extent(1))); - Tensor L(A.range()); - Tensor P(A.extent(0), A.extent(0)); - P.fill(0.0); - L.fill(0.0); - - // LAPACKE LU decomposition gives back dense L and U to be - // restored into lower and upper triangular form, and a pivoting - // matrix for L - auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), - A.data(), A.extent(1), piv.data()); - - // This means there was a problem with the LU that must be dealt with, - // The decomposition cannot be continued. - if (info < 0) { - BTAS_EXCEPTION("LU_decomp: LAPACKE_dgetrf received an invalid input parameter"); - } - - // This means that part of the LU is singular which may cause a problem in - // ones QR decomposition but LU can be computed fine. - if (info != 0) { - } - - // indexing the pivot matrix - for (auto &j : piv) - j -= 1; - - int pivsize = piv.extent(0); - piv.resize(Range{Range1{A.extent(0)}}); - - // Walk through the full pivot array and - // put the correct index values throughout - for (int i = 0; i < piv.extent(0); i++) { - if (i == piv(i) || i >= pivsize) { - for (int j = 0; j < i; j++) { - if (i == piv(j)) { - piv(i) = j; - break; - } - } - } - if (i >= pivsize) { - piv(i) = i; - for (int j = 0; j < i; j++) - if (i == piv(j)) { - piv(i) = j; - break; - } - } - } - - // generating the pivot matrix from the correct indices found above - for (int i = 0; i < piv.extent(0); i++) - P(piv(i), i) = 1; - - // Use the output of LAPACKE to make a lower triangular matrix, L - for (int i = 0; i < L.extent(0); i++) { - for (int j = 0; j < i && j < L.extent(1); j++) { - L(i, j) = A(i, j); - } - if (i < L.extent(1)) - L(i, i) = 1; - } - - // contracting the pivoting matrix with L to put in correct order - gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A); -} - -// Computes the QR decomposition of matrix \c A -/// \param[in, out] A In: A Reference matrix to be QR decomposed. Out: -/// The Q of a QR decomposition of \c A. - -template bool QR_decomp(Tensor &A) { - - int Qm = A.extent(0); - int Qn = A.extent(1); - Tensor B(1, std::min(Qm, Qn)); - - // LAPACKE doesn't directly calculate Q. Must first call this function to - // generate precursors to Q - auto info = LAPACKE_dgeqrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), - A.data(), A.extent(1), B.data()); - - if (info == 0) { - // This function generates Q. - info = LAPACKE_dorgqr(LAPACK_ROW_MAJOR, Qm, Qn, Qn, A.data(), A.extent(1), - B.data()); - - // If there was some problem generating Q, i.e. it is singular, the - // randomized decompsoition will fail. There is an exception thrown if - // there is a problem to stop the randomized decomposition - if (info != 0) { - return false; - } - return true; - } else { - return false; - } -} - -template -bool Inverse_Matrix(Tensor & A){ - if(A.rank() > 2){ - //Return exception - } - btas::Tensor piv(std::min(A.extent(0), A.extent(1))); - piv.fill(0); - - // LAPACKE LU decomposition gives back dense L and U to be - // restored into lower and upper triangular form, and a pivoting - // matrix for L - auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1), - A.data(), A.extent(1), piv.data()); - if(info != 0){ - A = Tensor(); - return false; - } - info = LAPACKE_dgetri(CblasRowMajor, A.extent(0), A.data(), A.extent(0), piv.data()); - if(info != 0){ - A = Tensor(); - return false; - } - return true; -} } // namespace btas -#endif // HAS_INTEL_MKL - #endif // BTAS_RANDOMIZED_DECOMP_H From ed67cb575fced40d5297b2c9303b7860ab034c92 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 16:22:41 -0500 Subject: [PATCH 02/66] Update fit check functions' documentation --- btas/generic/converge_class.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/btas/generic/converge_class.h b/btas/generic/converge_class.h index 178c0e0a..a550c503 100644 --- a/btas/generic/converge_class.h +++ b/btas/generic/converge_class.h @@ -61,10 +61,13 @@ namespace btas { int rank_; // Rank of the CP problem }; - // From Tensor Toolbox : - // The "fit" is defined as 1 - norm(X-full(M))/norm(X) and is - // loosely the proportion of the data described by the CP model, i.e., a - // fit of 1 is perfect. + /** + \brief Class used to decide when ALS problem is converged + The "fit" is defined as \f$ 1 - \frac{\|X-full(M)\|}{\|X\|} \leq \epsilon\f$ + where X is the exact tensor and M is the reconstructed CP tensor. + This fit is loosely the proportion of the data described by the + CP model, i.e., a fit of 1 is perfect. + **/ template class FitCheck{ public: @@ -172,6 +175,15 @@ namespace btas { } }; + /** + \brief Class used to decide when ALS problem is converged + The "fit" is defined as \f$ 1 - \frac{\|X_1-full(M_1)\|}{\|X_1\|} - + \frac{\|X_2-full(M_2)\|}{\|X_2\|}\leq \epsilon\f$ + where \f$ X_1 \f$ and \f$ X_2 \f$ are tensors coupled by a single mode + \f$ M_1 \f$ and \f$ M_2 \f$ are the coupled reconstructed CP tensors. + This fit is loosely the proportion of the data described by the + CP model, i.e., a fit of 1 is perfect. + **/ template class CoupledFitCheck{ public: From 2f9e0c4bf34752a0458dc39f5cb47fb7dfc27499 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 16:23:06 -0500 Subject: [PATCH 03/66] Format types.h and add LAPACKE_ENABLED definition --- btas/types.h | 74 ++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/btas/types.h b/btas/types.h index 0875971b..900225c2 100644 --- a/btas/types.h +++ b/btas/types.h @@ -12,43 +12,43 @@ extern "C" { #endif // __cplusplus #ifdef BTAS_HAS_CBLAS - -#if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER) - -#ifdef _HAS_INTEL_MKL - -#include -#include - -#else // _HAS_INTEL_MKL - -#include -// see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I -#include -#ifndef lapack_complex_float -# define lapack_complex_float std::complex -#endif -#ifndef lapack_complex_double -# define lapack_complex_double std::complex -#endif -#include - -#endif // _HAS_INTEL_MKL - -#else // _CBLAS_HEADER - -#include _CBLAS_HEADER -// see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I -#include -#ifndef lapack_complex_float -# define lapack_complex_float std::complex -#endif -#ifndef lapack_complex_double -# define lapack_complex_double std::complex -#endif -#include _LAPACKE_HEADER - -#endif // _CBLAS_HEADER +# define LAPACKE_ENABLED +# if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER) + +# ifdef _HAS_INTEL_MKL + +# include +# include + +# else // _HAS_INTEL_MKL + +# include + // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I +# include +# ifndef lapack_complex_float +# define lapack_complex_float std::complex +# endif // lapack_complex_float +# ifndef lapack_complex_double +# define lapack_complex_double std::complex +# endif // lapack_complex_double +# include + +# endif // _HAS_INTEL_MKL + +# else // _CBLAS_HEADER + +# include _CBLAS_HEADER + // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I +# include +# ifndef lapack_complex_float +# define lapack_complex_float std::complex +# endif // lapack_complex_float +# ifndef lapack_complex_double +# define lapack_complex_double std::complex +# endif // lapack_complex_double +# include _LAPACKE_HEADER + +# endif // _CBLAS_HEADER #else // BTAS_HAS_CBLAS From 6fd9391e4c0b33f6963b7e6061da60ba70a03845 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 16:23:24 -0500 Subject: [PATCH 04/66] Formatting types.h --- btas/types.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/btas/types.h b/btas/types.h index 900225c2..4ed6d798 100644 --- a/btas/types.h +++ b/btas/types.h @@ -77,15 +77,15 @@ enum CBLAS_SIDE { CblasLeft, CblasRight }; #ifndef HAVE_INTEL_MKL #ifndef lapack_complex_float # define lapack_complex_float std::complex -#else +#else // lapack_complex_float static_assert(sizeof(std::complex)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex do not match"); -#endif +#endif // lapack_complex_float #ifndef lapack_complex_double # define lapack_complex_double std::complex -#else +#else // lapack_complex_double static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex do not match"); -#endif -#else +#endif // lapack_complex_double +#else // HAVE_INTEL_MKL // if calling direct need to cast to the MKL complex types # ifdef MKL_DIRECT_CALL # include @@ -96,15 +96,15 @@ static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "size # define lapack_complex_double MKL_Complex16 # endif // else can call via F77 prototypes which don't need type conversion -# else +# else // MKL_DIRECT_CALL # ifndef lapack_complex_float # define lapack_complex_float std::complex # endif # ifndef lapack_complex_double # define lapack_complex_double std::complex # endif -# endif -#endif +# endif // MKL_DIRECT_CALL +#endif // HAVE_INTEL_MKL namespace btas { From ea0fd4018197728546ae087d8fe6589600b167ea Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 18:10:53 -0500 Subject: [PATCH 05/66] Move generic pseudo inverse code to linear algebra file. (Needs slight update to get rid of rank variable) --- btas/generic/cp.h | 69 ------------------------- btas/generic/linear_algebra.h | 97 +++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 69 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index f9fe9c3f..b142448f 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -648,75 +648,6 @@ namespace btas{ BTAS_EXCEPTION("Pseudo inverse failed" ); } } - - /// SVD referencing code from - /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html - /// Fast pseudo-inverse algorithm described in - /// https://arxiv.org/pdf/0804.4809.pdf - - /// \param[in] a matrix to be inverted. - /// \return a^{\dagger} The pseudoinverse of the matrix a. - Tensor pseudoInverse(Tensor & a){ - bool matlab = false; - auto R = A[0].extent(1); - Tensor s(Range{Range1{R}}); - Tensor U(Range{Range1{R}, Range1{R}}); - Tensor Vt(Range{Range1{R}, Range1{R}}); - - if(! matlab) { - -// btas has no generic SVD for MKL LAPACKE -// time1 = std::chrono::high_resolution_clock::now(); -#ifdef _HAS_INTEL_MKL - double worksize; - double *work = &worksize; - lapack_int lwork = -1; - lapack_int info = 0; - - char A = 'A'; - - // Call dgesvd with lwork = -1 to query optimal workspace size: - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, - &worksize, lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); - - lwork = (lapack_int) worksize; - work = (double *) malloc(sizeof(double) * lwork); - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work, - lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); - - free(work); -#else // BTAS_HAS_CBLAS - - gesvd('A', 'A', a, s, U, Vt); - -#endif - - // Inverse the Singular values with threshold 1e-13 = 0 - double lr_thresh = 1e-13; - Tensor s_inv(Range{Range1{R}, Range1{R}}); - s_inv.fill(0.0); - for (auto i = 0; i < R; ++i) { - if (s(i) > lr_thresh) - s_inv(i, i) = 1 / s(i); - else - s_inv(i, i) = s(i); - } - s.resize(Range{Range1{R}, Range1{R}}); - - // Compute the matrix A^-1 from the inverted singular values and the U and - // V^T provided by the SVD - gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s); - gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U); - - } - return U; - } }; };// namespace btas diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 5f936df9..973c625b 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -162,6 +162,103 @@ namespace btas{ } return true; } + +/// Computes the eigenvalue decomposition of a matrix \c A and +/// \param[in, out] A In: A reference matrix to be decomposed. Out: +/// The eigenvectors of the matrix \c A. +/// \param[in, out] lambda In: An empty vector with length greater than +/// or equal to the largest mode of \c A. Out: The eigenvalues of the +/// matrix \c A + template + void eigenvalue_decomp(Tensor & A, Tensor & lambda){ +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE"); +#endif // LAPACKE_ENABLED + if(A.rank() > 2){ + BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix."); + } + auto lambda_length = lambda.size(); + auto largest_mode_A = (A.extent(0) > A.extent(1) ? A.extent(0) : A.extent(1)); + if(lambda_length < largest_mode_A){ + BTAS_EXCEPTION("Volume of lambda must be greater than or equal to the largest mode of A"); + } + + auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', largest_mode_A, + A.data(), largest_mode_A, lambda.data()); + if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + } + +/// SVD referencing code from +/// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html +/// Fast pseudo-inverse algorithm described in +/// https://arxiv.org/pdf/0804.4809.pdf + +/// \param[in] a matrix to be inverted. +/// \return a^{\dagger} The pseudoinverse of the matrix a. +template +Tensor pseudoInverse(Tensor & a, int R) { +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); +#endif // LAPACKE_ENABLED + bool Cholesky = true; + bool fast = false; + Tensor s(Range{Range1{R}}); + Tensor U(Range{Range1{R}, Range1{R}}); + Tensor Vt(Range{Range1{R}, Range1{R}}); + + if(Cholesky){ + Cholesky = false; + } + if(fast){ + + } + if (!Cholesky && !fast) { + double worksize; + double *work = &worksize; + lapack_int lwork = -1; + lapack_int info = 0; + + char A = 'A'; + + // Call dgesvd with lwork = -1 to query optimal workspace size: + + info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, + &worksize, lwork); + if (info != 0) + BTAS_EXCEPTION("SVD pseudo inverse failed"); + + lwork = (lapack_int) worksize; + work = (double *) malloc(sizeof(double) * lwork); + + info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work, + lwork); + if (info != 0) + BTAS_EXCEPTION("SVD pseudo inverse failed"); + + free(work); + + //gesvd('A', 'A', a, s, U, Vt); + + // Inverse the Singular values with threshold 1e-13 = 0 + double lr_thresh = 1e-13; + Tensor s_inv(Range{Range1{R}, Range1{R}}); + s_inv.fill(0.0); + for (auto i = 0; i < R; ++i) { + if (s(i) > lr_thresh) + s_inv(i, i) = 1 / s(i); + else + s_inv(i, i) = s(i); + } + s.resize(Range{Range1{R}, Range1{R}}); + + // Compute the matrix A^-1 from the inverted singular values and the U and + // V^T provided by the SVD + gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s); + gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U); + + } + return U; } +} // namespace btas #endif //BTAS_LINEAR_ALGEBRA_H From 1842a332e1b1aa40e3b67b33c80a4687101d924a Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 18:11:06 -0500 Subject: [PATCH 06/66] Linear algebra need cblas definitions --- btas/generic/linear_algebra.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 973c625b..a5cff42d 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -6,6 +6,8 @@ #define BTAS_LINEAR_ALGEBRA_H #include +#ifdef BTAS_HAS_CBLAS + namespace btas{ /// Computes L of the LU decomposition of tensor \c A /// \param[in, out] A In: A reference matrix to be LU decomposed. Out: @@ -261,4 +263,5 @@ Tensor pseudoInverse(Tensor & a, int R) { } } // namespace btas +#endif // BTAS_HAS_CBLAS #endif //BTAS_LINEAR_ALGEBRA_H From 809b289dc2903192a38a72044827593af88fef59 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 18:11:13 -0500 Subject: [PATCH 07/66] Formatting --- btas/generic/linear_algebra.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index a5cff42d..782f258e 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -138,7 +138,7 @@ namespace btas{ bool Inverse_Matrix(Tensor & A){ #ifndef LAPACKE_ENABLED - BTAS_EXCEPTION("Using this function requires LAPACKE"); + BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE"); #endif // LAPACKE_ENABLED if(A.rank() > 2){ From 7ad154ec888975cda3aa664a4193eb7177491671 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Sun, 26 Jan 2020 18:12:15 -0500 Subject: [PATCH 08/66] Use eig value decomposition from linear algebra. Use Pseudoinverse from linear algebra --- btas/generic/coupled_cp_als.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h index 3304bb8c..f6dcdea6 100644 --- a/btas/generic/coupled_cp_als.h +++ b/btas/generic/coupled_cp_als.h @@ -23,6 +23,7 @@ #include #include #include +#include namespace btas{ @@ -78,7 +79,6 @@ namespace btas{ public: using CP::A; using CP::ndim; - using CP::pseudoInverse; using CP::normCol; using CP::generate_KRP; using CP::generate_V; @@ -183,7 +183,6 @@ namespace btas{ // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. -#ifdef _HAS_INTEL_MKL if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); @@ -220,8 +219,7 @@ namespace btas{ gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition - auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); - if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values @@ -267,9 +265,6 @@ namespace btas{ // Optimize this initial guess. ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } -#else // _HAS_INTEL_MKL - if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK"); -#endif // _HAS_INTEL_MKL // This loop keeps track of column dimension for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { // This loop walks through the factor matrices @@ -480,7 +475,7 @@ namespace btas{ } // Finally Form the product of K * J^\dagger Tensor a0(coupled_dim, rank); - gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1), 0.0, a0); + gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, rank), 0.0, a0); this->normCol(a0); A[0] = a0; } @@ -556,7 +551,7 @@ namespace btas{ else if(n == this->ndim - 1) detail::set_MtKRPR(converge_test, contract_tensor); Tensor an(skip_dim, rank); - gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G), 0.0, an); + gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, rank), 0.0, an); this->normCol(an); A[n] = an; } From 9abaf983357b937f395498aa201b8a4c49e9c6ec Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:42:24 -0500 Subject: [PATCH 09/66] Add linear algebra to cp.h --- btas/generic/cp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index b142448f..0be610a6 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef _HAS_INTEL_MKL #include From 9aa87f1ef81dd5c35cdcceb8bee302e7ea53be60 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:43:37 -0500 Subject: [PATCH 10/66] Simplify the pseudoInverse function. Moving all pseudoInverse implementations to linear algebra file --- btas/generic/cp.h | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 0be610a6..8aea42d1 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -569,58 +569,16 @@ namespace btas{ /// \return The pseudoinverse of the matrix V. Tensor pseudoInverse(int n, int R, bool & fast_pI, double lambda = 0.0) { // CP_ALS method requires the pseudoinverse of matrix V -#ifdef _HAS_INTEL_MKL - if(fast_pI) { - auto a = this->generate_V(n, R, lambda); - Tensor temp(R, R), inv(R, R); - // compute V^{\dag} = (A^T A) ^{-1} A^T - gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp); - fast_pI = Inverse_Matrix(temp); - if(fast_pI) { - gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv); - return inv; - } - else{ - std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl; - } - } -#else - fast_pI = false; -#endif // _HAS_INTEL_MKL + auto a = this->generate_V(n, R, lambda); if(!fast_pI) { - auto a = this->generate_V(n, R, lambda); Tensor s(Range{Range1{R}}); Tensor U(Range{Range1{R}, Range1{R}}); Tensor Vt(Range{Range1{R}, Range1{R}}); // btas has no generic SVD for MKL LAPACKE // time1 = std::chrono::high_resolution_clock::now(); -#ifdef _HAS_INTEL_MKL - double worksize; - double *work = &worksize; - lapack_int lwork = -1; - lapack_int info = 0; - - char A = 'A'; - - // Call dgesvd with lwork = -1 to query optimal workspace size: - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, - &worksize, lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); - - lwork = (lapack_int) worksize; - work = (double *) malloc(sizeof(double) * lwork); - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work, - lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); - - free(work); -#else // BTAS_HAS_CBLAS +#ifdef LAPACKE_ENABLED gesvd('A', 'A', a, s, U, Vt); From 88207cf040cbcbd65017f5393a2d5296aca20adc Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:44:44 -0500 Subject: [PATCH 11/66] Pseudoinverse helper function that first calls Cholesky (Ax=B) solver. If that fails calls LU inversion function then SVD. --- btas/generic/cp.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 8aea42d1..4fc2bdc9 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -607,8 +607,29 @@ namespace btas{ BTAS_EXCEPTION("Pseudo inverse failed" ); } } - }; + /// Trying to solve Ax = B + /// First try Cholesky to solve this problem directly + /// second tryfast pseudo-inverse algorithm described in + /// https://arxiv.org/pdf/0804.4809.pdf + /// If all else fails use SVD + Tensor psuedoinverse_helper(int mode_of_A, bool & fast_pI, + bool & cholesky, Tensor B = Tensor(), + double lambda = 0.0){ + if(cholesky && B.empty()){ + BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)"); + } + + auto rank = A[0].extent(1); + auto a = this->generate_V(mode_of_A, rank, lambda); + + if(cholesky) { + cholesky = cholesky_inverse(a, B); + return B; + } + return pseudoInverse_impl(a, fast_pI); + } + }; };// namespace btas #endif //BTAS_GENERIC_CP_H From 790b673032d1813fd9b754282e98519467662d64 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:45:26 -0500 Subject: [PATCH 12/66] Rank is defined as the smallest mode in a matrix not the largest --- btas/generic/linear_algebra.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 782f258e..a2847531 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -180,13 +180,13 @@ namespace btas{ BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix."); } auto lambda_length = lambda.size(); - auto largest_mode_A = (A.extent(0) > A.extent(1) ? A.extent(0) : A.extent(1)); - if(lambda_length < largest_mode_A){ + auto smallest_mode_A = (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)); + if(lambda_length < smallest_mode_A){ BTAS_EXCEPTION("Volume of lambda must be greater than or equal to the largest mode of A"); } - auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', largest_mode_A, - A.data(), largest_mode_A, lambda.data()); + auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', smallest_mode_A, + A.data(), smallest_mode_A, lambda.data()); if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); } From 65d1a19c033ae6a7a3b016e4b098bac57e0a083e Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:46:14 -0500 Subject: [PATCH 13/66] Created Cholesky inverse function which computes inverse of A to solve Ax = B. --- btas/generic/linear_algebra.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index a2847531..fb02c977 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -190,6 +190,34 @@ namespace btas{ if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); } + /// Solving Ax = B using a Cholesky decomposition + /// \param[in] A The right-hand side of the linear equation + /// to be inverted using Cholesky + /// \param[in, out] B In: The left-hand side of the linear equation + /// out: The solution x = A^{-1}B +template +bool cholesky_inverse(Tensor & A, Tensor & B){ +#ifndef LAPACKE_ENABLED + BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE") +#endif + // This method computes the inverse quickly for a square matrix + // based on MATLAB's implementation of A / B operator. + auto rank = B.extent(1); + int LDB = B.extent(0); + + btas::Tensor > piv(rank); + piv.fill(0); + auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, A.data(), rank, piv.data(), B.data(), rank); + if (info == 0) { + return true; + } + else{ + // If inverse fails resort to the pseudoinverse + std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl; + return false; + } +} + /// SVD referencing code from /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html /// Fast pseudo-inverse algorithm described in From cfb11551fe9c83cb1dd9b59b9623b2ca10383455 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 13:49:47 -0500 Subject: [PATCH 14/66] Simplify pseudoinverse function. First try LU inversion and if that fails use SVD. --- btas/generic/linear_algebra.h | 68 ++++++++++++++--------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index fb02c977..0b0cb7f9 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -226,69 +226,55 @@ bool cholesky_inverse(Tensor & A, Tensor & B){ /// \param[in] a matrix to be inverted. /// \return a^{\dagger} The pseudoinverse of the matrix a. template -Tensor pseudoInverse(Tensor & a, int R) { +Tensor pseudoInverse_impl(Tensor & a, bool & fast_pI) { #ifndef LAPACKE_ENABLED BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); #endif // LAPACKE_ENABLED - bool Cholesky = true; - bool fast = false; - Tensor s(Range{Range1{R}}); - Tensor U(Range{Range1{R}, Range1{R}}); - Tensor Vt(Range{Range1{R}, Range1{R}}); - - if(Cholesky){ - Cholesky = false; - } - if(fast){ - - } - if (!Cholesky && !fast) { - double worksize; - double *work = &worksize; - lapack_int lwork = -1; - lapack_int info = 0; - - char A = 'A'; - - // Call dgesvd with lwork = -1 to query optimal workspace size: - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, - &worksize, lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); - lwork = (lapack_int) worksize; - work = (double *) malloc(sizeof(double) * lwork); - - info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work, - lwork); - if (info != 0) - BTAS_EXCEPTION("SVD pseudo inverse failed"); + if (a.rank() > 2) { + BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix"); + } - free(work); + auto row = a.extent(0), col = a.extent(1); + auto rank = (row < col ? row : col); + + if (fast_pI) { + Tensor temp(col, col), inv(col, row); + // compute V^{\dag} = (A^T A) ^{-1} A^T + gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp); + fast_pI = Inverse_Matrix(temp); + if (fast_pI) { + gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv); + return inv; + } else { + std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl; + } + } + Tensor s(Range{Range1{rank}}); + Tensor U(Range{Range1{row}, Range1{row}}); + Tensor Vt(Range{Range1{col}, Range1{col}}); - //gesvd('A', 'A', a, s, U, Vt); + gesvd('A', 'A', a, s, U, Vt); // Inverse the Singular values with threshold 1e-13 = 0 double lr_thresh = 1e-13; - Tensor s_inv(Range{Range1{R}, Range1{R}}); + Tensor s_inv(Range{Range1{row}, Range1{col}}); s_inv.fill(0.0); - for (auto i = 0; i < R; ++i) { + for (auto i = 0; i < rank; ++i) { if (s(i) > lr_thresh) s_inv(i, i) = 1 / s(i); else s_inv(i, i) = s(i); } - s.resize(Range{Range1{R}, Range1{R}}); + s.resize(Range{Range1{row}, Range1{col}}); // Compute the matrix A^-1 from the inverted singular values and the U and // V^T provided by the SVD gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s); gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U); + return U; } - return U; -} } // namespace btas #endif // BTAS_HAS_CBLAS From c4412f49b84783b6cb5a86a649d703f8ed26a406 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:01:17 -0500 Subject: [PATCH 15/66] Use eigenvalue decomposition function from linear algebra file --- btas/generic/cp_als.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index a7086926..972a177e 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -401,7 +401,6 @@ namespace btas{ // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. -#ifdef _HAS_INTEL_MKL if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); @@ -437,8 +436,9 @@ namespace btas{ gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition - auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); - if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + //auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); + //if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values @@ -486,9 +486,6 @@ namespace btas{ // Optimize this initial guess. ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } -#else // _HAS_INTEL_MKL - if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK"); -#endif // _HAS_INTEL_MKL // This loop keeps track of column dimension bool opt_in_for_loop = false; for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { From aabdf5f3a75544c08f9488a4da204966d4c135e7 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:02:17 -0500 Subject: [PATCH 16/66] Use the new pseudoinverse function from linear algebra in the conventional ALS --- btas/generic/cp_als.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 972a177e..1eca6b24 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -633,7 +633,7 @@ namespace btas{ } else if (dir) { direct(i, rank, fast_pI, matlab, converge_test); } else { - update_w_KRP(i, rank, fast_pI, converge_test); + update_w_KRP(i, rank, fast_pI, matlab, converge_test); } } //std::cout << count << "\t"; @@ -662,7 +662,8 @@ namespace btas{ /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse /// in the same manner that matlab would compute the inverse. /// \param[in] converge_test test to see if the ALS is converged - void update_w_KRP(int n, int rank, bool & fast_pI, ConvClass & converge_test) { + void update_w_KRP(int n, int rank, bool & fast_pI, + bool & matlab, ConvClass & converge_test) { Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); @@ -699,23 +700,25 @@ namespace btas{ swap_to_first(tensor_ref, n, true); #else // BTAS_HAS_CBLAS - // without MKL program cannot perform the swapping algorithm, must compute // flattened intermediate gemm(CblasNoTrans, CblasNoTrans, 1.0, flatten(tensor_ref, n), this->generate_KRP(n, rank, true), 0.0, temp); #endif detail::set_MtKRP(converge_test, temp); -// if(typeid(converge_test) == typeid(btas::FitCheck)) { -// converge_test.set_MtKRP(temp); -// } + // contract the product from above with the pseudoinverse of the Hadamard // produce an optimize factor matrix - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an); + auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp); + if(!matlab) { + gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, + pInv, 0.0, an); + } else{ + an = pInv; + } // compute the difference between this new factor matrix and the previous // iteration - //for (auto l = 0; l < rank; ++l) A[ndim](l) = normCol(an, l); this->normCol(an); // Replace the old factor matrix with the new optimized result From b7973a6eb4ae0e78f9b98f03785bdbb43dcc979b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:06:24 -0500 Subject: [PATCH 17/66] Use the new pseudoinverse function from linear algebra --- btas/generic/cp_als.h | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 1eca6b24..9f8032de 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -874,36 +874,13 @@ namespace btas{ n = last_dim ? ndim - 1: n; // multiply resulting matrix temp by pseudoinverse to calculate optimized // factor matrix - detail::set_MtKRP(converge_test, temp); -#ifdef _HAS_INTEL_MKL - if(fast_pI && matlab) { - // This method computes the inverse quickly for a square matrix - // based on MATLAB's implementation of A / B operator. - btas::Tensor > piv(rank); - piv.fill(0); - - auto a = this->generate_V(n, rank); - int LDB = temp.extent(0); - auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), temp.data(), rank); - if (info == 0) { - an = temp; - } - else{ - // If inverse fails resort to the pseudoinverse - std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl; - matlab = false; - } - } - if(!fast_pI || ! matlab){ - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an); - } -#else - matlab = false; - if(!fast_pI || !matlab){ - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an); + auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp); + if(!matlab) { + gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pInv, 0.0, an); + } else{ + an = pInv; } -#endif // Normalize the columns of the new factor matrix and update this->normCol(an); From 4b2ad6d03adc5cebb484fce90e8f264aca931c63 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:07:13 -0500 Subject: [PATCH 18/66] PALS uses build function which has lapacke gaurd in eigenvalue function, so no longer need _HAS_INTEL_MKL --- btas/generic/cp_als.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 9f8032de..f0685e13 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -120,8 +120,6 @@ namespace btas{ ~CP_ALS() = default; -#ifdef _HAS_INTEL_MKL - /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) @@ -210,10 +208,10 @@ namespace btas{ } count++; } - //std::cout << "Number of ALS iterations was " << this->num_ALS << std::endl; return epsilon; } +#ifdef _HAS_INTEL_MKL /// \brief Computes an approximate core tensor using /// Tucker decomposition, e.g. /// \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$ From e190a2770eea0ad77d0a2fdf0a2d4a69052c8db0 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:41:25 -0500 Subject: [PATCH 19/66] Reminder to write documentation for pseudoinverse function --- btas/generic/cp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 4fc2bdc9..a0590078 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -613,8 +613,9 @@ namespace btas{ /// second tryfast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf /// If all else fails use SVD - Tensor psuedoinverse_helper(int mode_of_A, bool & fast_pI, - bool & cholesky, Tensor B = Tensor(), + // TODO write docs here + void psuedoinverse_helper(int mode_of_A, bool & fast_pI, + bool & cholesky, Tensor & B = Tensor(), double lambda = 0.0){ if(cholesky && B.empty()){ BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)"); From 907d22aec9d4f9852484fcdbb53f3d33c57809f3 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:42:08 -0500 Subject: [PATCH 20/66] Now both cholesky and other compute A^{-1}B and the output is written into the B tensor --- btas/generic/cp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index a0590078..d6009674 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -626,9 +626,12 @@ namespace btas{ if(cholesky) { cholesky = cholesky_inverse(a, B); - return B; + return; } - return pseudoInverse_impl(a, fast_pI); + auto pInv = pseudoInverse_impl(a, fast_pI); + Tensor an(B.extent(0), rank); + gemm(CblasNoTrans, CblasNoTrans, 1.0, B, pInv, 0.0, an); + B = an; } }; };// namespace btas From a373c1c4151e51aa05fadb277c868a21556e900c Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:42:29 -0500 Subject: [PATCH 21/66] Remove unused code --- btas/generic/cp_als.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index f0685e13..0e0ff59e 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -434,8 +434,6 @@ namespace btas{ gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition - //auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); - //if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular From da68deceb0d2d3ff1531ea659edaf246a34becb0 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 14:43:01 -0500 Subject: [PATCH 22/66] Pseudoinverse now returns A^{-1}B so no need to compute it here. --- btas/generic/cp_als.h | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 0e0ff59e..a50c845c 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -705,20 +705,14 @@ namespace btas{ // contract the product from above with the pseudoinverse of the Hadamard // produce an optimize factor matrix - auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp); - if(!matlab) { - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, - pInv, 0.0, an); - } else{ - an = pInv; - } + this->psuedoinverse_helper(n, fast_pI, matlab, temp); // compute the difference between this new factor matrix and the previous // iteration - this->normCol(an); + this->normCol(temp); // Replace the old factor matrix with the new optimized result - A[n] = an; + A[n] = temp; } @@ -871,16 +865,12 @@ namespace btas{ // multiply resulting matrix temp by pseudoinverse to calculate optimized // factor matrix detail::set_MtKRP(converge_test, temp); - auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp); - if(!matlab) { - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pInv, 0.0, an); - } else{ - an = pInv; - } + // Temp is then rewritten with unnormalized new A[n] matrix + this->psuedoinverse_helper(n, fast_pI, matlab, temp); // Normalize the columns of the new factor matrix and update - this->normCol(an); - A[n] = an; + this->normCol(temp); + A[n] = temp; } }; From 7e1d574bf9cf2df7804e5df99f71bf8626e6fc33 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:05:33 -0500 Subject: [PATCH 23/66] Remove _HAS_INTEL_MKL and use the eigenvalue decomposition function from linear algebra --- btas/generic/cp_rals.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index e1ffa309..1279ec74 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -416,7 +416,6 @@ namespace btas{ // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. -#ifdef _HAS_INTEL_MKL if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); @@ -452,8 +451,7 @@ namespace btas{ gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition - auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); - if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values @@ -503,9 +501,6 @@ namespace btas{ // Optimize this initial guess. ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } -#else // _HAS_INTEL_MKL - if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK"); -#endif // _HAS_INTEL_MKL // This loop keeps track of column dimension bool opt_in_for_loop = false; for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { From 9124c580aa6794168b04f961041258e1c6bb981b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:06:20 -0500 Subject: [PATCH 24/66] Use new pseudoinverse function and add cholesky decomposition method to conventional ALS --- btas/generic/cp_rals.h | 52 ++++++++++-------------------------------- 1 file changed, 12 insertions(+), 40 deletions(-) diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index 1279ec74..2590674e 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -657,7 +657,7 @@ namespace btas{ if (dir) { direct(i, rank, fast_pI, matlab, lambda[i], s, converge_test); } else { - update_w_KRP(i, rank, fast_pI, lambda[i], s, converge_test); + update_w_KRP(i, rank, fast_pI, matlab, lambda[i], s, converge_test); } lambda[i] = (lambda[i] * (s * s) / (s0 * s0)) * alpha + (1 - alpha) * lambda[i]; } @@ -687,7 +687,8 @@ namespace btas{ /// \param[in] lambda regularization parameter /// \param[in, out] s regularization step size, returns updated stepsize based on ALS iteration /// \param[in] converge_test test to see if the ALS is converged - void update_w_KRP(int n, int rank, bool & fast_pI, double lambda, double & s, ConvClass & converge_test) { + void update_w_KRP(int n, int rank, bool & fast_pI, bool & matlab, + double lambda, double & s, ConvClass & converge_test) { Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); @@ -740,18 +741,17 @@ namespace btas{ } // contract the product from above with the pseudoinverse of the Hadamard // produce an optimize factor matrix - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an); + this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda); // compute the difference between this new factor matrix and the previous // iteration - //for (auto l = 0; l < rank; ++l) A[ndim](l) = normCol(an, l); - normCol(an); + normCol(temp); // Compute the value s after normalizing the columns - s = helper(n, an); + s = helper(n, temp); // Replace the old factor matrix with the new optimized result - A[n] = an; + A[n] = temp; } /// Computes an optimized factor matrix holding all others constant. @@ -910,44 +910,16 @@ namespace btas{ if(typeid(converge_test) == typeid(btas::FitCheck)) { converge_test.set_MtKRP(temp); } - -#ifdef _HAS_INTEL_MKL - if(fast_pI && matlab) { - // This method computes the inverse quickly for a square matrix - // based on MATLAB's implementation of A / B operator. - btas::Tensor > piv(rank); - piv.fill(0); - - auto a = generate_V(n, rank, lambda); - int LDB = temp.extent(0); - auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), temp.data(), rank); - if (info == 0) { - an = temp; - } - else{ - // If inverse fails resort to the pseudoinverse - std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl; - matlab = false; - } - } - if(!fast_pI || ! matlab){ - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an); - } -#else - matlab = false; - if(!fast_pI || !matlab){ - gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an); - } -#endif - + + this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda); // Normalize the columns of the new factor matrix and update - normCol(an); + normCol(temp); // Compute S after column normalization for RALS - s = helper(n, an); + s = helper(n, temp); - A[n] = an; + A[n] = temp; } }; From 5451e31fd232eb507c36bd80143b1d283d19f375 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:27:52 -0500 Subject: [PATCH 25/66] Remove _HAS_INTEL_MKL guard from PALS --- btas/generic/cp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index d6009674..8f2781a5 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -302,7 +302,6 @@ namespace btas{ return epsilon; } -#ifdef _HAS_INTEL_MKL /// virtual function implemented in solver /// Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) @@ -326,7 +325,6 @@ namespace btas{ /// false && ConvClass != FitCheck. virtual double compute_PALS(std::vector & converge_list, double RankStep = 0.5, int panels = 4, int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) = 0; -#endif // _HAS_INTEL_MKL /// returns the rank \c rank optimized factor matrices /// \return Factor matrices stored in a vector. For example, a order-3 From 9486efe0ce25554f39ea41f811df9e66f99527e5 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:28:07 -0500 Subject: [PATCH 26/66] Formatting --- btas/generic/cp_df_als.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h index fd580b39..ebd8c044 100644 --- a/btas/generic/cp_df_als.h +++ b/btas/generic/cp_df_als.h @@ -131,7 +131,6 @@ namespace btas{ ~CP_DF_ALS() = default; -#ifdef _HAS_INTEL_MKL /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) @@ -223,12 +222,9 @@ namespace btas{ ALS(rank_new, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } count++; - //if (count + 1 == panels) max_als = 1000; } - //std::cout << "Number of ALS iterations was " << this->num_ALS << std::endl; return epsilon; } -#endif // _HAS_INTEL_MKL protected: Tensor &tensor_ref_left; // Left connected tensor From 3c1583e276bdaa91563b58e77179059ac6d74722 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:28:30 -0500 Subject: [PATCH 27/66] Use the Eigenvalue decomposition from linear algebra in build --- btas/generic/cp_df_als.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h index ebd8c044..857c9bfd 100644 --- a/btas/generic/cp_df_als.h +++ b/btas/generic/cp_df_als.h @@ -267,7 +267,6 @@ namespace btas{ // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. -#ifdef _HAS_INTEL_MKL if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); @@ -336,8 +335,7 @@ namespace btas{ gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition - auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data()); - if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); + eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values @@ -379,9 +377,6 @@ namespace btas{ // Optimize this initial guess. ALS(SVD_rank, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } -#else // _HAS_INTEL_MKL - if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK"); -#endif // _HAS_INTEL_MKL // This loop keeps track of column dimension bool opt_in_for_loop = false; for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { From d4dfb8cd0598452ed2f9480f7aa7c6ee3d6c5749 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:28:40 -0500 Subject: [PATCH 28/66] Remove comments --- btas/generic/cp_df_als.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h index 857c9bfd..c3d4f856 100644 --- a/btas/generic/cp_df_als.h +++ b/btas/generic/cp_df_als.h @@ -475,7 +475,6 @@ namespace btas{ for(int i = 1; i < ndimL; ++i){ auto & tensor_ref = tensor_ref_left; Tensor a(Range{Range1{tensor_ref.extent(i)}, Range1{rank}}); - //std::uniform_int_distribution distribution(0, std::numeric_limits::max() - 1); for(auto iter = a.begin(); iter != a.end(); ++iter){ *(iter) = distribution(generator); } @@ -485,7 +484,6 @@ namespace btas{ auto & tensor_ref = tensor_ref_right; Tensor a(tensor_ref.extent(i), rank); - //std::uniform_int_distribution distribution(0, std::numeric_limits::max() - 1); for(auto iter = a.begin(); iter != a.end(); ++iter){ *(iter) = distribution(generator); } @@ -692,7 +690,6 @@ namespace btas{ auto a_dim = leftTensor ? contract_dim : ndim - 1; auto offset = 0; - // TODO fix the hadamard contraction loop // go through hadamard contract on all dimensions excluding rank (will skip one dimension) for(int i = 0; i < ndimCurr - 2; ++i, --contract_dim, --a_dim){ auto contract_size = dims[contract_dim]; From 4cd469f06de9b9b6455eb965b5c0c57963133b6b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:30:15 -0500 Subject: [PATCH 29/66] Add fast_pI variable to direct function and use new pseudoinverse function --- btas/generic/cp_df_als.h | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h index c3d4f856..ff8faf0a 100644 --- a/btas/generic/cp_df_als.h +++ b/btas/generic/cp_df_als.h @@ -533,7 +533,7 @@ namespace btas{ for (auto i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if(tmp == i) { - direct(i, rank, matlab, converge_test); + direct(i, rank, fast_pI, matlab, converge_test); } else if(tmp < i){ A[i] = A[tmp]; @@ -574,7 +574,7 @@ namespace btas{ /// in the same manner that matlab would compute the inverse. /// return if computing the inverse in this was was successful /// \param[in] converge_test test to see if the ALS is converged - void direct(int n, int rank, bool & matlab, ConvClass & converge_test) { + void direct(int n, int rank, bool & fast_pI, bool & matlab, ConvClass & converge_test) { // Determine if n is in the left or the right tensor bool leftTensor = n < (ndimL - 1); @@ -774,43 +774,16 @@ namespace btas{ // multiply resulting matrix temp by pseudoinverse to calculate optimized // factor matrix //t1 = std::chrono::high_resolution_clock::now(); -#ifdef _HAS_INTEL_MKL - if(matlab) { - // This method computes the inverse quickly for a square matrix - // based on MATLAB's implementation of A / B operator. - btas::Tensor > piv(rank); - piv.fill(0); - - auto a = generate_V(n, rank); - int LDB = contract_tensor.extent(0); - auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), contract_tensor.data(), rank); - if (info == 0) { - an = contract_tensor; - } - else{ - // If inverse fails resort to the pseudoinverse - std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl; - matlab = false; - } - } - if(! matlab){ - bool fast = false; - gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(n, rank, fast), 0.0, an); - } -#else - matlab = false; - if( !matlab){ - gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(n, rank, matlab), 0.0, an); - } -#endif + + this->psuedoinverse_helper(n, fast_pI, matlab, contract_tensor); //t2 = std::chrono::high_resolution_clock::now(); //time = t2 - t1; //gemm_wPI += time.count(); // Normalize the columns of the new factor matrix and update - normCol(an); - A[n] = an; + normCol(contract_tensor); + A[n] = contract_tensor; } }; From c63ce9ae0a92924e22b9ac3845d36c657a7ac845 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:32:28 -0500 Subject: [PATCH 30/66] Remove old pseudoinverse function and rename new function --- btas/generic/coupled_cp_als.h | 4 ++-- btas/generic/cp.h | 42 +---------------------------------- btas/generic/cp_df_als.h | 1 - btas/generic/cp_rals.h | 3 +-- btas/generic/linear_algebra.h | 2 +- 5 files changed, 5 insertions(+), 47 deletions(-) diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h index f6dcdea6..c337c22d 100644 --- a/btas/generic/coupled_cp_als.h +++ b/btas/generic/coupled_cp_als.h @@ -475,7 +475,7 @@ namespace btas{ } // Finally Form the product of K * J^\dagger Tensor a0(coupled_dim, rank); - gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, rank), 0.0, a0); + gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, fast_pI), 0.0, a0); this->normCol(a0); A[0] = a0; } @@ -551,7 +551,7 @@ namespace btas{ else if(n == this->ndim - 1) detail::set_MtKRPR(converge_test, contract_tensor); Tensor an(skip_dim, rank); - gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, rank), 0.0, an); + gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, fast_pI), 0.0, an); this->normCol(an); A[n] = an; } diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 8f2781a5..daef9c2b 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -565,46 +565,6 @@ namespace btas{ /// on return reports whether the fast route was used. /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V /// \return The pseudoinverse of the matrix V. - Tensor pseudoInverse(int n, int R, bool & fast_pI, double lambda = 0.0) { - // CP_ALS method requires the pseudoinverse of matrix V - auto a = this->generate_V(n, R, lambda); - - if(!fast_pI) { - Tensor s(Range{Range1{R}}); - Tensor U(Range{Range1{R}, Range1{R}}); - Tensor Vt(Range{Range1{R}, Range1{R}}); - -// btas has no generic SVD for MKL LAPACKE -// time1 = std::chrono::high_resolution_clock::now(); -#ifdef LAPACKE_ENABLED - - gesvd('A', 'A', a, s, U, Vt); - -#endif - - // Inverse the Singular values with threshold 1e-13 = 0 - double lr_thresh = 1e-13; - Tensor s_inv(Range{Range1{R}, Range1{R}}); - s_inv.fill(0.0); - for (auto i = 0; i < R; ++i) { - if (s(i) > lr_thresh) - s_inv(i, i) = 1 / s(i); - else - s_inv(i, i) = s(i); - } - s.resize(Range{Range1{R}, Range1{R}}); - - // Compute the matrix A^-1 from the inverted singular values and the U and - // V^T provided by the SVD - gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s); - gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U); - - return U; - } - else{ - BTAS_EXCEPTION("Pseudo inverse failed" ); - } - } /// Trying to solve Ax = B /// First try Cholesky to solve this problem directly @@ -626,7 +586,7 @@ namespace btas{ cholesky = cholesky_inverse(a, B); return; } - auto pInv = pseudoInverse_impl(a, fast_pI); + auto pInv = pseudoInverse(a, fast_pI); Tensor an(B.extent(0), rank); gemm(CblasNoTrans, CblasNoTrans, 1.0, B, pInv, 0.0, an); B = an; diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h index ff8faf0a..77f34a8e 100644 --- a/btas/generic/cp_df_als.h +++ b/btas/generic/cp_df_als.h @@ -81,7 +81,6 @@ namespace btas{ using CP::A; using CP::ndim; - using CP::pseudoInverse; using CP::normCol; using CP::generate_KRP; using CP::generate_V; diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index 2590674e..ac26ece5 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -95,7 +95,6 @@ namespace btas{ public: using CP::A; using CP::ndim; - using CP::pseudoInverse; using CP::normCol; using CP::generate_KRP; using CP::generate_V; @@ -910,7 +909,7 @@ namespace btas{ if(typeid(converge_test) == typeid(btas::FitCheck)) { converge_test.set_MtKRP(temp); } - + this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda); // Normalize the columns of the new factor matrix and update diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 0b0cb7f9..444dfccf 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -226,7 +226,7 @@ bool cholesky_inverse(Tensor & A, Tensor & B){ /// \param[in] a matrix to be inverted. /// \return a^{\dagger} The pseudoinverse of the matrix a. template -Tensor pseudoInverse_impl(Tensor & a, bool & fast_pI) { +Tensor pseudoInverse(Tensor & a, bool & fast_pI) { #ifndef LAPACKE_ENABLED BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); #endif // LAPACKE_ENABLED From f3b3b7fa31f8cafd160923eab8d3c4100f16d92d Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:35:28 -0500 Subject: [PATCH 31/66] Add description --- btas/generic/default_random_seed.h | 1 + 1 file changed, 1 insertion(+) diff --git a/btas/generic/default_random_seed.h b/btas/generic/default_random_seed.h index 7a779a03..ecbecf5a 100644 --- a/btas/generic/default_random_seed.h +++ b/btas/generic/default_random_seed.h @@ -5,6 +5,7 @@ #ifndef BTAS_GENERIC_DEFAULT_RANDOM_SEED_H #define BTAS_GENERIC_DEFAULT_RANDOM_SEED_H namespace btas{ + // A seed for the random number generator. static inline unsigned int& random_seed_accessor(){ static unsigned int value = 3; return value; From 8b850e7814d53139b9342da13754433e3bbe8a71 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:37:16 -0500 Subject: [PATCH 32/66] Fix typo --- btas/generic/flatten.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/btas/generic/flatten.h b/btas/generic/flatten.h index c4438893..47951447 100644 --- a/btas/generic/flatten.h +++ b/btas/generic/flatten.h @@ -51,7 +51,7 @@ template Tensor flatten(const Tensor &A, int mode) { /// matrix X \param[in] indexj The column index of matrix X \param[in] J The /// step size for the row dimension of X \param[in] tensor_itr An iterator of \c A. /// The value of the iterator is placed in the correct position of X using -/// recursive calls of fill()m. +/// recursive calls of fill(). template void fill(const Tensor &A, int depth, Tensor &X, int mode, int indexi, int indexj, From e58880d82277f21c9826bc0fc1d5a165e158be6b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:38:21 -0500 Subject: [PATCH 33/66] Remove old todo --- btas/generic/rals_helper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/btas/generic/rals_helper.h b/btas/generic/rals_helper.h index 3fe616d8..e4b869de 100644 --- a/btas/generic/rals_helper.h +++ b/btas/generic/rals_helper.h @@ -30,7 +30,6 @@ namespace btas{ /// \param[in] mode which mode of the actual tensor /// is being updated /// \param[in] An the updated factor matrix - // TODO fix the dot function double operator() (int mode, const Tensor& An){ auto size = An.size(); auto change = An - prev_[mode]; From b408049b4d7d576e1acd19979c5f7906337c8bc2 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:39:50 -0500 Subject: [PATCH 34/66] Randomized needs linear algebra for QR --- btas/generic/randomized.h | 1 + 1 file changed, 1 insertion(+) diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h index e64e7976..6f18792a 100644 --- a/btas/generic/randomized.h +++ b/btas/generic/randomized.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include From ad9eabf769281fe5d5c903302184a015100401b9 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:42:57 -0500 Subject: [PATCH 35/66] Swaps here require MKL. If they don't have it throw exception --- btas/generic/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/btas/generic/swap.h b/btas/generic/swap.h index f4b84d66..28734c0e 100644 --- a/btas/generic/swap.h +++ b/btas/generic/swap.h @@ -32,6 +32,9 @@ namespace btas { template void swap_to_first(Tensor &A, int mode, bool is_in_front = false, bool for_ALS_update = true) { +#ifndef _HAS_INTEL_MKL + BTAS_EXCEPTION("In place tranposes require MKL"); +#endif // If the mode of interest is the the first mode you are done. if(mode > A.rank()){ BTAS_EXCEPTION("Mode index is greater than tensor rank"); @@ -118,6 +121,9 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false, template void swap_to_back(Tensor &A, int mode, bool is_in_back = false) { +#ifndef _HAS_INTEL_MKL + BTAS_EXCEPTION("In place tranposes require MKL"); +#endif if (mode > A.rank()) BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "mode > A.rank(), mode out of range"); From 6b5f2404703f547490dfd3b394e1d8f11a034e9f Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:47:40 -0500 Subject: [PATCH 36/66] Remove unnecessary square root --- btas/generic/tucker.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index c0515f00..f4cc129e 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -21,8 +21,8 @@ namespace btas { template void tucker_compression(Tensor &A, double epsilon_svd, std::vector &transforms) { - double norm2 = norm(A); - norm2 *= norm2; + double norm2 = dot(A,A); + //norm2 *= norm2; auto ndim = A.rank(); for (int i = 0; i < ndim; i++) { From b647faf875cc4fbf5b01b3be69f0d963d4403518 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 15:57:30 -0500 Subject: [PATCH 37/66] remove unused norm function --- btas/generic/tucker.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index f4cc129e..9b60c3d4 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -64,13 +64,6 @@ void tucker_compression(Tensor &A, double epsilon_svd, core_contract(A, lambda, i); } } - -/// calculates the 2-norm of a matrix Mat -/// \param[in] Mat The matrix who's 2-norm is caclulated - -template double norm(const Tensor &Mat) { - return sqrt(dot(Mat, Mat)); -} } // namespace btas #endif //_HAS_INTEL_MKL #endif // BTAS_TUCKER_DECOMP_H From c626f8ea017b7dec7b31e4300d3450d895e0aee2 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 16:16:01 -0500 Subject: [PATCH 38/66] Was using ALS for RALS tests, updated results to match --- unittest/cp_test_results.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unittest/cp_test_results.txt b/unittest/cp_test_results.txt index 2e34e038..a6650b58 100644 --- a/unittest/cp_test_results.txt +++ b/unittest/cp_test_results.txt @@ -12,16 +12,16 @@ 0.5092600887442312 0.8023893427056535 0.9998165185189172 -0.7909930183444412 -0.5365678558201658 +0.7965146841534007 +0.5365998545867845 0.5946792918362551 0.9902479480481341 -0.6040159648938821 -0.5862586400695924 +0.6077571914361457 +0.5862884447076786 0.04914517954652209 0.1829273308737376 -0.0008026970681476175 -0.5092600887442312 +0.002141341851659817 +0.509270575828507 0.8857484066143582 0.9928149635593394 0.7671766452773272 From ecb937863b54bc61b4881ab9bb22b15c92d1eace Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Mon, 27 Jan 2020 16:17:41 -0500 Subject: [PATCH 39/66] Reduce the tightness of the test. Remove unnecessary ifdef with _HAS_INTEL_MKL and use RALS in tucker and random RALS tests --- unittest/tensor_cp_test.cc | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/unittest/tensor_cp_test.cc b/unittest/tensor_cp_test.cc index dbd8484d..f0b360f2 100644 --- a/unittest/tensor_cp_test.cc +++ b/unittest/tensor_cp_test.cc @@ -21,7 +21,8 @@ TEST_CASE("CP") using btas::CP_DF_ALS; using btas::COUPLED_CP_ALS; - double epsilon = fmax(1e-10, std::numeric_limits::epsilon()); + //double epsilon = fmax(1e-10, std::numeric_limits::epsilon()); + double epsilon = 1e-7; // TEST_CASE("CP_ALS"){ tensor D3(5, 2, 9); std::ifstream in3(__dirname + "/mat3D.txt"); @@ -89,7 +90,6 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 1, 100); CHECK((diff - results(1,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("ALS MODE = 3, Tucker + CP"){ auto d = D3; CP_ALS A1(d); @@ -106,7 +106,6 @@ TEST_CASE("CP") A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5); CHECK((diff - results(3,0)) <= epsilon); } -#endif SECTION("ALS MODE = 4, Finite rank"){ CP_ALS A1(D4); @@ -121,7 +120,6 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 1, 100); CHECK((diff - results(5,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("ALS MODE = 4, Tucker + CP"){ auto d = D4; CP_ALS A1(d); @@ -136,8 +134,6 @@ TEST_CASE("CP") double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100); CHECK((diff - results(7,0)) <= epsilon); } -#endif - SECTION("ALS MODE = 5, Finite rank"){ CP_ALS A1(D5); conv.set_norm(norm5); @@ -150,7 +146,6 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 10, 200); CHECK((diff - results(9,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("ALS MODE = 5, Tucker + CP"){ auto d = D5; CP_ALS A1(d); @@ -165,7 +160,6 @@ TEST_CASE("CP") double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5); CHECK((diff - results(11,0)) <= epsilon); } -#endif } // RALS tests { @@ -182,10 +176,9 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 1, 100); CHECK((diff - results(13,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("RALS MODE = 3, Tucker + CP"){ auto d = D3; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm3); double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); @@ -193,14 +186,12 @@ TEST_CASE("CP") } SECTION("RALS MODE = 3, Random + CP"){ auto d = D3; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm3); double diff = A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5); CHECK((diff - results(15,0)) <= epsilon); } -#endif - SECTION("RALS MODE = 4, Finite rank"){ CP_RALS A1(D4); conv.set_norm(norm4); @@ -214,23 +205,20 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 1, 100); CHECK((diff - results(17,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("RALS MODE = 4, Tucker + CP"){ auto d = D4; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm4); double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5); CHECK((diff - results(18,0)) <= epsilon); } SECTION("RALS MODE = 4, Random + CP"){ auto d = D4; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm4); double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100); CHECK((diff - results(19,0)) <= epsilon); } -#endif - SECTION("RALS MODE = 5, Finite rank"){ CP_RALS A1(D5); conv.set_norm(norm5); @@ -243,22 +231,20 @@ TEST_CASE("CP") double diff = A1.compute_error(conv, 1e-2, 1, 20); CHECK((diff - results(21,0)) <= epsilon); } -#ifdef _HAS_INTEL_MKL SECTION("RALS MODE = 5, Tucker + CP"){ auto d = D5; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm5); double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); CHECK((diff - results(22,0)) <= epsilon); } SECTION("RALS MODE = 5, Random + CP"){ auto d = D5; - CP_ALS A1(d); + CP_RALS A1(d); conv.set_norm(norm5); double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5); CHECK((diff - results(23,0)) <= epsilon); } -#endif } // CP-DF-ALS tests { From 51e9c2f35772d7ebf5bcff9d127fe50a92a26988 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 09:06:52 -0500 Subject: [PATCH 40/66] reorganized macro and cmake variable names that control linear algebra ... will impact downstream users --- CMakeLists.txt | 26 +++++----- bin/travisci_build_linux.sh | 8 ++-- btas/generic/coupled_cp_als.h | 4 +- btas/generic/cp.h | 6 +-- btas/generic/cp_als.h | 8 ++-- btas/generic/cp_rals.h | 8 ++-- btas/generic/gemm_impl.h | 4 +- btas/generic/linear_algebra.h | 22 ++++----- btas/generic/swap.h | 8 ++-- btas/generic/tucker.h | 4 +- btas/types.h | 64 ++++++++----------------- cmake/modules/RedefaultableOption.cmake | 12 +++++ 12 files changed, 82 insertions(+), 92 deletions(-) create mode 100644 cmake/modules/RedefaultableOption.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ae3a571..373c3aa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,21 +4,25 @@ cmake_minimum_required (VERSION 3.1.0) project (BTAS) include(FeatureSummary) +include(RedefaultableOption) # Configure options -option(BTAS_BUILD_UNITTEST "Causes building BTAS unit tests" ON) -option(BTAS_EXPERT "BTAS Expert mode: disables automatically downloading or building dependencies" OFF) -option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw" OFF) +redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF) +add_feature_info(BUILD_UNITTEST BTAS_BUILD_UNITTEST "Will build unit tests") +redefaultable_option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw; enable if BTAS_BUILD_UNITTEST=ON" OFF) +add_feature_info(ASSERT_THROWS BTAS_ASSERT_THROWS "BTAS_ASSERT(x) will throw if x is false, and not be affected by NDEBUG") +redefaultable_option(BTAS_USE_CBLAS_LAPACKE "Whether to use BLAS+LAPACK via CBLAS+LAPACKE" ON) +add_feature_info(USE_CBLAS_LAPACKE BTAS_USE_CBLAS_LAPACKE "Will use BLAS and LAPACK linear algebra libraries via their CBLAS and LAPACKE interfaces") set(TARGET_MAX_INDEX_RANK 6 CACHE STRING "Determines the rank for which the default BTAS index type will use stack (default: 6); this requires Boost.Container") add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default BTAS index type will use stack for rank<=${TARGET_MAX_INDEX_RANK}") # Set BTAS version -set(BTAS_MAJOR_VERSION 0) -set(BTAS_MINOR_VERSION 1) +set(BTAS_MAJOR_VERSION 1) +set(BTAS_MINOR_VERSION 0) set(BTAS_MICRO_VERSION 0) -set(BTAS_BUILDID alpha) -set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_BUILDID}") +set(BTAS_PRERELEASE alpha) +set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}") set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") # extra cmake files are shipped with BTAS @@ -77,10 +81,10 @@ check_type_size("long long" BTAS_HAS_LONG_LONG) add_custom_target(External) if (BTAS_BUILD_UNITTEST) - if (USE_CBLAS) + if (BTAS_USE_CBLAS_LAPACKE) find_package(CBLAS REQUIRED) find_package(LAPACKE REQUIRED) - add_definitions(-D_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -D_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1) + add_definitions(-DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1) message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}") message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}") message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}") @@ -89,9 +93,9 @@ if (BTAS_BUILD_UNITTEST) message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}") include_directories(${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR}) if (MKL_FOUND) - add_definitions(-D_HAS_INTEL_MKL=1) + add_definitions(-DBTAS_HAS_INTEL_MKL=1) endif(MKL_FOUND) - endif(USE_CBLAS) + endif(BTAS_USE_CBLAS_LAPACKE) include(external/boost.cmake) endif() diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh index 61083d5a..49f8bd29 100755 --- a/bin/travisci_build_linux.sh +++ b/bin/travisci_build_linux.sh @@ -14,18 +14,18 @@ fi cd ${BUILD_PREFIX} -########## test with cblas ########## +########## test with blas+lapack ########## mkdir build_cblas cd build_cblas -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DUSE_CBLAS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON make VERBOSE=1 make check VERBOSE=1 cd .. -########## test without cblas ########## +########## test without blas+lapack ########## mkdir build cd build -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON make VERBOSE=1 make check VERBOSE=1 cd .. diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h index c337c22d..f23c0ca0 100644 --- a/btas/generic/coupled_cp_als.h +++ b/btas/generic/coupled_cp_als.h @@ -122,7 +122,7 @@ namespace btas{ "Tensor describing symmetries must be defined for all dimensions"); } -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) @@ -147,7 +147,7 @@ namespace btas{ int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override{ BTAS_EXCEPTION("Function not yet implemented"); } -#endif //_HAS_INTEL_MKL +#endif //BTAS_HAS_INTEL_MKL protected: Tensor& tensor_ref_left; // Tensor in first term of the loss function diff --git a/btas/generic/cp.h b/btas/generic/cp.h index daef9c2b..934c2ec5 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -23,7 +23,7 @@ #include #include -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL #include #endif @@ -126,8 +126,8 @@ namespace btas{ /// tensor /// \param[in] ndim number of modes in the reference tensor. CP(int dims) : num_ALS(0) { -#if not defined(BTAS_HAS_CBLAS) || not defined(_HAS_INTEL_MKL) - BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "CP decompositions requires LAPACKE or mkl_lapack"); +#if not defined(BTAS_HAS_LAPACKE) + BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "CP decompositions requires LAPACKE"); #endif ndim = dims; } diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index a50c845c..a120c4be 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -12,7 +12,7 @@ #include #include -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL #include #endif @@ -211,7 +211,7 @@ namespace btas{ return epsilon; } -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL /// \brief Computes an approximate core tensor using /// Tucker decomposition, e.g. /// \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$ @@ -362,7 +362,7 @@ namespace btas{ return epsilon; } -#endif //_HAS_INTEL_MKL +#endif //BTAS_HAS_INTEL_MKL protected: @@ -663,7 +663,7 @@ namespace btas{ Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL // Computes the Khatri-Rao product intermediate auto KhatriRao = this->generate_KRP(n, rank, true); diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index ac26ece5..efea1c05 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -22,7 +22,7 @@ #include #include -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL #include #endif @@ -134,7 +134,7 @@ namespace btas{ ~CP_RALS() = default; -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) @@ -379,7 +379,7 @@ namespace btas{ return epsilon; } -#endif //_HAS_INTEL_MKL +#endif //BTAS_HAS_INTEL_MKL protected: int size; // number of elements in tensor_ref @@ -691,7 +691,7 @@ namespace btas{ Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL // Computes the Khatri-Rao product intermediate auto KhatriRao = generate_KRP(n, rank, true); diff --git a/btas/generic/gemm_impl.h b/btas/generic/gemm_impl.h index 3a5513f3..e7f4d18c 100644 --- a/btas/generic/gemm_impl.h +++ b/btas/generic/gemm_impl.h @@ -289,7 +289,7 @@ template<> struct gemm_impl { const lapack_complex_float alphac = to_lapack_val(std::move(alpha)); const lapack_complex_float betac = to_lapack_val(std::move(beta)); -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL cblas_cgemm3m(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_cptr(itrA), LDA, to_lapack_cptr(itrB), LDB, &betac, to_lapack_cptr(itrC), LDC); #else cblas_cgemm(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_cptr(itrA), LDA, to_lapack_cptr(itrB), LDB, &betac, to_lapack_cptr(itrC), LDC); @@ -315,7 +315,7 @@ template<> struct gemm_impl { const lapack_complex_double alphac = to_lapack_val(std::move(alpha)); const lapack_complex_double betac = to_lapack_val(std::move(beta)); -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL cblas_zgemm3m(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_zptr(itrA), LDA, to_lapack_zptr(itrB), LDB, &betac, to_lapack_zptr(itrC), LDC); #else cblas_zgemm(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_zptr(itrA), LDA, to_lapack_zptr(itrB), LDB, &betac, to_lapack_zptr(itrC), LDC); diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 444dfccf..3d6fe4e4 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -15,9 +15,9 @@ namespace btas{ template void LU_decomp(Tensor &A) { -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using this function requires LAPACKE"); -#endif // LAPACKE_ENABLED +#endif // BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -97,9 +97,9 @@ namespace btas{ template bool QR_decomp(Tensor &A) { -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using this function requires LAPACKE"); -#endif // LAPACKE_ENABLED +#endif // BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -137,9 +137,9 @@ namespace btas{ template bool Inverse_Matrix(Tensor & A){ -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE"); -#endif // LAPACKE_ENABLED +#endif // BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -173,9 +173,9 @@ namespace btas{ /// matrix \c A template void eigenvalue_decomp(Tensor & A, Tensor & lambda){ -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE"); -#endif // LAPACKE_ENABLED +#endif // BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix."); } @@ -197,7 +197,7 @@ namespace btas{ /// out: The solution x = A^{-1}B template bool cholesky_inverse(Tensor & A, Tensor & B){ -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE") #endif // This method computes the inverse quickly for a square matrix @@ -227,9 +227,9 @@ bool cholesky_inverse(Tensor & A, Tensor & B){ /// \return a^{\dagger} The pseudoinverse of the matrix a. template Tensor pseudoInverse(Tensor & a, bool & fast_pI) { -#ifndef LAPACKE_ENABLED +#ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); -#endif // LAPACKE_ENABLED +#endif // BTAS_HAS_LAPACKE if (a.rank() > 2) { BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix"); diff --git a/btas/generic/swap.h b/btas/generic/swap.h index 28734c0e..b8f29cec 100644 --- a/btas/generic/swap.h +++ b/btas/generic/swap.h @@ -1,7 +1,7 @@ #ifndef BTAS_SWAP_H #define BTAS_SWAP_H -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL #include @@ -32,7 +32,7 @@ namespace btas { template void swap_to_first(Tensor &A, int mode, bool is_in_front = false, bool for_ALS_update = true) { -#ifndef _HAS_INTEL_MKL +#ifndef BTAS_HAS_INTEL_MKL BTAS_EXCEPTION("In place tranposes require MKL"); #endif // If the mode of interest is the the first mode you are done. @@ -121,7 +121,7 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false, template void swap_to_back(Tensor &A, int mode, bool is_in_back = false) { -#ifndef _HAS_INTEL_MKL +#ifndef BTAS_HAS_INTEL_MKL BTAS_EXCEPTION("In place tranposes require MKL"); #endif if (mode > A.rank()) @@ -159,6 +159,6 @@ void swap_to_back(Tensor &A, int mode, bool is_in_back = false) { } // namespace btas -#endif //_HAS_INTEL_MKL +#endif //BTAS_HAS_INTEL_MKL #endif // BTAS_SWAP_H diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index 9b60c3d4..36704d93 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -1,7 +1,7 @@ #ifndef BTAS_TUCKER_DECOMP_H #define BTAS_TUCKER_DECOMP_H -#ifdef _HAS_INTEL_MKL +#ifdef BTAS_HAS_INTEL_MKL #include #include @@ -65,5 +65,5 @@ void tucker_compression(Tensor &A, double epsilon_svd, } } } // namespace btas -#endif //_HAS_INTEL_MKL +#endif //BTAS_HAS_INTEL_MKL #endif // BTAS_TUCKER_DECOMP_H diff --git a/btas/types.h b/btas/types.h index 4ed6d798..4e13a5f9 100644 --- a/btas/types.h +++ b/btas/types.h @@ -11,46 +11,53 @@ extern "C" { #endif // __cplusplus -#ifdef BTAS_HAS_CBLAS -# define LAPACKE_ENABLED -# if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER) +#if defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE) +# if not defined(BTAS_CBLAS_HEADER) && not defined(BTAS_LAPACKE_HEADER) -# ifdef _HAS_INTEL_MKL +# ifdef BTAS_HAS_INTEL_MKL # include # include -# else // _HAS_INTEL_MKL +# else // BTAS_HAS_INTEL_MKL # include // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I # include # ifndef lapack_complex_float # define lapack_complex_float std::complex +# else // lapack_complex_float + static_assert(sizeof(std::complex)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex do not match"); # endif // lapack_complex_float # ifndef lapack_complex_double # define lapack_complex_double std::complex +# else // lapack_complex_double + static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex do not match"); # endif // lapack_complex_double # include -# endif // _HAS_INTEL_MKL +# endif // BTAS_HAS_INTEL_MKL -# else // _CBLAS_HEADER +# else // BTAS_CBLAS_HEADER -# include _CBLAS_HEADER +# include BTAS_CBLAS_HEADER // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I # include # ifndef lapack_complex_float # define lapack_complex_float std::complex +# else // lapack_complex_float + static_assert(sizeof(std::complex)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex do not match"); # endif // lapack_complex_float # ifndef lapack_complex_double # define lapack_complex_double std::complex +# else // lapack_complex_double + static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex do not match"); # endif // lapack_complex_double -# include _LAPACKE_HEADER +# include BTAS_LAPACKE_HEADER -# endif // _CBLAS_HEADER +# endif // BTAS_CBLAS_HEADER -#else // BTAS_HAS_CBLAS +#else // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE) /// major order directive enum CBLAS_ORDER { CblasRowMajor, CblasColMajor }; @@ -67,45 +74,12 @@ enum CBLAS_DIAG { CblasNonUnit, CblasUnit }; /// transposition directive for symmetric matrix (not used) enum CBLAS_SIDE { CblasLeft, CblasRight }; -#endif // BTAS_HAS_CBLAS +#endif // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE) #ifdef __cplusplus } #endif // __cplusplus -// some BLAS libraries define their own types for complex data -#ifndef HAVE_INTEL_MKL -#ifndef lapack_complex_float -# define lapack_complex_float std::complex -#else // lapack_complex_float -static_assert(sizeof(std::complex)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex do not match"); -#endif // lapack_complex_float -#ifndef lapack_complex_double -# define lapack_complex_double std::complex -#else // lapack_complex_double -static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex do not match"); -#endif // lapack_complex_double -#else // HAVE_INTEL_MKL -// if calling direct need to cast to the MKL complex types -# ifdef MKL_DIRECT_CALL -# include -# ifndef lapack_complex_float -# define lapack_complex_float MKL_Complex8 -# endif -# ifndef lapack_complex_double -# define lapack_complex_double MKL_Complex16 -# endif -// else can call via F77 prototypes which don't need type conversion -# else // MKL_DIRECT_CALL -# ifndef lapack_complex_float -# define lapack_complex_float std::complex -# endif -# ifndef lapack_complex_double -# define lapack_complex_double std::complex -# endif -# endif // MKL_DIRECT_CALL -#endif // HAVE_INTEL_MKL - namespace btas { template diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake new file mode 100644 index 00000000..addec791 --- /dev/null +++ b/cmake/modules/RedefaultableOption.cmake @@ -0,0 +1,12 @@ +# if local variable is defined, use its value as the default, otherwise use _default +# this is consistent with cmake 3.13 and later (see policy CMP0077) +macro(redefaultable_option _name _descr _default) + + if (DEFINED ${_name}) + set(${_name}_DEFAULT ${${_name}}) + else() + set(${_name}_DEFAULT ${_default}) + endif() + option(${_name} ${_descr} ${${_name}_DEFAULT}) + +endmacro() From eb20233754fe27334f6c6c99d22b040e0754ce1f Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 09:42:48 -0500 Subject: [PATCH 41/66] Need preprocessor else so lapacke calls don't try to compile when its not included. --- btas/generic/linear_algebra.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 3d6fe4e4..78ccbef1 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -17,7 +17,7 @@ namespace btas{ #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using this function requires LAPACKE"); -#endif // BTAS_HAS_LAPACKE +#else //BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -89,6 +89,7 @@ namespace btas{ // contracting the pivoting matrix with L to put in correct order gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A); +#endif // BTAS_HAS_LAPACKE } /// Computes the QR decomposition of matrix \c A @@ -99,7 +100,7 @@ namespace btas{ #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using this function requires LAPACKE"); -#endif // BTAS_HAS_LAPACKE +#else //BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -129,6 +130,7 @@ namespace btas{ } else { return false; } +#endif // BTAS_HAS_LAPACKE } /// Computes the inverse of a matrix \c A using a pivoted LU decomposition @@ -139,7 +141,7 @@ namespace btas{ #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE"); -#endif // BTAS_HAS_LAPACKE +#else //BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); @@ -163,6 +165,7 @@ namespace btas{ return false; } return true; +#endif // BTAS_HAS_LAPACKE } /// Computes the eigenvalue decomposition of a matrix \c A and @@ -175,7 +178,7 @@ namespace btas{ void eigenvalue_decomp(Tensor & A, Tensor & lambda){ #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE"); -#endif // BTAS_HAS_LAPACKE +#else //BTAS_HAS_LAPACKE if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix."); } @@ -188,6 +191,7 @@ namespace btas{ auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', smallest_mode_A, A.data(), smallest_mode_A, lambda.data()); if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess"); +#endif // BTAS_HAS_LAPACKE } /// Solving Ax = B using a Cholesky decomposition @@ -198,8 +202,8 @@ namespace btas{ template bool cholesky_inverse(Tensor & A, Tensor & B){ #ifndef BTAS_HAS_LAPACKE - BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE") -#endif + BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE"); +#else //BTAS_HAS_LAPACKE // This method computes the inverse quickly for a square matrix // based on MATLAB's implementation of A / B operator. auto rank = B.extent(1); @@ -216,6 +220,7 @@ bool cholesky_inverse(Tensor & A, Tensor & B){ std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl; return false; } +#endif //BTAS_HAS_LAPACKE } /// SVD referencing code from @@ -229,7 +234,7 @@ template Tensor pseudoInverse(Tensor & a, bool & fast_pI) { #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); -#endif // BTAS_HAS_LAPACKE +#else //BTAS_HAS_LAPACKE if (a.rank() > 2) { BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix"); @@ -274,6 +279,7 @@ Tensor pseudoInverse(Tensor & a, bool & fast_pI) { gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U); return U; +#endif // BTAS_HAS_LAPACKE } } // namespace btas From 4c6b312eef806d147e735619df58860a303facf8 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 10:37:13 -0500 Subject: [PATCH 42/66] Include path to RedefaultableOption --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 373c3aa8..d5d797d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required (VERSION 3.1.0) project (BTAS) include(FeatureSummary) -include(RedefaultableOption) +include(cmake/modules/RedefaultableOption.cmake) # Configure options redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF) From 624f77f2e532cd412253fc50a97f9575b76956c9 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 10:37:59 -0500 Subject: [PATCH 43/66] Option needs quotations around description --- cmake/modules/RedefaultableOption.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake index addec791..ead06273 100644 --- a/cmake/modules/RedefaultableOption.cmake +++ b/cmake/modules/RedefaultableOption.cmake @@ -8,5 +8,6 @@ macro(redefaultable_option _name _descr _default) set(${_name}_DEFAULT ${_default}) endif() option(${_name} ${_descr} ${${_name}_DEFAULT}) + option(${_name} "${_descr}" ${OPTION}) endmacro() From 11727dd57043ee1ff1649efebde0e17345771507 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 10:39:06 -0500 Subject: [PATCH 44/66] Fixed a typo --- cmake/modules/RedefaultableOption.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake index ead06273..52b572d2 100644 --- a/cmake/modules/RedefaultableOption.cmake +++ b/cmake/modules/RedefaultableOption.cmake @@ -7,7 +7,6 @@ macro(redefaultable_option _name _descr _default) else() set(${_name}_DEFAULT ${_default}) endif() - option(${_name} ${_descr} ${${_name}_DEFAULT}) - option(${_name} "${_descr}" ${OPTION}) + option(${_name} "${_descr}" ${${_name}_DEFAULT}) endmacro() From 4be74c5848045fe263ce9c341ac722987b3161bd Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 11:16:44 -0500 Subject: [PATCH 45/66] fixup lapack type defs to be present even if not using cblas/lapack --- CMakeLists.txt | 8 +++---- btas/types.h | 58 ++++++++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d5d797d9..35d2d5ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,8 +3,11 @@ cmake_minimum_required (VERSION 3.1.0) project (BTAS) +# extra cmake files are shipped with BTAS +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") + include(FeatureSummary) -include(cmake/modules/RedefaultableOption.cmake) +include(RedefaultableOption) # Configure options redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF) @@ -25,9 +28,6 @@ set(BTAS_PRERELEASE alpha) set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}") set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") -# extra cmake files are shipped with BTAS -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") - include(CMakePushCheckState) include(GNUInstallDirs) include(AppendFlags) diff --git a/btas/types.h b/btas/types.h index 4e13a5f9..656ddd28 100644 --- a/btas/types.h +++ b/btas/types.h @@ -7,10 +7,6 @@ #include -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - #if defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE) # if not defined(BTAS_CBLAS_HEADER) && not defined(BTAS_LAPACKE_HEADER) @@ -76,9 +72,17 @@ enum CBLAS_SIDE { CblasLeft, CblasRight }; #endif // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE) -#ifdef __cplusplus -} -#endif // __cplusplus +// if lapack types are not defined define them EVEN if not using CBLAS/LAPACKE to make writing generic API easier +#ifndef lapack_complex_float +# define lapack_complex_float std::complex +#else // lapack_complex_float +static_assert(sizeof(std::complex)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex do not match"); +# endif // lapack_complex_float +#ifndef lapack_complex_double +# define lapack_complex_double std::complex +#else // lapack_complex_double +static_assert(sizeof(std::complex)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex do not match"); +#endif // lapack_complex_double namespace btas { @@ -98,15 +102,15 @@ namespace btas { return *reinterpret_cast*>(&val); } template - const lapack_complex_float* - to_lapack_cptr(const T* ptr) { - static_assert(sizeof(T)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match"); + const lapack_complex_float* to_lapack_cptr(const T* ptr) { + static_assert(sizeof(T) == sizeof(lapack_complex_float), + "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match"); return reinterpret_cast(ptr); } template - typename std::enable_if::value, lapack_complex_float*>::type - to_lapack_cptr(T* ptr) { - static_assert(sizeof(T)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match"); + typename std::enable_if::value, lapack_complex_float*>::type to_lapack_cptr(T* ptr) { + static_assert(sizeof(T) == sizeof(lapack_complex_float), + "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match"); return reinterpret_cast(ptr); } @@ -117,27 +121,29 @@ namespace btas { return *reinterpret_cast*>(&val); } template - const lapack_complex_double* - to_lapack_zptr(const T* ptr) { - static_assert(sizeof(T)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match"); + const lapack_complex_double* to_lapack_zptr(const T* ptr) { + static_assert(sizeof(T) == sizeof(lapack_complex_double), + "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match"); return reinterpret_cast(ptr); } template - typename std::enable_if::value, lapack_complex_double*>::type - to_lapack_zptr(T* ptr) { - static_assert(sizeof(T)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match"); + typename std::enable_if::value, lapack_complex_double*>::type to_lapack_zptr(T* ptr) { + static_assert(sizeof(T) == sizeof(lapack_complex_double), + "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match"); return reinterpret_cast(ptr); } -// -// Other aliases for convenience -// + // + // Other aliases for convenience + // -/// default size type -typedef unsigned long size_type; + /// default size type + typedef unsigned long size_type; -/// null deleter -struct nulldeleter { void operator() (void const*) { } }; + /// null deleter + struct nulldeleter { + void operator()(void const*) {} + }; } // namespace btas From b25e9f971a2c222456ddb4de89b4aec2bd90dfe7 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 12:03:44 -0500 Subject: [PATCH 46/66] Remove unnecessary instances of BTAS_HAS_INTEL_MKL --- btas/generic/coupled_cp_als.h | 2 -- btas/generic/cp.h | 4 ---- btas/generic/cp_als.h | 9 --------- btas/generic/cp_rals.h | 6 ------ btas/generic/swap.h | 8 +------- 5 files changed, 1 insertion(+), 28 deletions(-) diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h index f23c0ca0..66845d23 100644 --- a/btas/generic/coupled_cp_als.h +++ b/btas/generic/coupled_cp_als.h @@ -122,7 +122,6 @@ namespace btas{ "Tensor describing symmetries must be defined for all dimensions"); } -#ifdef BTAS_HAS_INTEL_MKL /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) @@ -147,7 +146,6 @@ namespace btas{ int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override{ BTAS_EXCEPTION("Function not yet implemented"); } -#endif //BTAS_HAS_INTEL_MKL protected: Tensor& tensor_ref_left; // Tensor in first term of the loss function diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 934c2ec5..2ca02d03 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -23,10 +23,6 @@ #include #include -#ifdef BTAS_HAS_INTEL_MKL -#include -#endif - namespace btas{ namespace detail{ diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index a120c4be..62e9e6a7 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -12,10 +12,6 @@ #include #include -#ifdef BTAS_HAS_INTEL_MKL -#include -#endif - namespace btas{ /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor using alternating least squares (ALS). @@ -211,7 +207,6 @@ namespace btas{ return epsilon; } -#ifdef BTAS_HAS_INTEL_MKL /// \brief Computes an approximate core tensor using /// Tucker decomposition, e.g. /// \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$ @@ -221,7 +216,6 @@ namespace btas{ /// computed to either finite error or finite rank. Default settings /// calculate to finite error. Factor matrices from get_factor_matrices() are /// scaled by the Tucker transformations. - /// \note This requires Intel MKL. /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker /// decomposition. @@ -295,7 +289,6 @@ namespace btas{ /// either finite error or finite rank. /// Default settings calculate to finite error. /// Factor matrices are scaled by randomized transformation. - /// \note This requires Intel MKL. /// \param[in] desired_compression_rank The new dimension of each mode after /// randomized compression. @@ -362,8 +355,6 @@ namespace btas{ return epsilon; } -#endif //BTAS_HAS_INTEL_MKL - protected: Tensor& tensor_ref; // Tensor to be decomposed diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index efea1c05..7a92f2e8 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -134,8 +134,6 @@ namespace btas{ ~CP_RALS() = default; -#ifdef BTAS_HAS_INTEL_MKL - /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) @@ -236,7 +234,6 @@ namespace btas{ /// computed to either finite error or finite rank. Default settings /// calculate to finite error. Factor matrices from get_factor_matrices() are /// scaled by the Tucker transformations. - /// \note This requires Intel MKL. /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker /// decomposition. @@ -311,7 +308,6 @@ namespace btas{ /// either finite error or finite rank. /// Default settings calculate to finite error. /// Factor matrices are scaled by randomized transformation. - /// \note This requires Intel MKL. /// \param[in] desired_compression_rank The new dimension of each mode after /// randomized compression. @@ -379,8 +375,6 @@ namespace btas{ return epsilon; } -#endif //BTAS_HAS_INTEL_MKL - protected: int size; // number of elements in tensor_ref Tensor & tensor_ref; // Tensor to be decomposed diff --git a/btas/generic/swap.h b/btas/generic/swap.h index b8f29cec..e2dc2742 100644 --- a/btas/generic/swap.h +++ b/btas/generic/swap.h @@ -32,9 +32,6 @@ namespace btas { template void swap_to_first(Tensor &A, int mode, bool is_in_front = false, bool for_ALS_update = true) { -#ifndef BTAS_HAS_INTEL_MKL - BTAS_EXCEPTION("In place tranposes require MKL"); -#endif // If the mode of interest is the the first mode you are done. if(mode > A.rank()){ BTAS_EXCEPTION("Mode index is greater than tensor rank"); @@ -120,10 +117,7 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false, /// back. Default = false. template -void swap_to_back(Tensor &A, int mode, bool is_in_back = false) { -#ifndef BTAS_HAS_INTEL_MKL - BTAS_EXCEPTION("In place tranposes require MKL"); -#endif +void swap_to_back(Tensor &A, int mode, bool is_in_back = false){ if (mode > A.rank()) BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "mode > A.rank(), mode out of range"); From 45d8e4237207892d5faf9d7d4d28203bed9b29ba Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 12:09:46 -0500 Subject: [PATCH 47/66] added exportable BTAS target (in BTAS:: namespace) --- CMakeLists.txt | 144 ++++++++++++++++++++++++++---------- bin/travisci_build_linux.sh | 4 +- cmake/btas-config.cmake.in | 53 +++++++++++++ external/boost.cmake | 30 +++++--- external/lapack.cmake | 68 ----------------- unittest/CMakeLists.txt | 8 +- 6 files changed, 183 insertions(+), 124 deletions(-) create mode 100644 cmake/btas-config.cmake.in delete mode 100644 external/lapack.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 35d2d5ef..7e2db5f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,15 @@ #; -*-CMake-*- -cmake_minimum_required (VERSION 3.1.0) +cmake_minimum_required (VERSION 3.8.0) # cxx_std_11 compile feature project (BTAS) +enable_language(CXX) # extra cmake files are shipped with BTAS list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") include(FeatureSummary) include(RedefaultableOption) +include(CMakePackageConfigHelpers) # Configure options redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF) @@ -24,14 +26,37 @@ add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default set(BTAS_MAJOR_VERSION 1) set(BTAS_MINOR_VERSION 0) set(BTAS_MICRO_VERSION 0) -set(BTAS_PRERELEASE alpha) -set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}") +set(BTAS_PRERELEASE_ID alpha) +set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}") +if (BTAS_PRERELEASE_ID) + set(BTAS_EXT_VERSION "${BTAS_VERSION}-${BTAS_PRERELEASE_ID}") +else(BTAS_PRERELEASE_ID) + set(BTAS_EXT_VERSION "${BTAS_VERSION}") +endif(BTAS_PRERELEASE_ID) set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") include(CMakePushCheckState) include(GNUInstallDirs) include(AppendFlags) +########################## +# Standard build variables +########################## +set(BTAS_INSTALL_BINDIR "bin" + CACHE PATH "BTAS BIN install directory") +set(BTAS_INSTALL_INCLUDEDIR "include" + CACHE PATH "BTAS INCLUDE install directory") +set(BTAS_INSTALL_LIBDIR "lib" + CACHE PATH "BTAS LIB install directory") +set(BTAS_INSTALL_SHAREDIR "share/BTAS/${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}" + CACHE PATH "BTAS SHARE install directory") +set(BTAS_INSTALL_DATADIR "${BTAS_INSTALL_SHAREDIR}/data" + CACHE PATH "BTAS DATA install directory") +set(BTAS_INSTALL_DOCDIR "${BTAS_INSTALL_SHAREDIR}/doc" + CACHE PATH "BTAS DOC install directory") +set(BTAS_INSTALL_CMAKEDIR "lib/cmake/BTAS" + CACHE PATH "BTAS CMAKE install directory") + ########################## # Standard build variables ########################## @@ -48,69 +73,81 @@ endif() if(NOT CMAKE_EXE_LINKER_FLAGS OR NOT DEFINED CMAKE_EXE_LINKER_FLAGS) set(CMAKE_EXE_LINKER_FLAGS "$ENV{LDFLAGS}") endif() - -enable_language (CXX) if (NOT CMAKE_CXX_COMPILER) message(FATAL_ERROR "C++ compiler not found") endif() -# Set the default fortran integer type. -set(INTEGER4 "FALSE" CACHE BOOL "Set the default Fortran integer type to integer*4") -mark_as_advanced(INTEGER4) - set(CMAKE_SKIP_RPATH FALSE) -set(BUILD_TESTING FALSE CACHE BOOL "BUILD_TESTING") -set(BUILD_TESTING_STATIC FALSE CACHE BOOL "BUILD_TESTING_STATIC") -set(BUILD_TESTING_SHARED FALSE CACHE BOOL "BUILD_TESTING_SHARED") ########################## # We use C++11 features ########################## -set(CMAKE_CXX_STANDARD 11) +# but insist on strict standard +set(CMAKE_CXX_STANDARD 11 CACHE STRING "C++ ISO Standard version") +if (NOT(CMAKE_CXX_STANDARD EQUAL 11 OR CMAKE_CXX_STANDARD EQUAL 14 OR CMAKE_CXX_STANDARD EQUAL 17 OR CMAKE_CXX_STANDARD EQUAL 20)) + message(FATAL_ERROR "C++ 2011 ISO Standard or higher is required to compile BTAS") +endif() +# C++20 is only configurable via compile features with cmake 3.12 and older +if (CMAKE_CXX_STANDARD EQUAL 20 AND CMAKE_VERSION VERSION_LESS 3.12.0) + cmake_minimum_required (VERSION 3.12.0) +endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Whether to use extensions of C++ ISO Standard version") # Check type support include(CheckTypeSize) check_type_size("long double" BTAS_HAS_LONG_DOUBLE) check_type_size("long long" BTAS_HAS_LONG_LONG) +####################################### +# create exportable BTAS library target +####################################### +add_library(BTAS INTERFACE) +install(TARGETS BTAS EXPORT btas COMPONENT BTAS) +target_compile_features(BTAS INTERFACE "cxx_std_11") +target_include_directories(BTAS INTERFACE $ + $) +install(DIRECTORY btas + COMPONENT BTAS + DESTINATION "${BTAS_INSTALL_INCLUDEDIR}" + FILES_MATCHING PATTERN "*.h" + PATTERN "*.h.in" EXCLUDE + ) + ########################## # external dependencies ########################## add_custom_target(External) -if (BTAS_BUILD_UNITTEST) - if (BTAS_USE_CBLAS_LAPACKE) - find_package(CBLAS REQUIRED) - find_package(LAPACKE REQUIRED) - add_definitions(-DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1) - message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}") - message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}") - message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}") - message(STATUS "** LAPACKE_LIBRARIES = ${LAPACKE_LIBRARIES}") - message(STATUS "** LAPACKE_INCLUDE_DIR = ${LAPACKE_INCLUDE_DIR}") - message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}") - include_directories(${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR}) - if (MKL_FOUND) - add_definitions(-DBTAS_HAS_INTEL_MKL=1) - endif(MKL_FOUND) - endif(BTAS_USE_CBLAS_LAPACKE) - include(external/boost.cmake) -endif() +if (BTAS_USE_CBLAS_LAPACKE) + find_package(CBLAS REQUIRED) + find_package(LAPACKE REQUIRED) + target_compile_definitions(BTAS INTERFACE -DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1) + message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}") + message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}") + message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}") + message(STATUS "** LAPACKE_LIBRARIES = ${LAPACKE_LIBRARIES}") + message(STATUS "** LAPACKE_INCLUDE_DIR = ${LAPACKE_INCLUDE_DIR}") + message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}") + target_include_directories(BTAS INTERFACE ${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR}) + if (MKL_FOUND) + target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_INTEL_MKL=1) + endif(MKL_FOUND) + target_link_libraries(BTAS INTERFACE ${LAPACKE_LIBRARIES} ${CBLAS_LIBRARIES}) +endif(BTAS_USE_CBLAS_LAPACKE) + +include(external/boost.cmake) ########################## # configure BTAS_ASSERT ########################## if (BTAS_ASSERT_THROWS) - add_definitions(-DBTAS_ASSERT_THROWS=1) + target_compile_definitions(BTAS INTERFACE -DBTAS_ASSERT_THROWS=1) endif(BTAS_ASSERT_THROWS) ########################## -# sources +# dox ########################## - -include_directories(${PROJECT_SOURCE_DIR}) add_subdirectory(doc) ########################## @@ -138,6 +175,37 @@ ADD_CUSTOM_TARGET(release COMMENT "Switch CMAKE_BUILD_TYPE to Release" ) +# Create the version file +write_basic_package_version_file(btas-config-version.cmake + VERSION ${BTAS_VERSION} COMPATIBILITY AnyNewerVersion) + +## Create the configure file +configure_package_config_file(cmake/btas-config.cmake.in + "${PROJECT_BINARY_DIR}/btas-config.cmake" + INSTALL_DESTINATION "${BTAS_INSTALL_CMAKEDIR}" + PATH_VARS CMAKE_INSTALL_PREFIX BTAS_INSTALL_BINDIR + BTAS_INSTALL_INCLUDEDIR BTAS_INSTALL_LIBDIR + BTAS_INSTALL_DOCDIR BTAS_INSTALL_CMAKEDIR) + +## Install config, version, and target files +install(EXPORT btas + FILE "btas-targets.cmake" + DESTINATION "${BTAS_INSTALL_CMAKEDIR}" + NAMESPACE BTAS:: + COMPONENT btas-config) +install(FILES + "${PROJECT_BINARY_DIR}/btas-config.cmake" + "${PROJECT_BINARY_DIR}/btas-config-version.cmake" + DESTINATION "${BTAS_INSTALL_CMAKEDIR}" + COMPONENT btas-config) +add_custom_target(install-config + COMMAND ${CMAKE_COMMAND} -DCOMPONENT=btas-config -P ${CMAKE_BINARY_DIR}/cmake_install.cmake + COMMENT "Installing BTAS config components") + feature_summary(WHAT ALL - DESCRIPTION "=== BTAS Feature Info ===") - + DESCRIPTION "=== BTAS Package/Feature Info ===") + +############################################################################### +# appendix: misc details +############################################################################### +SET(CMAKE_COLOR_MAKEFILE ON) diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh index 49f8bd29..43f9e8b8 100755 --- a/bin/travisci_build_linux.sh +++ b/bin/travisci_build_linux.sh @@ -17,7 +17,7 @@ cd ${BUILD_PREFIX} ########## test with blas+lapack ########## mkdir build_cblas cd build_cblas -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON make VERBOSE=1 make check VERBOSE=1 cd .. @@ -25,7 +25,7 @@ cd .. ########## test without blas+lapack ########## mkdir build cd build -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF make VERBOSE=1 make check VERBOSE=1 cd .. diff --git a/cmake/btas-config.cmake.in b/cmake/btas-config.cmake.in new file mode 100644 index 00000000..bfa37a3f --- /dev/null +++ b/cmake/btas-config.cmake.in @@ -0,0 +1,53 @@ +# - CMAKE Config file for the BTAS package +# This will define the following CMake cache variables +# +# BTAS_FOUND - true if BTAS library were found +# BTAS_VERSION - the libint2 version +# BTAS_EXT_VERSION - the libint2 version including the (optional) buildid, such as beta.3 +# +# and the following imported targets +# +# BTAS::BTAS - the BTAS library +# + +# Set package version +set(BTAS_VERSION "@BTAS_VERSION@") +set(BTAS_EXT_VERSION "@BTAS_EXT_VERSION@") + +@PACKAGE_INIT@ + +if(NOT TARGET Boost::boost) + set(Boost_USE_CONFIG @Boost_USE_CONFIG@) + set(Boost_BTAS_DEPS_LIBRARIES @Boost_BTAS_DEPS_LIBRARIES@) + if (Boost_USE_CONFIG) + set(Boost_CONFIG @Boost_CONFIG@) + if (NOT Boost_CONFIG OR NOT EXISTS ${Boost_CONFIG}) + message(FATAL_ERROR "Expected Boost config file at ${Boost_CONFIG}; directory moved since BTAS configuration?") + endif() + get_filename_component(Boost_DIR ${Boost_CONFIG} DIRECTORY) + find_package(Boost CONFIG QUIET REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES} PATHS ${Boost_DIR} NO_DEFAULT_PATH) + else (Boost_USE_CONFIG) + set(BOOST_INCLUDEDIR @Boost_INCLUDE_DIR@) + set(BOOST_LIBRARYDIR @Boost_LIBRARY_DIR_RELEASE@) + if (NOT BOOST_LIBRARYDIR OR NOT EXISTS ${BOOST_LIBRARYDIR}) + set(BOOST_LIBRARYDIR @Boost_LIBRARY_DIR_DEBUG@) + endif() + set(Boost_NO_SYSTEM_PATHS OFF) + if (BOOST_LIBRARYDIR AND BOOST_INCLUDEDIR) + if (EXISTS ${BOOST_LIBRARYDIR} AND EXISTS ${BOOST_INCLUDEDIR}) + set(Boost_NO_SYSTEM_PATHS ON) + endif() + endif() + find_package(Boost QUIET REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) + endif (Boost_USE_CONFIG) +endif() + +# Include library IMPORT targets +if(NOT TARGET BTAS::BTAS) + include("${CMAKE_CURRENT_LIST_DIR}/btas-targets.cmake") + if(NOT TARGET BTAS::BTAS) + message(FATAL_ERROR "expected BTAS::BTAS among imported BTAS targets") + endif() +endif() + +set(BTAS_FOUND TRUE) diff --git a/external/boost.cmake b/external/boost.cmake index ef63f49d..65bf0777 100644 --- a/external/boost.cmake +++ b/external/boost.cmake @@ -4,19 +4,27 @@ if (BOOST_ROOT OR BOOST_INCLUDEDIR) set(Boost_NO_SYSTEM_PATHS TRUE) endif() -#set(Boost_DEBUG TRUE) -# Check for Boost, unless told otherwise (then must set Boost_FOUND, Boost_INCLUDE_DIRS, Boost_LIBRARIES) -if (NOT SKIP_BOOST_SEARCH) - find_package(Boost 1.33 REQUIRED OPTIONAL_COMPONENTS serialization container) -endif() +if (NOT TARGET Boost::boost) + # Boost::boost is defined since 3.5 + cmake_minimum_required(VERSION 3.5.0) + # try config first + set(Boost_BTAS_DEPS_LIBRARIES serialization container) + find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) + if (NOT TARGET Boost::boost) + find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) + set(Boost_USE_CONFIG FALSE) + else() + set(Boost_USE_CONFIG TRUE) + endif() +endif (NOT TARGET Boost::boost) # Perform a compile check with Boost list(APPEND CMAKE_REQUIRED_INCLUDES ${Boost_INCLUDE_DIRS}) list(APPEND CMAKE_REQUIRED_LIBRARIES ${Boost_LIBRARIES}) -add_definitions(-DBTAS_HAS_BOOST_ITERATOR=1) +target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_ITERATOR=1) if (Boost_CONTAINER_FOUND) - add_definitions(-DBTAS_HAS_BOOST_CONTAINER=1 -DBTAS_TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}) + target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_CONTAINER=1 -DBTAS_TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}) endif() include(CheckCXXSourceRuns) @@ -86,14 +94,12 @@ CHECK_CXX_SOURCE_RUNS( " BOOST_COMPILES_AND_RUNS) if (BOOST_COMPILES_AND_RUNS) - add_definitions(-DBTAS_HAS_BOOST_SERIALIZATION=1) + target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_SERIALIZATION=1) else () message(STATUS "Boost found at ${BOOST_ROOT}, but could not compile and/or run test program") message(WARNING "To obtain usable Boost, use your system package manager (HomeBrew, apt, etc.) OR download at www.boost.org and compile (unpacking alone is not enough)") message(WARNING "** !! due to missing Boost.Serialization the corresponding unit tests will be disabled !!") endif(BOOST_COMPILES_AND_RUNS) -if (NOT BOOST_COMPILES_AND_RUNS) -endif() - -include_directories(${Boost_INCLUDE_DIRS}) +target_include_directories(BTAS INTERFACE ${Boost_INCLUDE_DIRS}) +target_link_libraries(BTAS INTERFACE ${Boost_LIBRARIES}) diff --git a/external/lapack.cmake b/external/lapack.cmake deleted file mode 100644 index 8446d9e2..00000000 --- a/external/lapack.cmake +++ /dev/null @@ -1,68 +0,0 @@ -# Find BLAS and LAPACK. -include(CheckCFortranFunctionExists) -include(CMakePushCheckState) - -if(LAPACK_LIBRARIES OR BLAS_LIBRARIES OR LAPACK_LINKER_FLAGS OR BLAS_LINKER_FLAGS) - # Here we verify that the we can link against LAPACK and BLAS based on the - # given library and linker flags. If BLAS_FOUND and/or LAPACK_FOUND are true, - # we assume that these values have been verified. - - if(NOT BLAS_FOUND) - # Verify that we can link against BLAS - - cmake_push_check_state() - - if(UNIX AND BLA_STATIC AND BLAS_LIBRARIES) - set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LINKER_FLAGS} "-Wl,-start-group" - ${BLAS_LIBRARIES} "-Wl,-end-group" ${CMAKE_THREAD_LIBS_INIT} - ${CMAKE_REQUIRED_LIBRARIES}) - else() - set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LINKER_FLAGS} ${BLAS_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_REQUIRED_LIBRARIES}) - endif() - - check_c_fortran_function_exists(sgemm BLAS_FOUND) - - if(BLAS_FOUND) - message(STATUS "A library with BLAS API found.") - else() - message(FATAL_ERROR "The user specified BLAS libraries do not support the BLAS API.\nRerun cmake with BLAS_LIBRARIES and/or BLAS_LINKER_FLAGS.") - endif() - - cmake_pop_check_state() - endif() - - if(NOT LAPACK_FOUND) - # Verify that we can link against LAPACK - - cmake_push_check_state() - - if(UNIX AND BLA_STATIC AND LAPACK_LIBRARIES) - set(CMAKE_REQUIRED_LIBRARIES ${LAPACK_LINKER_FLAGS} "-Wl,--start-group" - ${LAPACK_LIBRARIES} "-Wl,--end-group" ${CMAKE_THREAD_LIBS_INIT} - ${CMAKE_REQUIRED_LIBRARIES}) - else() - set(CMAKE_REQUIRED_LIBRARIES ${LAPACK_LINKER_FLAGS} ${LAPACK_LIBRARIES} - ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_REQUIRED_LIBRARIES}) - endif() - - check_c_fortran_function_exists(cheev LAPACK_FOUND) - - if(LAPACK_FOUND) - message(STATUS "A library with LAPACK API found.") - else(LAPACK_FOUND) - message(FATAL_ERROR "The user specified LAPACK libraries do not support the LAPACK API.\nRerun cmake with LAPACK_LIBRARIES and/or LAPACK_LINKER_FLAGS.") - endif(LAPACK_FOUND) - - cmake_pop_check_state() - - endif() -else() - # Try to find BLAS and LAPACK - find_package(BLAS REQUIRED) - find_package(LAPACK REQUIRED) -endif() - -append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}") -append_flags(CMAKE_EXE_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}") - diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 806dbf68..be62f6e9 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -12,12 +12,12 @@ set(btas_test_src_files ) add_executable(${executable} EXCLUDE_FROM_ALL ${btas_test_src_files}) # Add include directories and compiler flags for ta_test -include_directories( +target_include_directories(${executable} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} - ${Boost_INCLUDE_DIRS}) -target_link_libraries(${executable} ${Boost_LIBRARIES} ${LAPACKE_LIBRARIES} ${CBLAS_LIBRARIES}) -append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}") +) +target_link_libraries(${executable} BTAS) +#append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}") # Add targets add_dependencies(${executable} External) From ed75bf8ae1e1da1eef6ec4ff439a2131c4e68b8f Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 12:09:59 -0500 Subject: [PATCH 48/66] Update linear algebra documentation --- btas/generic/linear_algebra.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 78ccbef1..8bfbf980 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -95,6 +95,7 @@ namespace btas{ /// Computes the QR decomposition of matrix \c A /// \param[in, out] A In: A Reference matrix to be QR decomposed. Out: /// The Q of a QR decomposition of \c A. +/// \return bool true if QR was successful false if failed. template bool QR_decomp(Tensor &A) { @@ -136,6 +137,7 @@ namespace btas{ /// Computes the inverse of a matrix \c A using a pivoted LU decomposition /// \param[in, out] A In: A reference matrix to be inverted. Out: /// The inverse of A, computed using LU decomposition. +/// \return bool true if inversion was successful false if failed template bool Inverse_Matrix(Tensor & A){ @@ -195,10 +197,13 @@ namespace btas{ } /// Solving Ax = B using a Cholesky decomposition - /// \param[in] A The right-hand side of the linear equation - /// to be inverted using Cholesky + /// \param[in, out] A In: The right-hand side of the linear equation + /// to be inverted using Cholesky. Out: + /// the factors L and U from the factorization A = P*L*U; + /// the unit diagonal elements of L are not stored. /// \param[in, out] B In: The left-hand side of the linear equation /// out: The solution x = A^{-1}B + /// \return bool true if inversion was successful false if failed. template bool cholesky_inverse(Tensor & A, Tensor & B){ #ifndef BTAS_HAS_LAPACKE @@ -228,28 +233,28 @@ bool cholesky_inverse(Tensor & A, Tensor & B){ /// Fast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf -/// \param[in] a matrix to be inverted. -/// \return a^{\dagger} The pseudoinverse of the matrix a. +/// \param[in] A In: A reference to the matrix to be inverted. +/// \return \f$ A^{\dagger} \f$ The pseudoinverse of the matrix A. template -Tensor pseudoInverse(Tensor & a, bool & fast_pI) { +Tensor pseudoInverse(Tensor & A, bool & fast_pI) { #ifndef BTAS_HAS_LAPACKE BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE"); #else //BTAS_HAS_LAPACKE - if (a.rank() > 2) { + if (A.rank() > 2) { BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix"); } - auto row = a.extent(0), col = a.extent(1); + auto row = A.extent(0), col = A.extent(1); auto rank = (row < col ? row : col); if (fast_pI) { Tensor temp(col, col), inv(col, row); // compute V^{\dag} = (A^T A) ^{-1} A^T - gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp); + gemm(CblasTrans, CblasNoTrans, 1.0, A, A, 0.0, temp); fast_pI = Inverse_Matrix(temp); if (fast_pI) { - gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv); + gemm(CblasNoTrans, CblasTrans, 1.0, temp, A, 0.0, inv); return inv; } else { std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl; @@ -259,7 +264,7 @@ Tensor pseudoInverse(Tensor & a, bool & fast_pI) { Tensor U(Range{Range1{row}, Range1{row}}); Tensor Vt(Range{Range1{col}, Range1{col}}); - gesvd('A', 'A', a, s, U, Vt); + gesvd('A', 'A', A, s, U, Vt); // Inverse the Singular values with threshold 1e-13 = 0 double lr_thresh = 1e-13; From dc029456ecaa6c5871642b788b4308be9626e764 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 12:10:22 -0500 Subject: [PATCH 49/66] dox cleanup [skip ci] --- cmake/btas-config.cmake.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/btas-config.cmake.in b/cmake/btas-config.cmake.in index bfa37a3f..f6469ed1 100644 --- a/cmake/btas-config.cmake.in +++ b/cmake/btas-config.cmake.in @@ -2,8 +2,8 @@ # This will define the following CMake cache variables # # BTAS_FOUND - true if BTAS library were found -# BTAS_VERSION - the libint2 version -# BTAS_EXT_VERSION - the libint2 version including the (optional) buildid, such as beta.3 +# BTAS_VERSION - the BTAS version +# BTAS_EXT_VERSION - the BTAS version including the (optional) buildid, such as beta.3 # # and the following imported targets # From f9345229b0e534953b5efed417dd7df7d2edec32 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 12:16:05 -0500 Subject: [PATCH 50/66] Update pseudoinverse_helper documentation --- btas/generic/cp.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 2ca02d03..50a8a901 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -555,24 +555,26 @@ namespace btas{ /// Fast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf - /// \param[in] n The mode being optimized, all other modes held constant - /// \param[in] R The current rank, column dimension of the factor matrices - /// \param[in,out] fast_pI If true, try to compute the pseudo inverse via fast Cholesky decomposition, else use SVD; - /// on return reports whether the fast route was used. - /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V - /// \return The pseudoinverse of the matrix V. - /// Trying to solve Ax = B /// First try Cholesky to solve this problem directly /// second tryfast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf /// If all else fails use SVD - // TODO write docs here + + /// \param[in] mode_of_A The mode being optimized used to compute hadamard LHS (V) of ALS problem (Vx = B) + /// \param[in,out] fast_pI If true, try to compute the pseudo inverse via fast LU decomposition, else use SVD; + /// on return reports whether the fast route was used. + /// \param[in, out] cholesky If true, try to solve the linear equation Vx = B (the ALS problem) + /// using a Cholesky decomposition (lapacke subroutine) on return reports if + /// inversion was successful. + /// \param[in, out] B In: The RHS of the ALS problem ( Vx = B ). Out: The solved linear equation + /// \f$ V^{-1} B \f$ + /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V void psuedoinverse_helper(int mode_of_A, bool & fast_pI, - bool & cholesky, Tensor & B = Tensor(), + bool & cholesky, Tensor & B, double lambda = 0.0){ - if(cholesky && B.empty()){ - BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)"); + if(B.empty()){ + BTAS_EXCEPTION("pseudoinverse helper solves Ax = B. B cannot be an empty tensor"); } auto rank = A[0].extent(1); From 6f3d1374b434cdcb87173922fe9771b6e8101b50 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 12:22:14 -0500 Subject: [PATCH 51/66] no container boost component, it's part of Boost::boost --- external/boost.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/boost.cmake b/external/boost.cmake index 65bf0777..7da8b6cb 100644 --- a/external/boost.cmake +++ b/external/boost.cmake @@ -9,7 +9,7 @@ if (NOT TARGET Boost::boost) # Boost::boost is defined since 3.5 cmake_minimum_required(VERSION 3.5.0) # try config first - set(Boost_BTAS_DEPS_LIBRARIES serialization container) + set(Boost_BTAS_DEPS_LIBRARIES boost serialization) find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) if (NOT TARGET Boost::boost) find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) From c966011c19b30fc6c926606827e726a78b25c868 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 12:25:32 -0500 Subject: [PATCH 52/66] don't search for Boost::boost --- external/boost.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/boost.cmake b/external/boost.cmake index 7da8b6cb..f63e7d99 100644 --- a/external/boost.cmake +++ b/external/boost.cmake @@ -9,7 +9,7 @@ if (NOT TARGET Boost::boost) # Boost::boost is defined since 3.5 cmake_minimum_required(VERSION 3.5.0) # try config first - set(Boost_BTAS_DEPS_LIBRARIES boost serialization) + set(Boost_BTAS_DEPS_LIBRARIES serialization) find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) if (NOT TARGET Boost::boost) find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES}) From e1efc87fe363db17ff42fb1615acdf5be8bc75ff Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 12:34:28 -0500 Subject: [PATCH 53/66] [travis] DO build unit tests --- bin/travisci_build_linux.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh index 43f9e8b8..17621fe0 100755 --- a/bin/travisci_build_linux.sh +++ b/bin/travisci_build_linux.sh @@ -17,7 +17,7 @@ cd ${BUILD_PREFIX} ########## test with blas+lapack ########## mkdir build_cblas cd build_cblas -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_BUILD_UNITTEST=ON make VERBOSE=1 make check VERBOSE=1 cd .. @@ -25,7 +25,7 @@ cd .. ########## test without blas+lapack ########## mkdir build cd build -cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF +cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_BUILD_UNITTEST=ON -DBTAS_USE_CBLAS_LAPACKE=OFF make VERBOSE=1 make check VERBOSE=1 cd .. From 5ec5eab392d5681845db22cea9013c1329aec715 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 13:28:12 -0500 Subject: [PATCH 54/66] Allow tucker to be used without MKL. Use contract (unoptimized swapping) instead of core_contract engine (in place transposes) --- btas/generic/tucker.h | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index 36704d93..bd6086c0 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -1,10 +1,9 @@ #ifndef BTAS_TUCKER_DECOMP_H #define BTAS_TUCKER_DECOMP_H -#ifdef BTAS_HAS_INTEL_MKL - #include #include +#include #include @@ -24,6 +23,10 @@ void tucker_compression(Tensor &A, double epsilon_svd, double norm2 = dot(A,A); //norm2 *= norm2; auto ndim = A.rank(); + std::vector A_modes; + for(int i = 0; i< ndim;++i){ + A_modes.push_back(i); + } for (int i = 0; i < ndim; i++) { // Determine the threshold epsilon_SVD. @@ -37,10 +40,7 @@ void tucker_compression(Tensor &A, double epsilon_svd, gemm(CblasNoTrans, CblasTrans, 1.0, flat, flat, 0.0, S); // Calculate SVD of smaller object. - auto info = LAPACKE_dsyev(LAPACK_ROW_MAJOR, 'V', 'L', R, S.data(), R, - lambda.data()); - if (info) - BTAS_EXCEPTION("Error in computing the tucker SVD"); + eigenvalue_decomp(S, lambda); // Find the truncation rank based on the threshold. int rank = 0; @@ -60,10 +60,27 @@ void tucker_compression(Tensor &A, double epsilon_svd, // Push the factor matrix back as a transformation. transforms.push_back(lambda); +#ifdef BTAS_HAS_INTEL_MKL // Contract the factor matrix with the reference tensor, A. core_contract(A, lambda, i); +#else + std::vector contract_modes; + contract_modes.push_back(i); contract_modes.push_back(ndim); + std::vector final_modes; + std::vector final_dims; + for(int j = 0; j < ndim; ++j){ + if(j == i){ + final_modes.push_back(ndim); + final_dims.push_back(rank); + } else{ + final_modes.push_back(j); + final_dims.push_back(A.extent(j)); + } + } + Tensor final(final_dims); + btas::contract(1.0, A, A_modes, lambda, {i, ndim}, 0.0, final, final_dims); +#endif //BTAS_HAS_INTEL_MKL } } } // namespace btas -#endif //BTAS_HAS_INTEL_MKL #endif // BTAS_TUCKER_DECOMP_H From f9031aceeae9d1cfb959d2da8d6a3e703b4f9a64 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 14:20:12 -0500 Subject: [PATCH 55/66] Added non-MKL core contraction to randomized.h --- btas/generic/randomized.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h index 6f18792a..3f70bd10 100644 --- a/btas/generic/randomized.h +++ b/btas/generic/randomized.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,11 @@ void randomized_decomposition(Tensor &A, std::vector &transforms, // Add the oversampling to the desired rank auto ndim = A.rank(); auto rank = des_rank + oversampl; + std::vector A_modes; + for(int i = 0; i< ndim;++i){ + A_modes.push_back(i); + } + std::vector final_modes(A_modes); // Walk through all the modes of A for (int n = 0; n < ndim; n++) { @@ -102,8 +108,28 @@ void randomized_decomposition(Tensor &A, std::vector &transforms, transforms.push_back(Y); } + std::vector contract_modes; + contract_modes.push_back(0); contract_modes.push_back(ndim); for (int n = 0; n < ndim; n++) { +#ifdef BTAS_HAS_INTEL_MKL core_contract(A, transforms[n], n); +#else + std::vector final_dims; + for(int j = 0; j < ndim; ++j){ + if(j == n){ + final_dims.push_back(transforms[n].extent(1)); + } else { + final_dims.push_back(A.extent(j)); + } + } + contract_modes[0] = n; + final_modes[n] = ndim; + btas::Range final_range(final_dims); + Tensor final(final_range); + contract(1.0, A, A_modes, transforms[n], contract_modes, 0.0, final, final_modes); + final_modes[n] = n; + A = final; +#endif //BTAS_HAS_INTEL_MKL } } } // namespace btas From 5a95ae22416a34fded4be1993eb2bb869b1dd005 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 14:27:42 -0500 Subject: [PATCH 56/66] Fix non-mkl core contract in tucker.h --- btas/generic/tucker.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index bd6086c0..06646f9b 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -50,7 +50,8 @@ void tucker_compression(Tensor &A, double epsilon_svd, } // Truncate the column space of the unitary factor matrix. - lambda = Tensor(R, R - rank); + auto kept_evecs = R - rank; + lambda = Tensor(R, kept_evecs); auto lower_bound = {0, rank}; auto upper_bound = {R, R}; auto view = @@ -71,14 +72,16 @@ void tucker_compression(Tensor &A, double epsilon_svd, for(int j = 0; j < ndim; ++j){ if(j == i){ final_modes.push_back(ndim); - final_dims.push_back(rank); + final_dims.push_back(kept_evecs); } else{ final_modes.push_back(j); final_dims.push_back(A.extent(j)); } } - Tensor final(final_dims); - btas::contract(1.0, A, A_modes, lambda, {i, ndim}, 0.0, final, final_dims); + btas::Range final_range(final_dims); + Tensor final(final_range); + btas::contract(1.0, A, A_modes, lambda, contract_modes, 0.0, final, final_modes); + A = final; #endif //BTAS_HAS_INTEL_MKL } } From c1b502c53f04c2244861624c4ddd235e265cb590 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 14:27:54 -0500 Subject: [PATCH 57/66] Update documentation --- btas/generic/cp.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 50a8a901..2c3dc617 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -419,8 +419,6 @@ namespace btas{ /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . - /// \param[in] step - /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors @@ -455,9 +453,9 @@ namespace btas{ return V; } - // Keep track of the Left hand Khatri-Rao product of matrices and - // Continues to multiply be right hand products, skipping - // the matrix at index n. + /// Keep track of the Left hand Khatri-Rao product of matrices and + /// Continues to multiply be right hand products, skipping + /// the matrix at index n. /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in] forward Should the Khatri-Rao product move through the factor From a7b2294524f3a2a30fa75b31013c44c38bd7171a Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Tue, 28 Jan 2020 14:57:36 -0500 Subject: [PATCH 58/66] tucker.h uses row major eigenvalue decomposition. --- btas/generic/tucker.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index 06646f9b..648ecd34 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -40,7 +40,14 @@ void tucker_compression(Tensor &A, double epsilon_svd, gemm(CblasNoTrans, CblasTrans, 1.0, flat, flat, 0.0, S); // Calculate SVD of smaller object. - eigenvalue_decomp(S, lambda); +#ifdef BTAS_HAS_LAPACKE + auto info = LAPACKE_dsyev(LAPACK_ROW_MAJOR, 'V', 'L', R, S.data(), R, + lambda.data()); + if (info) + BTAS_EXCEPTION("Error in computing the tucker SVD"); +#else + BTAS_EXCEPTION("Tucker decomposition requires LAPACKE"); +#endif // Find the truncation rank based on the threshold. int rank = 0; From 67fe44c7966a57f8b005f85768a39c710caf5314 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 28 Jan 2020 15:13:34 -0500 Subject: [PATCH 59/66] search for MKL can be disabled --- CMakeLists.txt | 2 ++ cmake/modules/FindCBLAS.cmake | 18 ++++++++++-------- cmake/modules/FindLAPACKE.cmake | 22 ++++++++++++---------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e2db5f4..8f4aa7a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,8 @@ redefaultable_option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw; enabl add_feature_info(ASSERT_THROWS BTAS_ASSERT_THROWS "BTAS_ASSERT(x) will throw if x is false, and not be affected by NDEBUG") redefaultable_option(BTAS_USE_CBLAS_LAPACKE "Whether to use BLAS+LAPACK via CBLAS+LAPACKE" ON) add_feature_info(USE_CBLAS_LAPACKE BTAS_USE_CBLAS_LAPACKE "Will use BLAS and LAPACK linear algebra libraries via their CBLAS and LAPACKE interfaces") +redefaultable_option(BTAS_ENABLE_MKL "Whether to look for Intel MKL" ON) +add_feature_info(ENABLE_MKL BTAS_ENABLE_MKL "Will look for Intel MKL library") set(TARGET_MAX_INDEX_RANK 6 CACHE STRING "Determines the rank for which the default BTAS index type will use stack (default: 6); this requires Boost.Container") add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default BTAS index type will use stack for rank<=${TARGET_MAX_INDEX_RANK}") diff --git a/cmake/modules/FindCBLAS.cmake b/cmake/modules/FindCBLAS.cmake index c279e14f..7ea74823 100644 --- a/cmake/modules/FindCBLAS.cmake +++ b/cmake/modules/FindCBLAS.cmake @@ -18,13 +18,15 @@ SET(CBLAS_INCLUDE_DIR) SET(CBLAS_INCLUDE_FILE) # CBLAS in Intel MKL -FIND_PACKAGE(MKL) -IF (MKL_FOUND AND NOT CBLAS_LIBRARIES) - SET(CBLAS_LIBRARIES ${MKL_LIBRARIES}) - SET(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR}) - SET(CBLAS_INCLUDE_FILE "mkl_cblas.h") - RETURN () -ENDIF (MKL_FOUND AND NOT CBLAS_LIBRARIES) +IF (BTAS_ENABLE_MKL) + FIND_PACKAGE(MKL) + IF (MKL_FOUND AND NOT CBLAS_LIBRARIES) + SET(CBLAS_LIBRARIES ${MKL_LIBRARIES}) + SET(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR}) + SET(CBLAS_INCLUDE_FILE "mkl_cblas.h") + RETURN () + ENDIF (MKL_FOUND AND NOT CBLAS_LIBRARIES) +ENDIF(BTAS_ENABLE_MKL) # Old CBLAS search INCLUDE(CheckLibraryList) @@ -99,7 +101,7 @@ IF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED) ENDIF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED) IF(NOT CBLAS_FIND_QUIETLY) IF(CBLAS_FOUND) - MESSAGE(STATUS "CBLAS library found") + MESSAGE(STATUS "CBLAS library found: CBLAS_LIBRARIES=${CBLAS_LIBRARIES}") ELSE(CBLAS_FOUND) MESSAGE(STATUS "CBLAS library not found.") ENDIF(CBLAS_FOUND) diff --git a/cmake/modules/FindLAPACKE.cmake b/cmake/modules/FindLAPACKE.cmake index 1501f7a7..0c3fc66c 100644 --- a/cmake/modules/FindLAPACKE.cmake +++ b/cmake/modules/FindLAPACKE.cmake @@ -17,15 +17,17 @@ SET(LAPACKE_INCLUDE_DIR) SET(LAPACKE_INCLUDE_FILE) # Unless already found look for LAPACKE in Intel mkl -IF (NOT CBLAS_FOUND) - FIND_PACKAGE(MKL) -ENDIF (NOT CBLAS_FOUND) -IF (MKL_FOUND AND NOT LAPACKE_LIBRARIES) - SET(LAPACKE_LIBRARIES ${MKL_LIBRARIES}) - SET(LAPACKE_INCLUDE_DIR ${MKL_INCLUDE_DIR}) - SET(LAPACKE_INCLUDE_FILE "mkl_lapacke.h") - RETURN () -ENDIF (MKL_FOUND AND NOT LAPACKE_LIBRARIES) +IF (BTAS_ENABLE_MKL) + IF (NOT CBLAS_FOUND) + FIND_PACKAGE(MKL) + ENDIF (NOT CBLAS_FOUND) + IF (MKL_FOUND AND NOT LAPACKE_LIBRARIES) + SET(LAPACKE_LIBRARIES ${MKL_LIBRARIES}) + SET(LAPACKE_INCLUDE_DIR ${MKL_INCLUDE_DIR}) + SET(LAPACKE_INCLUDE_FILE "mkl_lapacke.h") + RETURN () + ENDIF (MKL_FOUND AND NOT LAPACKE_LIBRARIES) +ENDIF(BTAS_ENABLE_MKL) INCLUDE(CheckLibraryList) @@ -66,7 +68,7 @@ IF(NOT LAPACKE_FOUND AND LAPACKE_FIND_REQUIRED) ENDIF(NOT LAPACKE_FOUND AND LAPACKE_FIND_REQUIRED) IF(NOT LAPACKE_FIND_QUIETLY) IF(LAPACKE_FOUND) - MESSAGE(STATUS "LAPACKE library found") + MESSAGE(STATUS "LAPACKE library found: LAPACKE_LIBRARIES=${LAPACKE_LIBRARIES}") ELSE(LAPACKE_FOUND) MESSAGE(STATUS "LAPACKE library not found.") ENDIF(LAPACKE_FOUND) From c8da39389e392fe3801d39a242ed13ccc171e35c Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 12:53:46 -0500 Subject: [PATCH 60/66] Update documentation and use compute_rand in tucker and random compression --- btas/generic/cp_als.h | 61 ++++++---------------------------------- btas/generic/cp_rals.h | 64 ++++++++---------------------------------- 2 files changed, 20 insertions(+), 105 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 62e9e6a7..22aeecbb 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -220,10 +220,6 @@ namespace btas{ /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker /// decomposition. /// \param[in] converge_test Test to see if ALS is converged - /// \param[in] opt_rank Should the CP decomposition of tucker - /// core tensor find the optimal rank with error < tcutCP? Default = true. - /// \param[in] tcutCP How small epsilon must be to consider the CP - /// decomposition converged. Default = 1e-2. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. @@ -231,32 +227,18 @@ namespace btas{ /// without calculating the Khatri-Rao product? Default = true. /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{exact} - - /// T_{approx}|| = \epsilon \f$ . Default = true. - /// \param[in] step CP_ALS built - /// from r =1 to r = \c rank. r increments by \c step; default = 1. - /// \param[in] - /// max_rank The highest rank approximation computed before giving up on - /// CP-ALS. Default = 1e4. + /// T_{approx}|| = \epsilon \f$ . Default = false. /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e4. - /// \param[in] tcutALS How small difference in - /// factor matrices must be to consider ALS of a single rank converged. - /// Default = 0.1. - /// \param[in] SVD_initial_guess Should the initial factor matrices be - /// approximated with left singular values? default = false - /// \param[in] SVD_rank if \c - /// SVD_initial_guess is true specify the rank of the initial guess such that - /// SVD_rank <= rank. default = true /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \returns 2-norm error, \f$ \epsilon \f$, /// between exact and approximate tensor, -1.0 if calculate_epsilon = false && /// ConvClass != FitCheck. - double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, - bool direct = true, bool calculate_epsilon = true, int step = 1, int max_rank = 1e4, - double max_als = 1e4, bool SVD_initial_guess = false, - int SVD_rank = 0, bool fast_pI = false) { + double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, int rank = 0, + bool direct = true, bool calculate_epsilon = false, + double max_als = 1e4, bool fast_pI = false) { // Tensor compression std::vector transforms; tucker_compression(tensor_ref, tcutSVD, transforms); @@ -264,10 +246,7 @@ namespace btas{ double epsilon = -1.0; // CP decomposition - if (opt_rank) - epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct); - else - epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct); + epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices for (int i = 0; i < ndim; i++) { @@ -299,51 +278,29 @@ namespace btas{ /// \param[in] powerit Number of power iterations, /// as specified in the literature, to scale the spectrum of each mode. /// Default = suggested = 2. - /// \param[in] opt_rank Should the CP decomposition - /// of tucker core tensor find the optimal rank with error < tcutCP? Default = - /// true. - /// \param[in] tcutCP How small epsilon must be to consider the CP - /// decomposition converged. Default = 1e-2. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. /// \param[in] direct Should the CP decomposition be /// computed without calculating the Khatri-Rao product? Default = true. /// \param[in] calculate_epsilon Should the 2-norm error be calculated - /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = true. - /// \param[in] step - /// CP_ALS built from r =1 to r = rank. r increments by step; default = 1. - /// \param[in] max_rank The highest rank approximation computed before giving - /// up on CP-ALS. Default = 1e5. + /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = false. /// \param[in] max_als If CP decomposition is to /// finite error, max_als is the highest rank approximation computed before /// giving up on CP-ALS. Default = 1e5. - /// \param[in] tcutALS How small - /// difference in factor matrices must be to consider ALS of a single rank - /// converged. Default = 0.1. - /// \param[in] SVD_initial_guess Should the initial factor matrices be - /// approximated with left singular values? default = false - /// \param[in] SVD_rank if \c - /// SVD_initial_guess is true specify the rank of the initial guess such that - /// SVD_rank <= rank. default = true /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \returns 2-norm error, \f$ \epsilon \f$, /// between exact and approximate tensor, -1.0 if calculate_epsilon = false && /// ConvClass != FitCheck. double compress_compute_rand(int desired_compression_rank, ConvClass & converge_test, int oversampl = 10, int powerit = 2, - bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, bool direct = true, - bool calculate_epsilon = false, int step = 1, int max_rank = 1e5, double max_als = 1e5, - bool SVD_initial_guess = false, int SVD_rank = 0, bool fast_pI = false) { + int rank = 0, bool direct = true, bool calculate_epsilon = false, double max_als = 1e5, + bool fast_pI = false) { std::vector transforms; randomized_decomposition(tensor_ref, transforms, desired_compression_rank, oversampl, powerit); size = tensor_ref.size(); - double epsilon = -1.0; - if (opt_rank) - epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct); - else - epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct); + auto epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices for (int i = 0; i < ndim; i++) { diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index 7a92f2e8..eebe4e5c 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -238,10 +238,6 @@ namespace btas{ /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker /// decomposition. /// \param[in] converge_test Test to see if ALS is converged - /// \param[in] opt_rank Should the CP decomposition of tucker - /// core tensor find the optimal rank with error < tcutCP? Default = true. - /// \param[in] tcutCP How small epsilon must be to consider the CP - /// decomposition converged. Default = 1e-2. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. @@ -249,32 +245,18 @@ namespace btas{ /// without calculating the Khatri-Rao product? Default = true. /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{exact} - - /// T_{approx}|| = \epsilon \f$ . Default = true. - /// \param[in] step CP_ALS built - /// from r =1 to r = \c rank. r increments by \c step; default = 1. - /// \param[in] - /// max_rank The highest rank approximation computed before giving up on - /// CP-ALS. Default = 1e4. + /// T_{approx}|| = \epsilon \f$ . Default = false. /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e4. - /// \param[in] tcutALS How small difference in - /// factor matrices must be to consider ALS of a single rank converged. - /// Default = 0.1. - /// \param[in] SVD_initial_guess Should the initial factor matrices be - /// approximated with left singular values? default = false - /// \param[in] SVD_rank if \c - /// SVD_initial_guess is true specify the rank of the initial guess such that - /// SVD_rank <= rank. default = true /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \returns 2-norm error, \f$ \epsilon \f$, /// between exact and approximate tensor, -1.0 if calculate_epsilon = false && - // ConvClass != FitCheck. - double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, - bool direct = true, bool calculate_epsilon = true, int step = 1, int max_rank = 1e4, - double max_als = 1e4, bool SVD_initial_guess = false, - int SVD_rank = 0, bool fast_pI = false) { + /// ConvClass != FitCheck. + double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, int rank = 0, + bool direct = true, bool calculate_epsilon = false, double max_als = 1e4, + bool fast_pI = false) { // Tensor compression std::vector transforms; tucker_compression(tensor_ref, tcutSVD, transforms); @@ -282,10 +264,7 @@ namespace btas{ double epsilon = -1.0; // CP decomposition - if (opt_rank) - epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct); - else - epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct); + epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices @@ -318,52 +297,31 @@ namespace btas{ /// \param[in] powerit Number of power iterations, /// as specified in the literature, to scale the spectrum of each mode. /// Default = suggested = 2. - /// \param[in] opt_rank Should the CP decomposition - /// of tucker core tensor find the optimal rank with error < tcutCP? Default = - /// true. - /// \param[in] tcutCP How small epsilon must be to consider the CP - /// decomposition converged. Default = 1e-2. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. /// \param[in] direct Should the CP decomposition be /// computed without calculating the Khatri-Rao product? Default = true. /// \param[in] calculate_epsilon Should the 2-norm error be calculated - /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = true. - /// \param[in] step - /// CP_ALS built from r =1 to r = rank. r increments by step; default = 1. - /// \param[in] max_rank The highest rank approximation computed before giving - /// up on CP-ALS. Default = 1e5. + /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = false. /// \param[in] max_als If CP decomposition is to /// finite error, max_als is the highest rank approximation computed before /// giving up on CP-ALS. Default = 1e5. - /// \param[in] tcutALS How small - /// difference in factor matrices must be to consider ALS of a single rank - /// converged. Default = 0.1. - /// \param[in] SVD_initial_guess Should the initial factor matrices be - /// approximated with left singular values? default = false - /// \param[in] SVD_rank if \c - /// SVD_initial_guess is true specify the rank of the initial guess such that - /// SVD_rank <= rank. default = true /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \returns 2-norm error, \f$ \epsilon \f$, /// between exact and approximate tensor, -1.0 if calculate_epsilon = false && - // ConvClass != FitCheck. + /// ConvClass != FitCheck. double compress_compute_rand(int desired_compression_rank, ConvClass & converge_test, int oversampl = 10, int powerit = 2, - bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, bool direct = true, - bool calculate_epsilon = false, int step = 1, int max_rank = 1e5, double max_als = 1e5, - bool SVD_initial_guess = false, int SVD_rank = 0, bool fast_pI = false) { + int rank = 0, bool direct = true, bool calculate_epsilon = false, + double max_als = 1e5, bool fast_pI = false) { std::vector transforms; randomized_decomposition(tensor_ref, transforms, desired_compression_rank, oversampl, powerit); size = tensor_ref.size(); double epsilon = -1.0; // CP decomposition - if (opt_rank) - epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct); - else - epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct); + epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices for (int i = 0; i < ndim; i++) { From 4ceee1a1d92353cf60abf4811eb144fd6f55693b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 12:55:48 -0500 Subject: [PATCH 61/66] Update compress_rand and compress_tucker function calls in tensor_cp_test.cc --- unittest/tensor_cp_test.cc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/unittest/tensor_cp_test.cc b/unittest/tensor_cp_test.cc index f0b360f2..6ee9db3f 100644 --- a/unittest/tensor_cp_test.cc +++ b/unittest/tensor_cp_test.cc @@ -95,7 +95,7 @@ TEST_CASE("CP") CP_ALS A1(d); conv.set_norm(norm3); double diff = - A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); + A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true); CHECK((diff - results(2,0)) <= epsilon); } SECTION("ALS MODE = 3, Random + CP"){ @@ -103,7 +103,7 @@ TEST_CASE("CP") CP_ALS A1(d); conv.set_norm(norm3); double diff = - A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5); + A1.compress_compute_rand(2, conv, 0, 2, 5, true, false, 100, true); CHECK((diff - results(3,0)) <= epsilon); } @@ -124,14 +124,14 @@ TEST_CASE("CP") auto d = D4; CP_ALS A1(d); conv.set_norm(norm4); - double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5); + double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 1e4, true); CHECK((diff - results(6,0)) <= epsilon); } SECTION("ALS MODE = 4, Random + CP"){ auto d = D4; CP_ALS A1(d); conv.set_norm(norm4); - double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100); + double diff = A1.compress_compute_rand(3, conv, 0, 2, 1, true, false, 20, true); CHECK((diff - results(7,0)) <= epsilon); } SECTION("ALS MODE = 5, Finite rank"){ @@ -150,14 +150,14 @@ TEST_CASE("CP") auto d = D5; CP_ALS A1(d); conv.set_norm(norm5); - double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); + double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true); CHECK((diff - results(10,0)) <= epsilon); } SECTION("ALS MODE = 5, Random + CP"){ auto d = D5; CP_ALS A1(d); conv.set_norm(norm5); - double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5); + double diff = A1. compress_compute_rand(1, conv, 0, 2, 5, true, false, 100, false); CHECK((diff - results(11,0)) <= epsilon); } } @@ -181,7 +181,7 @@ TEST_CASE("CP") CP_RALS A1(d); conv.set_norm(norm3); double diff = - A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); + A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true); CHECK((diff - results(14,0)) <= epsilon); } SECTION("RALS MODE = 3, Random + CP"){ @@ -189,7 +189,7 @@ TEST_CASE("CP") CP_RALS A1(d); conv.set_norm(norm3); double diff = - A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5); + A1.compress_compute_rand(2, conv, 0, 2, 5, true, false, 100, true); CHECK((diff - results(15,0)) <= epsilon); } SECTION("RALS MODE = 4, Finite rank"){ @@ -209,14 +209,14 @@ TEST_CASE("CP") auto d = D4; CP_RALS A1(d); conv.set_norm(norm4); - double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5); + double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true); CHECK((diff - results(18,0)) <= epsilon); } SECTION("RALS MODE = 4, Random + CP"){ auto d = D4; CP_RALS A1(d); conv.set_norm(norm4); - double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100); + double diff = A1.compress_compute_rand(3, conv, 0, 2, 1, true, false, 20, true); CHECK((diff - results(19,0)) <= epsilon); } SECTION("RALS MODE = 5, Finite rank"){ @@ -235,14 +235,14 @@ TEST_CASE("CP") auto d = D5; CP_RALS A1(d); conv.set_norm(norm5); - double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false); + double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true); CHECK((diff - results(22,0)) <= epsilon); } SECTION("RALS MODE = 5, Random + CP"){ auto d = D5; CP_RALS A1(d); conv.set_norm(norm5); - double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5); + double diff = A1.compress_compute_rand(1, conv, 0, 2, 5, true, false, 100, true); CHECK((diff - results(23,0)) <= epsilon); } } From 8832130621d97d00d0f5cd77305e7d42587a1a95 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 12:57:48 -0500 Subject: [PATCH 62/66] Remove BTAS_HAS_CBLAS guard around linear algebra functions --- btas/generic/linear_algebra.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h index 8bfbf980..c83b274d 100644 --- a/btas/generic/linear_algebra.h +++ b/btas/generic/linear_algebra.h @@ -6,8 +6,6 @@ #define BTAS_LINEAR_ALGEBRA_H #include -#ifdef BTAS_HAS_CBLAS - namespace btas{ /// Computes L of the LU decomposition of tensor \c A /// \param[in, out] A In: A reference matrix to be LU decomposed. Out: @@ -288,5 +286,4 @@ Tensor pseudoInverse(Tensor & A, bool & fast_pI) { } } // namespace btas -#endif // BTAS_HAS_CBLAS #endif //BTAS_LINEAR_ALGEBRA_H From edc63f4bcd94e62b7e44a2ddcd72912934529e8f Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 13:02:33 -0500 Subject: [PATCH 63/66] Update results using different initializtion function --- unittest/cp_test_results.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unittest/cp_test_results.txt b/unittest/cp_test_results.txt index a6650b58..488c07e1 100644 --- a/unittest/cp_test_results.txt +++ b/unittest/cp_test_results.txt @@ -1,6 +1,6 @@ 0.8022121792543945 0.9983681298768465 -0.7909930183444412 +0.8013967619361885 0.5365678558201658 0.6011111112728442 0.9932249481447798 @@ -8,11 +8,11 @@ 0.5862586400695924 0.0562044207525656 0.183409953450329 -0.0008026970682262213 +0.00197202949114994 0.5092600887442312 0.8023893427056535 0.9998165185189172 -0.7965146841534007 +0.8000632869557247 0.5365998545867845 0.5946792918362551 0.9902479480481341 From ca567c2dd53f08564912d861de9f1164ea2e4c11 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 13:55:54 -0500 Subject: [PATCH 64/66] Add a verbose option to printing in converge check --- btas/generic/converge_class.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/btas/generic/converge_class.h b/btas/generic/converge_class.h index a550c503..ef23b34f 100644 --- a/btas/generic/converge_class.h +++ b/btas/generic/converge_class.h @@ -112,7 +112,9 @@ namespace btas { double fitChange = abs(fitOld_ - fit); fitOld_ = fit; - //std::cout << fit << "\t" << fitChange << std::endl; + if(verbose_) { + std::cout << fit << "\t" << fitChange << std::endl; + } if(fitChange < tol_) { converged_num++; if(converged_num == 2){ @@ -140,6 +142,10 @@ namespace btas { return final_fit_; } + void verbose(bool verb){ + verbose_ = verb; + } + private: double tol_; double fitOld_ = -1.0; @@ -148,6 +154,7 @@ namespace btas { int iter_ = 0; int converged_num = 0; Tensor MtKRP_; + bool verbose_ = false; double norm(const std::vector & btas_factors){ auto rank = btas_factors[0].extent(1); From 6f374b2b575b5beaffd281c65ab50b05cbc7ebef Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 30 Jan 2020 14:29:56 -0500 Subject: [PATCH 65/66] Compute all the tucker transformations before transforming the core tensor. --- btas/generic/tucker.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h index 648ecd34..ed0776b4 100644 --- a/btas/generic/tucker.h +++ b/btas/generic/tucker.h @@ -67,7 +67,11 @@ void tucker_compression(Tensor &A, double epsilon_svd, // Push the factor matrix back as a transformation. transforms.push_back(lambda); + } + for(int i = 0; i < ndim; ++i){ + auto & lambda = transforms[i]; + auto kept_evecs = lambda.extent(1); #ifdef BTAS_HAS_INTEL_MKL // Contract the factor matrix with the reference tensor, A. core_contract(A, lambda, i); From a072fd7eb9078b9009037fba9bda44d449291e18 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 30 Jan 2020 15:31:30 -0500 Subject: [PATCH 66/66] release id -> alpha.1 [skip ci] --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f4aa7a3..368a9419 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default set(BTAS_MAJOR_VERSION 1) set(BTAS_MINOR_VERSION 0) set(BTAS_MICRO_VERSION 0) -set(BTAS_PRERELEASE_ID alpha) +set(BTAS_PRERELEASE_ID alpha.1) set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}") if (BTAS_PRERELEASE_ID) set(BTAS_EXT_VERSION "${BTAS_VERSION}-${BTAS_PRERELEASE_ID}")