From 29f4a79d351e7f8dd6339ff07788635a01a62e9e Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 16:22:12 -0500
Subject: [PATCH 01/66] Move linear algebra from randomized into linear algebra
 specific file.

---
 btas/generic/linear_algebra.h | 167 ++++++++++++++++++++++++++++++++++
 btas/generic/randomized.h     | 136 ---------------------------
 2 files changed, 167 insertions(+), 136 deletions(-)
 create mode 100644 btas/generic/linear_algebra.h
diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
new file mode 100644
index 00000000..5f936df9
--- /dev/null
+++ b/btas/generic/linear_algebra.h
@@ -0,0 +1,167 @@
+//
+// Created by Karl Pierce on 1/26/20.
+//
+
+#ifndef BTAS_LINEAR_ALGEBRA_H
+#define BTAS_LINEAR_ALGEBRA_H
+#include <btas/error.h>
+
+namespace btas{
+  /// Computes L of the LU decomposition of tensor \c A
+/// \param[in, out] A In: A reference matrix to be LU decomposed.  Out:
+/// The L of an LU decomposition of \c A.
+
+  template <typename Tensor> void LU_decomp(Tensor &A) {
+
+#ifndef LAPACKE_ENABLED
+    BTAS_EXCEPTION("Using this function requires LAPACKE");
+#endif // LAPACKE_ENABLED
+
+    if(A.rank() > 2){
+      BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
+    }
+
+    btas::Tensor<int> piv(std::min(A.extent(0), A.extent(1)));
+    Tensor L(A.range());
+    Tensor P(A.extent(0), A.extent(0));
+    P.fill(0.0);
+    L.fill(0.0);
+
+    // LAPACKE LU decomposition gives back dense L and U to be
+    // restored into lower and upper triangular form, and a pivoting
+    // matrix for L
+    auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
+                               A.data(), A.extent(1), piv.data());
+
+    // This means there was a problem with the LU that must be dealt with,
+    // The decomposition cannot be continued.
+    if (info < 0) {
+      BTAS_EXCEPTION("LU_decomp: LAPACKE_dgetrf received an invalid input parameter");
+    }
+
+    // This means that part of the LU is singular which may cause a problem in
+    // ones QR decomposition but LU can be computed fine.
+    if (info != 0) {
+    }
+
+    // indexing the pivot matrix
+    for (auto &j : piv)
+      j -= 1;
+
+    int pivsize = piv.extent(0);
+    piv.resize(Range{Range1{A.extent(0)}});
+
+    // Walk through the full pivot array and
+    // put the correct index values throughout
+    for (int i = 0; i < piv.extent(0); i++) {
+      if (i == piv(i) || i >= pivsize) {
+        for (int j = 0; j < i; j++) {
+          if (i == piv(j)) {
+            piv(i) = j;
+            break;
+          }
+        }
+      }
+      if (i >= pivsize) {
+        piv(i) = i;
+        for (int j = 0; j < i; j++)
+          if (i == piv(j)) {
+            piv(i) = j;
+            break;
+          }
+      }
+    }
+
+    // generating the pivot matrix from the correct indices found above
+    for (int i = 0; i < piv.extent(0); i++)
+      P(piv(i), i) = 1;
+
+    // Use the output of LAPACKE to make a lower triangular matrix, L
+    for (int i = 0; i < L.extent(0); i++) {
+      for (int j = 0; j < i && j < L.extent(1); j++) {
+        L(i, j) = A(i, j);
+      }
+      if (i < L.extent(1))
+        L(i, i) = 1;
+    }
+
+    // contracting the pivoting matrix with L to put in correct order
+    gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A);
+  }
+
+/// Computes the QR decomposition of matrix \c A
+/// \param[in, out] A In: A Reference matrix to be QR decomposed.  Out:
+/// The Q of a QR decomposition of \c A.
+
+  template <typename Tensor> bool QR_decomp(Tensor &A) {
+
+#ifndef LAPACKE_ENABLED
+    BTAS_EXCEPTION("Using this function requires LAPACKE");
+#endif // LAPACKE_ENABLED
+
+    if(A.rank() > 2){
+      BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
+    }
+
+    int Qm = A.extent(0);
+    int Qn = A.extent(1);
+    Tensor B(1, std::min(Qm, Qn));
+
+    // LAPACKE doesn't directly calculate Q. Must first call this function to
+    // generate precursors to Q
+    auto info = LAPACKE_dgeqrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
+                               A.data(), A.extent(1), B.data());
+
+    if (info == 0) {
+      // This function generates Q.
+      info = LAPACKE_dorgqr(LAPACK_ROW_MAJOR, Qm, Qn, Qn, A.data(), A.extent(1),
+                            B.data());
+
+      // If there was some problem generating Q, i.e. it is singular, the
+      // randomized decompsoition will fail.  There is an exception thrown if
+      // there is a problem to stop the randomized decomposition
+      if (info != 0) {
+        return false;
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+/// Computes the inverse of a matrix \c A using a pivoted LU decomposition
+/// \param[in, out] A In: A reference matrix to be inverted. Out:
+/// The inverse of A, computed using LU decomposition.
+  template <typename Tensor>
+  bool Inverse_Matrix(Tensor & A){
+
+#ifndef LAPACKE_ENABLED
+    BTAS_EXCEPTION("Using this function requires LAPACKE");
+#endif // LAPACKE_ENABLED
+
+    if(A.rank() > 2){
+      BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
+    }
+
+    btas::Tensor<int> piv(std::min(A.extent(0), A.extent(1)));
+    piv.fill(0);
+
+    // LAPACKE LU decomposition gives back dense L and U to be
+    // restored into lower and upper triangular form, and a pivoting
+    // matrix for L
+    auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
+                               A.data(), A.extent(1), piv.data());
+    if(info != 0){
+      A = Tensor();
+      return false;
+    }
+    info = LAPACKE_dgetri(CblasRowMajor, A.extent(0), A.data(), A.extent(0), piv.data());
+    if(info != 0){
+      A = Tensor();
+      return false;
+    }
+    return true;
+  }
+}
+
+#endif //BTAS_LINEAR_ALGEBRA_H
diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h
index 2b4cc811..4fdb2525 100644
--- a/btas/generic/randomized.h
+++ b/btas/generic/randomized.h
@@ -9,9 +9,6 @@
 #include <stdlib.h>
 #include <vector>
 
-#ifdef _HAS_INTEL_MKL
-#include <mkl.h>
-
 namespace btas {
 
 /// \param[in,out] A In: An empty matrix of size column dimension of the nth
@@ -109,139 +106,6 @@ void randomized_decomposition(Tensor &A, std::vector<Tensor> &transforms,
     core_contract(A, transforms[n], n);
   }
 }
-
-/// Computes L of the LU decomposition of tensor \c A
-/// \param[in, out] A In: A reference matrix to be LU decomposed.  Out:
-/// The L of an LU decomposition of \c A.
-
-template <typename Tensor> void LU_decomp(Tensor &A) {
-
-  btas::Tensor<int> piv(std::min(A.extent(0), A.extent(1)));
-  Tensor L(A.range());
-  Tensor P(A.extent(0), A.extent(0));
-  P.fill(0.0);
-  L.fill(0.0);
-
-  // LAPACKE LU decomposition gives back dense L and U to be
-  // restored into lower and upper triangular form, and a pivoting
-  // matrix for L
-  auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
-                             A.data(), A.extent(1), piv.data());
-
-  // This means there was a problem with the LU that must be dealt with,
-  // The decomposition cannot be continued.
-  if (info < 0) {
-    BTAS_EXCEPTION("LU_decomp: LAPACKE_dgetrf received an invalid input parameter");
-  }
-
-  // This means that part of the LU is singular which may cause a problem in
-  // ones QR decomposition but LU can be computed fine.
-  if (info != 0) {
-  }
-
-  // indexing the pivot matrix
-  for (auto &j : piv)
-    j -= 1;
-
-  int pivsize = piv.extent(0);
-  piv.resize(Range{Range1{A.extent(0)}});
-
-  // Walk through the full pivot array and
-  // put the correct index values throughout
-  for (int i = 0; i < piv.extent(0); i++) {
-    if (i == piv(i) || i >= pivsize) {
-      for (int j = 0; j < i; j++) {
-        if (i == piv(j)) {
-          piv(i) = j;
-          break;
-        }
-      }
-    }
-    if (i >= pivsize) {
-      piv(i) = i;
-      for (int j = 0; j < i; j++)
-        if (i == piv(j)) {
-          piv(i) = j;
-          break;
-        }
-    }
-  }
-
-  // generating the pivot matrix from the correct indices found above
-  for (int i = 0; i < piv.extent(0); i++)
-    P(piv(i), i) = 1;
-
-  // Use the output of LAPACKE to make a lower triangular matrix, L
-  for (int i = 0; i < L.extent(0); i++) {
-    for (int j = 0; j < i && j < L.extent(1); j++) {
-      L(i, j) = A(i, j);
-    }
-    if (i < L.extent(1))
-      L(i, i) = 1;
-  }
-
-  // contracting the pivoting matrix with L to put in correct order
-  gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A);
-}
-
-// Computes the QR decomposition of matrix \c A
-/// \param[in, out] A In: A Reference matrix to be QR decomposed.  Out:
-/// The Q of a QR decomposition of \c A.
-
-template <typename Tensor> bool QR_decomp(Tensor &A) {
-
-  int Qm = A.extent(0);
-  int Qn = A.extent(1);
-  Tensor B(1, std::min(Qm, Qn));
-
-  // LAPACKE doesn't directly calculate Q. Must first call this function to
-  // generate precursors to Q
-  auto info = LAPACKE_dgeqrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
-                             A.data(), A.extent(1), B.data());
-
-  if (info == 0) {
-    // This function generates Q.
-    info = LAPACKE_dorgqr(LAPACK_ROW_MAJOR, Qm, Qn, Qn, A.data(), A.extent(1),
-                          B.data());
-
-    // If there was some problem generating Q, i.e. it is singular, the
-    // randomized decompsoition will fail.  There is an exception thrown if
-    // there is a problem to stop the randomized decomposition
-    if (info != 0) {
-      return false;
-    }
-    return true;
-  } else {
-    return false;
-  }
-}
-
-template <typename Tensor>
-bool Inverse_Matrix(Tensor & A){
-  if(A.rank() > 2){
-    //Return exception
-  }
-  btas::Tensor<int> piv(std::min(A.extent(0), A.extent(1)));
-  piv.fill(0);
-
-  // LAPACKE LU decomposition gives back dense L and U to be
-  // restored into lower and upper triangular form, and a pivoting
-  // matrix for L
-  auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, A.extent(0), A.extent(1),
-                             A.data(), A.extent(1), piv.data());
-  if(info != 0){
-    A = Tensor();
-    return false;
-  }
-  info = LAPACKE_dgetri(CblasRowMajor, A.extent(0), A.data(), A.extent(0), piv.data());
-  if(info != 0){
-    A = Tensor();
-    return false;
-  }
-  return true;
-}
 } // namespace btas
 
-#endif // HAS_INTEL_MKL
-
 #endif // BTAS_RANDOMIZED_DECOMP_H

From ed67cb575fced40d5297b2c9303b7860ab034c92 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 16:22:41 -0500
Subject: [PATCH 02/66] Update fit check functions' documentation

---
 btas/generic/converge_class.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/btas/generic/converge_class.h b/btas/generic/converge_class.h
index 178c0e0a..a550c503 100644
--- a/btas/generic/converge_class.h
+++ b/btas/generic/converge_class.h
@@ -61,10 +61,13 @@ namespace btas {
     int rank_;               // Rank of the CP problem
   };
 
-  // From Tensor Toolbox : 
-  // The "fit" is defined as 1 - norm(X-full(M))/norm(X) and is
-  // loosely the proportion of the data described by the CP model, i.e., a
-  // fit of 1 is perfect.
+  /**
+   \brief Class used to decide when ALS problem is converged
+   The "fit" is defined as \f$ 1 - \frac{\|X-full(M)\|}{\|X\|} \leq \epsilon\f$
+   where X is the exact tensor and M is the reconstructed CP tensor.
+   This fit is loosely the proportion of the data described by the
+   CP model, i.e., a fit of 1 is perfect.
+   **/
   template<typename Tensor>
   class FitCheck{
   public:
@@ -172,6 +175,15 @@ namespace btas {
     }
   };
 
+  /**
+   \brief Class used to decide when ALS problem is converged
+   The "fit" is defined as \f$ 1 - \frac{\|X_1-full(M_1)\|}{\|X_1\|} -
+   \frac{\|X_2-full(M_2)\|}{\|X_2\|}\leq \epsilon\f$
+   where \f$ X_1 \f$ and \f$ X_2 \f$ are tensors coupled by a single mode
+   \f$ M_1 \f$ and \f$ M_2 \f$ are the coupled reconstructed CP tensors.
+   This fit is loosely the proportion of the data described by the
+   CP model, i.e., a fit of 1 is perfect.
+   **/
   template <typename Tensor>
   class CoupledFitCheck{
   public:

From 2f9e0c4bf34752a0458dc39f5cb47fb7dfc27499 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 16:23:06 -0500
Subject: [PATCH 03/66] Format types.h and add LAPACKE_ENABLED definition

---
 btas/types.h | 74 ++++++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/btas/types.h b/btas/types.h
index 0875971b..900225c2 100644
--- a/btas/types.h
+++ b/btas/types.h
@@ -12,43 +12,43 @@ extern "C" {
 #endif // __cplusplus
 
 #ifdef BTAS_HAS_CBLAS
-
-#if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER)
-
-#ifdef _HAS_INTEL_MKL
-
-#include <mkl_cblas.h>
-#include <mkl_lapacke.h>
-
-#else  // _HAS_INTEL_MKL
-
-#include <cblas.h>
-// see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
-#include <complex>
-#ifndef lapack_complex_float
-# define lapack_complex_float std::complex<float>
-#endif
-#ifndef lapack_complex_double
-# define lapack_complex_double std::complex<double>
-#endif
-#include <lapacke.h>
-
-#endif  // _HAS_INTEL_MKL
-
-#else  // _CBLAS_HEADER
-
-#include _CBLAS_HEADER
-// see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
-#include <complex>
-#ifndef lapack_complex_float
-# define lapack_complex_float std::complex<float>
-#endif
-#ifndef lapack_complex_double
-# define lapack_complex_double std::complex<double>
-#endif
-#include _LAPACKE_HEADER
-
-#endif  // _CBLAS_HEADER
+# define LAPACKE_ENABLED
+# if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER)
+
+#   ifdef _HAS_INTEL_MKL
+
+#     include <mkl_cblas.h>
+#     include <mkl_lapacke.h>
+
+#   else  // _HAS_INTEL_MKL
+
+#     include <cblas.h>
+      // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
+#     include <complex>
+#     ifndef lapack_complex_float
+#       define lapack_complex_float std::complex<float>
+#     endif // lapack_complex_float
+#     ifndef lapack_complex_double
+#       define lapack_complex_double std::complex<double>
+#     endif // lapack_complex_double
+#     include <lapacke.h>
+
+#   endif  // _HAS_INTEL_MKL
+
+# else  // _CBLAS_HEADER
+
+#   include _CBLAS_HEADER
+    // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
+#   include <complex>
+#   ifndef lapack_complex_float
+#     define lapack_complex_float std::complex<float>
+#   endif // lapack_complex_float
+#   ifndef lapack_complex_double
+#     define lapack_complex_double std::complex<double>
+#   endif // lapack_complex_double
+#   include _LAPACKE_HEADER
+
+# endif  // _CBLAS_HEADER
 
 #else  // BTAS_HAS_CBLAS
 

From 6fd9391e4c0b33f6963b7e6061da60ba70a03845 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 16:23:24 -0500
Subject: [PATCH 04/66] Formatting types.h

---
 btas/types.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/btas/types.h b/btas/types.h
index 900225c2..4ed6d798 100644
--- a/btas/types.h
+++ b/btas/types.h
@@ -77,15 +77,15 @@ enum CBLAS_SIDE { CblasLeft, CblasRight };
 #ifndef HAVE_INTEL_MKL
 #ifndef lapack_complex_float
 # define lapack_complex_float  std::complex<float>
-#else
+#else // lapack_complex_float
 static_assert(sizeof(std::complex<float>)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex<float> do not match");
-#endif
+#endif // lapack_complex_float
 #ifndef lapack_complex_double
 # define lapack_complex_double std::complex<double>
-#else
+#else // lapack_complex_double
 static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex<double> do not match");
-#endif
-#else
+#endif // lapack_complex_double
+#else // HAVE_INTEL_MKL
 // if calling direct need to cast to the MKL complex types
 # ifdef MKL_DIRECT_CALL
 #  include <mkl_types.h>
@@ -96,15 +96,15 @@ static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "size
 #   define lapack_complex_double MKL_Complex16
 #  endif
 // else can call via F77 prototypes which don't need type conversion
-# else
+# else // MKL_DIRECT_CALL
 #  ifndef lapack_complex_float
 #   define lapack_complex_float  std::complex<float>
 #  endif
 #  ifndef lapack_complex_double
 #   define lapack_complex_double std::complex<double>
 #  endif
-# endif
-#endif
+# endif // MKL_DIRECT_CALL
+#endif // HAVE_INTEL_MKL
 
 namespace btas {
 

From ea0fd4018197728546ae087d8fe6589600b167ea Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 18:10:53 -0500
Subject: [PATCH 05/66] Move generic pseudo inverse code to linear algebra
 file. (Needs slight update to get rid of rank variable)

---
 btas/generic/cp.h             | 69 -------------------------
 btas/generic/linear_algebra.h | 97 +++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 69 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index f9fe9c3f..b142448f 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -648,75 +648,6 @@ namespace btas{
         BTAS_EXCEPTION("Pseudo inverse failed" );
       }
     }
-
-    /// SVD referencing code from
-    /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html
-    /// Fast pseudo-inverse algorithm described in
-    /// https://arxiv.org/pdf/0804.4809.pdf
-
-    /// \param[in] a matrix to be inverted.
-    /// \return a^{\dagger} The pseudoinverse of the matrix a.
-    Tensor pseudoInverse(Tensor & a){
-      bool matlab = false;
-      auto R = A[0].extent(1);
-      Tensor s(Range{Range1{R}});
-      Tensor U(Range{Range1{R}, Range1{R}});
-      Tensor Vt(Range{Range1{R}, Range1{R}});
-
-      if(! matlab) {
-
-// btas has no generic SVD for MKL LAPACKE
-//        time1 = std::chrono::high_resolution_clock::now();
-#ifdef _HAS_INTEL_MKL
-        double worksize;
-        double *work = &worksize;
-        lapack_int lwork = -1;
-        lapack_int info = 0;
-
-        char A = 'A';
-
-        // Call dgesvd with lwork = -1 to query optimal workspace size:
-
-        info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R,
-                                   &worksize, lwork);
-        if (info != 0)
-        BTAS_EXCEPTION("SVD pseudo inverse failed");
-
-        lwork = (lapack_int) worksize;
-        work = (double *) malloc(sizeof(double) * lwork);
-
-        info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work,
-                                   lwork);
-        if (info != 0)
-        BTAS_EXCEPTION("SVD pseudo inverse failed");
-
-        free(work);
-#else  // BTAS_HAS_CBLAS
-
-        gesvd('A', 'A', a, s, U, Vt);
-
-#endif
-
-        // Inverse the Singular values with threshold 1e-13 = 0
-        double lr_thresh = 1e-13;
-        Tensor s_inv(Range{Range1{R}, Range1{R}});
-        s_inv.fill(0.0);
-        for (auto i = 0; i < R; ++i) {
-          if (s(i) > lr_thresh)
-            s_inv(i, i) = 1 / s(i);
-          else
-            s_inv(i, i) = s(i);
-        }
-        s.resize(Range{Range1{R}, Range1{R}});
-
-        // Compute the matrix A^-1 from the inverted singular values and the U and
-        // V^T provided by the SVD
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U);
-
-      }
-      return U;
-    }
   };
 
 };// namespace btas
diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 5f936df9..973c625b 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -162,6 +162,103 @@ namespace btas{
     }
     return true;
   }
+
+/// Computes the eigenvalue decomposition of a matrix \c A and
+/// \param[in, out] A In: A reference matrix to be decomposed. Out:
+/// The eigenvectors of the matrix \c A.
+/// \param[in, out] lambda In: An empty vector with length greater than
+/// or equal to the largest mode of \c A. Out: The eigenvalues of the
+///  matrix \c A
+  template <typename Tensor>
+  void eigenvalue_decomp(Tensor & A, Tensor & lambda){
+#ifndef LAPACKE_ENABLED
+    BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE");
+#endif // LAPACKE_ENABLED
+    if(A.rank() > 2){
+      BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix.");
+    }
+    auto lambda_length = lambda.size();
+    auto largest_mode_A = (A.extent(0) > A.extent(1) ? A.extent(0) : A.extent(1));
+    if(lambda_length < largest_mode_A){
+      BTAS_EXCEPTION("Volume of lambda must be greater than or equal to the largest mode of A");
+    }
+
+    auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', largest_mode_A,
+            A.data(), largest_mode_A, lambda.data());
+    if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+  }
+
+/// SVD referencing code from
+/// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html
+/// Fast pseudo-inverse algorithm described in
+/// https://arxiv.org/pdf/0804.4809.pdf
+
+/// \param[in] a matrix to be inverted.
+/// \return a^{\dagger} The pseudoinverse of the matrix a.
+template <typename Tensor>
+Tensor pseudoInverse(Tensor & a, int R) {
+#ifndef LAPACKE_ENABLED
+    BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
+#endif // LAPACKE_ENABLED
+  bool Cholesky = true;
+  bool fast = false;
+  Tensor s(Range{Range1{R}});
+  Tensor U(Range{Range1{R}, Range1{R}});
+  Tensor Vt(Range{Range1{R}, Range1{R}});
+
+  if(Cholesky){
+    Cholesky = false;
+  }
+  if(fast){
+
+  }
+  if (!Cholesky && !fast) {
+    double worksize;
+    double *work = &worksize;
+    lapack_int lwork = -1;
+    lapack_int info = 0;
+
+    char A = 'A';
+
+    // Call dgesvd with lwork = -1 to query optimal workspace size:
+
+    info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R,
+                               &worksize, lwork);
+    if (info != 0)
+    BTAS_EXCEPTION("SVD pseudo inverse failed");
+
+    lwork = (lapack_int) worksize;
+    work = (double *) malloc(sizeof(double) * lwork);
+
+    info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work,
+                               lwork);
+    if (info != 0)
+    BTAS_EXCEPTION("SVD pseudo inverse failed");
+
+    free(work);
+
+    //gesvd('A', 'A', a, s, U, Vt);
+
+    // Inverse the Singular values with threshold 1e-13 = 0
+    double lr_thresh = 1e-13;
+    Tensor s_inv(Range{Range1{R}, Range1{R}});
+    s_inv.fill(0.0);
+    for (auto i = 0; i < R; ++i) {
+      if (s(i) > lr_thresh)
+        s_inv(i, i) = 1 / s(i);
+      else
+        s_inv(i, i) = s(i);
+    }
+    s.resize(Range{Range1{R}, Range1{R}});
+
+    // Compute the matrix A^-1 from the inverted singular values and the U and
+    // V^T provided by the SVD
+    gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s);
+    gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U);
+
+  }
+  return U;
 }
 
+} // namespace btas
 #endif //BTAS_LINEAR_ALGEBRA_H

From 1842a332e1b1aa40e3b67b33c80a4687101d924a Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 18:11:06 -0500
Subject: [PATCH 06/66] Linear algebra need cblas definitions

---
 btas/generic/linear_algebra.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 973c625b..a5cff42d 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -6,6 +6,8 @@
 #define BTAS_LINEAR_ALGEBRA_H
 #include <btas/error.h>
 
+#ifdef BTAS_HAS_CBLAS
+
 namespace btas{
   /// Computes L of the LU decomposition of tensor \c A
 /// \param[in, out] A In: A reference matrix to be LU decomposed.  Out:
@@ -261,4 +263,5 @@ Tensor pseudoInverse(Tensor & a, int R) {
 }
 
 } // namespace btas
+#endif // BTAS_HAS_CBLAS
 #endif //BTAS_LINEAR_ALGEBRA_H

From 809b289dc2903192a38a72044827593af88fef59 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 18:11:13 -0500
Subject: [PATCH 07/66] Formatting

---
 btas/generic/linear_algebra.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index a5cff42d..782f258e 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -138,7 +138,7 @@ namespace btas{
   bool Inverse_Matrix(Tensor & A){
 
 #ifndef LAPACKE_ENABLED
-    BTAS_EXCEPTION("Using this function requires LAPACKE");
+    BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE");
 #endif // LAPACKE_ENABLED
 
     if(A.rank() > 2){

From 7ad154ec888975cda3aa664a4193eb7177491671 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Sun, 26 Jan 2020 18:12:15 -0500
Subject: [PATCH 08/66] Use eig value decomposition from linear algebra. Use
 Pseudoinverse from linear algebra

---
 btas/generic/coupled_cp_als.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h
index 3304bb8c..f6dcdea6 100644
--- a/btas/generic/coupled_cp_als.h
+++ b/btas/generic/coupled_cp_als.h
@@ -23,6 +23,7 @@
 #include <btas/generic/converge_class.h>
 #include <btas/generic/rals_helper.h>
 #include <btas/generic/reconstruct.h>
+#include <btas/generic/linear_algebra.h>
 
 namespace btas{
 
@@ -78,7 +79,6 @@ namespace btas{
   public:
     using CP<Tensor,ConvClass>::A;
     using CP<Tensor,ConvClass>::ndim;
-    using CP<Tensor,ConvClass>::pseudoInverse;
     using CP<Tensor,ConvClass>::normCol;
     using CP<Tensor,ConvClass>::generate_KRP;
     using CP<Tensor,ConvClass>::generate_V;
@@ -183,7 +183,6 @@ namespace btas{
       // If its the first time into build and SVD_initial_guess
       // build and optimize the initial guess based on the left
       // singular vectors of the reference tensor.
-#ifdef _HAS_INTEL_MKL
       if (A.empty() && SVD_initial_guess) {
         if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD");
 
@@ -220,8 +219,7 @@ namespace btas{
             gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S);
 
             // Find the Singular vectors of the matrix using eigenvalue decomposition
-            auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
-            if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+            eigenvalue_decomp(S, lambda);
 
             // Fill a factor matrix with the singular vectors with the largest corresponding singular
             // values
@@ -267,9 +265,6 @@ namespace btas{
         // Optimize this initial guess.
         ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI);
       }
-#else  // _HAS_INTEL_MKL
-      if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK");
-#endif // _HAS_INTEL_MKL
       // This loop keeps track of column dimension
       for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) {
         // This loop walks through the factor matrices
@@ -480,7 +475,7 @@ namespace btas{
         }
         // Finally Form the product of K * J^\dagger
         Tensor a0(coupled_dim, rank);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1), 0.0, a0);
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, rank), 0.0, a0);
         this->normCol(a0);
         A[0] = a0;
       }
@@ -556,7 +551,7 @@ namespace btas{
         else if(n == this->ndim - 1)
           detail::set_MtKRPR(converge_test, contract_tensor);
         Tensor an(skip_dim, rank);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G), 0.0, an);
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, rank), 0.0, an);
         this->normCol(an);
         A[n] = an;
       }

From 9abaf983357b937f395498aa201b8a4c49e9c6ec Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:42:24 -0500
Subject: [PATCH 09/66] Add linear algebra to cp.h

---
 btas/generic/cp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index b142448f..0be610a6 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -21,6 +21,7 @@
 #include <btas/generic/converge_class.h>
 #include <btas/generic/rals_helper.h>
 #include <btas/generic/reconstruct.h>
+#include <btas/generic/linear_algebra.h>
 
 #ifdef _HAS_INTEL_MKL
 #include <mkl_trans.h>

From 9aa87f1ef81dd5c35cdcceb8bee302e7ea53be60 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:43:37 -0500
Subject: [PATCH 10/66] Simplify the pseudoInverse function.  Moving all
 pseudoInverse implementations to linear algebra file

---
 btas/generic/cp.h | 46 ++--------------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 0be610a6..8aea42d1 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -569,58 +569,16 @@ namespace btas{
     /// \return The pseudoinverse of the matrix V.
     Tensor pseudoInverse(int n, int R, bool & fast_pI, double lambda = 0.0) {
       // CP_ALS method requires the pseudoinverse of matrix V
-#ifdef _HAS_INTEL_MKL
-      if(fast_pI) {
-        auto a = this->generate_V(n, R, lambda);
-        Tensor temp(R, R), inv(R, R);
-        // compute V^{\dag} = (A^T A) ^{-1} A^T
-        gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp);
-        fast_pI = Inverse_Matrix(temp);
-        if(fast_pI) {
-          gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv);
-          return inv;
-        }
-        else{
-          std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl;
-        }
-      }
-#else
-      fast_pI = false;
-#endif // _HAS_INTEL_MKL
+      auto a = this->generate_V(n, R, lambda);
 
       if(!fast_pI) {
-        auto a = this->generate_V(n, R, lambda);
         Tensor s(Range{Range1{R}});
         Tensor U(Range{Range1{R}, Range1{R}});
         Tensor Vt(Range{Range1{R}, Range1{R}});
 
 // btas has no generic SVD for MKL LAPACKE
 //        time1 = std::chrono::high_resolution_clock::now();
-#ifdef _HAS_INTEL_MKL
-        double worksize;
-        double *work = &worksize;
-        lapack_int lwork = -1;
-        lapack_int info = 0;
-
-        char A = 'A';
-
-        // Call dgesvd with lwork = -1 to query optimal workspace size:
-
-        info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R,
-                                   &worksize, lwork);
-        if (info != 0)
-          BTAS_EXCEPTION("SVD pseudo inverse failed");
-
-        lwork = (lapack_int) worksize;
-        work = (double *) malloc(sizeof(double) * lwork);
-
-        info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work,
-                                   lwork);
-        if (info != 0)
-          BTAS_EXCEPTION("SVD pseudo inverse failed");
-
-        free(work);
-#else  // BTAS_HAS_CBLAS
+#ifdef LAPACKE_ENABLED
 
         gesvd('A', 'A', a, s, U, Vt);
 

From 88207cf040cbcbd65017f5393a2d5296aca20adc Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:44:44 -0500
Subject: [PATCH 11/66] Pseudoinverse helper function that first calls Cholesky
 (Ax=B) solver. If that fails calls LU inversion function then SVD.

---
 btas/generic/cp.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 8aea42d1..4fc2bdc9 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -607,8 +607,29 @@ namespace btas{
         BTAS_EXCEPTION("Pseudo inverse failed" );
       }
     }
-  };
 
+    /// Trying to solve Ax = B
+    /// First try Cholesky to solve this problem directly
+    /// second tryfast pseudo-inverse algorithm described in
+    /// https://arxiv.org/pdf/0804.4809.pdf
+    /// If all else fails use SVD
+    Tensor psuedoinverse_helper(int mode_of_A, bool & fast_pI,
+                                bool & cholesky, Tensor B = Tensor(),
+                                double lambda = 0.0){
+      if(cholesky && B.empty()){
+        BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)");
+      }
+
+      auto rank = A[0].extent(1);
+      auto a = this->generate_V(mode_of_A, rank, lambda);
+
+      if(cholesky) {
+        cholesky = cholesky_inverse(a, B);
+        return B;
+      }
+      return pseudoInverse_impl(a, fast_pI);
+    }
+  };
 };// namespace btas
 
 #endif //BTAS_GENERIC_CP_H

From 790b673032d1813fd9b754282e98519467662d64 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:45:26 -0500
Subject: [PATCH 12/66] Rank is defined as the smallest mode in a matrix not
 the largest

---
 btas/generic/linear_algebra.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 782f258e..a2847531 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -180,13 +180,13 @@ namespace btas{
       BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix.");
     }
     auto lambda_length = lambda.size();
-    auto largest_mode_A = (A.extent(0) > A.extent(1) ? A.extent(0) : A.extent(1));
-    if(lambda_length < largest_mode_A){
+    auto smallest_mode_A = (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1));
+    if(lambda_length < smallest_mode_A){
       BTAS_EXCEPTION("Volume of lambda must be greater than or equal to the largest mode of A");
     }
 
-    auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', largest_mode_A,
-            A.data(), largest_mode_A, lambda.data());
+    auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', smallest_mode_A,
+            A.data(), smallest_mode_A, lambda.data());
     if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
   }
 

From 65d1a19c033ae6a7a3b016e4b098bac57e0a083e Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:46:14 -0500
Subject: [PATCH 13/66] Created Cholesky inverse function which computes
 inverse of A to solve Ax = B.

---
 btas/generic/linear_algebra.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index a2847531..fb02c977 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -190,6 +190,34 @@ namespace btas{
     if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
   }
 
+  /// Solving Ax = B using a Cholesky decomposition
+  /// \param[in] A The right-hand side of the linear equation
+  /// to be inverted using Cholesky
+  /// \param[in, out] B In: The left-hand side of the linear equation
+  /// out: The solution x = A^{-1}B
+template <typename Tensor>
+bool cholesky_inverse(Tensor & A, Tensor & B){
+#ifndef LAPACKE_ENABLED
+  BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE")
+#endif
+    // This method computes the inverse quickly for a square matrix
+    // based on MATLAB's implementation of A / B operator.
+    auto rank = B.extent(1);
+    int LDB = B.extent(0);
+
+    btas::Tensor<int, DEFAULT::range, varray<int> > piv(rank);
+    piv.fill(0);
+    auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, A.data(), rank, piv.data(), B.data(), rank);
+    if (info == 0) {
+      return true;
+    }
+    else{
+      // If inverse fails resort to the pseudoinverse
+      std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl;
+      return false;
+    }
+}
+
 /// SVD referencing code from
 /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html
 /// Fast pseudo-inverse algorithm described in

From cfb11551fe9c83cb1dd9b59b9623b2ca10383455 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 13:49:47 -0500
Subject: [PATCH 14/66] Simplify pseudoinverse function. First try LU inversion
 and if that fails use SVD.

---
 btas/generic/linear_algebra.h | 68 ++++++++++++++---------------------
 1 file changed, 27 insertions(+), 41 deletions(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index fb02c977..0b0cb7f9 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -226,69 +226,55 @@ bool cholesky_inverse(Tensor & A, Tensor & B){
 /// \param[in] a matrix to be inverted.
 /// \return a^{\dagger} The pseudoinverse of the matrix a.
 template <typename Tensor>
-Tensor pseudoInverse(Tensor & a, int R) {
+Tensor pseudoInverse_impl(Tensor & a, bool & fast_pI) {
 #ifndef LAPACKE_ENABLED
     BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
 #endif // LAPACKE_ENABLED
-  bool Cholesky = true;
-  bool fast = false;
-  Tensor s(Range{Range1{R}});
-  Tensor U(Range{Range1{R}, Range1{R}});
-  Tensor Vt(Range{Range1{R}, Range1{R}});
-
-  if(Cholesky){
-    Cholesky = false;
-  }
-  if(fast){
-
-  }
-  if (!Cholesky && !fast) {
-    double worksize;
-    double *work = &worksize;
-    lapack_int lwork = -1;
-    lapack_int info = 0;
-
-    char A = 'A';
-
-    // Call dgesvd with lwork = -1 to query optimal workspace size:
-
-    info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R,
-                               &worksize, lwork);
-    if (info != 0)
-    BTAS_EXCEPTION("SVD pseudo inverse failed");
 
-    lwork = (lapack_int) worksize;
-    work = (double *) malloc(sizeof(double) * lwork);
-
-    info = LAPACKE_dgesvd_work(LAPACK_ROW_MAJOR, A, A, R, R, a.data(), R, s.data(), U.data(), R, Vt.data(), R, work,
-                               lwork);
-    if (info != 0)
-    BTAS_EXCEPTION("SVD pseudo inverse failed");
+    if (a.rank() > 2) {
+      BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix");
+    }
 
-    free(work);
+    auto row = a.extent(0), col = a.extent(1);
+    auto rank = (row < col ? row : col);
+
+    if (fast_pI) {
+      Tensor temp(col, col), inv(col, row);
+      // compute V^{\dag} = (A^T A) ^{-1} A^T
+      gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp);
+      fast_pI = Inverse_Matrix(temp);
+      if (fast_pI) {
+        gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv);
+        return inv;
+      } else {
+        std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl;
+      }
+    }
+    Tensor s(Range{Range1{rank}});
+    Tensor U(Range{Range1{row}, Range1{row}});
+    Tensor Vt(Range{Range1{col}, Range1{col}});
 
-    //gesvd('A', 'A', a, s, U, Vt);
+    gesvd('A', 'A', a, s, U, Vt);
 
     // Inverse the Singular values with threshold 1e-13 = 0
     double lr_thresh = 1e-13;
-    Tensor s_inv(Range{Range1{R}, Range1{R}});
+    Tensor s_inv(Range{Range1{row}, Range1{col}});
     s_inv.fill(0.0);
-    for (auto i = 0; i < R; ++i) {
+    for (auto i = 0; i < rank; ++i) {
       if (s(i) > lr_thresh)
         s_inv(i, i) = 1 / s(i);
       else
         s_inv(i, i) = s(i);
     }
-    s.resize(Range{Range1{R}, Range1{R}});
+    s.resize(Range{Range1{row}, Range1{col}});
 
     // Compute the matrix A^-1 from the inverted singular values and the U and
     // V^T provided by the SVD
     gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s);
     gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U);
 
+    return U;
   }
-  return U;
-}
 
 } // namespace btas
 #endif // BTAS_HAS_CBLAS

From c4412f49b84783b6cb5a86a649d703f8ed26a406 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:01:17 -0500
Subject: [PATCH 15/66] Use eigenvalue decomposition function from linear
 algebra file

---
 btas/generic/cp_als.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index a7086926..972a177e 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -401,7 +401,6 @@ namespace btas{
       // If its the first time into build and SVD_initial_guess
       // build and optimize the initial guess based on the left
       // singular vectors of the reference tensor.
-#ifdef _HAS_INTEL_MKL
       if (A.empty() && SVD_initial_guess) {
         if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD");
 
@@ -437,8 +436,9 @@ namespace btas{
           gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S);
 
           // Find the Singular vectors of the matrix using eigenvalue decomposition
-          auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
-          if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+          //auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
+          //if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+          eigenvalue_decomp(S, lambda);
 
           // Fill a factor matrix with the singular vectors with the largest corresponding singular
           // values
@@ -486,9 +486,6 @@ namespace btas{
         // Optimize this initial guess.
         ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI);
       }
-#else  // _HAS_INTEL_MKL
-      if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK");
-#endif // _HAS_INTEL_MKL
       // This loop keeps track of column dimension
       bool opt_in_for_loop = false;
       for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) {

From aabdf5f3a75544c08f9488a4da204966d4c135e7 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:02:17 -0500
Subject: [PATCH 16/66] Use the new pseudoinverse function from linear algebra
 in the conventional ALS

---
 btas/generic/cp_als.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 972a177e..1eca6b24 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -633,7 +633,7 @@ namespace btas{
           } else if (dir) {
             direct(i, rank, fast_pI, matlab, converge_test);
           } else {
-            update_w_KRP(i, rank, fast_pI, converge_test);
+            update_w_KRP(i, rank, fast_pI, matlab, converge_test);
           }
         }
         //std::cout << count << "\t";
@@ -662,7 +662,8 @@ namespace btas{
     /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse
     /// in the same manner that matlab would compute the inverse.
     /// \param[in] converge_test test to see if the ALS is converged
-    void update_w_KRP(int n, int rank, bool & fast_pI, ConvClass & converge_test) {
+    void update_w_KRP(int n, int rank, bool & fast_pI,
+            bool & matlab, ConvClass & converge_test) {
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
 
@@ -699,23 +700,25 @@ namespace btas{
       swap_to_first(tensor_ref, n, true);
 
 #else  // BTAS_HAS_CBLAS
-
       // without MKL program cannot perform the swapping algorithm, must compute
       // flattened intermediate
       gemm(CblasNoTrans, CblasNoTrans, 1.0, flatten(tensor_ref, n), this->generate_KRP(n, rank, true), 0.0, temp);
 #endif
 
       detail::set_MtKRP(converge_test, temp);
-//      if(typeid(converge_test) == typeid(btas::FitCheck<Tensor>)) {
-//        converge_test.set_MtKRP(temp);
-//      }
+
       // contract the product from above with the pseudoinverse of the Hadamard
       // produce an optimize factor matrix
-      gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an);
+      auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp);
+      if(!matlab) {
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp,
+             pInv, 0.0, an);
+      } else{
+        an = pInv;
+      }
 
       // compute the difference between this new factor matrix and the previous
       // iteration
-      //for (auto l = 0; l < rank; ++l) A[ndim](l) = normCol(an, l);
       this->normCol(an);
 
       // Replace the old factor matrix with the new optimized result

From b7973a6eb4ae0e78f9b98f03785bdbb43dcc979b Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:06:24 -0500
Subject: [PATCH 17/66] Use the new pseudoinverse function from linear algebra

---
 btas/generic/cp_als.h | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 1eca6b24..9f8032de 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -874,36 +874,13 @@ namespace btas{
       n = last_dim ? ndim - 1: n;
       // multiply resulting matrix temp by pseudoinverse to calculate optimized
       // factor matrix
-
       detail::set_MtKRP(converge_test, temp);
-#ifdef _HAS_INTEL_MKL
-      if(fast_pI && matlab) {
-        // This method computes the inverse quickly for a square matrix
-        // based on MATLAB's implementation of A / B operator.
-        btas::Tensor<int, DEFAULT::range, varray<int> > piv(rank);
-        piv.fill(0);
-
-        auto a = this->generate_V(n, rank);
-        int LDB = temp.extent(0);
-        auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), temp.data(), rank);
-        if (info == 0) {
-            an = temp;
-        }
-        else{
-          // If inverse fails resort to the pseudoinverse
-          std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl;
-          matlab = false;
-        }
-      }
-      if(!fast_pI || ! matlab){
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an);
-      }
-#else
-      matlab = false;
-      if(!fast_pI || !matlab){
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, this->pseudoInverse(n, rank, fast_pI), 0.0, an);
+      auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp);
+      if(!matlab) {
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pInv, 0.0, an);
+      } else{
+        an = pInv;
       }
-#endif
 
       // Normalize the columns of the new factor matrix and update
       this->normCol(an);

From 4b2ad6d03adc5cebb484fce90e8f264aca931c63 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:07:13 -0500
Subject: [PATCH 18/66] PALS uses build function which has lapacke gaurd in
 eigenvalue function, so no longer need _HAS_INTEL_MKL

---
 btas/generic/cp_als.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 9f8032de..f0685e13 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -120,8 +120,6 @@ namespace btas{
 
     ~CP_ALS() = default;
 
-#ifdef _HAS_INTEL_MKL
-
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
     /// Initial guess for factor matrices start at rank = max_dim(reference_tensor)
@@ -210,10 +208,10 @@ namespace btas{
         }
         count++;
       }
-      //std::cout << "Number of ALS iterations was " << this->num_ALS << std::endl;
       return epsilon;
     }
 
+#ifdef _HAS_INTEL_MKL
     /// \brief Computes an approximate core tensor using
     /// Tucker decomposition, e.g.
     ///  \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$

From e190a2770eea0ad77d0a2fdf0a2d4a69052c8db0 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:41:25 -0500
Subject: [PATCH 19/66] Reminder to write documentation for pseudoinverse
 function

---
 btas/generic/cp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 4fc2bdc9..a0590078 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -613,8 +613,9 @@ namespace btas{
     /// second tryfast pseudo-inverse algorithm described in
     /// https://arxiv.org/pdf/0804.4809.pdf
     /// If all else fails use SVD
-    Tensor psuedoinverse_helper(int mode_of_A, bool & fast_pI,
-                                bool & cholesky, Tensor B = Tensor(),
+    // TODO write docs here
+    void psuedoinverse_helper(int mode_of_A, bool & fast_pI,
+                                bool & cholesky, Tensor & B = Tensor(),
                                 double lambda = 0.0){
       if(cholesky && B.empty()){
         BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)");

From 907d22aec9d4f9852484fcdbb53f3d33c57809f3 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:42:08 -0500
Subject: [PATCH 20/66] Now both cholesky and other compute A^{-1}B and the
 output is written into the B tensor

---
 btas/generic/cp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index a0590078..d6009674 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -626,9 +626,12 @@ namespace btas{
 
       if(cholesky) {
         cholesky = cholesky_inverse(a, B);
-        return B;
+        return;
       }
-      return pseudoInverse_impl(a, fast_pI);
+      auto pInv = pseudoInverse_impl(a, fast_pI);
+      Tensor an(B.extent(0), rank);
+      gemm(CblasNoTrans, CblasNoTrans, 1.0, B, pInv, 0.0, an);
+      B = an;
     }
   };
 };// namespace btas

From a373c1c4151e51aa05fadb277c868a21556e900c Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:42:29 -0500
Subject: [PATCH 21/66] Remove unused code

---
 btas/generic/cp_als.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index f0685e13..0e0ff59e 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -434,8 +434,6 @@ namespace btas{
           gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S);
 
           // Find the Singular vectors of the matrix using eigenvalue decomposition
-          //auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
-          //if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
           eigenvalue_decomp(S, lambda);
 
           // Fill a factor matrix with the singular vectors with the largest corresponding singular

From da68deceb0d2d3ff1531ea659edaf246a34becb0 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 14:43:01 -0500
Subject: [PATCH 22/66] Pseudoinverse now returns A^{-1}B so no need to compute
 it here.

---
 btas/generic/cp_als.h | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 0e0ff59e..a50c845c 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -705,20 +705,14 @@ namespace btas{
 
       // contract the product from above with the pseudoinverse of the Hadamard
       // produce an optimize factor matrix
-      auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp);
-      if(!matlab) {
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp,
-             pInv, 0.0, an);
-      } else{
-        an = pInv;
-      }
+      this->psuedoinverse_helper(n, fast_pI, matlab, temp);
 
       // compute the difference between this new factor matrix and the previous
       // iteration
-      this->normCol(an);
+      this->normCol(temp);
 
       // Replace the old factor matrix with the new optimized result
-      A[n] = an;
+      A[n] = temp;
     }
 
 
@@ -871,16 +865,12 @@ namespace btas{
       // multiply resulting matrix temp by pseudoinverse to calculate optimized
       // factor matrix
       detail::set_MtKRP(converge_test, temp);
-      auto pInv = this->psuedoinverse_helper(n, fast_pI, matlab, temp);
-      if(!matlab) {
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pInv, 0.0, an);
-      } else{
-        an = pInv;
-      }
+      // Temp is then rewritten with unnormalized new A[n] matrix
+      this->psuedoinverse_helper(n, fast_pI, matlab, temp);
 
       // Normalize the columns of the new factor matrix and update
-      this->normCol(an);
-      A[n] = an;
+      this->normCol(temp);
+      A[n] = temp;
     }
 
   };

From 7e1d574bf9cf2df7804e5df99f71bf8626e6fc33 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:05:33 -0500
Subject: [PATCH 23/66] Remove _HAS_INTEL_MKL and use the eigenvalue
 decomposition function from linear algebra

---
 btas/generic/cp_rals.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index e1ffa309..1279ec74 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -416,7 +416,6 @@ namespace btas{
       // If its the first time into build and SVD_initial_guess
       // build and optimize the initial guess based on the left
       // singular vectors of the reference tensor.
-#ifdef _HAS_INTEL_MKL
       if (A.empty() && SVD_initial_guess) {
         if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD");
 
@@ -452,8 +451,7 @@ namespace btas{
           gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S);
 
           // Find the Singular vectors of the matrix using eigenvalue decomposition
-          auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
-          if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+          eigenvalue_decomp(S, lambda);
 
           // Fill a factor matrix with the singular vectors with the largest corresponding singular
           // values
@@ -503,9 +501,6 @@ namespace btas{
         // Optimize this initial guess.
         ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI);
       }
-#else  // _HAS_INTEL_MKL
-      if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK");
-#endif // _HAS_INTEL_MKL
       // This loop keeps track of column dimension
       bool opt_in_for_loop = false;
       for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) {

From 9124c580aa6794168b04f961041258e1c6bb981b Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:06:20 -0500
Subject: [PATCH 24/66] Use new pseudoinverse function and add cholesky
 decomposition method to conventional ALS

---
 btas/generic/cp_rals.h | 52 ++++++++++--------------------------------
 1 file changed, 12 insertions(+), 40 deletions(-)

diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index 1279ec74..2590674e 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -657,7 +657,7 @@ namespace btas{
             if (dir) {
               direct(i, rank, fast_pI, matlab, lambda[i], s, converge_test);
             } else {
-              update_w_KRP(i, rank, fast_pI, lambda[i], s, converge_test);
+              update_w_KRP(i, rank, fast_pI, matlab, lambda[i], s, converge_test);
             }
             lambda[i] = (lambda[i] * (s * s) / (s0 * s0)) * alpha + (1 - alpha) * lambda[i];
           }
@@ -687,7 +687,8 @@ namespace btas{
     /// \param[in] lambda regularization parameter
     /// \param[in, out] s regularization step size, returns updated stepsize based on ALS iteration
     /// \param[in] converge_test test to see if the ALS is converged
-    void update_w_KRP(int n, int rank, bool & fast_pI, double lambda, double & s, ConvClass & converge_test) {
+    void update_w_KRP(int n, int rank, bool & fast_pI, bool & matlab,
+            double lambda, double & s, ConvClass & converge_test) {
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
 
@@ -740,18 +741,17 @@ namespace btas{
       }
       // contract the product from above with the pseudoinverse of the Hadamard
       // produce an optimize factor matrix
-      gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an);
+      this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda);
 
       // compute the difference between this new factor matrix and the previous
       // iteration
-      //for (auto l = 0; l < rank; ++l) A[ndim](l) = normCol(an, l);
-      normCol(an);
+      normCol(temp);
 
       // Compute the value s after normalizing the columns
-      s = helper(n, an);
+      s = helper(n, temp);
 
       // Replace the old factor matrix with the new optimized result
-      A[n] = an;
+      A[n] = temp;
     }
 
     /// Computes an optimized factor matrix holding all others constant.
@@ -910,44 +910,16 @@ namespace btas{
       if(typeid(converge_test) == typeid(btas::FitCheck<Tensor>)) {
         converge_test.set_MtKRP(temp);
       }
-
-#ifdef _HAS_INTEL_MKL
-      if(fast_pI && matlab) {
-        // This method computes the inverse quickly for a square matrix
-        // based on MATLAB's implementation of A / B operator.
-        btas::Tensor<int, DEFAULT::range, varray<int> > piv(rank);
-        piv.fill(0);
-
-        auto a = generate_V(n, rank, lambda);
-        int LDB = temp.extent(0);
-        auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), temp.data(), rank);
-        if (info == 0) {
-          an = temp;
-        }
-        else{
-          // If inverse fails resort to the pseudoinverse
-          std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl;
-          matlab = false;
-        }
-      }
-      if(!fast_pI || ! matlab){
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an);
-      }
-#else
-      matlab = false;
-      if(!fast_pI || !matlab){
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, temp, pseudoInverse(n, rank, fast_pI, lambda), 0.0, an);
-      }
-#endif
-
+      
+      this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda);
 
       // Normalize the columns of the new factor matrix and update
-      normCol(an);
+      normCol(temp);
 
       // Compute S after column normalization for RALS
-      s = helper(n, an);
+      s = helper(n, temp);
 
-      A[n] = an;
+      A[n] = temp;
     }
 
   };

From 5451e31fd232eb507c36bd80143b1d283d19f375 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:27:52 -0500
Subject: [PATCH 25/66] Remove _HAS_INTEL_MKL guard from PALS

---
 btas/generic/cp.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index d6009674..8f2781a5 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -302,7 +302,6 @@ namespace btas{
       return epsilon;
     }
 
-#ifdef _HAS_INTEL_MKL
     /// virtual function implemented in solver
     /// Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
@@ -326,7 +325,6 @@ namespace btas{
     /// false && ConvClass != FitCheck.
     virtual double compute_PALS(std::vector<ConvClass> & converge_list, double RankStep = 0.5, int panels = 4,
                          int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) = 0;
-#endif // _HAS_INTEL_MKL
 
     /// returns the rank \c rank optimized factor matrices
     /// \return Factor matrices stored in a vector. For example, a order-3

From 9486efe0ce25554f39ea41f811df9e66f99527e5 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:28:07 -0500
Subject: [PATCH 26/66] Formatting

---
 btas/generic/cp_df_als.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h
index fd580b39..ebd8c044 100644
--- a/btas/generic/cp_df_als.h
+++ b/btas/generic/cp_df_als.h
@@ -131,7 +131,6 @@ namespace btas{
 
     ~CP_DF_ALS() = default;
 
-#ifdef _HAS_INTEL_MKL
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
     /// Initial guess for factor matrices start at rank = max_dim(reference_tensor)
@@ -223,12 +222,9 @@ namespace btas{
           ALS(rank_new, converge_test, max_als, calculate_epsilon, epsilon, fast_pI);
         }
         count++;
-        //if (count + 1 == panels) max_als = 1000;
       }
-      //std::cout << "Number of ALS iterations was " << this->num_ALS << std::endl;
       return epsilon;
     }
-#endif // _HAS_INTEL_MKL
 
   protected:
     Tensor &tensor_ref_left;        // Left connected tensor

From 3c1583e276bdaa91563b58e77179059ac6d74722 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:28:30 -0500
Subject: [PATCH 27/66] Use the Eigenvalue decomposition from linear algebra in
 build

---
 btas/generic/cp_df_als.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h
index ebd8c044..857c9bfd 100644
--- a/btas/generic/cp_df_als.h
+++ b/btas/generic/cp_df_als.h
@@ -267,7 +267,6 @@ namespace btas{
         // If its the first time into build and SVD_initial_guess
         // build and optimize the initial guess based on the left
         // singular vectors of the reference tensor.
-#ifdef _HAS_INTEL_MKL
         if (A.empty() && SVD_initial_guess) {
           if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD");
 
@@ -336,8 +335,7 @@ namespace btas{
             gemm(CblasNoTrans, CblasTrans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S);
 
             // Find the Singular vectors of the matrix using eigenvalue decomposition
-            auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', R, S.data(), R, lambda.data());
-            if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+            eigenvalue_decomp(S, lambda);
 
             // Fill a factor matrix with the singular vectors with the largest corresponding singular
             // values
@@ -379,9 +377,6 @@ namespace btas{
           // Optimize this initial guess.
           ALS(SVD_rank, converge_test, max_als, calculate_epsilon, epsilon, fast_pI);
         }
-#else  // _HAS_INTEL_MKL
-        if (SVD_initial_guess) BTAS_EXCEPTION("Computing the SVD requires LAPACK");
-#endif // _HAS_INTEL_MKL
         // This loop keeps track of column dimension
         bool opt_in_for_loop = false;
         for (auto i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) {

From d4dfb8cd0598452ed2f9480f7aa7c6ee3d6c5749 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:28:40 -0500
Subject: [PATCH 28/66] Remove comments

---
 btas/generic/cp_df_als.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h
index 857c9bfd..c3d4f856 100644
--- a/btas/generic/cp_df_als.h
+++ b/btas/generic/cp_df_als.h
@@ -475,7 +475,6 @@ namespace btas{
       for(int i = 1; i < ndimL; ++i){
         auto & tensor_ref = tensor_ref_left;
         Tensor a(Range{Range1{tensor_ref.extent(i)}, Range1{rank}});
-        //std::uniform_int_distribution<unsigned int> distribution(0, std::numeric_limits<unsigned int>::max() - 1);
         for(auto iter = a.begin(); iter != a.end(); ++iter){
           *(iter) = distribution(generator);
         }
@@ -485,7 +484,6 @@ namespace btas{
         auto & tensor_ref = tensor_ref_right;
 
         Tensor a(tensor_ref.extent(i), rank);
-        //std::uniform_int_distribution<unsigned int> distribution(0, std::numeric_limits<unsigned int>::max() - 1);
         for(auto iter = a.begin(); iter != a.end(); ++iter){
           *(iter) = distribution(generator);
         }
@@ -692,7 +690,6 @@ namespace btas{
       auto a_dim = leftTensor ? contract_dim : ndim - 1;
       auto offset = 0;
 
-      // TODO fix the hadamard contraction loop
       // go through hadamard contract on all dimensions excluding rank (will skip one dimension)
       for(int i = 0; i < ndimCurr - 2; ++i, --contract_dim, --a_dim){
         auto contract_size = dims[contract_dim];

From 4cd469f06de9b9b6455eb965b5c0c57963133b6b Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:30:15 -0500
Subject: [PATCH 29/66] Add fast_pI variable to direct function and use new
 pseudoinverse function

---
 btas/generic/cp_df_als.h | 39 ++++++---------------------------------
 1 file changed, 6 insertions(+), 33 deletions(-)

diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h
index c3d4f856..ff8faf0a 100644
--- a/btas/generic/cp_df_als.h
+++ b/btas/generic/cp_df_als.h
@@ -533,7 +533,7 @@ namespace btas{
         for (auto i = 0; i < ndim; i++) {
           auto tmp = symmetries[i];
           if(tmp == i) {
-            direct(i, rank, matlab, converge_test);
+            direct(i, rank, fast_pI, matlab, converge_test);
           }
           else if(tmp < i){
             A[i] = A[tmp];
@@ -574,7 +574,7 @@ namespace btas{
     /// in the same manner that matlab would compute the inverse.
     /// return if computing the inverse in this was was successful
     /// \param[in] converge_test test to see if the ALS is converged
-    void direct(int n, int rank, bool & matlab, ConvClass & converge_test) {
+    void direct(int n, int rank, bool & fast_pI, bool & matlab, ConvClass & converge_test) {
 
       // Determine if n is in the left or the right tensor
       bool leftTensor = n < (ndimL - 1);
@@ -774,43 +774,16 @@ namespace btas{
       // multiply resulting matrix temp by pseudoinverse to calculate optimized
       // factor matrix
       //t1 = std::chrono::high_resolution_clock::now();
-#ifdef _HAS_INTEL_MKL
-      if(matlab) {
-        // This method computes the inverse quickly for a square matrix
-        // based on MATLAB's implementation of A / B operator.
-        btas::Tensor<int, DEFAULT::range, varray<int> > piv(rank);
-        piv.fill(0);
-
-        auto a = generate_V(n, rank);
-        int LDB = contract_tensor.extent(0);
-        auto info = LAPACKE_dgesv(CblasColMajor, rank, LDB, a.data(), rank, piv.data(), contract_tensor.data(), rank);
-        if (info == 0) {
-          an = contract_tensor;
-        }
-        else{
-          // If inverse fails resort to the pseudoinverse
-          std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl;
-          matlab = false;
-        }
-      }
-      if(! matlab){
-        bool fast = false;
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(n, rank, fast), 0.0, an);
-      }
-#else
-      matlab = false;
-      if( !matlab){
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(n, rank, matlab), 0.0, an);
-      }
-#endif
+
+      this->psuedoinverse_helper(n, fast_pI, matlab, contract_tensor);
       //t2 = std::chrono::high_resolution_clock::now();
       //time = t2 - t1;
       //gemm_wPI += time.count();
 
 
       // Normalize the columns of the new factor matrix and update
-      normCol(an);
-      A[n] = an;
+      normCol(contract_tensor);
+      A[n] = contract_tensor;
     }
 
   };

From c63ce9ae0a92924e22b9ac3845d36c657a7ac845 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:32:28 -0500
Subject: [PATCH 30/66] Remove old pseudoinverse function and rename new
 function

---
 btas/generic/coupled_cp_als.h |  4 ++--
 btas/generic/cp.h             | 42 +----------------------------------
 btas/generic/cp_df_als.h      |  1 -
 btas/generic/cp_rals.h        |  3 +--
 btas/generic/linear_algebra.h |  2 +-
 5 files changed, 5 insertions(+), 47 deletions(-)

diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h
index f6dcdea6..c337c22d 100644
--- a/btas/generic/coupled_cp_als.h
+++ b/btas/generic/coupled_cp_als.h
@@ -475,7 +475,7 @@ namespace btas{
         }
         // Finally Form the product of K * J^\dagger
         Tensor a0(coupled_dim, rank);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, rank), 0.0, a0);
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, K, pseudoInverse(J1, fast_pI), 0.0, a0);
         this->normCol(a0);
         A[0] = a0;
       }
@@ -551,7 +551,7 @@ namespace btas{
         else if(n == this->ndim - 1)
           detail::set_MtKRPR(converge_test, contract_tensor);
         Tensor an(skip_dim, rank);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, rank), 0.0, an);
+        gemm(CblasNoTrans, CblasNoTrans, 1.0, contract_tensor, pseudoInverse(G, fast_pI), 0.0, an);
         this->normCol(an);
         A[n] = an;
       }
diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 8f2781a5..daef9c2b 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -565,46 +565,6 @@ namespace btas{
     ///                on return reports whether the fast route was used.
     /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V
     /// \return The pseudoinverse of the matrix V.
-    Tensor pseudoInverse(int n, int R, bool & fast_pI, double lambda = 0.0) {
-      // CP_ALS method requires the pseudoinverse of matrix V
-      auto a = this->generate_V(n, R, lambda);
-
-      if(!fast_pI) {
-        Tensor s(Range{Range1{R}});
-        Tensor U(Range{Range1{R}, Range1{R}});
-        Tensor Vt(Range{Range1{R}, Range1{R}});
-
-// btas has no generic SVD for MKL LAPACKE
-//        time1 = std::chrono::high_resolution_clock::now();
-#ifdef LAPACKE_ENABLED
-
-        gesvd('A', 'A', a, s, U, Vt);
-
-#endif
-
-        // Inverse the Singular values with threshold 1e-13 = 0
-        double lr_thresh = 1e-13;
-        Tensor s_inv(Range{Range1{R}, Range1{R}});
-        s_inv.fill(0.0);
-        for (auto i = 0; i < R; ++i) {
-          if (s(i) > lr_thresh)
-            s_inv(i, i) = 1 / s(i);
-          else
-            s_inv(i, i) = s(i);
-        }
-        s.resize(Range{Range1{R}, Range1{R}});
-
-        // Compute the matrix A^-1 from the inverted singular values and the U and
-        // V^T provided by the SVD
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, U, s_inv, 0.0, s);
-        gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U);
-
-        return U;
-      }
-      else{
-        BTAS_EXCEPTION("Pseudo inverse failed" );
-      }
-    }
 
     /// Trying to solve Ax = B
     /// First try Cholesky to solve this problem directly
@@ -626,7 +586,7 @@ namespace btas{
         cholesky = cholesky_inverse(a, B);
         return;
       }
-      auto pInv = pseudoInverse_impl(a, fast_pI);
+      auto pInv = pseudoInverse(a, fast_pI);
       Tensor an(B.extent(0), rank);
       gemm(CblasNoTrans, CblasNoTrans, 1.0, B, pInv, 0.0, an);
       B = an;
diff --git a/btas/generic/cp_df_als.h b/btas/generic/cp_df_als.h
index ff8faf0a..77f34a8e 100644
--- a/btas/generic/cp_df_als.h
+++ b/btas/generic/cp_df_als.h
@@ -81,7 +81,6 @@ namespace btas{
 
     using CP<Tensor,ConvClass>::A;
     using CP<Tensor,ConvClass>::ndim;
-    using CP<Tensor,ConvClass>::pseudoInverse;
     using CP<Tensor,ConvClass>::normCol;
     using CP<Tensor,ConvClass>::generate_KRP;
     using CP<Tensor,ConvClass>::generate_V;
diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index 2590674e..ac26ece5 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -95,7 +95,6 @@ namespace btas{
 public:
     using CP<Tensor,ConvClass>::A;
     using CP<Tensor,ConvClass>::ndim;
-    using CP<Tensor,ConvClass>::pseudoInverse;
     using CP<Tensor,ConvClass>::normCol;
     using CP<Tensor,ConvClass>::generate_KRP;
     using CP<Tensor,ConvClass>::generate_V;
@@ -910,7 +909,7 @@ namespace btas{
       if(typeid(converge_test) == typeid(btas::FitCheck<Tensor>)) {
         converge_test.set_MtKRP(temp);
       }
-      
+
       this->psuedoinverse_helper(n, fast_pI, matlab, temp, lambda);
 
       // Normalize the columns of the new factor matrix and update
diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 0b0cb7f9..444dfccf 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -226,7 +226,7 @@ bool cholesky_inverse(Tensor & A, Tensor & B){
 /// \param[in] a matrix to be inverted.
 /// \return a^{\dagger} The pseudoinverse of the matrix a.
 template <typename Tensor>
-Tensor pseudoInverse_impl(Tensor & a, bool & fast_pI) {
+Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
 #ifndef LAPACKE_ENABLED
     BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
 #endif // LAPACKE_ENABLED

From f3b3b7fa31f8cafd160923eab8d3c4100f16d92d Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:35:28 -0500
Subject: [PATCH 31/66] Add description

---
 btas/generic/default_random_seed.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/btas/generic/default_random_seed.h b/btas/generic/default_random_seed.h
index 7a779a03..ecbecf5a 100644
--- a/btas/generic/default_random_seed.h
+++ b/btas/generic/default_random_seed.h
@@ -5,6 +5,7 @@
 #ifndef BTAS_GENERIC_DEFAULT_RANDOM_SEED_H
 #define BTAS_GENERIC_DEFAULT_RANDOM_SEED_H
 namespace btas{
+  // A seed for the random number generator.
   static inline unsigned int& random_seed_accessor(){
     static unsigned int value = 3;
     return value;

From 8b850e7814d53139b9342da13754433e3bbe8a71 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:37:16 -0500
Subject: [PATCH 32/66] Fix typo

---
 btas/generic/flatten.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/btas/generic/flatten.h b/btas/generic/flatten.h
index c4438893..47951447 100644
--- a/btas/generic/flatten.h
+++ b/btas/generic/flatten.h
@@ -51,7 +51,7 @@ template <typename Tensor> Tensor flatten(const Tensor &A, int mode) {
 /// matrix X \param[in] indexj The column index of matrix X \param[in] J The
 /// step size for the row dimension of X \param[in] tensor_itr An iterator of \c A.
 /// The value of the iterator is placed in the correct position of X using
-/// recursive calls of fill()m.
+/// recursive calls of fill().
 
 template <typename Tensor, typename iterator>
 void fill(const Tensor &A, int depth, Tensor &X, int mode, int indexi, int indexj,

From e58880d82277f21c9826bc0fc1d5a165e158be6b Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:38:21 -0500
Subject: [PATCH 33/66] Remove old todo

---
 btas/generic/rals_helper.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/btas/generic/rals_helper.h b/btas/generic/rals_helper.h
index 3fe616d8..e4b869de 100644
--- a/btas/generic/rals_helper.h
+++ b/btas/generic/rals_helper.h
@@ -30,7 +30,6 @@ namespace btas{
     /// \param[in] mode which mode of the actual tensor
     /// is being updated
     /// \param[in] An the updated factor matrix
-    // TODO fix the dot function
     double operator() (int mode, const Tensor& An){
       auto size = An.size();
       auto change = An - prev_[mode];

From b408049b4d7d576e1acd19979c5f7906337c8bc2 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:39:50 -0500
Subject: [PATCH 34/66] Randomized needs linear algebra for QR

---
 btas/generic/randomized.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h
index e64e7976..6f18792a 100644
--- a/btas/generic/randomized.h
+++ b/btas/generic/randomized.h
@@ -4,6 +4,7 @@
 #include <btas/generic/core_contract.h>
 #include <btas/error.h>
 #include <btas/tensor.h>
+#include <btas/generic/linear_algebra.h>
 
 #include <random>
 #include <stdlib.h>

From ad9eabf769281fe5d5c903302184a015100401b9 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:42:57 -0500
Subject: [PATCH 35/66] Swaps here require MKL. If they don't have it throw
 exception

---
 btas/generic/swap.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/btas/generic/swap.h b/btas/generic/swap.h
index f4b84d66..28734c0e 100644
--- a/btas/generic/swap.h
+++ b/btas/generic/swap.h
@@ -32,6 +32,9 @@ namespace btas {
 template <typename Tensor>
 void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
                    bool for_ALS_update = true) {
+#ifndef _HAS_INTEL_MKL
+  BTAS_EXCEPTION("In place tranposes require MKL");
+#endif
   // If the mode of interest is the the first mode you are done.
   if(mode > A.rank()){
     BTAS_EXCEPTION("Mode index is greater than tensor rank");
@@ -118,6 +121,9 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
 
 template <typename Tensor>
 void swap_to_back(Tensor &A, int mode, bool is_in_back = false) {
+#ifndef _HAS_INTEL_MKL
+  BTAS_EXCEPTION("In place tranposes require MKL");
+#endif
   if (mode > A.rank())
     BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__,
                            "mode > A.rank(), mode out of range");

From 6b5f2404703f547490dfd3b394e1d8f11a034e9f Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:47:40 -0500
Subject: [PATCH 36/66] Remove unnecessary square root

---
 btas/generic/tucker.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index c0515f00..f4cc129e 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -21,8 +21,8 @@ namespace btas {
 template <typename Tensor>
 void tucker_compression(Tensor &A, double epsilon_svd,
                         std::vector<Tensor> &transforms) {
-  double norm2 = norm(A);
-  norm2 *= norm2;
+  double norm2 = dot(A,A);
+  //norm2 *= norm2;
   auto ndim = A.rank();
 
   for (int i = 0; i < ndim; i++) {

From b647faf875cc4fbf5b01b3be69f0d963d4403518 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 15:57:30 -0500
Subject: [PATCH 37/66] remove unused norm function

---
 btas/generic/tucker.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index f4cc129e..9b60c3d4 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -64,13 +64,6 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     core_contract(A, lambda, i);
   }
 }
-
-/// calculates the 2-norm of a matrix Mat
-/// \param[in] Mat The matrix who's 2-norm is caclulated
-
-template <typename Tensor> double norm(const Tensor &Mat) {
-  return sqrt(dot(Mat, Mat));
-}
 } // namespace btas
 #endif //_HAS_INTEL_MKL
 #endif // BTAS_TUCKER_DECOMP_H

From c626f8ea017b7dec7b31e4300d3450d895e0aee2 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 16:16:01 -0500
Subject: [PATCH 38/66] Was using ALS for RALS tests, updated results to match

---
 unittest/cp_test_results.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/unittest/cp_test_results.txt b/unittest/cp_test_results.txt
index 2e34e038..a6650b58 100644
--- a/unittest/cp_test_results.txt
+++ b/unittest/cp_test_results.txt
@@ -12,16 +12,16 @@
 0.5092600887442312
 0.8023893427056535
 0.9998165185189172
-0.7909930183444412
-0.5365678558201658
+0.7965146841534007
+0.5365998545867845
 0.5946792918362551
 0.9902479480481341
-0.6040159648938821
-0.5862586400695924
+0.6077571914361457
+0.5862884447076786
 0.04914517954652209
 0.1829273308737376
-0.0008026970681476175
-0.5092600887442312
+0.002141341851659817
+0.509270575828507
 0.8857484066143582
 0.9928149635593394
 0.7671766452773272

From ecb937863b54bc61b4881ab9bb22b15c92d1eace Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Mon, 27 Jan 2020 16:17:41 -0500
Subject: [PATCH 39/66] Reduce the tightness of the test.  Remove unnecessary
 ifdef with _HAS_INTEL_MKL and use RALS in tucker and random RALS tests

---
 unittest/tensor_cp_test.cc | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/unittest/tensor_cp_test.cc b/unittest/tensor_cp_test.cc
index dbd8484d..f0b360f2 100644
--- a/unittest/tensor_cp_test.cc
+++ b/unittest/tensor_cp_test.cc
@@ -21,7 +21,8 @@ TEST_CASE("CP")
   using btas::CP_DF_ALS;
   using btas::COUPLED_CP_ALS;
 
-  double epsilon = fmax(1e-10, std::numeric_limits<double>::epsilon());
+  //double epsilon = fmax(1e-10, std::numeric_limits<double>::epsilon());
+  double epsilon = 1e-7;
   // TEST_CASE("CP_ALS"){
   tensor D3(5, 2, 9);
   std::ifstream in3(__dirname + "/mat3D.txt");
@@ -89,7 +90,6 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 1, 100);
       CHECK((diff - results(1,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("ALS MODE = 3, Tucker + CP"){
       auto d = D3;
       CP_ALS<tensor, conv_class> A1(d);
@@ -106,7 +106,6 @@ TEST_CASE("CP")
               A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5);
       CHECK((diff - results(3,0)) <= epsilon);
     }
-#endif
 
     SECTION("ALS MODE = 4, Finite rank"){
       CP_ALS<tensor, conv_class> A1(D4);
@@ -121,7 +120,6 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 1, 100);
       CHECK((diff - results(5,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("ALS MODE = 4, Tucker + CP"){
       auto d = D4;
       CP_ALS<tensor, conv_class> A1(d);
@@ -136,8 +134,6 @@ TEST_CASE("CP")
       double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100);
       CHECK((diff - results(7,0)) <= epsilon);
     }
-#endif
-
     SECTION("ALS MODE = 5, Finite rank"){
       CP_ALS<tensor, conv_class> A1(D5);
       conv.set_norm(norm5);
@@ -150,7 +146,6 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 10, 200);
       CHECK((diff - results(9,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("ALS MODE = 5, Tucker + CP"){
       auto d = D5;
       CP_ALS<tensor, conv_class> A1(d);
@@ -165,7 +160,6 @@ TEST_CASE("CP")
       double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5);
       CHECK((diff - results(11,0)) <= epsilon);
     }
-#endif
   }
   // RALS tests
   {
@@ -182,10 +176,9 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 1, 100);
       CHECK((diff - results(13,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("RALS MODE = 3, Tucker + CP"){
       auto d = D3;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
               A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
@@ -193,14 +186,12 @@ TEST_CASE("CP")
     }
     SECTION("RALS MODE = 3, Random + CP"){
       auto d = D3;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
               A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5);
       CHECK((diff - results(15,0)) <= epsilon);
     }
-#endif
-
     SECTION("RALS MODE = 4, Finite rank"){
       CP_RALS<tensor, conv_class> A1(D4);
       conv.set_norm(norm4);
@@ -214,23 +205,20 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 1, 100);
       CHECK((diff - results(17,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("RALS MODE = 4, Tucker + CP"){
       auto d = D4;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
       double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5);
       CHECK((diff - results(18,0)) <= epsilon);
     }
     SECTION("RALS MODE = 4, Random + CP"){
       auto d = D4;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
       double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100);
       CHECK((diff - results(19,0)) <= epsilon);
     }
-#endif
-
     SECTION("RALS MODE = 5, Finite rank"){
       CP_RALS<tensor, conv_class> A1(D5);
       conv.set_norm(norm5);
@@ -243,22 +231,20 @@ TEST_CASE("CP")
       double diff = A1.compute_error(conv, 1e-2, 1, 20);
       CHECK((diff - results(21,0)) <= epsilon);
     }
-#ifdef _HAS_INTEL_MKL
     SECTION("RALS MODE = 5, Tucker + CP"){
       auto d = D5;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
       double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
       CHECK((diff - results(22,0)) <= epsilon);
     }
     SECTION("RALS MODE = 5, Random + CP"){
       auto d = D5;
-      CP_ALS<tensor, conv_class> A1(d);
+      CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
       double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5);
       CHECK((diff - results(23,0)) <= epsilon);
     }
-#endif
   }
   // CP-DF-ALS tests
   {

From 51e9c2f35772d7ebf5bcff9d127fe50a92a26988 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 09:06:52 -0500
Subject: [PATCH 40/66] reorganized macro and cmake variable names that control
 linear algebra ... will impact downstream users

---
 CMakeLists.txt                          | 26 +++++-----
 bin/travisci_build_linux.sh             |  8 ++--
 btas/generic/coupled_cp_als.h           |  4 +-
 btas/generic/cp.h                       |  6 +--
 btas/generic/cp_als.h                   |  8 ++--
 btas/generic/cp_rals.h                  |  8 ++--
 btas/generic/gemm_impl.h                |  4 +-
 btas/generic/linear_algebra.h           | 22 ++++-----
 btas/generic/swap.h                     |  8 ++--
 btas/generic/tucker.h                   |  4 +-
 btas/types.h                            | 64 ++++++++-----------------
 cmake/modules/RedefaultableOption.cmake | 12 +++++
 12 files changed, 82 insertions(+), 92 deletions(-)
 create mode 100644 cmake/modules/RedefaultableOption.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0ae3a571..373c3aa8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,21 +4,25 @@ cmake_minimum_required (VERSION 3.1.0)
 project (BTAS)
 
 include(FeatureSummary)
+include(RedefaultableOption)
 
 # Configure options
-option(BTAS_BUILD_UNITTEST "Causes building BTAS unit tests" ON)
-option(BTAS_EXPERT "BTAS Expert mode: disables automatically downloading or building dependencies" OFF)
-option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw" OFF)
+redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF)
+add_feature_info(BUILD_UNITTEST BTAS_BUILD_UNITTEST "Will build unit tests")
+redefaultable_option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw; enable if BTAS_BUILD_UNITTEST=ON" OFF)
+add_feature_info(ASSERT_THROWS BTAS_ASSERT_THROWS "BTAS_ASSERT(x) will throw if x is false, and not be affected by NDEBUG")
+redefaultable_option(BTAS_USE_CBLAS_LAPACKE "Whether to use BLAS+LAPACK via CBLAS+LAPACKE" ON)
+add_feature_info(USE_CBLAS_LAPACKE BTAS_USE_CBLAS_LAPACKE "Will use BLAS and LAPACK linear algebra libraries via their CBLAS and LAPACKE interfaces")
 
 set(TARGET_MAX_INDEX_RANK 6 CACHE STRING "Determines the rank for which the default BTAS index type will use stack (default: 6); this requires Boost.Container")
 add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default BTAS index type will use stack for rank<=${TARGET_MAX_INDEX_RANK}")
 
 # Set BTAS version
-set(BTAS_MAJOR_VERSION 0)
-set(BTAS_MINOR_VERSION 1)
+set(BTAS_MAJOR_VERSION 1)
+set(BTAS_MINOR_VERSION 0)
 set(BTAS_MICRO_VERSION 0)
-set(BTAS_BUILDID alpha)
-set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_BUILDID}")
+set(BTAS_PRERELEASE alpha)
+set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}")
 set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
 
 # extra cmake files are shipped with BTAS
@@ -77,10 +81,10 @@ check_type_size("long long" BTAS_HAS_LONG_LONG)
 add_custom_target(External)
 
 if (BTAS_BUILD_UNITTEST)
-  if (USE_CBLAS)
+  if (BTAS_USE_CBLAS_LAPACKE)
     find_package(CBLAS REQUIRED)
     find_package(LAPACKE REQUIRED)
-    add_definitions(-D_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -D_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1)
+    add_definitions(-DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1)
     message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}")
     message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}")
     message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}")
@@ -89,9 +93,9 @@ if (BTAS_BUILD_UNITTEST)
     message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}")
     include_directories(${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR})
     if (MKL_FOUND)
-      add_definitions(-D_HAS_INTEL_MKL=1)
+      add_definitions(-DBTAS_HAS_INTEL_MKL=1)
     endif(MKL_FOUND)
-  endif(USE_CBLAS)
+  endif(BTAS_USE_CBLAS_LAPACKE)
   include(external/boost.cmake)
 endif()
 
diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh
index 61083d5a..49f8bd29 100755
--- a/bin/travisci_build_linux.sh
+++ b/bin/travisci_build_linux.sh
@@ -14,18 +14,18 @@ fi
 
 cd ${BUILD_PREFIX}
 
-##########   test with cblas   ##########
+##########   test with blas+lapack   ##########
 mkdir build_cblas
 cd build_cblas
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DUSE_CBLAS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
 make VERBOSE=1
 make check VERBOSE=1
 cd ..
 
-########## test without cblas ##########
+########## test without blas+lapack   ##########
 mkdir build
 cd build
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF  -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
 make VERBOSE=1
 make check VERBOSE=1
 cd ..
diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h
index c337c22d..f23c0ca0 100644
--- a/btas/generic/coupled_cp_als.h
+++ b/btas/generic/coupled_cp_als.h
@@ -122,7 +122,7 @@ namespace btas{
               "Tensor describing symmetries must be defined for all dimensions");
     }
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
     /// Initial guess for factor matrices start at rank = max_dim(reference_tensor)
@@ -147,7 +147,7 @@ namespace btas{
             int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override{
       BTAS_EXCEPTION("Function not yet implemented");
     }
-#endif //_HAS_INTEL_MKL
+#endif //BTAS_HAS_INTEL_MKL
 
   protected:
     Tensor& tensor_ref_left;        // Tensor in first term of the loss function
diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index daef9c2b..934c2ec5 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -23,7 +23,7 @@
 #include <btas/generic/reconstruct.h>
 #include <btas/generic/linear_algebra.h>
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 #include <mkl_trans.h>
 #endif
 
@@ -126,8 +126,8 @@ namespace btas{
     /// tensor
     /// \param[in] ndim number of modes in the reference tensor.
     CP(int dims) : num_ALS(0) {
-#if not defined(BTAS_HAS_CBLAS) || not defined(_HAS_INTEL_MKL)
-      BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "CP decompositions requires LAPACKE or mkl_lapack");
+#if not defined(BTAS_HAS_LAPACKE)
+      BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "CP decompositions requires LAPACKE");
 #endif
       ndim = dims;
     }
diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index a50c845c..a120c4be 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -12,7 +12,7 @@
 #include <iostream>
 #include <vector>
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 #include <mkl_trans.h>
 #endif
 
@@ -211,7 +211,7 @@ namespace btas{
       return epsilon;
     }
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
     /// \brief Computes an approximate core tensor using
     /// Tucker decomposition, e.g.
     ///  \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$
@@ -362,7 +362,7 @@ namespace btas{
       return epsilon;
     }
 
-#endif  //_HAS_INTEL_MKL
+#endif  //BTAS_HAS_INTEL_MKL
 
 
   protected:
@@ -663,7 +663,7 @@ namespace btas{
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 
       // Computes the Khatri-Rao product intermediate
       auto KhatriRao = this->generate_KRP(n, rank, true);
diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index ac26ece5..efea1c05 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -22,7 +22,7 @@
 #include <btas/generic/rals_helper.h>
 #include <btas/generic/reconstruct.h>
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 #include <mkl_trans.h>
 #endif
 
@@ -134,7 +134,7 @@ namespace btas{
 
     ~CP_RALS() = default;
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
@@ -379,7 +379,7 @@ namespace btas{
       return epsilon;
     }
 
-#endif  //_HAS_INTEL_MKL
+#endif  //BTAS_HAS_INTEL_MKL
 
   protected:
     int size;                   // number of elements in tensor_ref
@@ -691,7 +691,7 @@ namespace btas{
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 
       // Computes the Khatri-Rao product intermediate
       auto KhatriRao = generate_KRP(n, rank, true);
diff --git a/btas/generic/gemm_impl.h b/btas/generic/gemm_impl.h
index 3a5513f3..e7f4d18c 100644
--- a/btas/generic/gemm_impl.h
+++ b/btas/generic/gemm_impl.h
@@ -289,7 +289,7 @@ template<> struct gemm_impl<true>
    {
       const lapack_complex_float alphac = to_lapack_val(std::move(alpha));
       const lapack_complex_float betac  = to_lapack_val(std::move(beta));
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
       cblas_cgemm3m(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_cptr(itrA), LDA, to_lapack_cptr(itrB), LDB, &betac, to_lapack_cptr(itrC), LDC);
 #else
       cblas_cgemm(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_cptr(itrA), LDA, to_lapack_cptr(itrB), LDB, &betac, to_lapack_cptr(itrC), LDC);
@@ -315,7 +315,7 @@ template<> struct gemm_impl<true>
    {
       const lapack_complex_double alphac = to_lapack_val(std::move(alpha));
       const lapack_complex_double betac  = to_lapack_val(std::move(beta));
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
       cblas_zgemm3m(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_zptr(itrA), LDA, to_lapack_zptr(itrB), LDB, &betac, to_lapack_zptr(itrC), LDC);
 #else
       cblas_zgemm(order, transA, transB, Msize, Nsize, Ksize, &alphac, to_lapack_zptr(itrA), LDA, to_lapack_zptr(itrB), LDB, &betac, to_lapack_zptr(itrC), LDC);
diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 444dfccf..3d6fe4e4 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -15,9 +15,9 @@ namespace btas{
 
   template <typename Tensor> void LU_decomp(Tensor &A) {
 
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using this function requires LAPACKE");
-#endif // LAPACKE_ENABLED
+#endif // BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -97,9 +97,9 @@ namespace btas{
 
   template <typename Tensor> bool QR_decomp(Tensor &A) {
 
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using this function requires LAPACKE");
-#endif // LAPACKE_ENABLED
+#endif // BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -137,9 +137,9 @@ namespace btas{
   template <typename Tensor>
   bool Inverse_Matrix(Tensor & A){
 
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE");
-#endif // LAPACKE_ENABLED
+#endif // BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -173,9 +173,9 @@ namespace btas{
 ///  matrix \c A
   template <typename Tensor>
   void eigenvalue_decomp(Tensor & A, Tensor & lambda){
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE");
-#endif // LAPACKE_ENABLED
+#endif // BTAS_HAS_LAPACKE
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix.");
     }
@@ -197,7 +197,7 @@ namespace btas{
   /// out: The solution x = A^{-1}B
 template <typename Tensor>
 bool cholesky_inverse(Tensor & A, Tensor & B){
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
   BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE")
 #endif
     // This method computes the inverse quickly for a square matrix
@@ -227,9 +227,9 @@ bool cholesky_inverse(Tensor & A, Tensor & B){
 /// \return a^{\dagger} The pseudoinverse of the matrix a.
 template <typename Tensor>
 Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
-#ifndef LAPACKE_ENABLED
+#ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
-#endif // LAPACKE_ENABLED
+#endif // BTAS_HAS_LAPACKE
 
     if (a.rank() > 2) {
       BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix");
diff --git a/btas/generic/swap.h b/btas/generic/swap.h
index 28734c0e..b8f29cec 100644
--- a/btas/generic/swap.h
+++ b/btas/generic/swap.h
@@ -1,7 +1,7 @@
 #ifndef BTAS_SWAP_H
 #define BTAS_SWAP_H
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 
 #include <vector>
 
@@ -32,7 +32,7 @@ namespace btas {
 template <typename Tensor>
 void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
                    bool for_ALS_update = true) {
-#ifndef _HAS_INTEL_MKL
+#ifndef BTAS_HAS_INTEL_MKL
   BTAS_EXCEPTION("In place tranposes require MKL");
 #endif
   // If the mode of interest is the the first mode you are done.
@@ -121,7 +121,7 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
 
 template <typename Tensor>
 void swap_to_back(Tensor &A, int mode, bool is_in_back = false) {
-#ifndef _HAS_INTEL_MKL
+#ifndef BTAS_HAS_INTEL_MKL
   BTAS_EXCEPTION("In place tranposes require MKL");
 #endif
   if (mode > A.rank())
@@ -159,6 +159,6 @@ void swap_to_back(Tensor &A, int mode, bool is_in_back = false) {
 
 } // namespace btas
 
-#endif //_HAS_INTEL_MKL
+#endif //BTAS_HAS_INTEL_MKL
 
 #endif // BTAS_SWAP_H
diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index 9b60c3d4..36704d93 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -1,7 +1,7 @@
 #ifndef BTAS_TUCKER_DECOMP_H
 #define BTAS_TUCKER_DECOMP_H
 
-#ifdef _HAS_INTEL_MKL
+#ifdef BTAS_HAS_INTEL_MKL
 
 #include <btas/generic/core_contract.h>
 #include <btas/generic/flatten.h>
@@ -65,5 +65,5 @@ void tucker_compression(Tensor &A, double epsilon_svd,
   }
 }
 } // namespace btas
-#endif //_HAS_INTEL_MKL
+#endif //BTAS_HAS_INTEL_MKL
 #endif // BTAS_TUCKER_DECOMP_H
diff --git a/btas/types.h b/btas/types.h
index 4ed6d798..4e13a5f9 100644
--- a/btas/types.h
+++ b/btas/types.h
@@ -11,46 +11,53 @@
 extern "C" {
 #endif // __cplusplus
 
-#ifdef BTAS_HAS_CBLAS
-# define LAPACKE_ENABLED
-# if not defined(_CBLAS_HEADER) && not defined(_LAPACKE_HEADER)
+#if defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE)
+# if not defined(BTAS_CBLAS_HEADER) && not defined(BTAS_LAPACKE_HEADER)
 
-#   ifdef _HAS_INTEL_MKL
+#   ifdef BTAS_HAS_INTEL_MKL
 
 #     include <mkl_cblas.h>
 #     include <mkl_lapacke.h>
 
-#   else  // _HAS_INTEL_MKL
+#   else  // BTAS_HAS_INTEL_MKL
 
 #     include <cblas.h>
       // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
 #     include <complex>
 #     ifndef lapack_complex_float
 #       define lapack_complex_float std::complex<float>
+#     else // lapack_complex_float
+        static_assert(sizeof(std::complex<float>)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex<float> do not match");
 #     endif // lapack_complex_float
 #     ifndef lapack_complex_double
 #       define lapack_complex_double std::complex<double>
+#     else // lapack_complex_double
+        static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex<double> do not match");
 #     endif // lapack_complex_double
 #     include <lapacke.h>
 
-#   endif  // _HAS_INTEL_MKL
+#   endif  // BTAS_HAS_INTEL_MKL
 
-# else  // _CBLAS_HEADER
+# else  // BTAS_CBLAS_HEADER
 
-#   include _CBLAS_HEADER
+#   include BTAS_CBLAS_HEADER
     // see https://github.com/xianyi/OpenBLAS/issues/1992 why this is needed to prevent lapacke.h #define'ing I
 #   include <complex>
 #   ifndef lapack_complex_float
 #     define lapack_complex_float std::complex<float>
+#   else // lapack_complex_float
+      static_assert(sizeof(std::complex<float>)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex<float> do not match");
 #   endif // lapack_complex_float
 #   ifndef lapack_complex_double
 #     define lapack_complex_double std::complex<double>
+#   else // lapack_complex_double
+      static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex<double> do not match");
 #   endif // lapack_complex_double
-#   include _LAPACKE_HEADER
+#   include BTAS_LAPACKE_HEADER
 
-# endif  // _CBLAS_HEADER
+# endif  // BTAS_CBLAS_HEADER
 
-#else  // BTAS_HAS_CBLAS
+#else  // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE)
 
 /// major order directive
 enum CBLAS_ORDER { CblasRowMajor, CblasColMajor };
@@ -67,45 +74,12 @@ enum CBLAS_DIAG { CblasNonUnit, CblasUnit };
 /// transposition directive for symmetric matrix (not used)
 enum CBLAS_SIDE { CblasLeft, CblasRight };
 
-#endif // BTAS_HAS_CBLAS
+#endif // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE)
 
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 
-// some BLAS libraries define their own types for complex data
-#ifndef HAVE_INTEL_MKL
-#ifndef lapack_complex_float
-# define lapack_complex_float  std::complex<float>
-#else // lapack_complex_float
-static_assert(sizeof(std::complex<float>)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex<float> do not match");
-#endif // lapack_complex_float
-#ifndef lapack_complex_double
-# define lapack_complex_double std::complex<double>
-#else // lapack_complex_double
-static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex<double> do not match");
-#endif // lapack_complex_double
-#else // HAVE_INTEL_MKL
-// if calling direct need to cast to the MKL complex types
-# ifdef MKL_DIRECT_CALL
-#  include <mkl_types.h>
-#  ifndef lapack_complex_float
-#   define lapack_complex_float MKL_Complex8
-#  endif
-#  ifndef lapack_complex_double
-#   define lapack_complex_double MKL_Complex16
-#  endif
-// else can call via F77 prototypes which don't need type conversion
-# else // MKL_DIRECT_CALL
-#  ifndef lapack_complex_float
-#   define lapack_complex_float  std::complex<float>
-#  endif
-#  ifndef lapack_complex_double
-#   define lapack_complex_double std::complex<double>
-#  endif
-# endif // MKL_DIRECT_CALL
-#endif // HAVE_INTEL_MKL
-
 namespace btas {
 
   template <typename T>
diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake
new file mode 100644
index 00000000..addec791
--- /dev/null
+++ b/cmake/modules/RedefaultableOption.cmake
@@ -0,0 +1,12 @@
+# if local variable is defined, use its value as the default, otherwise use _default
+# this is consistent with cmake 3.13 and later (see policy CMP0077)
+macro(redefaultable_option _name _descr _default)
+
+  if (DEFINED ${_name})
+    set(${_name}_DEFAULT ${${_name}})
+  else()
+    set(${_name}_DEFAULT ${_default})
+  endif()
+  option(${_name} ${_descr} ${${_name}_DEFAULT})
+
+endmacro()

From eb20233754fe27334f6c6c99d22b040e0754ce1f Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 09:42:48 -0500
Subject: [PATCH 41/66] Need preprocessor else so lapacke calls don't try to
 compile when its not included.

---
 btas/generic/linear_algebra.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 3d6fe4e4..78ccbef1 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -17,7 +17,7 @@ namespace btas{
 
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using this function requires LAPACKE");
-#endif // BTAS_HAS_LAPACKE
+#else //BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -89,6 +89,7 @@ namespace btas{
 
     // contracting the pivoting matrix with L to put in correct order
     gemm(CblasNoTrans, CblasNoTrans, 1.0, P, L, 0.0, A);
+#endif // BTAS_HAS_LAPACKE
   }
 
 /// Computes the QR decomposition of matrix \c A
@@ -99,7 +100,7 @@ namespace btas{
 
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using this function requires LAPACKE");
-#endif // BTAS_HAS_LAPACKE
+#else //BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -129,6 +130,7 @@ namespace btas{
     } else {
       return false;
     }
+#endif // BTAS_HAS_LAPACKE
   }
 
 /// Computes the inverse of a matrix \c A using a pivoted LU decomposition
@@ -139,7 +141,7 @@ namespace btas{
 
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using LU matrix inversion requires LAPACKE");
-#endif // BTAS_HAS_LAPACKE
+#else //BTAS_HAS_LAPACKE
 
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices.");
@@ -163,6 +165,7 @@ namespace btas{
       return false;
     }
     return true;
+#endif // BTAS_HAS_LAPACKE
   }
 
 /// Computes the eigenvalue decomposition of a matrix \c A and
@@ -175,7 +178,7 @@ namespace btas{
   void eigenvalue_decomp(Tensor & A, Tensor & lambda){
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Using eigenvalue decomposition requires LAPACKE");
-#endif // BTAS_HAS_LAPACKE
+#else //BTAS_HAS_LAPACKE
     if(A.rank() > 2){
       BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix.");
     }
@@ -188,6 +191,7 @@ namespace btas{
     auto info = LAPACKE_dsyev(LAPACK_COL_MAJOR, 'V', 'U', smallest_mode_A,
             A.data(), smallest_mode_A, lambda.data());
     if (info) BTAS_EXCEPTION("Error in computing the SVD initial guess");
+#endif // BTAS_HAS_LAPACKE
   }
 
   /// Solving Ax = B using a Cholesky decomposition
@@ -198,8 +202,8 @@ namespace btas{
 template <typename Tensor>
 bool cholesky_inverse(Tensor & A, Tensor & B){
 #ifndef BTAS_HAS_LAPACKE
-  BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE")
-#endif
+  BTAS_EXCEPTION("Cholesky inverse function requires LAPACKE");
+#else //BTAS_HAS_LAPACKE
     // This method computes the inverse quickly for a square matrix
     // based on MATLAB's implementation of A / B operator.
     auto rank = B.extent(1);
@@ -216,6 +220,7 @@ bool cholesky_inverse(Tensor & A, Tensor & B){
       std::cout << "Matlab square inverse failed revert to fast inverse" << std::endl;
       return false;
     }
+#endif //BTAS_HAS_LAPACKE
 }
 
 /// SVD referencing code from
@@ -229,7 +234,7 @@ template <typename Tensor>
 Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
-#endif // BTAS_HAS_LAPACKE
+#else //BTAS_HAS_LAPACKE
 
     if (a.rank() > 2) {
       BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix");
@@ -274,6 +279,7 @@ Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
     gemm(CblasNoTrans, CblasNoTrans, 1.0, s, Vt, 0.0, U);
 
     return U;
+#endif // BTAS_HAS_LAPACKE
   }
 
 } // namespace btas

From 4c6b312eef806d147e735619df58860a303facf8 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 10:37:13 -0500
Subject: [PATCH 42/66] Include path to RedefaultableOption

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 373c3aa8..d5d797d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ cmake_minimum_required (VERSION 3.1.0)
 project (BTAS)
 
 include(FeatureSummary)
-include(RedefaultableOption)
+include(cmake/modules/RedefaultableOption.cmake)
 
 # Configure options
 redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF)

From 624f77f2e532cd412253fc50a97f9575b76956c9 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 10:37:59 -0500
Subject: [PATCH 43/66] Option needs quotations around description

---
 cmake/modules/RedefaultableOption.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake
index addec791..ead06273 100644
--- a/cmake/modules/RedefaultableOption.cmake
+++ b/cmake/modules/RedefaultableOption.cmake
@@ -8,5 +8,6 @@ macro(redefaultable_option _name _descr _default)
     set(${_name}_DEFAULT ${_default})
   endif()
   option(${_name} ${_descr} ${${_name}_DEFAULT})
+  option(${_name} "${_descr}" ${OPTION})
 
 endmacro()

From 11727dd57043ee1ff1649efebde0e17345771507 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 10:39:06 -0500
Subject: [PATCH 44/66] Fixed a typo

---
 cmake/modules/RedefaultableOption.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cmake/modules/RedefaultableOption.cmake b/cmake/modules/RedefaultableOption.cmake
index ead06273..52b572d2 100644
--- a/cmake/modules/RedefaultableOption.cmake
+++ b/cmake/modules/RedefaultableOption.cmake
@@ -7,7 +7,6 @@ macro(redefaultable_option _name _descr _default)
   else()
     set(${_name}_DEFAULT ${_default})
   endif()
-  option(${_name} ${_descr} ${${_name}_DEFAULT})
-  option(${_name} "${_descr}" ${OPTION})
+  option(${_name} "${_descr}" ${${_name}_DEFAULT})
 
 endmacro()

From 4be74c5848045fe263ce9c341ac722987b3161bd Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 11:16:44 -0500
Subject: [PATCH 45/66] fixup lapack type defs to be present even if not using
 cblas/lapack

---
 CMakeLists.txt |  8 +++----
 btas/types.h   | 58 ++++++++++++++++++++++++++++----------------------
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d5d797d9..35d2d5ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,11 @@
 cmake_minimum_required (VERSION 3.1.0)
 project (BTAS)
 
+# extra cmake files are shipped with BTAS
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules")
+
 include(FeatureSummary)
-include(cmake/modules/RedefaultableOption.cmake)
+include(RedefaultableOption)
 
 # Configure options
 redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF)
@@ -25,9 +28,6 @@ set(BTAS_PRERELEASE alpha)
 set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}")
 set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
 
-# extra cmake files are shipped with BTAS
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules")
-
 include(CMakePushCheckState)
 include(GNUInstallDirs)
 include(AppendFlags)
diff --git a/btas/types.h b/btas/types.h
index 4e13a5f9..656ddd28 100644
--- a/btas/types.h
+++ b/btas/types.h
@@ -7,10 +7,6 @@
 
 #include <complex>
 
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
 #if defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE)
 # if not defined(BTAS_CBLAS_HEADER) && not defined(BTAS_LAPACKE_HEADER)
 
@@ -76,9 +72,17 @@ enum CBLAS_SIDE { CblasLeft, CblasRight };
 
 #endif // defined(BTAS_HAS_CBLAS) && defined(BTAS_HAS_LAPACKE)
 
-#ifdef __cplusplus
-}
-#endif // __cplusplus
+// if lapack types are not defined define them EVEN if not using CBLAS/LAPACKE to make writing generic API easier
+#ifndef lapack_complex_float
+# define lapack_complex_float std::complex<float>
+#else // lapack_complex_float
+static_assert(sizeof(std::complex<float>)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and std::complex<float> do not match");
+# endif // lapack_complex_float
+#ifndef lapack_complex_double
+# define lapack_complex_double std::complex<double>
+#else // lapack_complex_double
+static_assert(sizeof(std::complex<double>)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and std::complex<double> do not match");
+#endif // lapack_complex_double
 
 namespace btas {
 
@@ -98,15 +102,15 @@ namespace btas {
     return *reinterpret_cast<std::complex<float>*>(&val);
   }
   template <typename T>
-  const lapack_complex_float*
-  to_lapack_cptr(const T* ptr) {
-    static_assert(sizeof(T)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match");
+  const lapack_complex_float* to_lapack_cptr(const T* ptr) {
+    static_assert(sizeof(T) == sizeof(lapack_complex_float),
+                  "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match");
     return reinterpret_cast<const lapack_complex_float*>(ptr);
   }
   template <typename T>
-  typename std::enable_if<!std::is_const<T>::value, lapack_complex_float*>::type
-  to_lapack_cptr(T* ptr) {
-    static_assert(sizeof(T)==sizeof(lapack_complex_float), "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match");
+  typename std::enable_if<!std::is_const<T>::value, lapack_complex_float*>::type to_lapack_cptr(T* ptr) {
+    static_assert(sizeof(T) == sizeof(lapack_complex_float),
+                  "sizes of lapack_complex_float and T given to btas::to_lapack_cptr do not match");
     return reinterpret_cast<lapack_complex_float*>(ptr);
   }
 
@@ -117,27 +121,29 @@ namespace btas {
     return *reinterpret_cast<std::complex<double>*>(&val);
   }
   template <typename T>
-  const lapack_complex_double*
-  to_lapack_zptr(const T* ptr) {
-    static_assert(sizeof(T)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match");
+  const lapack_complex_double* to_lapack_zptr(const T* ptr) {
+    static_assert(sizeof(T) == sizeof(lapack_complex_double),
+                  "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match");
     return reinterpret_cast<const lapack_complex_double*>(ptr);
   }
   template <typename T>
-  typename std::enable_if<!std::is_const<T>::value, lapack_complex_double*>::type
-  to_lapack_zptr(T* ptr) {
-    static_assert(sizeof(T)==sizeof(lapack_complex_double), "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match");
+  typename std::enable_if<!std::is_const<T>::value, lapack_complex_double*>::type to_lapack_zptr(T* ptr) {
+    static_assert(sizeof(T) == sizeof(lapack_complex_double),
+                  "sizes of lapack_complex_double and T given to btas::to_lapack_zptr do not match");
     return reinterpret_cast<lapack_complex_double*>(ptr);
   }
 
-//
-//  Other aliases for convenience
-//
+  //
+  //  Other aliases for convenience
+  //
 
-/// default size type
-typedef unsigned long size_type;
+  /// default size type
+  typedef unsigned long size_type;
 
-/// null deleter
-struct nulldeleter { void operator() (void const*) { } };
+  /// null deleter
+  struct nulldeleter {
+    void operator()(void const*) {}
+  };
 
 } // namespace btas
 

From b25e9f971a2c222456ddb4de89b4aec2bd90dfe7 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 12:03:44 -0500
Subject: [PATCH 46/66] Remove unnecessary instances of BTAS_HAS_INTEL_MKL

---
 btas/generic/coupled_cp_als.h | 2 --
 btas/generic/cp.h             | 4 ----
 btas/generic/cp_als.h         | 9 ---------
 btas/generic/cp_rals.h        | 6 ------
 btas/generic/swap.h           | 8 +-------
 5 files changed, 1 insertion(+), 28 deletions(-)

diff --git a/btas/generic/coupled_cp_als.h b/btas/generic/coupled_cp_als.h
index f23c0ca0..66845d23 100644
--- a/btas/generic/coupled_cp_als.h
+++ b/btas/generic/coupled_cp_als.h
@@ -122,7 +122,6 @@ namespace btas{
               "Tensor describing symmetries must be defined for all dimensions");
     }
 
-#ifdef BTAS_HAS_INTEL_MKL
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
     /// Initial guess for factor matrices start at rank = max_dim(reference_tensor)
@@ -147,7 +146,6 @@ namespace btas{
             int max_als = 20,bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override{
       BTAS_EXCEPTION("Function not yet implemented");
     }
-#endif //BTAS_HAS_INTEL_MKL
 
   protected:
     Tensor& tensor_ref_left;        // Tensor in first term of the loss function
diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 934c2ec5..2ca02d03 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -23,10 +23,6 @@
 #include <btas/generic/reconstruct.h>
 #include <btas/generic/linear_algebra.h>
 
-#ifdef BTAS_HAS_INTEL_MKL
-#include <mkl_trans.h>
-#endif
-
 namespace btas{
   namespace detail{
 
diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index a120c4be..62e9e6a7 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -12,10 +12,6 @@
 #include <iostream>
 #include <vector>
 
-#ifdef BTAS_HAS_INTEL_MKL
-#include <mkl_trans.h>
-#endif
-
 namespace btas{
   /** \brief Computes the Canonical Product (CP) decomposition of an order-N
     tensor using alternating least squares (ALS).
@@ -211,7 +207,6 @@ namespace btas{
       return epsilon;
     }
 
-#ifdef BTAS_HAS_INTEL_MKL
     /// \brief Computes an approximate core tensor using
     /// Tucker decomposition, e.g.
     ///  \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$
@@ -221,7 +216,6 @@ namespace btas{
     /// computed to either finite error or finite rank. Default settings
     /// calculate to finite error. Factor matrices from get_factor_matrices() are
     /// scaled by the Tucker transformations.
-    /// \note This requires Intel MKL.
 
     /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker
     /// decomposition.
@@ -295,7 +289,6 @@ namespace btas{
     /// either finite error or finite rank.
     /// Default settings calculate to finite error.
     /// Factor matrices are scaled by randomized transformation.
-    /// \note This requires Intel MKL.
 
     /// \param[in] desired_compression_rank The new dimension of each mode after
     /// randomized compression.
@@ -362,8 +355,6 @@ namespace btas{
       return epsilon;
     }
 
-#endif  //BTAS_HAS_INTEL_MKL
-
 
   protected:
     Tensor& tensor_ref;         // Tensor to be decomposed
diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index efea1c05..7a92f2e8 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -134,8 +134,6 @@ namespace btas{
 
     ~CP_RALS() = default;
 
-#ifdef BTAS_HAS_INTEL_MKL
-
     /// \brief Computes decomposition of the order-N tensor \c tensor
     /// with rank = \c RankStep * \c panels *  max_dim(reference_tensor) + max_dim(reference_tensor)
     /// Initial guess for factor matrices start at rank = max_dim(reference_tensor)
@@ -236,7 +234,6 @@ namespace btas{
     /// computed to either finite error or finite rank. Default settings
     /// calculate to finite error. Factor matrices from get_factor_matrices() are
     /// scaled by the Tucker transformations.
-    /// \note This requires Intel MKL.
 
     /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker
     /// decomposition.
@@ -311,7 +308,6 @@ namespace btas{
     /// either finite error or finite rank.
     /// Default settings calculate to finite error.
     /// Factor matrices are scaled by randomized transformation.
-    /// \note This requires Intel MKL.
 
     /// \param[in] desired_compression_rank The new dimension of each mode after
     /// randomized compression.
@@ -379,8 +375,6 @@ namespace btas{
       return epsilon;
     }
 
-#endif  //BTAS_HAS_INTEL_MKL
-
   protected:
     int size;                   // number of elements in tensor_ref
     Tensor & tensor_ref;        // Tensor to be decomposed
diff --git a/btas/generic/swap.h b/btas/generic/swap.h
index b8f29cec..e2dc2742 100644
--- a/btas/generic/swap.h
+++ b/btas/generic/swap.h
@@ -32,9 +32,6 @@ namespace btas {
 template <typename Tensor>
 void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
                    bool for_ALS_update = true) {
-#ifndef BTAS_HAS_INTEL_MKL
-  BTAS_EXCEPTION("In place tranposes require MKL");
-#endif
   // If the mode of interest is the the first mode you are done.
   if(mode > A.rank()){
     BTAS_EXCEPTION("Mode index is greater than tensor rank");
@@ -120,10 +117,7 @@ void swap_to_first(Tensor &A, int mode, bool is_in_front = false,
 /// back. Default = false.
 
 template <typename Tensor>
-void swap_to_back(Tensor &A, int mode, bool is_in_back = false) {
-#ifndef BTAS_HAS_INTEL_MKL
-  BTAS_EXCEPTION("In place tranposes require MKL");
-#endif
+void swap_to_back(Tensor &A, int mode, bool is_in_back = false){
   if (mode > A.rank())
     BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__,
                            "mode > A.rank(), mode out of range");

From 45d8e4237207892d5faf9d7d4d28203bed9b29ba Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 12:09:46 -0500
Subject: [PATCH 47/66] added exportable BTAS target (in BTAS:: namespace)

---
 CMakeLists.txt              | 144 ++++++++++++++++++++++++++----------
 bin/travisci_build_linux.sh |   4 +-
 cmake/btas-config.cmake.in  |  53 +++++++++++++
 external/boost.cmake        |  30 +++++---
 external/lapack.cmake       |  68 -----------------
 unittest/CMakeLists.txt     |   8 +-
 6 files changed, 183 insertions(+), 124 deletions(-)
 create mode 100644 cmake/btas-config.cmake.in
 delete mode 100644 external/lapack.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35d2d5ef..7e2db5f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,15 @@
 #; -*-CMake-*-
 
-cmake_minimum_required (VERSION 3.1.0)
+cmake_minimum_required (VERSION 3.8.0)  # cxx_std_11 compile feature
 project (BTAS)
+enable_language(CXX)
 
 # extra cmake files are shipped with BTAS
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules")
 
 include(FeatureSummary)
 include(RedefaultableOption)
+include(CMakePackageConfigHelpers)
 
 # Configure options
 redefaultable_option(BTAS_BUILD_UNITTEST "Whether to build unit tests" OFF)
@@ -24,14 +26,37 @@ add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default
 set(BTAS_MAJOR_VERSION 1)
 set(BTAS_MINOR_VERSION 0)
 set(BTAS_MICRO_VERSION 0)
-set(BTAS_PRERELEASE alpha)
-set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}-${BTAS_PRERELEASE}")
+set(BTAS_PRERELEASE_ID alpha)
+set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}")
+if (BTAS_PRERELEASE_ID)
+  set(BTAS_EXT_VERSION "${BTAS_VERSION}-${BTAS_PRERELEASE_ID}")
+else(BTAS_PRERELEASE_ID)
+  set(BTAS_EXT_VERSION "${BTAS_VERSION}")
+endif(BTAS_PRERELEASE_ID)
 set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
 
 include(CMakePushCheckState)
 include(GNUInstallDirs)
 include(AppendFlags)
 
+##########################
+# Standard build variables
+##########################
+set(BTAS_INSTALL_BINDIR "bin"
+    CACHE PATH "BTAS BIN install directory")
+set(BTAS_INSTALL_INCLUDEDIR "include"
+    CACHE PATH "BTAS INCLUDE install directory")
+set(BTAS_INSTALL_LIBDIR "lib"
+    CACHE PATH "BTAS LIB install directory")
+set(BTAS_INSTALL_SHAREDIR "share/BTAS/${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}"
+    CACHE PATH "BTAS SHARE install directory")
+set(BTAS_INSTALL_DATADIR "${BTAS_INSTALL_SHAREDIR}/data"
+    CACHE PATH "BTAS DATA install directory")
+set(BTAS_INSTALL_DOCDIR "${BTAS_INSTALL_SHAREDIR}/doc"
+    CACHE PATH "BTAS DOC install directory")
+set(BTAS_INSTALL_CMAKEDIR "lib/cmake/BTAS"
+    CACHE PATH "BTAS CMAKE install directory")
+
 ##########################
 # Standard build variables
 ##########################
@@ -48,69 +73,81 @@ endif()
 if(NOT CMAKE_EXE_LINKER_FLAGS OR NOT DEFINED CMAKE_EXE_LINKER_FLAGS)
   set(CMAKE_EXE_LINKER_FLAGS "$ENV{LDFLAGS}")
 endif()
-
-enable_language (CXX)
 if (NOT CMAKE_CXX_COMPILER)
   message(FATAL_ERROR "C++ compiler not found")
 endif()
 
-# Set the default fortran integer type.
-set(INTEGER4 "FALSE" CACHE BOOL "Set the default Fortran integer type to integer*4")
-mark_as_advanced(INTEGER4)
-
 set(CMAKE_SKIP_RPATH FALSE)
-set(BUILD_TESTING FALSE CACHE BOOL "BUILD_TESTING")
-set(BUILD_TESTING_STATIC FALSE CACHE BOOL "BUILD_TESTING_STATIC")
-set(BUILD_TESTING_SHARED FALSE CACHE BOOL "BUILD_TESTING_SHARED")
 
 ##########################
 # We use C++11 features
 ##########################
-set(CMAKE_CXX_STANDARD 11)
+# but insist on strict standard
+set(CMAKE_CXX_STANDARD 11 CACHE STRING "C++ ISO Standard version")
+if (NOT(CMAKE_CXX_STANDARD EQUAL 11 OR CMAKE_CXX_STANDARD EQUAL 14 OR CMAKE_CXX_STANDARD EQUAL 17 OR CMAKE_CXX_STANDARD EQUAL 20))
+  message(FATAL_ERROR "C++ 2011 ISO Standard or higher is required to compile BTAS")
+endif()
+# C++20 is only configurable via compile features with cmake 3.12 and older
+if (CMAKE_CXX_STANDARD EQUAL 20 AND CMAKE_VERSION VERSION_LESS 3.12.0)
+  cmake_minimum_required (VERSION 3.12.0)
+endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL  "Whether to use extensions of C++ ISO Standard version")
 
 # Check type support
 include(CheckTypeSize)
 check_type_size("long double" BTAS_HAS_LONG_DOUBLE)
 check_type_size("long long" BTAS_HAS_LONG_LONG)
 
+#######################################
+# create exportable BTAS library target
+#######################################
+add_library(BTAS INTERFACE)
+install(TARGETS BTAS EXPORT btas COMPONENT BTAS)
+target_compile_features(BTAS INTERFACE "cxx_std_11")
+target_include_directories(BTAS INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${BTAS_INSTALL_INCLUDEDIR}>)
+install(DIRECTORY btas
+    COMPONENT BTAS
+    DESTINATION "${BTAS_INSTALL_INCLUDEDIR}"
+    FILES_MATCHING PATTERN "*.h"
+                   PATTERN "*.h.in" EXCLUDE
+    )
+
 ##########################
 # external dependencies
 ##########################
 add_custom_target(External)
 
-if (BTAS_BUILD_UNITTEST)
-  if (BTAS_USE_CBLAS_LAPACKE)
-    find_package(CBLAS REQUIRED)
-    find_package(LAPACKE REQUIRED)
-    add_definitions(-DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1)
-    message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}")
-    message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}")
-    message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}")
-    message(STATUS "** LAPACKE_LIBRARIES = ${LAPACKE_LIBRARIES}")
-    message(STATUS "** LAPACKE_INCLUDE_DIR = ${LAPACKE_INCLUDE_DIR}")
-    message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}")
-    include_directories(${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR})
-    if (MKL_FOUND)
-      add_definitions(-DBTAS_HAS_INTEL_MKL=1)
-    endif(MKL_FOUND)
-  endif(BTAS_USE_CBLAS_LAPACKE)
-  include(external/boost.cmake)
-endif()
+if (BTAS_USE_CBLAS_LAPACKE)
+  find_package(CBLAS REQUIRED)
+  find_package(LAPACKE REQUIRED)
+  target_compile_definitions(BTAS INTERFACE -DBTAS_CBLAS_HEADER="${CBLAS_INCLUDE_FILE}" -DBTAS_LAPACKE_HEADER="${LAPACKE_INCLUDE_FILE}" -DBTAS_HAS_CBLAS=1 -DBTAS_HAS_LAPACKE=1)
+  message(STATUS "** CBLAS_LIBRARIES = ${CBLAS_LIBRARIES}")
+  message(STATUS "** CBLAS_INCLUDE_DIR = ${CBLAS_INCLUDE_DIR}")
+  message(STATUS "** CBLAS_INCLUDE_FILE = ${CBLAS_INCLUDE_FILE}")
+  message(STATUS "** LAPACKE_LIBRARIES = ${LAPACKE_LIBRARIES}")
+  message(STATUS "** LAPACKE_INCLUDE_DIR = ${LAPACKE_INCLUDE_DIR}")
+  message(STATUS "** LAPACKE_INCLUDE_FILE = ${LAPACKE_INCLUDE_FILE}")
+  target_include_directories(BTAS INTERFACE ${CBLAS_INCLUDE_DIR} ${LAPACKE_INCLUDE_DIR})
+  if (MKL_FOUND)
+    target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_INTEL_MKL=1)
+  endif(MKL_FOUND)
+  target_link_libraries(BTAS INTERFACE ${LAPACKE_LIBRARIES} ${CBLAS_LIBRARIES})
+endif(BTAS_USE_CBLAS_LAPACKE)
+
+include(external/boost.cmake)
 
 ##########################
 # configure BTAS_ASSERT
 ##########################
 if (BTAS_ASSERT_THROWS)
-  add_definitions(-DBTAS_ASSERT_THROWS=1)
+  target_compile_definitions(BTAS INTERFACE -DBTAS_ASSERT_THROWS=1)
 endif(BTAS_ASSERT_THROWS)
 
 ##########################
-# sources
+# dox
 ##########################
-
-include_directories(${PROJECT_SOURCE_DIR})
 add_subdirectory(doc)
 
 ##########################
@@ -138,6 +175,37 @@ ADD_CUSTOM_TARGET(release
   COMMENT "Switch CMAKE_BUILD_TYPE to Release"
   )
 
+# Create the version file
+write_basic_package_version_file(btas-config-version.cmake
+    VERSION ${BTAS_VERSION} COMPATIBILITY AnyNewerVersion)
+
+## Create the configure file
+configure_package_config_file(cmake/btas-config.cmake.in
+    "${PROJECT_BINARY_DIR}/btas-config.cmake"
+    INSTALL_DESTINATION "${BTAS_INSTALL_CMAKEDIR}"
+    PATH_VARS CMAKE_INSTALL_PREFIX BTAS_INSTALL_BINDIR
+    BTAS_INSTALL_INCLUDEDIR BTAS_INSTALL_LIBDIR
+    BTAS_INSTALL_DOCDIR BTAS_INSTALL_CMAKEDIR)
+
+## Install config, version, and target files
+install(EXPORT btas
+    FILE "btas-targets.cmake"
+    DESTINATION "${BTAS_INSTALL_CMAKEDIR}"
+    NAMESPACE BTAS::
+    COMPONENT btas-config)
+install(FILES
+    "${PROJECT_BINARY_DIR}/btas-config.cmake"
+    "${PROJECT_BINARY_DIR}/btas-config-version.cmake"
+    DESTINATION "${BTAS_INSTALL_CMAKEDIR}"
+    COMPONENT btas-config)
+add_custom_target(install-config
+    COMMAND ${CMAKE_COMMAND} -DCOMPONENT=btas-config -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+    COMMENT "Installing BTAS config components")
+
 feature_summary(WHAT ALL
-                DESCRIPTION "=== BTAS Feature Info ===")
-                
+    DESCRIPTION "=== BTAS Package/Feature Info ===")
+
+###############################################################################
+# appendix: misc details
+###############################################################################
+SET(CMAKE_COLOR_MAKEFILE ON)
diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh
index 49f8bd29..43f9e8b8 100755
--- a/bin/travisci_build_linux.sh
+++ b/bin/travisci_build_linux.sh
@@ -17,7 +17,7 @@ cd ${BUILD_PREFIX}
 ##########   test with blas+lapack   ##########
 mkdir build_cblas
 cd build_cblas
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON
 make VERBOSE=1
 make check VERBOSE=1
 cd ..
@@ -25,7 +25,7 @@ cd ..
 ########## test without blas+lapack   ##########
 mkdir build
 cd build
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF  -DBoost_FOUND=ON -DBoost_INCLUDE_DIRS=/usr/include/boost -DBoost_LIBRARIES=/usr/lib/x86_64-linux-gnu/libboost_serialization.a -DSKIP_BOOST_SEARCH=ON
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF
 make VERBOSE=1
 make check VERBOSE=1
 cd ..
diff --git a/cmake/btas-config.cmake.in b/cmake/btas-config.cmake.in
new file mode 100644
index 00000000..bfa37a3f
--- /dev/null
+++ b/cmake/btas-config.cmake.in
@@ -0,0 +1,53 @@
+# - CMAKE Config file for the BTAS package
+# This will define the following CMake cache variables
+#
+#    BTAS_FOUND           - true if BTAS library were found
+#    BTAS_VERSION         - the libint2 version
+#    BTAS_EXT_VERSION     - the libint2 version including the (optional) buildid, such as beta.3
+#
+# and the following imported targets
+#
+#     BTAS::BTAS          - the BTAS library
+#
+
+# Set package version
+set(BTAS_VERSION "@BTAS_VERSION@")
+set(BTAS_EXT_VERSION "@BTAS_EXT_VERSION@")
+
+@PACKAGE_INIT@
+
+if(NOT TARGET Boost::boost)
+  set(Boost_USE_CONFIG @Boost_USE_CONFIG@)
+  set(Boost_BTAS_DEPS_LIBRARIES @Boost_BTAS_DEPS_LIBRARIES@)
+  if (Boost_USE_CONFIG)
+    set(Boost_CONFIG @Boost_CONFIG@)
+    if (NOT Boost_CONFIG OR NOT EXISTS ${Boost_CONFIG})
+      message(FATAL_ERROR "Expected Boost config file at ${Boost_CONFIG}; directory moved since BTAS configuration?")
+    endif()
+    get_filename_component(Boost_DIR ${Boost_CONFIG} DIRECTORY)
+    find_package(Boost CONFIG QUIET REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES} PATHS ${Boost_DIR} NO_DEFAULT_PATH)
+  else (Boost_USE_CONFIG)
+    set(BOOST_INCLUDEDIR @Boost_INCLUDE_DIR@)
+    set(BOOST_LIBRARYDIR @Boost_LIBRARY_DIR_RELEASE@)
+    if (NOT BOOST_LIBRARYDIR OR NOT EXISTS ${BOOST_LIBRARYDIR})
+      set(BOOST_LIBRARYDIR @Boost_LIBRARY_DIR_DEBUG@)
+    endif()
+    set(Boost_NO_SYSTEM_PATHS OFF)
+    if (BOOST_LIBRARYDIR AND BOOST_INCLUDEDIR)
+      if (EXISTS ${BOOST_LIBRARYDIR} AND EXISTS ${BOOST_INCLUDEDIR})
+        set(Boost_NO_SYSTEM_PATHS ON)
+      endif()
+    endif()
+    find_package(Boost QUIET REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})
+  endif (Boost_USE_CONFIG)
+endif()
+
+# Include library IMPORT targets
+if(NOT TARGET BTAS::BTAS)
+  include("${CMAKE_CURRENT_LIST_DIR}/btas-targets.cmake")
+  if(NOT TARGET BTAS::BTAS)
+    message(FATAL_ERROR "expected BTAS::BTAS among imported BTAS targets")
+  endif()
+endif()
+
+set(BTAS_FOUND TRUE)
diff --git a/external/boost.cmake b/external/boost.cmake
index ef63f49d..65bf0777 100644
--- a/external/boost.cmake
+++ b/external/boost.cmake
@@ -4,19 +4,27 @@
 if (BOOST_ROOT OR BOOST_INCLUDEDIR)
   set(Boost_NO_SYSTEM_PATHS TRUE)
 endif()
-#set(Boost_DEBUG TRUE)
 
-# Check for Boost, unless told otherwise (then must set Boost_FOUND, Boost_INCLUDE_DIRS, Boost_LIBRARIES)
-if (NOT SKIP_BOOST_SEARCH)
-  find_package(Boost 1.33 REQUIRED OPTIONAL_COMPONENTS serialization container)
-endif()
+if (NOT TARGET Boost::boost)
+  # Boost::boost is defined since 3.5
+  cmake_minimum_required(VERSION 3.5.0)
+  # try config first
+  set(Boost_BTAS_DEPS_LIBRARIES serialization container)
+  find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})
+  if (NOT TARGET Boost::boost)
+    find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})
+    set(Boost_USE_CONFIG FALSE)
+  else()
+    set(Boost_USE_CONFIG TRUE)
+  endif()
+endif (NOT TARGET Boost::boost)
 
 # Perform a compile check with Boost
 list(APPEND CMAKE_REQUIRED_INCLUDES ${Boost_INCLUDE_DIRS})
 list(APPEND CMAKE_REQUIRED_LIBRARIES ${Boost_LIBRARIES})
-add_definitions(-DBTAS_HAS_BOOST_ITERATOR=1)
+target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_ITERATOR=1)
 if (Boost_CONTAINER_FOUND)
-  add_definitions(-DBTAS_HAS_BOOST_CONTAINER=1 -DBTAS_TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK})
+  target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_CONTAINER=1 -DBTAS_TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK})
 endif()
 
 include(CheckCXXSourceRuns)
@@ -86,14 +94,12 @@ CHECK_CXX_SOURCE_RUNS(
     "  BOOST_COMPILES_AND_RUNS)
 
 if (BOOST_COMPILES_AND_RUNS)
-    add_definitions(-DBTAS_HAS_BOOST_SERIALIZATION=1)
+    target_compile_definitions(BTAS INTERFACE -DBTAS_HAS_BOOST_SERIALIZATION=1)
 else ()
   message(STATUS "Boost found at ${BOOST_ROOT}, but could not compile and/or run test program")
   message(WARNING "To obtain usable Boost, use your system package manager (HomeBrew, apt, etc.) OR download at www.boost.org and compile (unpacking alone is not enough)")
   message(WARNING "** !! due to missing Boost.Serialization the corresponding unit tests will be disabled !!")
 endif(BOOST_COMPILES_AND_RUNS)
 
-if (NOT BOOST_COMPILES_AND_RUNS)
-endif()
-
-include_directories(${Boost_INCLUDE_DIRS})
+target_include_directories(BTAS INTERFACE ${Boost_INCLUDE_DIRS})
+target_link_libraries(BTAS INTERFACE ${Boost_LIBRARIES})
diff --git a/external/lapack.cmake b/external/lapack.cmake
deleted file mode 100644
index 8446d9e2..00000000
--- a/external/lapack.cmake
+++ /dev/null
@@ -1,68 +0,0 @@
-# Find BLAS and LAPACK.
-include(CheckCFortranFunctionExists)
-include(CMakePushCheckState)
-
-if(LAPACK_LIBRARIES OR BLAS_LIBRARIES OR LAPACK_LINKER_FLAGS OR BLAS_LINKER_FLAGS)
-  # Here we verify that the we can link against LAPACK and BLAS based on the
-  # given library and linker flags. If BLAS_FOUND and/or LAPACK_FOUND are true,
-  # we assume that these values have been verified.
-
-  if(NOT BLAS_FOUND)
-    # Verify that we can link against BLAS
-
-    cmake_push_check_state()
-    
-    if(UNIX AND BLA_STATIC AND BLAS_LIBRARIES)
-      set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LINKER_FLAGS} "-Wl,-start-group"
-          ${BLAS_LIBRARIES} "-Wl,-end-group" ${CMAKE_THREAD_LIBS_INIT}
-          ${CMAKE_REQUIRED_LIBRARIES})
-    else()
-      set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LINKER_FLAGS} ${BLAS_LIBRARIES}
-          ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_REQUIRED_LIBRARIES})
-    endif()
-
-    check_c_fortran_function_exists(sgemm BLAS_FOUND)
-
-    if(BLAS_FOUND)
-      message(STATUS "A library with BLAS API found.")
-    else()
-      message(FATAL_ERROR "The user specified BLAS libraries do not support the BLAS API.\nRerun cmake with BLAS_LIBRARIES and/or BLAS_LINKER_FLAGS.")
-    endif()
-    
-    cmake_pop_check_state()
-  endif()
-  
-  if(NOT LAPACK_FOUND)
-    # Verify that we can link against LAPACK
-
-    cmake_push_check_state()
-
-    if(UNIX AND BLA_STATIC AND LAPACK_LIBRARIES)
-      set(CMAKE_REQUIRED_LIBRARIES ${LAPACK_LINKER_FLAGS} "-Wl,--start-group"
-          ${LAPACK_LIBRARIES} "-Wl,--end-group" ${CMAKE_THREAD_LIBS_INIT}
-          ${CMAKE_REQUIRED_LIBRARIES})
-    else()
-      set(CMAKE_REQUIRED_LIBRARIES ${LAPACK_LINKER_FLAGS} ${LAPACK_LIBRARIES} 
-          ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_REQUIRED_LIBRARIES})
-    endif()
-
-    check_c_fortran_function_exists(cheev LAPACK_FOUND)
-
-    if(LAPACK_FOUND)
-      message(STATUS "A library with LAPACK API found.")
-    else(LAPACK_FOUND)
-      message(FATAL_ERROR "The user specified LAPACK libraries do not support the LAPACK API.\nRerun cmake with LAPACK_LIBRARIES and/or LAPACK_LINKER_FLAGS.")
-    endif(LAPACK_FOUND)
-    
-    cmake_pop_check_state()
-    
-  endif()
-else()
-  # Try to find BLAS and LAPACK
-  find_package(BLAS REQUIRED)
-  find_package(LAPACK REQUIRED)
-endif()
-
-append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}")
-append_flags(CMAKE_EXE_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}")
-
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index 806dbf68..be62f6e9 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -12,12 +12,12 @@ set(btas_test_src_files
 )
 add_executable(${executable} EXCLUDE_FROM_ALL ${btas_test_src_files})
 # Add include directories and compiler flags for ta_test
-include_directories(
+target_include_directories(${executable} PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}
     ${CMAKE_CURRENT_BINARY_DIR}
-    ${Boost_INCLUDE_DIRS})
-target_link_libraries(${executable} ${Boost_LIBRARIES} ${LAPACKE_LIBRARIES} ${CBLAS_LIBRARIES})
-append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}")
+)
+target_link_libraries(${executable} BTAS)
+#append_flags(CMAKE_EXE_LINKER_FLAGS "${BLAS_LINKER_FLAGS}")
 
 # Add targets
 add_dependencies(${executable} External)

From ed75bf8ae1e1da1eef6ec4ff439a2131c4e68b8f Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 12:09:59 -0500
Subject: [PATCH 48/66] Update linear algebra documentation

---
 btas/generic/linear_algebra.h | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 78ccbef1..8bfbf980 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -95,6 +95,7 @@ namespace btas{
 /// Computes the QR decomposition of matrix \c A
 /// \param[in, out] A In: A Reference matrix to be QR decomposed.  Out:
 /// The Q of a QR decomposition of \c A.
+/// \return bool true if QR was successful false if failed.
 
   template <typename Tensor> bool QR_decomp(Tensor &A) {
 
@@ -136,6 +137,7 @@ namespace btas{
 /// Computes the inverse of a matrix \c A using a pivoted LU decomposition
 /// \param[in, out] A In: A reference matrix to be inverted. Out:
 /// The inverse of A, computed using LU decomposition.
+/// \return bool true if inversion was successful false if failed
   template <typename Tensor>
   bool Inverse_Matrix(Tensor & A){
 
@@ -195,10 +197,13 @@ namespace btas{
   }
 
   /// Solving Ax = B using a Cholesky decomposition
-  /// \param[in] A The right-hand side of the linear equation
-  /// to be inverted using Cholesky
+  /// \param[in, out] A In: The right-hand side of the linear equation
+  /// to be inverted using Cholesky. Out:
+  /// the factors L and U from the factorization A = P*L*U;
+  /// the unit diagonal elements of L are not stored.
   /// \param[in, out] B In: The left-hand side of the linear equation
   /// out: The solution x = A^{-1}B
+  /// \return bool true if inversion was successful false if failed.
 template <typename Tensor>
 bool cholesky_inverse(Tensor & A, Tensor & B){
 #ifndef BTAS_HAS_LAPACKE
@@ -228,28 +233,28 @@ bool cholesky_inverse(Tensor & A, Tensor & B){
 /// Fast pseudo-inverse algorithm described in
 /// https://arxiv.org/pdf/0804.4809.pdf
 
-/// \param[in] a matrix to be inverted.
-/// \return a^{\dagger} The pseudoinverse of the matrix a.
+/// \param[in] A In: A reference to the matrix to be inverted.
+/// \return \f$ A^{\dagger} \f$ The pseudoinverse of the matrix A.
 template <typename Tensor>
-Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
+Tensor pseudoInverse(Tensor & A, bool & fast_pI) {
 #ifndef BTAS_HAS_LAPACKE
     BTAS_EXCEPTION("Computing the pseudoinverses requires LAPACKE");
 #else //BTAS_HAS_LAPACKE
 
-    if (a.rank() > 2) {
+    if (A.rank() > 2) {
       BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix");
     }
 
-    auto row = a.extent(0), col = a.extent(1);
+    auto row = A.extent(0), col = A.extent(1);
     auto rank = (row < col ? row : col);
 
     if (fast_pI) {
       Tensor temp(col, col), inv(col, row);
       // compute V^{\dag} = (A^T A) ^{-1} A^T
-      gemm(CblasTrans, CblasNoTrans, 1.0, a, a, 0.0, temp);
+      gemm(CblasTrans, CblasNoTrans, 1.0, A, A, 0.0, temp);
       fast_pI = Inverse_Matrix(temp);
       if (fast_pI) {
-        gemm(CblasNoTrans, CblasTrans, 1.0, temp, a, 0.0, inv);
+        gemm(CblasNoTrans, CblasTrans, 1.0, temp, A, 0.0, inv);
         return inv;
       } else {
         std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl;
@@ -259,7 +264,7 @@ Tensor pseudoInverse(Tensor & a, bool & fast_pI) {
     Tensor U(Range{Range1{row}, Range1{row}});
     Tensor Vt(Range{Range1{col}, Range1{col}});
 
-    gesvd('A', 'A', a, s, U, Vt);
+    gesvd('A', 'A', A, s, U, Vt);
 
     // Inverse the Singular values with threshold 1e-13 = 0
     double lr_thresh = 1e-13;

From dc029456ecaa6c5871642b788b4308be9626e764 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 12:10:22 -0500
Subject: [PATCH 49/66] dox cleanup [skip ci]

---
 cmake/btas-config.cmake.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/btas-config.cmake.in b/cmake/btas-config.cmake.in
index bfa37a3f..f6469ed1 100644
--- a/cmake/btas-config.cmake.in
+++ b/cmake/btas-config.cmake.in
@@ -2,8 +2,8 @@
 # This will define the following CMake cache variables
 #
 #    BTAS_FOUND           - true if BTAS library were found
-#    BTAS_VERSION         - the libint2 version
-#    BTAS_EXT_VERSION     - the libint2 version including the (optional) buildid, such as beta.3
+#    BTAS_VERSION         - the BTAS version
+#    BTAS_EXT_VERSION     - the BTAS version including the (optional) buildid, such as beta.3
 #
 # and the following imported targets
 #

From f9345229b0e534953b5efed417dd7df7d2edec32 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 12:16:05 -0500
Subject: [PATCH 50/66] Update pseudoinverse_helper documentation

---
 btas/generic/cp.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 2ca02d03..50a8a901 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -555,24 +555,26 @@ namespace btas{
     /// Fast pseudo-inverse algorithm described in
     /// https://arxiv.org/pdf/0804.4809.pdf
 
-    /// \param[in] n The mode being optimized, all other modes held constant
-    /// \param[in] R The current rank, column dimension of the factor matrices
-    /// \param[in,out] fast_pI If true, try to compute the pseudo inverse via fast Cholesky decomposition, else use SVD;
-    ///                on return reports whether the fast route was used.
-    /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V
-    /// \return The pseudoinverse of the matrix V.
-
     /// Trying to solve Ax = B
     /// First try Cholesky to solve this problem directly
     /// second tryfast pseudo-inverse algorithm described in
     /// https://arxiv.org/pdf/0804.4809.pdf
     /// If all else fails use SVD
-    // TODO write docs here
+
+    /// \param[in] mode_of_A The mode being optimized used to compute hadamard LHS (V) of ALS problem (Vx = B)
+    /// \param[in,out] fast_pI If true, try to compute the pseudo inverse via fast LU decomposition, else use SVD;
+    ///                on return reports whether the fast route was used.
+    /// \param[in, out] cholesky If true, try to solve the linear equation Vx = B (the ALS problem)
+    ///                using a Cholesky decomposition (lapacke subroutine) on return reports if
+    ///                inversion was successful.
+    /// \param[in, out] B In: The RHS of the ALS problem ( Vx = B ). Out: The solved linear equation
+    ///                     \f$ V^{-1} B \f$
+    /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V
     void psuedoinverse_helper(int mode_of_A, bool & fast_pI,
-                                bool & cholesky, Tensor & B = Tensor(),
+                                bool & cholesky, Tensor & B,
                                 double lambda = 0.0){
-      if(cholesky && B.empty()){
-        BTAS_EXCEPTION("To compute the Cholesky, one must provide a LHS tensor (Ax = B)");
+      if(B.empty()){
+        BTAS_EXCEPTION("pseudoinverse helper solves Ax = B.  B cannot be an empty tensor");
       }
 
       auto rank = A[0].extent(1);

From 6f3d1374b434cdcb87173922fe9771b6e8101b50 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 12:22:14 -0500
Subject: [PATCH 51/66] no container boost component, it's part of Boost::boost

---
 external/boost.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/boost.cmake b/external/boost.cmake
index 65bf0777..7da8b6cb 100644
--- a/external/boost.cmake
+++ b/external/boost.cmake
@@ -9,7 +9,7 @@ if (NOT TARGET Boost::boost)
   # Boost::boost is defined since 3.5
   cmake_minimum_required(VERSION 3.5.0)
   # try config first
-  set(Boost_BTAS_DEPS_LIBRARIES serialization container)
+  set(Boost_BTAS_DEPS_LIBRARIES boost serialization)
   find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})
   if (NOT TARGET Boost::boost)
     find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})

From c966011c19b30fc6c926606827e726a78b25c868 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 12:25:32 -0500
Subject: [PATCH 52/66] don't search for Boost::boost

---
 external/boost.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/boost.cmake b/external/boost.cmake
index 7da8b6cb..f63e7d99 100644
--- a/external/boost.cmake
+++ b/external/boost.cmake
@@ -9,7 +9,7 @@ if (NOT TARGET Boost::boost)
   # Boost::boost is defined since 3.5
   cmake_minimum_required(VERSION 3.5.0)
   # try config first
-  set(Boost_BTAS_DEPS_LIBRARIES boost serialization)
+  set(Boost_BTAS_DEPS_LIBRARIES serialization)
   find_package(Boost CONFIG COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})
   if (NOT TARGET Boost::boost)
     find_package(Boost REQUIRED COMPONENTS ${Boost_BTAS_DEPS_LIBRARIES})

From e1efc87fe363db17ff42fb1615acdf5be8bc75ff Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 12:34:28 -0500
Subject: [PATCH 53/66] [travis] DO build unit tests

---
 bin/travisci_build_linux.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/travisci_build_linux.sh b/bin/travisci_build_linux.sh
index 43f9e8b8..17621fe0 100755
--- a/bin/travisci_build_linux.sh
+++ b/bin/travisci_build_linux.sh
@@ -17,7 +17,7 @@ cd ${BUILD_PREFIX}
 ##########   test with blas+lapack   ##########
 mkdir build_cblas
 cd build_cblas
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_BUILD_UNITTEST=ON
 make VERBOSE=1
 make check VERBOSE=1
 cd ..
@@ -25,7 +25,7 @@ cd ..
 ########## test without blas+lapack   ##########
 mkdir build
 cd build
-cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON -DBTAS_USE_CBLAS_LAPACKE=OFF
+cmake ${TRAVIS_BUILD_DIR} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBTAS_ASSERT_THROWS=ON  -DBTAS_BUILD_UNITTEST=ON -DBTAS_USE_CBLAS_LAPACKE=OFF
 make VERBOSE=1
 make check VERBOSE=1
 cd ..

From 5ec5eab392d5681845db22cea9013c1329aec715 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 13:28:12 -0500
Subject: [PATCH 54/66] Allow tucker to be used without MKL. Use contract
 (unoptimized swapping) instead of core_contract engine (in place transposes)

---
 btas/generic/tucker.h | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index 36704d93..bd6086c0 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -1,10 +1,9 @@
 #ifndef BTAS_TUCKER_DECOMP_H
 #define BTAS_TUCKER_DECOMP_H
 
-#ifdef BTAS_HAS_INTEL_MKL
-
 #include <btas/generic/core_contract.h>
 #include <btas/generic/flatten.h>
+#include <btas/generic/contract.h>
 
 #include <cstdlib>
 
@@ -24,6 +23,10 @@ void tucker_compression(Tensor &A, double epsilon_svd,
   double norm2 = dot(A,A);
   //norm2 *= norm2;
   auto ndim = A.rank();
+  std::vector<int> A_modes;
+  for(int i = 0; i< ndim;++i){
+    A_modes.push_back(i);
+  }
 
   for (int i = 0; i < ndim; i++) {
     // Determine the threshold epsilon_SVD.
@@ -37,10 +40,7 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     gemm(CblasNoTrans, CblasTrans, 1.0, flat, flat, 0.0, S);
 
     // Calculate SVD of smaller object.
-    auto info = LAPACKE_dsyev(LAPACK_ROW_MAJOR, 'V', 'L', R, S.data(), R,
-                              lambda.data());
-    if (info)
-      BTAS_EXCEPTION("Error in computing the tucker SVD");
+    eigenvalue_decomp(S, lambda);
 
     // Find the truncation rank based on the threshold.
     int rank = 0;
@@ -60,10 +60,27 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     // Push the factor matrix back as a transformation.
     transforms.push_back(lambda);
 
+#ifdef BTAS_HAS_INTEL_MKL
     // Contract the factor matrix with the reference tensor, A.
     core_contract(A, lambda, i);
+#else
+    std::vector<int> contract_modes;
+    contract_modes.push_back(i); contract_modes.push_back(ndim);
+    std::vector<int> final_modes;
+    std::vector<int> final_dims;
+    for(int j = 0; j < ndim; ++j){
+      if(j == i){
+        final_modes.push_back(ndim);
+        final_dims.push_back(rank);
+      } else{
+        final_modes.push_back(j);
+        final_dims.push_back(A.extent(j));
+      }
+    }
+    Tensor final(final_dims);
+    btas::contract(1.0, A, A_modes, lambda, {i, ndim}, 0.0, final, final_dims);
+#endif //BTAS_HAS_INTEL_MKL
   }
 }
 } // namespace btas
-#endif //BTAS_HAS_INTEL_MKL
 #endif // BTAS_TUCKER_DECOMP_H

From f9031aceeae9d1cfb959d2da8d6a3e703b4f9a64 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 14:20:12 -0500
Subject: [PATCH 55/66] Added non-MKL core contraction to randomized.h

---
 btas/generic/randomized.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/btas/generic/randomized.h b/btas/generic/randomized.h
index 6f18792a..3f70bd10 100644
--- a/btas/generic/randomized.h
+++ b/btas/generic/randomized.h
@@ -5,6 +5,7 @@
 #include <btas/error.h>
 #include <btas/tensor.h>
 #include <btas/generic/linear_algebra.h>
+#include <btas/generic/contract.h>
 
 #include <random>
 #include <stdlib.h>
@@ -58,6 +59,11 @@ void randomized_decomposition(Tensor &A, std::vector<Tensor> &transforms,
   // Add the oversampling to the desired rank
   auto ndim = A.rank();
   auto rank = des_rank + oversampl;
+  std::vector<int> A_modes;
+  for(int i = 0; i< ndim;++i){
+    A_modes.push_back(i);
+  }
+  std::vector<int> final_modes(A_modes);
 
   // Walk through all the modes of A
   for (int n = 0; n < ndim; n++) {
@@ -102,8 +108,28 @@ void randomized_decomposition(Tensor &A, std::vector<Tensor> &transforms,
 
     transforms.push_back(Y);
   }
+  std::vector<int> contract_modes;
+  contract_modes.push_back(0); contract_modes.push_back(ndim);
   for (int n = 0; n < ndim; n++) {
+#ifdef BTAS_HAS_INTEL_MKL
     core_contract(A, transforms[n], n);
+#else
+    std::vector<int> final_dims;
+    for(int j = 0; j < ndim; ++j){
+      if(j == n){
+        final_dims.push_back(transforms[n].extent(1));
+      } else {
+        final_dims.push_back(A.extent(j));
+      }
+    }
+    contract_modes[0] = n;
+    final_modes[n] = ndim;
+    btas::Range final_range(final_dims);
+    Tensor final(final_range);
+    contract(1.0, A, A_modes, transforms[n], contract_modes, 0.0, final, final_modes);
+    final_modes[n] = n;
+    A = final;
+#endif //BTAS_HAS_INTEL_MKL
   }
 }
 } // namespace btas

From 5a95ae22416a34fded4be1993eb2bb869b1dd005 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 14:27:42 -0500
Subject: [PATCH 56/66] Fix non-mkl core contract in tucker.h

---
 btas/generic/tucker.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index bd6086c0..06646f9b 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -50,7 +50,8 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     }
 
     // Truncate the column space of the unitary factor matrix.
-    lambda = Tensor(R, R - rank);
+    auto kept_evecs = R - rank;
+    lambda = Tensor(R, kept_evecs);
     auto lower_bound = {0, rank};
     auto upper_bound = {R, R};
     auto view =
@@ -71,14 +72,16 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     for(int j = 0; j < ndim; ++j){
       if(j == i){
         final_modes.push_back(ndim);
-        final_dims.push_back(rank);
+        final_dims.push_back(kept_evecs);
       } else{
         final_modes.push_back(j);
         final_dims.push_back(A.extent(j));
       }
     }
-    Tensor final(final_dims);
-    btas::contract(1.0, A, A_modes, lambda, {i, ndim}, 0.0, final, final_dims);
+    btas::Range final_range(final_dims);
+    Tensor final(final_range);
+    btas::contract(1.0, A, A_modes, lambda, contract_modes, 0.0, final, final_modes);
+    A = final;
 #endif //BTAS_HAS_INTEL_MKL
   }
 }

From c1b502c53f04c2244861624c4ddd235e265cb590 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 14:27:54 -0500
Subject: [PATCH 57/66] Update documentation

---
 btas/generic/cp.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 50a8a901..2c3dc617 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -419,8 +419,6 @@ namespace btas{
     /// on CP-ALS.
     /// \param[in] calculate_epsilon Should the 2-norm
     /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ .
-    /// \param[in] step
-    /// CP_ALS built from r =1 to r = rank. r increments by step.
     /// \param[in, out] epsilon The 2-norm
     /// error between the exact and approximated reference tensor
     /// \param[in] SVD_initial_guess build inital guess from left singular vectors
@@ -455,9 +453,9 @@ namespace btas{
       return V;
     }
 
-    // Keep track of the Left hand Khatri-Rao product of matrices and
-    // Continues to multiply be right hand products, skipping
-    // the matrix at index n.
+    /// Keep track of the Left hand Khatri-Rao product of matrices and
+    /// Continues to multiply be right hand products, skipping
+    /// the matrix at index n.
     /// \param[in] n The mode being optimized, all other modes held constant
     /// \param[in] rank The current rank, column dimension of the factor matrices
     /// \param[in] forward Should the Khatri-Rao product move through the factor

From a7b2294524f3a2a30fa75b31013c44c38bd7171a Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Tue, 28 Jan 2020 14:57:36 -0500
Subject: [PATCH 58/66] tucker.h uses row major eigenvalue decomposition.

---
 btas/generic/tucker.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index 06646f9b..648ecd34 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -40,7 +40,14 @@ void tucker_compression(Tensor &A, double epsilon_svd,
     gemm(CblasNoTrans, CblasTrans, 1.0, flat, flat, 0.0, S);
 
     // Calculate SVD of smaller object.
-    eigenvalue_decomp(S, lambda);
+#ifdef BTAS_HAS_LAPACKE
+    auto info = LAPACKE_dsyev(LAPACK_ROW_MAJOR, 'V', 'L', R, S.data(), R,
+                              lambda.data());
+    if (info)
+    BTAS_EXCEPTION("Error in computing the tucker SVD");
+#else
+    BTAS_EXCEPTION("Tucker decomposition requires LAPACKE");
+#endif
 
     // Find the truncation rank based on the threshold.
     int rank = 0;

From 67fe44c7966a57f8b005f85768a39c710caf5314 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Tue, 28 Jan 2020 15:13:34 -0500
Subject: [PATCH 59/66] search for MKL can be disabled

---
 CMakeLists.txt                  |  2 ++
 cmake/modules/FindCBLAS.cmake   | 18 ++++++++++--------
 cmake/modules/FindLAPACKE.cmake | 22 ++++++++++++----------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e2db5f4..8f4aa7a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,8 @@ redefaultable_option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw; enabl
 add_feature_info(ASSERT_THROWS BTAS_ASSERT_THROWS "BTAS_ASSERT(x) will throw if x is false, and not be affected by NDEBUG")
 redefaultable_option(BTAS_USE_CBLAS_LAPACKE "Whether to use BLAS+LAPACK via CBLAS+LAPACKE" ON)
 add_feature_info(USE_CBLAS_LAPACKE BTAS_USE_CBLAS_LAPACKE "Will use BLAS and LAPACK linear algebra libraries via their CBLAS and LAPACKE interfaces")
+redefaultable_option(BTAS_ENABLE_MKL "Whether to look for Intel MKL" ON)
+add_feature_info(ENABLE_MKL BTAS_ENABLE_MKL "Will look for Intel MKL library")
 
 set(TARGET_MAX_INDEX_RANK 6 CACHE STRING "Determines the rank for which the default BTAS index type will use stack (default: 6); this requires Boost.Container")
 add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default BTAS index type will use stack for rank<=${TARGET_MAX_INDEX_RANK}")
diff --git a/cmake/modules/FindCBLAS.cmake b/cmake/modules/FindCBLAS.cmake
index c279e14f..7ea74823 100644
--- a/cmake/modules/FindCBLAS.cmake
+++ b/cmake/modules/FindCBLAS.cmake
@@ -18,13 +18,15 @@ SET(CBLAS_INCLUDE_DIR)
 SET(CBLAS_INCLUDE_FILE)
 
 # CBLAS in Intel MKL
-FIND_PACKAGE(MKL)
-IF (MKL_FOUND AND NOT CBLAS_LIBRARIES)
-  SET(CBLAS_LIBRARIES ${MKL_LIBRARIES})
-  SET(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR})
-  SET(CBLAS_INCLUDE_FILE "mkl_cblas.h")
-  RETURN ()
-ENDIF (MKL_FOUND AND NOT CBLAS_LIBRARIES)
+IF (BTAS_ENABLE_MKL)
+  FIND_PACKAGE(MKL)
+  IF (MKL_FOUND AND NOT CBLAS_LIBRARIES)
+    SET(CBLAS_LIBRARIES ${MKL_LIBRARIES})
+    SET(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR})
+    SET(CBLAS_INCLUDE_FILE "mkl_cblas.h")
+    RETURN ()
+  ENDIF (MKL_FOUND AND NOT CBLAS_LIBRARIES)
+ENDIF(BTAS_ENABLE_MKL)
 
 # Old CBLAS search 
 INCLUDE(CheckLibraryList)
@@ -99,7 +101,7 @@ IF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED)
 ENDIF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED)
 IF(NOT CBLAS_FIND_QUIETLY)
   IF(CBLAS_FOUND)
-    MESSAGE(STATUS "CBLAS library found")
+    MESSAGE(STATUS "CBLAS library found: CBLAS_LIBRARIES=${CBLAS_LIBRARIES}")
   ELSE(CBLAS_FOUND)
     MESSAGE(STATUS "CBLAS library not found.")
   ENDIF(CBLAS_FOUND)
diff --git a/cmake/modules/FindLAPACKE.cmake b/cmake/modules/FindLAPACKE.cmake
index 1501f7a7..0c3fc66c 100644
--- a/cmake/modules/FindLAPACKE.cmake
+++ b/cmake/modules/FindLAPACKE.cmake
@@ -17,15 +17,17 @@ SET(LAPACKE_INCLUDE_DIR)
 SET(LAPACKE_INCLUDE_FILE)
 
 # Unless already found look for LAPACKE in Intel mkl
-IF (NOT CBLAS_FOUND)
-  FIND_PACKAGE(MKL)
-ENDIF (NOT CBLAS_FOUND)
-IF (MKL_FOUND AND NOT LAPACKE_LIBRARIES)
-  SET(LAPACKE_LIBRARIES ${MKL_LIBRARIES})
-  SET(LAPACKE_INCLUDE_DIR ${MKL_INCLUDE_DIR})
-  SET(LAPACKE_INCLUDE_FILE "mkl_lapacke.h")
-  RETURN ()
-ENDIF (MKL_FOUND AND NOT LAPACKE_LIBRARIES)
+IF (BTAS_ENABLE_MKL)
+  IF (NOT CBLAS_FOUND)
+    FIND_PACKAGE(MKL)
+  ENDIF (NOT CBLAS_FOUND)
+  IF (MKL_FOUND AND NOT LAPACKE_LIBRARIES)
+    SET(LAPACKE_LIBRARIES ${MKL_LIBRARIES})
+    SET(LAPACKE_INCLUDE_DIR ${MKL_INCLUDE_DIR})
+    SET(LAPACKE_INCLUDE_FILE "mkl_lapacke.h")
+    RETURN ()
+  ENDIF (MKL_FOUND AND NOT LAPACKE_LIBRARIES)
+ENDIF(BTAS_ENABLE_MKL)
 
 INCLUDE(CheckLibraryList)
 
@@ -66,7 +68,7 @@ IF(NOT LAPACKE_FOUND AND LAPACKE_FIND_REQUIRED)
 ENDIF(NOT LAPACKE_FOUND AND LAPACKE_FIND_REQUIRED)
 IF(NOT LAPACKE_FIND_QUIETLY)
   IF(LAPACKE_FOUND)
-    MESSAGE(STATUS "LAPACKE library found")
+    MESSAGE(STATUS "LAPACKE library found: LAPACKE_LIBRARIES=${LAPACKE_LIBRARIES}")
   ELSE(LAPACKE_FOUND)
     MESSAGE(STATUS "LAPACKE library not found.")
   ENDIF(LAPACKE_FOUND)

From c8da39389e392fe3801d39a242ed13ccc171e35c Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 12:53:46 -0500
Subject: [PATCH 60/66] Update documentation and use compute_rand in tucker and
 random compression

---
 btas/generic/cp_als.h  | 61 ++++++----------------------------------
 btas/generic/cp_rals.h | 64 ++++++++----------------------------------
 2 files changed, 20 insertions(+), 105 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 62e9e6a7..22aeecbb 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -220,10 +220,6 @@ namespace btas{
     /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker
     /// decomposition.
     /// \param[in] converge_test Test to see if ALS is converged
-    /// \param[in] opt_rank Should the CP decomposition of tucker
-    /// core tensor find the optimal rank with error < tcutCP? Default = true.
-    /// \param[in] tcutCP How small epsilon must be to consider the CP
-    /// decomposition converged. Default = 1e-2.
     /// \param[in] rank If finding CP
     /// decomposition to finite rank, define CP rank. Default 0 will throw error
     /// for compute_rank.
@@ -231,32 +227,18 @@ namespace btas{
     /// without calculating the Khatri-Rao product? Default = true.
     /// \param[in]
     /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{exact} -
-    /// T_{approx}|| = \epsilon \f$ . Default = true.
-    /// \param[in] step CP_ALS built
-    /// from r =1 to r = \c rank. r increments by \c step; default = 1.
-    /// \param[in]
-    /// max_rank The highest rank approximation computed before giving up on
-    /// CP-ALS. Default = 1e4.
+    /// T_{approx}|| = \epsilon \f$ . Default = false.
     /// \param[in] max_als If CP decomposition is to finite
     /// error, max_als is the highest rank approximation computed before giving up
     /// on CP-ALS. Default = 1e4.
-    /// \param[in] tcutALS How small difference in
-    /// factor matrices must be to consider ALS of a single rank converged.
-    /// Default = 0.1.
-    /// \param[in] SVD_initial_guess Should the initial factor matrices be
-    /// approximated with left singular values? default = false
-    /// \param[in] SVD_rank if \c
-    /// SVD_initial_guess is true specify the rank of the initial guess such that
-    /// SVD_rank <= rank. default = true
     /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition
     /// default = false
     /// \returns 2-norm error, \f$ \epsilon \f$,
     /// between exact and approximate tensor, -1.0 if calculate_epsilon = false &&
     ///  ConvClass != FitCheck.
-    double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, bool opt_rank = true, double tcutCP = 1e-2, int rank = 0,
-                                   bool direct = true, bool calculate_epsilon = true, int step = 1, int max_rank = 1e4,
-                                   double max_als = 1e4, bool SVD_initial_guess = false,
-                                   int SVD_rank = 0, bool fast_pI = false) {
+    double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, int rank = 0,
+                                   bool direct = true, bool calculate_epsilon = false,
+                                   double max_als = 1e4, bool fast_pI = false) {
       // Tensor compression
       std::vector<Tensor> transforms;
       tucker_compression(tensor_ref, tcutSVD, transforms);
@@ -264,10 +246,7 @@ namespace btas{
       double epsilon = -1.0;
 
       // CP decomposition
-      if (opt_rank)
-        epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct);
-      else
-        epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct);
+      epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct);
 
       // scale factor matrices
       for (int i = 0; i < ndim; i++) {
@@ -299,51 +278,29 @@ namespace btas{
     /// \param[in] powerit Number of power iterations,
     /// as specified in the literature, to scale the spectrum of each mode.
     /// Default = suggested = 2.
-    /// \param[in] opt_rank Should the CP decomposition
-    /// of tucker core tensor find the optimal rank with error < tcutCP? Default =
-    /// true.
-    /// \param[in] tcutCP How small epsilon must be to consider the CP
-    /// decomposition converged. Default = 1e-2.
     /// \param[in] rank If finding CP
     /// decomposition to finite rank, define CP rank. Default 0 will throw error
     /// for compute_rank.
     /// \param[in] direct Should the CP decomposition be
     /// computed without calculating the Khatri-Rao product? Default = true.
     /// \param[in] calculate_epsilon Should the 2-norm error be calculated
-    /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = true.
-    /// \param[in] step
-    /// CP_ALS built from r =1 to r = rank. r increments by step; default = 1.
-    /// \param[in] max_rank The highest rank approximation computed before giving
-    /// up on CP-ALS. Default = 1e5.
+    /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = false.
     /// \param[in] max_als If CP decomposition is to
     /// finite error, max_als is the highest rank approximation computed before
     /// giving up on CP-ALS. Default = 1e5.
-    /// \param[in] tcutALS How small
-    /// difference in factor matrices must be to consider ALS of a single rank
-    /// converged. Default = 0.1.
-    /// \param[in] SVD_initial_guess Should the initial factor matrices be
-    /// approximated with left singular values? default = false
-    /// \param[in] SVD_rank if \c
-    /// SVD_initial_guess is true specify the rank of the initial guess such that
-    /// SVD_rank <= rank. default = true
     /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition
     /// default = false
     /// \returns 2-norm error, \f$ \epsilon \f$,
     /// between exact and approximate tensor, -1.0 if calculate_epsilon = false &&
     ///  ConvClass != FitCheck.
     double compress_compute_rand(int desired_compression_rank, ConvClass & converge_test, int oversampl = 10, int powerit = 2,
-                                 bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, bool direct = true,
-                                 bool calculate_epsilon = false, int step = 1, int max_rank = 1e5, double max_als = 1e5,
-                                 bool SVD_initial_guess = false, int SVD_rank = 0, bool fast_pI = false) {
+            int rank = 0, bool direct = true, bool calculate_epsilon = false, double max_als = 1e5,
+            bool fast_pI = false) {
       std::vector<Tensor> transforms;
       randomized_decomposition(tensor_ref, transforms, desired_compression_rank, oversampl, powerit);
       size = tensor_ref.size();
-      double epsilon = -1.0;
 
-      if (opt_rank)
-        epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct);
-      else
-        epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct);
+      auto epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct);
 
       // scale factor matrices
       for (int i = 0; i < ndim; i++) {
diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index 7a92f2e8..eebe4e5c 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -238,10 +238,6 @@ namespace btas{
     /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker
     /// decomposition.
     /// \param[in] converge_test Test to see if ALS is converged
-    /// \param[in] opt_rank Should the CP decomposition of tucker
-    /// core tensor find the optimal rank with error < tcutCP? Default = true.
-    /// \param[in] tcutCP How small epsilon must be to consider the CP
-    /// decomposition converged. Default = 1e-2.
     /// \param[in] rank If finding CP
     /// decomposition to finite rank, define CP rank. Default 0 will throw error
     /// for compute_rank.
@@ -249,32 +245,18 @@ namespace btas{
     /// without calculating the Khatri-Rao product? Default = true.
     /// \param[in]
     /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{exact} -
-    /// T_{approx}|| = \epsilon \f$ . Default = true.
-    /// \param[in] step CP_ALS built
-    /// from r =1 to r = \c rank. r increments by \c step; default = 1.
-    /// \param[in]
-    /// max_rank The highest rank approximation computed before giving up on
-    /// CP-ALS. Default = 1e4.
+    /// T_{approx}|| = \epsilon \f$ . Default = false.
     /// \param[in] max_als If CP decomposition is to finite
     /// error, max_als is the highest rank approximation computed before giving up
     /// on CP-ALS. Default = 1e4.
-    /// \param[in] tcutALS How small difference in
-    /// factor matrices must be to consider ALS of a single rank converged.
-    /// Default = 0.1.
-    /// \param[in] SVD_initial_guess Should the initial factor matrices be
-    /// approximated with left singular values? default = false
-    /// \param[in] SVD_rank if \c
-    /// SVD_initial_guess is true specify the rank of the initial guess such that
-    /// SVD_rank <= rank. default = true
     /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition
     /// default = false
     /// \returns 2-norm error, \f$ \epsilon \f$,
     /// between exact and approximate tensor, -1.0 if calculate_epsilon = false &&
-    //  ConvClass != FitCheck.
-    double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, bool opt_rank = true, double tcutCP = 1e-2, int rank = 0,
-                                   bool direct = true, bool calculate_epsilon = true, int step = 1, int max_rank = 1e4,
-                                   double max_als = 1e4, bool SVD_initial_guess = false,
-                                   int SVD_rank = 0, bool fast_pI = false) {
+    ///  ConvClass != FitCheck.
+    double compress_compute_tucker(double tcutSVD, ConvClass & converge_test, int rank = 0,
+            bool direct = true, bool calculate_epsilon = false, double max_als = 1e4,
+            bool fast_pI = false) {
       // Tensor compression
       std::vector<Tensor> transforms;
       tucker_compression(tensor_ref, tcutSVD, transforms);
@@ -282,10 +264,7 @@ namespace btas{
       double epsilon = -1.0;
 
       // CP decomposition
-      if (opt_rank)
-        epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct);
-      else
-        epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct);
+      epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct);
 
 
       // scale factor matrices
@@ -318,52 +297,31 @@ namespace btas{
     /// \param[in] powerit Number of power iterations,
     /// as specified in the literature, to scale the spectrum of each mode.
     /// Default = suggested = 2.
-    /// \param[in] opt_rank Should the CP decomposition
-    /// of tucker core tensor find the optimal rank with error < tcutCP? Default =
-    /// true.
-    /// \param[in] tcutCP How small epsilon must be to consider the CP
-    /// decomposition converged. Default = 1e-2.
     /// \param[in] rank If finding CP
     /// decomposition to finite rank, define CP rank. Default 0 will throw error
     /// for compute_rank.
     /// \param[in] direct Should the CP decomposition be
     /// computed without calculating the Khatri-Rao product? Default = true.
     /// \param[in] calculate_epsilon Should the 2-norm error be calculated
-    /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = true.
-    /// \param[in] step
-    /// CP_ALS built from r =1 to r = rank. r increments by step; default = 1.
-    /// \param[in] max_rank The highest rank approximation computed before giving
-    /// up on CP-ALS. Default = 1e5.
+    /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = false.
     /// \param[in] max_als If CP decomposition is to
     /// finite error, max_als is the highest rank approximation computed before
     /// giving up on CP-ALS. Default = 1e5.
-    /// \param[in] tcutALS How small
-    /// difference in factor matrices must be to consider ALS of a single rank
-    /// converged. Default = 0.1.
-    /// \param[in] SVD_initial_guess Should the initial factor matrices be
-    /// approximated with left singular values? default = false
-    /// \param[in] SVD_rank if \c
-    /// SVD_initial_guess is true specify the rank of the initial guess such that
-    /// SVD_rank <= rank. default = true
     /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition
     /// default = false
     /// \returns 2-norm error, \f$ \epsilon \f$,
     /// between exact and approximate tensor, -1.0 if calculate_epsilon = false &&
-    //  ConvClass != FitCheck.
+    ///  ConvClass != FitCheck.
     double compress_compute_rand(int desired_compression_rank, ConvClass & converge_test, int oversampl = 10, int powerit = 2,
-                                 bool opt_rank = true, double tcutCP = 1e-2, int rank = 0, bool direct = true,
-                                 bool calculate_epsilon = false, int step = 1, int max_rank = 1e5, double max_als = 1e5,
-                                 bool SVD_initial_guess = false, int SVD_rank = 0, bool fast_pI = false) {
+                                 int rank = 0, bool direct = true, bool calculate_epsilon = false,
+                                 double max_als = 1e5, bool fast_pI = false) {
       std::vector<Tensor> transforms;
       randomized_decomposition(tensor_ref, transforms, desired_compression_rank, oversampl, powerit);
       size = tensor_ref.size();
       double epsilon = -1.0;
 
       // CP decomposition
-      if (opt_rank)
-        epsilon = this->compute_error(converge_test, tcutCP, step, max_rank, SVD_initial_guess, SVD_rank, max_als, fast_pI, direct);
-      else
-        epsilon = this->compute_rank(rank, converge_test, step, SVD_initial_guess, SVD_rank, max_als, fast_pI, calculate_epsilon, direct);
+      epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct);
 
       // scale factor matrices
       for (int i = 0; i < ndim; i++) {

From 4ceee1a1d92353cf60abf4811eb144fd6f55693b Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 12:55:48 -0500
Subject: [PATCH 61/66] Update compress_rand and compress_tucker function calls
 in tensor_cp_test.cc

---
 unittest/tensor_cp_test.cc | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/unittest/tensor_cp_test.cc b/unittest/tensor_cp_test.cc
index f0b360f2..6ee9db3f 100644
--- a/unittest/tensor_cp_test.cc
+++ b/unittest/tensor_cp_test.cc
@@ -95,7 +95,7 @@ TEST_CASE("CP")
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
-              A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
+              A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true);
       CHECK((diff - results(2,0)) <= epsilon);
     }
     SECTION("ALS MODE = 3, Random + CP"){
@@ -103,7 +103,7 @@ TEST_CASE("CP")
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
-              A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5);
+              A1.compress_compute_rand(2, conv, 0, 2, 5, true, false, 100, true);
       CHECK((diff - results(3,0)) <= epsilon);
     }
 
@@ -124,14 +124,14 @@ TEST_CASE("CP")
       auto d = D4;
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
-      double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5);
+      double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 1e4, true);
       CHECK((diff - results(6,0)) <= epsilon);
     }
     SECTION("ALS MODE = 4, Random + CP"){
       auto d = D4;
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
-      double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100);
+      double diff = A1.compress_compute_rand(3, conv, 0, 2, 1, true, false, 20, true);
       CHECK((diff - results(7,0)) <= epsilon);
     }
     SECTION("ALS MODE = 5, Finite rank"){
@@ -150,14 +150,14 @@ TEST_CASE("CP")
       auto d = D5;
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
-      double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
+      double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true);
       CHECK((diff - results(10,0)) <= epsilon);
     }
     SECTION("ALS MODE = 5, Random + CP"){
       auto d = D5;
       CP_ALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
-      double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5);
+      double diff = A1. compress_compute_rand(1, conv, 0, 2, 5, true, false, 100, false);
       CHECK((diff - results(11,0)) <= epsilon);
     }
   }
@@ -181,7 +181,7 @@ TEST_CASE("CP")
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
-              A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
+              A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true);
       CHECK((diff - results(14,0)) <= epsilon);
     }
     SECTION("RALS MODE = 3, Random + CP"){
@@ -189,7 +189,7 @@ TEST_CASE("CP")
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm3);
       double diff =
-              A1.compress_compute_rand(2, conv, 0, 2, false, 1e-2, 5);
+              A1.compress_compute_rand(2, conv, 0, 2, 5, true, false, 100, true);
       CHECK((diff - results(15,0)) <= epsilon);
     }
     SECTION("RALS MODE = 4, Finite rank"){
@@ -209,14 +209,14 @@ TEST_CASE("CP")
       auto d = D4;
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
-      double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5);
+      double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true);
       CHECK((diff - results(18,0)) <= epsilon);
     }
     SECTION("RALS MODE = 4, Random + CP"){
       auto d = D4;
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm4);
-      double diff = A1.compress_compute_rand(3, conv, 0, 2, true, 1e-2, 0, true, false, 1, 20, 100);
+      double diff = A1.compress_compute_rand(3, conv, 0, 2, 1, true, false, 20, true);
       CHECK((diff - results(19,0)) <= epsilon);
     }
     SECTION("RALS MODE = 5, Finite rank"){
@@ -235,14 +235,14 @@ TEST_CASE("CP")
       auto d = D5;
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
-      double diff = A1.compress_compute_tucker(0.1, conv, false, 1e-2, 5, true, false);
+      double diff = A1.compress_compute_tucker(0.1, conv, 5, true, false, 100, true);
       CHECK((diff - results(22,0)) <= epsilon);
     }
     SECTION("RALS MODE = 5, Random + CP"){
       auto d = D5;
       CP_RALS<tensor, conv_class> A1(d);
       conv.set_norm(norm5);
-      double diff = A1.compress_compute_rand(1, conv, 0, 2, false, 1e-2, 5);
+      double diff = A1.compress_compute_rand(1, conv, 0, 2, 5, true, false, 100, true);
       CHECK((diff - results(23,0)) <= epsilon);
     }
   }

From 8832130621d97d00d0f5cd77305e7d42587a1a95 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 12:57:48 -0500
Subject: [PATCH 62/66] Remove BTAS_HAS_CBLAS guard around linear algebra
 functions

---
 btas/generic/linear_algebra.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 8bfbf980..c83b274d 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -6,8 +6,6 @@
 #define BTAS_LINEAR_ALGEBRA_H
 #include <btas/error.h>
 
-#ifdef BTAS_HAS_CBLAS
-
 namespace btas{
   /// Computes L of the LU decomposition of tensor \c A
 /// \param[in, out] A In: A reference matrix to be LU decomposed.  Out:
@@ -288,5 +286,4 @@ Tensor pseudoInverse(Tensor & A, bool & fast_pI) {
   }
 
 } // namespace btas
-#endif // BTAS_HAS_CBLAS
 #endif //BTAS_LINEAR_ALGEBRA_H

From edc63f4bcd94e62b7e44a2ddcd72912934529e8f Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 13:02:33 -0500
Subject: [PATCH 63/66] Update results using different initializtion function

---
 unittest/cp_test_results.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unittest/cp_test_results.txt b/unittest/cp_test_results.txt
index a6650b58..488c07e1 100644
--- a/unittest/cp_test_results.txt
+++ b/unittest/cp_test_results.txt
@@ -1,6 +1,6 @@
 0.8022121792543945
 0.9983681298768465
-0.7909930183444412
+0.8013967619361885
 0.5365678558201658
 0.6011111112728442
 0.9932249481447798
@@ -8,11 +8,11 @@
 0.5862586400695924
 0.0562044207525656
 0.183409953450329
-0.0008026970682262213
+0.00197202949114994
 0.5092600887442312
 0.8023893427056535
 0.9998165185189172
-0.7965146841534007
+0.8000632869557247
 0.5365998545867845
 0.5946792918362551
 0.9902479480481341

From ca567c2dd53f08564912d861de9f1164ea2e4c11 Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 13:55:54 -0500
Subject: [PATCH 64/66] Add a verbose option to printing in converge check

---
 btas/generic/converge_class.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/btas/generic/converge_class.h b/btas/generic/converge_class.h
index a550c503..ef23b34f 100644
--- a/btas/generic/converge_class.h
+++ b/btas/generic/converge_class.h
@@ -112,7 +112,9 @@ namespace btas {
 
       double fitChange = abs(fitOld_ - fit);
       fitOld_ = fit;
-      //std::cout << fit << "\t" << fitChange << std::endl;
+      if(verbose_) {
+        std::cout << fit << "\t" << fitChange << std::endl;
+      }
       if(fitChange < tol_) {
         converged_num++;
         if(converged_num == 2){
@@ -140,6 +142,10 @@ namespace btas {
       return final_fit_;
     }
 
+    void verbose(bool verb){
+      verbose_ = verb;
+    }
+
   private:
     double tol_;
     double fitOld_ = -1.0;
@@ -148,6 +154,7 @@ namespace btas {
     int iter_ = 0;
     int converged_num = 0;
     Tensor MtKRP_;
+    bool verbose_ = false;
 
     double norm(const std::vector<Tensor> & btas_factors){
       auto rank = btas_factors[0].extent(1);

From 6f374b2b575b5beaffd281c65ab50b05cbc7ebef Mon Sep 17 00:00:00 2001
From: kmp5VT <kmp5@vt.edu>
Date: Thu, 30 Jan 2020 14:29:56 -0500
Subject: [PATCH 65/66] Compute all the tucker transformations before
 transforming the core tensor.

---
 btas/generic/tucker.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/btas/generic/tucker.h b/btas/generic/tucker.h
index 648ecd34..ed0776b4 100644
--- a/btas/generic/tucker.h
+++ b/btas/generic/tucker.h
@@ -67,7 +67,11 @@ void tucker_compression(Tensor &A, double epsilon_svd,
 
     // Push the factor matrix back as a transformation.
     transforms.push_back(lambda);
+  }
 
+  for(int i = 0; i < ndim; ++i){
+    auto & lambda = transforms[i];
+    auto kept_evecs = lambda.extent(1);
 #ifdef BTAS_HAS_INTEL_MKL
     // Contract the factor matrix with the reference tensor, A.
     core_contract(A, lambda, i);

From a072fd7eb9078b9009037fba9bda44d449291e18 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Thu, 30 Jan 2020 15:31:30 -0500
Subject: [PATCH 66/66] release id -> alpha.1 [skip ci]

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f4aa7a3..368a9419 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default
 set(BTAS_MAJOR_VERSION 1)
 set(BTAS_MINOR_VERSION 0)
 set(BTAS_MICRO_VERSION 0)
-set(BTAS_PRERELEASE_ID alpha)
+set(BTAS_PRERELEASE_ID alpha.1)
 set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}")
 if (BTAS_PRERELEASE_ID)
   set(BTAS_EXT_VERSION "${BTAS_VERSION}-${BTAS_PRERELEASE_ID}")