From 8ff92b1747b954a21fed42c6e5e5501e76245095 Mon Sep 17 00:00:00 2001
From: asistradition <asistradition@users.noreply.github.com>
Date: Tue, 4 Jan 2022 15:43:43 -0500
Subject: [PATCH 1/4] Complex support

---
 CHANGELOG.md                               |   4 +
 README.md                                  |  12 +-
 demo.ipynb                                 |  87 +++--
 setup.py                                   |   2 +-
 sparse_dot_mkl/_dense_dense.py             |  38 +-
 sparse_dot_mkl/_mkl_interface.py           | 432 +++++++++++++--------
 sparse_dot_mkl/_sparse_dense.py            |  47 ++-
 sparse_dot_mkl/_sparse_qr_solver.py        |   2 +-
 sparse_dot_mkl/_sparse_sparse.py           |  24 +-
 sparse_dot_mkl/_sparse_sypr.py             | 122 ++++++
 sparse_dot_mkl/_sparse_vector.py           |  26 +-
 sparse_dot_mkl/tests/test_dense_dense.py   |  70 +++-
 sparse_dot_mkl/tests/test_mkl.py           |  41 +-
 sparse_dot_mkl/tests/test_sparse_dense.py  | 137 ++++---
 sparse_dot_mkl/tests/test_sparse_sparse.py | 114 ++++--
 sparse_dot_mkl/tests/test_sparse_vector.py | 132 +++++--
 16 files changed, 884 insertions(+), 406 deletions(-)
 create mode 100644 sparse_dot_mkl/_sparse_sypr.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 89f0405..3aa1e9b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### Version 0.8.0
+
+* Added support for complex data types
+
 ### Version 0.7.3
 
 * Fixed a memory leak when a CSC matrix was multiplied by a dense matrix in column-major format
diff --git a/README.md b/README.md
index 5f35502..3bf065d 100644
--- a/README.md
+++ b/README.md
@@ -19,12 +19,13 @@ Three functions are explicitly available - `dot_product_mkl`, `gram_matrix_mkl`,
 `matrix_a` and `matrix_b` are either numpy arrays (1d or 2d) or scipy sparse matrices (CSR, CSC, or BSR).
 BSR matrices are supported for matrix-matrix multiplication only if one matrix is a dense array or both sparse matrices are BSR.
 Sparse COO matrices are not supported. 
-Numpy arrays must be contiguous. Non-contiguous arrays should be copied to a contiguous array prior to calling this 
+Numpy arrays must be contiguous. 
+Non-contiguous arrays should be copied to a contiguous array prior to calling this 
 function.
 
-This package only works with float data.
-`cast=True` will convert data to double-precision floats by making an internal copy if necessary.
-If A and B are both single-precision floats they will be used as is.
+This package only works with float or complex float data.
+`cast=True` will convert data to double-precision floats or complex floats by making an internal copy if necessary.
+If A and B are both single-precision floats or complex floats they will be used as is.
 `cast=False` will raise a ValueError if the input arrays are not both double-precision or both single-precision.
 This defaults to `False` on the principle that potentially unsafe dtype conversions should not occur without explicit
 instruction.
@@ -45,7 +46,8 @@ Scipy matrix multiplication does not produce ordered outputs, so this defaults t
 
 `out` is an optional reference to a dense output array to which the product of the matrix multiplication will be added. 
 This must be identical in attributes to the array that would be returned if it was not used.
-Specifically it must have the correct shape, dtype, and column- or row-major order and it must be contiguous. A ValueError will be raised if any attribute of this array is incorrect.
+Specifically it must have the correct shape, dtype, and column- or row-major order and it must be contiguous. 
+A ValueError will be raised if any attribute of this array is incorrect.
 This function will return a reference to the same array object when `out` is set.
 
 `out_scalar` is an optional element-wise scaling of `out`, if `out` is provided.
diff --git a/demo.ipynb b/demo.ipynb
index 2a89c2a..0c63c56 100644
--- a/demo.ipynb
+++ b/demo.ipynb
@@ -4,28 +4,39 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy.sparse as sps\n",
+    "import numpy as np\n",
+    "\n",
+    "try:\n",
+    "    import sparse_dot_mkl\n",
+    "    dot_product = sparse_dot_mkl.dot_product_mkl\n",
+    "except ImportError:\n",
+    "    def dot_product(x, y, dense=False, **kwargs):\n",
+    "        z = x @ y\n",
+    "        return z.A if dense and sps.issparse(z) else z "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Using matplotlib backend: Qt5Agg\n",
-      "Populating the interactive namespace from numpy and matplotlib\n"
+      "X sparsity: 78.80 %\n"
      ]
     }
    ],
    "source": [
-    "%pylab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.sparse\n",
-    "import sparse_dot_mkl"
+    "np.random.seed(0)\n",
+    "X = np.random.randn(500, 5000)\n",
+    "X[X < 0.8] = 0\n",
+    "X = sps.csr_matrix(X)\n",
+    "print(f'X sparsity: {100 * (1 - X.count_nonzero() / np.prod(X.shape)):5.2f} %')"
    ]
   },
   {
@@ -34,19 +45,19 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X sparsity: 78.80 %\n"
-     ]
+     "data": {
+      "text/plain": [
+       "<500x500 sparse matrix of type '<class 'numpy.float64'>'\n",
+       "\twith 250000 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "random.seed(0)\n",
-    "X = random.randn(500, 5000)\n",
-    "X[X < 0.8] = 0\n",
-    "X = scipy.sparse.csr_matrix(X)\n",
-    "print(f'X sparsity: {100 * (1 - X.count_nonzero() / prod(X.shape)):5.2f} %')"
+    "X @ X.T"
    ]
   },
   {
@@ -57,7 +68,7 @@
    "source": [
     "expected_result = (X @ X.T).toarray()\n",
     "expected_result_tril = expected_result.copy()\n",
-    "expected_result_tril[tril_indices(expected_result.shape[0], k=-1)] = 0"
+    "expected_result_tril[np.tril_indices(expected_result.shape[0], k=-1)] = 0"
    ]
   },
   {
@@ -78,7 +89,7 @@
    ],
    "source": [
     "mkl_result1 = sparse_dot_mkl.dot_product_mkl(X, X.T)\n",
-    "allclose(mkl_result1.toarray(), expected_result)"
+    "np.allclose(mkl_result1.toarray(), expected_result)"
    ]
   },
   {
@@ -98,8 +109,8 @@
     }
    ],
    "source": [
-    "mkl_result2 = sparse_dot_mkl.dot_product_transpose_mkl(X)\n",
-    "allclose(mkl_result2.toarray(), expected_result_tril)"
+    "mkl_result2 = sparse_dot_mkl.gram_matrix_mkl(X, transpose=True)\n",
+    "np.allclose(mkl_result2.toarray(), expected_result_tril)"
    ]
   },
   {
@@ -111,24 +122,32 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "197 ms ± 5.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
-      "70.6 ms ± 593 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
-      "34.2 ms ± 421 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+      "Scipy Matrix Multiplication Product:\n",
+      "204 ms ± 8.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
+      "MKL Matrix Multiplication Product:\n",
+      "52.5 ms ± 579 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
+      "MKL Gram Matrix Product:\n",
+      "28.1 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
      ]
     }
    ],
    "source": [
+    "print(\"Scipy Matrix Multiplication Product:\")\n",
     "%timeit X @ X.T\n",
+    "\n",
+    "print(\"MKL Matrix Multiplication Product:\")\n",
     "%timeit sparse_dot_mkl.dot_product_mkl(X, X.T)\n",
-    "%timeit sparse_dot_mkl.dot_product_transpose_mkl(X)"
+    "\n",
+    "print(\"MKL Gram Matrix Product:\")\n",
+    "%timeit sparse_dot_mkl.dot_product_transpose_mkl(X, transpose=True)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:sparse-test]",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-sparse-test-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -140,7 +159,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,
diff --git a/setup.py b/setup.py
index 8b98c8d..56eee3c 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 from setuptools import setup, find_packages
 
 DISTNAME = 'sparse_dot_mkl'
-VERSION = '0.7.3'
+VERSION = '0.8.0'
 DESCRIPTION = "Intel MKL wrapper for sparse matrix multiplication"
 MAINTAINER = 'Chris Jackson'
 MAINTAINER_EMAIL = 'cj59@nyu.edu'
diff --git a/sparse_dot_mkl/_dense_dense.py b/sparse_dot_mkl/_dense_dense.py
index f249dc6..7730f3e 100644
--- a/sparse_dot_mkl/_dense_dense.py
+++ b/sparse_dot_mkl/_dense_dense.py
@@ -1,11 +1,19 @@
 from sparse_dot_mkl._mkl_interface import (MKL, _type_check, _sanity_check, _empty_output_check, _get_numpy_layout,
-                                           LAYOUT_CODE_C, LAYOUT_CODE_F, _out_matrix, debug_print)
+                                           LAYOUT_CODE_C, LAYOUT_CODE_F, _out_matrix, debug_print, _is_double, 
+                                           _output_dtypes, _mkl_scalar)
 
 import numpy as np
 import ctypes as _ctypes
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_gemm_funcs = {(False, False): MKL._cblas_sgemm,
+                   (True, False): MKL._cblas_dgemm,
+                   (False, True): MKL._cblas_cgemm,
+                   (True, True): MKL._cblas_zgemm}
 
-def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out_scalar=None):
+def _dense_matmul(matrix_a, matrix_b, scalar=1., out=None, out_scalar=None):
+
+    double_precision, complex_type = _is_double(matrix_a)
 
     # Reshape matrix_b to a column instead of a vector if it's 1d
     flatten_output = matrix_b.ndim == 1
@@ -16,7 +24,7 @@ def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out
     output_shape = (m, n)
 
     # Set the MKL function for precision
-    func = MKL._cblas_dgemm if double_precision else MKL._cblas_sgemm
+    func = _mkl_gemm_funcs[(double_precision, complex_type)]
 
     # Get the memory order for arrays
     layout_a, ld_a = _get_numpy_layout(matrix_a)
@@ -29,21 +37,31 @@ def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out
     out_order, ld_out = ("C", output_shape[1]) if layout_a == LAYOUT_CODE_C else ("F", output_shape[0])
 
     # Allocate an array for outputs and set functions and types for float or doubles
-    output_arr = _out_matrix(output_shape, np.float64 if double_precision else np.float32, order=out_order, out_arr=out)
+    output_arr = _out_matrix(output_shape, _output_dtypes[(double_precision, complex_type)],
+                             order=out_order, out_arr=out)
+
     output_ctype = _ctypes.c_double if double_precision else _ctypes.c_float
 
+    # The complex functions all take void pointers
+    output_ctype = _ctypes.c_void_p if complex_type else output_ctype
+
+    # The complex versions of these functions take void pointers instead of passed structs
+    # So create a C struct if necessary to be passed by reference
+    scalar = _mkl_scalar(scalar, complex_type, double_precision)
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
+
     func(layout_a,
          111,
          op_b,
          m,
          n,
          k,
-         scalar,
-         matrix_a,
+         scalar if not complex_type else _ctypes.byref(scalar),
+         matrix_a.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_a,
-         matrix_b,
+         matrix_b.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_b,
-         float(out_scalar) if out_scalar is not None else 1.,
+         out_scalar if not complex_type else _ctypes.byref(out_scalar),
          output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_out)
 
@@ -62,6 +80,4 @@ def _dense_dot_dense(matrix_a, matrix_b, cast=False, scalar=1., out=None, out_sc
 
     matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast)
 
-    a_dbl, b_dbl = matrix_a.dtype == np.float64, matrix_b.dtype == np.float64
-
-    return _dense_matmul(matrix_a, matrix_b, a_dbl or b_dbl, scalar=scalar, out=out, out_scalar=out_scalar)
+    return _dense_matmul(matrix_a, matrix_b, scalar=scalar, out=out, out_scalar=out_scalar)
diff --git a/sparse_dot_mkl/_mkl_interface.py b/sparse_dot_mkl/_mkl_interface.py
index a7c5c7c..a450f6b 100644
--- a/sparse_dot_mkl/_mkl_interface.py
+++ b/sparse_dot_mkl/_mkl_interface.py
@@ -66,7 +66,7 @@ def get_max_threads():
 from numpy.ctypeslib import ndpointer, as_array
 
 NUMPY_FLOAT_DTYPES = [np.float32, np.float64]
-
+NUMPY_COMPLEX_DTYPES = [np.csingle, np.cdouble]
 
 class MKL:
     """ This class holds shared object references to C functions with arg and returntypes that can be adjusted"""
@@ -76,139 +76,101 @@ class MKL:
     MKL_DEBUG = False
 
     # Import function for creating a MKL CSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-csr
     _mkl_sparse_d_create_csr = _libmkl.mkl_sparse_d_create_csr
-
-    # Import function for creating a MKL CSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-csr
     _mkl_sparse_s_create_csr = _libmkl.mkl_sparse_s_create_csr
+    _mkl_sparse_c_create_csr = _libmkl.mkl_sparse_c_create_csr
+    _mkl_sparse_z_create_csr = _libmkl.mkl_sparse_z_create_csr
 
     # Import function for creating a MKL CSC object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-csc
     _mkl_sparse_d_create_csc = _libmkl.mkl_sparse_d_create_csc
-
-    # Import function for creating a MKL CSC object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-csc
     _mkl_sparse_s_create_csc = _libmkl.mkl_sparse_s_create_csc
+    _mkl_sparse_c_create_csc = _libmkl.mkl_sparse_c_create_csc
+    _mkl_sparse_z_create_csc = _libmkl.mkl_sparse_z_create_csc
 
     # Import function for creating a MKL BSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-bsr
     _mkl_sparse_d_create_bsr = _libmkl.mkl_sparse_d_create_bsr
-
-    # Import function for creating a MKL BSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-create-bsr
     _mkl_sparse_s_create_bsr = _libmkl.mkl_sparse_s_create_bsr
+    _mkl_sparse_c_create_bsr = _libmkl.mkl_sparse_c_create_bsr
+    _mkl_sparse_z_create_bsr = _libmkl.mkl_sparse_z_create_bsr
 
     # Export function for exporting a MKL CSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-csr
     _mkl_sparse_d_export_csr = _libmkl.mkl_sparse_d_export_csr
-
-    # Export function for exporting a MKL CSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-csr
     _mkl_sparse_s_export_csr = _libmkl.mkl_sparse_s_export_csr
+    _mkl_sparse_c_export_csr = _libmkl.mkl_sparse_c_export_csr
+    _mkl_sparse_z_export_csr = _libmkl.mkl_sparse_z_export_csr
 
     # Export function for exporting a MKL CSC object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-csc
     _mkl_sparse_d_export_csc = _libmkl.mkl_sparse_d_export_csc
-
-    # Export function for exporting a MKL CSC object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-csc
     _mkl_sparse_s_export_csc = _libmkl.mkl_sparse_s_export_csc
+    _mkl_sparse_z_export_csc = _libmkl.mkl_sparse_z_export_csc
+    _mkl_sparse_c_export_csc = _libmkl.mkl_sparse_c_export_csc
 
     # Export function for exporting a MKL BSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-bsr
     _mkl_sparse_d_export_bsr = _libmkl.mkl_sparse_d_export_bsr
-
-    # Export function for exporting a MKL BSR object
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-export-bsr
     _mkl_sparse_s_export_bsr = _libmkl.mkl_sparse_s_export_bsr
+    _mkl_sparse_c_export_bsr = _libmkl.mkl_sparse_c_export_bsr
+    _mkl_sparse_z_export_bsr = _libmkl.mkl_sparse_z_export_bsr
 
     # Import function for matmul
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-spmm
     _mkl_sparse_spmm = _libmkl.mkl_sparse_spmm
 
     # Import function for cleaning up MKL objects
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-destroy
     _mkl_sparse_destroy = _libmkl.mkl_sparse_destroy
 
     # Import function for ordering MKL objects
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-order
     _mkl_sparse_order = _libmkl.mkl_sparse_order
 
     # Import function for coverting to CSR
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-convert-csr
     _mkl_sparse_convert_csr = _libmkl.mkl_sparse_convert_csr
 
     # Import function for matmul single dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-spmm
     _mkl_sparse_s_spmmd = _libmkl.mkl_sparse_s_spmmd
-
-    # Import function for matmul double dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-spmm
     _mkl_sparse_d_spmmd = _libmkl.mkl_sparse_d_spmmd
+    _mkl_sparse_c_spmmd = _libmkl.mkl_sparse_c_spmmd
+    _mkl_sparse_z_spmmd = _libmkl.mkl_sparse_z_spmmd
 
     # Import function for matmul single sparse*dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-mm
     _mkl_sparse_s_mm = _libmkl.mkl_sparse_s_mm
-
-    # Import function for matmul double sparse*dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-mm
     _mkl_sparse_d_mm = _libmkl.mkl_sparse_d_mm
+    _mkl_sparse_c_mm = _libmkl.mkl_sparse_c_mm
+    _mkl_sparse_z_mm = _libmkl.mkl_sparse_z_mm
 
-    # Import function for matmul single dense*dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm
+    # Import function for matmul dense*dense
     _cblas_sgemm = _libmkl.cblas_sgemm
-
-    # Import function for matmul double dense*dense
-    # https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm
     _cblas_dgemm = _libmkl.cblas_dgemm
+    _cblas_cgemm = _libmkl.cblas_cgemm
+    _cblas_zgemm = _libmkl.cblas_zgemm
 
     # Import function for matrix * vector
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-mv
     _mkl_sparse_s_mv = _libmkl.mkl_sparse_s_mv
-
-    # Import function for matrix * vector
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-mv
     _mkl_sparse_d_mv = _libmkl.mkl_sparse_d_mv
-
+    _mkl_sparse_c_mv = _libmkl.mkl_sparse_c_mv
+    _mkl_sparse_z_mv = _libmkl.mkl_sparse_z_mv
+    
     # Import function for sparse gram matrix
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-syrk
     _mkl_sparse_syrk = _libmkl.mkl_sparse_syrk
 
     # Import function for dense single gram matrix from sparse
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-syrkd
     _mkl_sparse_s_syrkd = _libmkl.mkl_sparse_s_syrkd
-
-    # Import function for dense double gram matrix from sparse
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-syrkd
     _mkl_sparse_d_syrkd = _libmkl.mkl_sparse_d_syrkd
+    _mkl_sparse_c_syrkd = _libmkl.mkl_sparse_c_syrkd
+    _mkl_sparse_z_syrkd = _libmkl.mkl_sparse_z_syrkd
 
     # Import function for dense single gram matrix
-    # https://software.intel.com/en-us/mkl-developer-reference-c-cblas-syrk
     _cblas_ssyrk = _libmkl.cblas_ssyrk
-
-    # Import function for dense double gram matrix
-    # https://software.intel.com/en-us/mkl-developer-reference-c-cblas-syrk
     _cblas_dsyrk = _libmkl.cblas_dsyrk
+    _cblas_csyrk = _libmkl.cblas_csyrk
+    _cblas_zsyrk = _libmkl.cblas_zsyrk
 
     # Import function for QR solver - reorder
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-qr-reorder
     _mkl_sparse_qr_reorder = _libmkl.mkl_sparse_qr_reorder
 
     # Import function for QR solver - factorize
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-qr-factorize
     _mkl_sparse_d_qr_factorize = _libmkl.mkl_sparse_d_qr_factorize
-
-    # Import function for QR solver - factorize
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-qr-factorize
     _mkl_sparse_s_qr_factorize = _libmkl.mkl_sparse_s_qr_factorize
 
     # Import function for QR solver - solve
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-qr-solve
     _mkl_sparse_d_qr_solve = _libmkl.mkl_sparse_d_qr_solve
-
-    # Import function for QR solver - solve
-    # https://software.intel.com/en-us/mkl-developer-reference-c-mkl-sparse-qr-solve
     _mkl_sparse_s_qr_solve = _libmkl.mkl_sparse_s_qr_solve
 
     @classmethod
@@ -216,39 +178,77 @@ def _set_int_type(cls, c_type, np_type):
         cls.MKL_INT = c_type
         cls.MKL_INT_NUMPY = np_type
 
+        cls._set_int_type_create()
+        cls._set_int_type_export()
+        cls._set_int_type_sparse_matmul()
+        cls._set_int_type_dense_matmul()
+        cls._set_int_type_vector_mul()
+        cls._set_int_type_qr_solver()
+        cls._set_int_type_misc()
+
+    @classmethod
+    def _set_int_type_create(cls):
+        """Set the correct argtypes for handle creation functions"""
         cls._mkl_sparse_d_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_create_csr.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_create_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_csr.argtypes = cls._mkl_sparse_create_argtypes(np.csingle)
+        cls._mkl_sparse_c_create_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_csr.argtypes = cls._mkl_sparse_create_argtypes(np.cdouble)
+        cls._mkl_sparse_z_create_csr.restypes = _ctypes.c_int
 
         cls._mkl_sparse_d_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_create_csc.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_create_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_csc.argtypes = cls._mkl_sparse_create_argtypes(np.csingle)
+        cls._mkl_sparse_c_create_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_csc.argtypes = cls._mkl_sparse_create_argtypes(np.cdouble)
+        cls._mkl_sparse_z_create_csc.restypes = _ctypes.c_int
 
         cls._mkl_sparse_d_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_create_bsr.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_create_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(np.csingle)
+        cls._mkl_sparse_c_create_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(np.cdouble)
+        cls._mkl_sparse_z_create_bsr.restypes = _ctypes.c_int
 
+    @classmethod
+    def _set_int_type_export(cls):
+        """Set the correct argtypes for handle export functions"""
         cls._mkl_sparse_d_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_export_csr.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_export_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_csr.restypes = _ctypes.c_int
 
+        cls._mkl_sparse_s_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_export_csc.restypes = _ctypes.c_int
         cls._mkl_sparse_d_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_export_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_csc.restypes = _ctypes.c_int
 
         cls._mkl_sparse_s_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_bsr.restypes = _ctypes.c_int
 
-        cls._mkl_sparse_s_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_export_bsr.restypes = _ctypes.c_int
-
+    @classmethod
+    def _set_int_type_sparse_matmul(cls):
+        """Set the correct argtypes for sparse (*) sparse functions and sparse (*) dense functions"""
         cls._mkl_sparse_spmm.argtypes = [_ctypes.c_int,
                                          sparse_matrix_t,
                                          sparse_matrix_t,
@@ -257,63 +257,92 @@ def _set_int_type(cls, c_type, np_type):
 
         cls._mkl_sparse_s_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
         cls._mkl_sparse_s_spmmd.restypes = _ctypes.c_int
-
         cls._mkl_sparse_d_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
         cls._mkl_sparse_d_spmmd.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_spmmd.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_spmmd.restypes = _ctypes.c_int
 
-        cls._mkl_sparse_s_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_float, _ctypes.c_float, _ctypes.c_float)
         cls._mkl_sparse_s_mm.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_d_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_double, _ctypes.c_double, _ctypes.c_double)
         cls._mkl_sparse_d_mm.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex8, _ctypes.c_float, np.csingle)
+        cls._mkl_sparse_c_mm.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex16, _ctypes.c_double, np.cdouble)
+        cls._mkl_sparse_z_mm.restypes = _ctypes.c_int
 
+    @classmethod
+    def _set_int_type_dense_matmul(cls):
+        """Set the correct argtypes for dense (*) dense functions"""
         cls._cblas_sgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_float)
         cls._cblas_sgemm.restypes = None
-
         cls._cblas_dgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_double)
         cls._cblas_dgemm.restypes = None
+        cls._cblas_cgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
+        cls._cblas_cgemm.restypes = None
+        cls._cblas_zgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
+        cls._cblas_zgemm.restypes = None
+    
+    @classmethod
+    def _set_int_type_vector_mul(cls):
+        """Set the correct argtypes for sparse (*) vector functions"""
+        cls._mkl_sparse_s_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_float, np.float32)
+        cls._mkl_sparse_s_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_double, np.float64)
+        cls._mkl_sparse_d_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex8, np.csingle)
+        cls._mkl_sparse_c_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex16, np.cdouble)
+        cls._mkl_sparse_z_mv.restypes = _ctypes.c_int
 
+    @classmethod
+    def _set_int_type_misc(cls):
         cls._mkl_sparse_destroy.argtypes = [sparse_matrix_t]
         cls._mkl_sparse_destroy.restypes = _ctypes.c_int
 
         cls._mkl_sparse_order.argtypes = [sparse_matrix_t]
         cls._mkl_sparse_order.restypes = _ctypes.c_int
 
-        cls._mkl_sparse_s_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_mv.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_d_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_mv.restypes = _ctypes.c_int
 
+    @classmethod
+    def _set_int_type_misc(cls):
         cls._mkl_sparse_syrk.argtypes = [_ctypes.c_int,
                                          sparse_matrix_t,
                                          _ctypes.POINTER(sparse_matrix_t)]
         cls._mkl_sparse_syrk.restypes = _ctypes.c_int
 
-        cls._mkl_sparse_s_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_float, np.float32)
         cls._mkl_sparse_s_syrkd.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_d_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_double, np.float64)
         cls._mkl_sparse_d_syrkd.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex8, np.csingle)
+        cls._mkl_sparse_c_syrkd.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex16, np.cdouble)
+        cls._mkl_sparse_z_syrkd.restypes = _ctypes.c_int
 
-        cls._cblas_ssyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_float)
+        cls._cblas_ssyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_float, np.float32)
         cls._cblas_ssyrk.restypes = None
-
-        cls._cblas_dsyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_double)
+        cls._cblas_dsyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_double, np.float64)
         cls._cblas_dsyrk.restypes = None
+        cls._cblas_csyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex8, np.csingle, scalar_pointers=True)
+        cls._cblas_csyrk.restypes = None
+        cls._cblas_zsyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex16, np.cdouble, scalar_pointers=True)
+        cls._cblas_zsyrk.restypes = None
+
 
+    @classmethod
+    def _set_int_type_qr_solver(cls):
+        """Set the correct argtypes for QR solver functions"""
         cls._mkl_sparse_qr_reorder.argtypes = [sparse_matrix_t, matrix_descr]
         cls._mkl_sparse_qr_reorder.restypes = _ctypes.c_int
-
         cls._mkl_sparse_d_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_double)]
         cls._mkl_sparse_d_qr_factorize.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_float)]
         cls._mkl_sparse_s_qr_factorize.restypes = _ctypes.c_int
-
         cls._mkl_sparse_d_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_double)
         cls._mkl_sparse_d_qr_solve.restypes = _ctypes.c_int
-
         cls._mkl_sparse_s_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_float)
         cls._mkl_sparse_s_qr_solve.restypes = _ctypes.c_int
 
@@ -371,19 +400,19 @@ def _mkl_sparse_export_bsr_argtypes(prec_type):
                 _ctypes.POINTER(_ctypes.POINTER(prec_type))]
 
     @staticmethod
-    def _cblas_gemm_argtypes(prec_type):
+    def _cblas_gemm_argtypes(prec_type, scalar_pointers=False):
         return [_ctypes.c_int,
                 _ctypes.c_int,
                 _ctypes.c_int,
                 MKL.MKL_INT,
                 MKL.MKL_INT,
                 MKL.MKL_INT,
-                prec_type,
-                ndpointer(dtype=prec_type, ndim=2),
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                _ctypes.POINTER(prec_type),
                 MKL.MKL_INT,
-                ndpointer(dtype=prec_type, ndim=2),
+                _ctypes.POINTER(prec_type),
                 MKL.MKL_INT,
-                prec_type,
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
                 _ctypes.POINTER(prec_type),
                 MKL.MKL_INT]
 
@@ -393,54 +422,55 @@ def _mkl_sparse_spmmd_argtypes(prec_type):
                 sparse_matrix_t,
                 sparse_matrix_t,
                 _ctypes.c_int,
-                _ctypes.POINTER(prec_type), MKL.MKL_INT]
+                _ctypes.POINTER(prec_type), 
+                MKL.MKL_INT]
 
     @staticmethod
-    def _mkl_sparse_mm_argtypes(prec_type):
+    def _mkl_sparse_mm_argtypes(scalar_type, prec_type, np_prec_type):
         return [_ctypes.c_int,
-                prec_type,
+                scalar_type,
                 sparse_matrix_t,
                 matrix_descr,
                 _ctypes.c_int,
-                ndpointer(dtype=prec_type, ndim=2),
+                ndpointer(dtype=np_prec_type, ndim=2),
                 MKL.MKL_INT,
                 MKL.MKL_INT,
-                prec_type,
+                scalar_type,
                 _ctypes.POINTER(prec_type),
                 MKL.MKL_INT]
 
     @staticmethod
-    def _mkl_sparse_mv_argtypes(prec_type):
+    def _mkl_sparse_mv_argtypes(prec_type, np_type):
         return [_ctypes.c_int,
                 prec_type,
                 sparse_matrix_t,
                 matrix_descr,
-                ndpointer(dtype=prec_type, ndim=1),
+                ndpointer(dtype=np_type, ndim=1),
                 prec_type,
-                _ctypes.POINTER(prec_type)]
+                ndpointer(dtype=np_type)]
 
     @staticmethod
-    def _mkl_sparse_syrkd_argtypes(prec_type):
+    def _mkl_sparse_syrkd_argtypes(prec_type, np_type):
         return [_ctypes.c_int,
                 sparse_matrix_t,
                 prec_type,
                 prec_type,
-                _ctypes.POINTER(prec_type),
+                ndpointer(dtype=np_type),
                 _ctypes.c_int,
                 MKL.MKL_INT]
 
     @staticmethod
-    def _cblas_syrk_argtypes(prec_type):
+    def _cblas_syrk_argtypes(prec_type, np_type, scalar_pointers=False):
         return [_ctypes.c_int,
                 _ctypes.c_int,
                 _ctypes.c_int,
                 MKL.MKL_INT,
                 MKL.MKL_INT,
-                prec_type,
-                ndpointer(dtype=prec_type, ndim=2),
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                ndpointer(dtype=np_type, ndim=2),
                 MKL.MKL_INT,
-                prec_type,
-                _ctypes.POINTER(prec_type),
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                ndpointer(dtype=np_type, ndim=2),
                 MKL.MKL_INT]
 
     @staticmethod
@@ -474,6 +504,25 @@ def __init__(self, sparse_matrix_type_t=20, sparse_fill_mode_t=0, sparse_diag_ty
         super(matrix_descr, self).__init__(sparse_matrix_type_t, sparse_fill_mode_t, sparse_diag_type_t)
 
 
+# Complex type structs
+# These are the same as the np.csingle and np.cdouble structs
+# They're defined to allow passing complex scalars of specific precisions directly
+class MKL_Complex8(_ctypes.Structure):
+    _fields_ = [("real", _ctypes.c_float),
+                ("imag", _ctypes.c_float)]
+
+    def __init__(self, cplx):
+        super(MKL_Complex8, self).__init__(cplx.real, cplx.imag)
+
+
+class MKL_Complex16(_ctypes.Structure):
+    _fields_ = [("real", _ctypes.c_double),
+                ("imag", _ctypes.c_double)]
+
+    def __init__(self, cplx):
+        super(MKL_Complex16, self).__init__(cplx.real, cplx.imag)
+
+
 # Define standard return codes
 RETURN_CODES = {0: "SPARSE_STATUS_SUCCESS",
                 1: "SPARSE_STATUS_NOT_INITIALIZED",
@@ -498,6 +547,39 @@ def __init__(self, sparse_matrix_type_t=20, sparse_fill_mode_t=0, sparse_diag_ty
 # ILP64 message
 ILP64_MSG = " Try changing MKL to int64 with the environment variable MKL_INTERFACE_LAYER=ILP64"
 
+# Dict keyed by ('sparse_type_str', 'double_precision_bool', 'complex_bool')
+_create_functions = {('csr', False, False): MKL._mkl_sparse_s_create_csr,
+                     ('csr', True, False): MKL._mkl_sparse_d_create_csr,
+                     ('csr', False, True): MKL._mkl_sparse_c_create_csr,
+                     ('csr', True, True): MKL._mkl_sparse_z_create_csr,
+                     ('csc', False, False): MKL._mkl_sparse_s_create_csc,
+                     ('csc', True, False): MKL._mkl_sparse_d_create_csc,
+                     ('csc', False, True): MKL._mkl_sparse_c_create_csc,
+                     ('csc', True, True): MKL._mkl_sparse_z_create_csc,
+                     ('bsr', False, False): MKL._mkl_sparse_s_create_bsr,
+                     ('bsr', True, False): MKL._mkl_sparse_d_create_bsr,
+                     ('bsr', False, True): MKL._mkl_sparse_c_create_bsr,
+                     ('bsr', True, True): MKL._mkl_sparse_z_create_bsr}
+
+# Dict keyed by ('sparse_type_str', 'double_precision_bool', 'complex_bool')
+_export_functions = {('csr', False, False): MKL._mkl_sparse_s_export_csr,
+                     ('csr', True, False): MKL._mkl_sparse_d_export_csr,
+                     ('csr', False, True): MKL._mkl_sparse_c_export_csr,
+                     ('csr', True, True): MKL._mkl_sparse_z_export_csr,
+                     ('csc', False, False): MKL._mkl_sparse_s_export_csc,
+                     ('csc', True, False): MKL._mkl_sparse_d_export_csc,
+                     ('csc', False, True): MKL._mkl_sparse_c_export_csc,
+                     ('csc', True, True): MKL._mkl_sparse_z_export_csc,
+                     ('bsr', False, False): MKL._mkl_sparse_s_export_bsr,
+                     ('bsr', True, False): MKL._mkl_sparse_d_export_bsr,
+                     ('bsr', False, True): MKL._mkl_sparse_c_export_bsr,
+                     ('bsr', True, True): MKL._mkl_sparse_z_export_bsr}
+
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_output_dtypes = {(False, False): np.float32,
+                 (True, False): np.float64,
+                 (False, True): np.csingle,
+                 (True, True): np.cdouble}
 
 def set_debug_mode(debug_bool):
     """
@@ -622,33 +704,34 @@ def _create_mkl_sparse(matrix):
     :param matrix: Sparse data in CSR or CSC format
     :type matrix: scipy.sparse.spmatrix
 
-    :return ref, double_precision: Handle for the MKL internal representation and boolean for double precision
-    :rtype: sparse_matrix_t, float
+    :return ref, double_precision: Handle for the MKL internal representation and boolean for 
+    double precision and for complex dtype
+    :rtype: sparse_matrix_t, bool, bool
     """
 
-    double_precision = _is_double(matrix)
+    double_precision, complex_type = _is_double(matrix)
 
     # Figure out which matrix creation function to use
     if _spsparse.isspmatrix_csr(matrix):
         _check_scipy_index_typing(matrix)
         assert matrix.data.shape[0] == matrix.indices.shape[0]
         assert matrix.indptr.shape[0] == matrix.shape[0] + 1
-        handle_func = MKL._mkl_sparse_d_create_csr if double_precision else MKL._mkl_sparse_s_create_csr
+        handle_func = _create_functions[('csr', double_precision, complex_type)]
 
     elif _spsparse.isspmatrix_csc(matrix):
         _check_scipy_index_typing(matrix)
         assert matrix.data.shape[0] == matrix.indices.shape[0]
         assert matrix.indptr.shape[0] == matrix.shape[1] + 1
-        handle_func = MKL._mkl_sparse_d_create_csc if double_precision else MKL._mkl_sparse_s_create_csc
+        handle_func = _create_functions[('csc', double_precision, complex_type)]
 
     elif _spsparse.isspmatrix_bsr(matrix):
         _check_scipy_index_typing(matrix)
-        return _create_mkl_sparse_bsr(matrix), double_precision
+        return _create_mkl_sparse_bsr(matrix), double_precision, complex_type
 
     else:
         raise ValueError("Matrix is not CSC, CSR, or BSR")
 
-    return _pass_mkl_handle_csr_csc(matrix, handle_func), double_precision
+    return _pass_mkl_handle_csr_csc(matrix, handle_func), double_precision, complex_type
 
 
 def _pass_mkl_handle_csr_csc(data, handle_func):
@@ -690,8 +773,8 @@ def _create_mkl_sparse_bsr(matrix):
     :rtype: sparse_matrix_t
     """
 
-    double_precision = _is_double(matrix)
-    handle_func = MKL._mkl_sparse_d_create_bsr if double_precision else MKL._mkl_sparse_s_create_bsr
+    double_precision, complex_type = _is_double(matrix)
+    handle_func = _create_functions[('bsr', double_precision, complex_type)]
 
     # Get the blocksize and check that the blocks are square
     _blocksize = matrix.blocksize[0]
@@ -731,7 +814,7 @@ def _create_mkl_sparse_bsr(matrix):
     return ref
 
 
-def _export_mkl(csr_mkl_handle, double_precision, output_type="csr"):
+def _export_mkl(csr_mkl_handle, double_precision, complex_type=False, output_type="csr"):
     """
     Export a MKL sparse handle of CSR or CSC type
 
@@ -749,20 +832,17 @@ def _export_mkl(csr_mkl_handle, double_precision, output_type="csr"):
 
     output_type = output_type.lower()
 
-    if output_type == "csr":
-        out_func = MKL._mkl_sparse_d_export_csr if double_precision else MKL._mkl_sparse_s_export_csr
-        sp_matrix_constructor = _spsparse.csr_matrix
-    elif output_type == "csc":
-        out_func = MKL._mkl_sparse_d_export_csc if double_precision else MKL._mkl_sparse_s_export_csc
-        sp_matrix_constructor = _spsparse.csc_matrix
-    elif output_type == "bsr":
-        return _export_mkl_sparse_bsr(csr_mkl_handle, double_precision)
+    if output_type == "bsr":
+        return _export_mkl_sparse_bsr(csr_mkl_handle, double_precision, complex_type=complex_type)
+    elif output_type == "csr" or output_type == "csc":
+        out_func = _export_functions[(output_type, double_precision, complex_type)]
+        sp_matrix_constructor = _spsparse.csr_matrix if output_type == "csr" else _spsparse.csc_matrix
     else:
         raise ValueError("Only CSR, CSC, and BSR output types are supported")
 
     # Allocate for output
     ordering, nrows, ncols, indptrb, indptren, indices, data = _allocate_for_export(double_precision)
-    final_dtype = np.float64 if double_precision else np.float32
+    final_dtype = _output_dtypes[(double_precision, complex_type)]
 
     ret_val = out_func(csr_mkl_handle,
                        _ctypes.byref(ordering),
@@ -805,14 +885,15 @@ def _export_mkl(csr_mkl_handle, double_precision, output_type="csr"):
         raise ValueError("Matrix ({m} x {n}) is attempting to index {z} elements".format(m=nrows, n=ncols, z=nnz))
 
     # Construct numpy arrays from data pointer and from indicies pointer
-    data = np.array(as_array(data, shape=(nnz,)), copy=True)
+    data = np.array(as_array(data, shape=(nnz * 2 if complex_type else nnz,)), copy=True)
     indices = np.array(as_array(indices, shape=(nnz,)), copy=True)
-
+    
     # Pack and return the matrix
-    return sp_matrix_constructor((data, indices, indptren), shape=(nrows, ncols))
+    return sp_matrix_constructor((data.view(final_dtype) if complex_type else data, indices, indptren),
+                                 shape=(nrows, ncols))
 
 
-def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision):
+def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision, complex_type=False):
     """
     Export a BSR matrix from MKL's internal representation to scipy
 
@@ -831,8 +912,8 @@ def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision):
     block_size = MKL.MKL_INT()
 
     # Set output
-    out_func = MKL._mkl_sparse_d_export_bsr if double_precision else MKL._mkl_sparse_s_export_bsr
-    final_dtype = np.float64 if double_precision else np.float32
+    out_func = _export_functions[('bsr', double_precision, complex_type)]
+    final_dtype = _output_dtypes[(double_precision, complex_type)]
 
     ret_val = out_func(bsr_mkl_handle,
                        _ctypes.byref(ordering),
@@ -866,21 +947,23 @@ def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision):
     indptren = np.insert(indptren, 0, indptrb[0])
 
     nnz_blocks = (indptren[-1] - indptrb[0])
+    nnz = nnz_blocks * (block_size ** 2)
 
     # If there's no non-zero data, return an empty matrix
     if nnz_blocks == 0:
         return _spsparse.bsr_matrix((nrows, ncols), dtype=final_dtype, blocksize=block_dims)
-    elif nnz_blocks < 0 or (nnz_blocks * (block_size ** 2)) > ncols * nrows:
-        nnz = nnz_blocks * (block_size ** 2)
+    
+    elif nnz_blocks < 0 or nnz > ncols * nrows:
         _err = "Matrix ({m} x {n}) is attempting to index {z} elements as {b} {bs} blocks"
         _err = _err.format(m=nrows, n=ncols, z=nnz, b=nnz_blocks, bs=(block_size, block_size))
         raise ValueError(_err)
 
-    data = np.array(as_array(data, shape=(nnz_blocks, block_size, block_size)), copy=True, order=ordering,
-                    dtype=final_dtype)
+    nnz_row_block = block_size if not complex_type else block_size * 2
+    data = np.array(as_array(data, shape=(nnz_blocks, block_size, nnz_row_block)), copy=True, order=ordering)
     indices = np.array(as_array(indices, shape=(nnz_blocks,)), copy=True)
 
-    return _spsparse.bsr_matrix((data, indices, indptren), shape=(nrows, ncols), blocksize=block_dims)
+    return _spsparse.bsr_matrix((data.view(final_dtype) if complex_type else data, indices, indptren),
+                                shape=(nrows, ncols), blocksize=block_dims)
 
 
 def _allocate_for_export(double_precision):
@@ -1005,6 +1088,9 @@ def _cast_to_float64(matrix):
     """ Make a copy of the array as double precision floats or return the reference if it already is"""
     return matrix.astype(np.float64) if matrix.dtype != np.float64 else matrix
 
+def _cast_to_cdouble(matrix):
+    """ Make a copy of the array as double precision complex floats or return the reference if it already is"""
+    return matrix.astype(np.cdouble) if matrix.dtype != np.cdouble else matrix
 
 def _type_check(matrix_a, matrix_b=None, cast=False):
     """
@@ -1012,30 +1098,56 @@ def _type_check(matrix_a, matrix_b=None, cast=False):
     If not, convert to double precision floats if cast is True, or raise an error if cast is False
     """
 
-    if matrix_b is None and matrix_a.dtype in NUMPY_FLOAT_DTYPES:
+    all_dtypes = NUMPY_FLOAT_DTYPES + NUMPY_COMPLEX_DTYPES
+    _n_complex = np.iscomplexobj(matrix_a) + np.iscomplexobj(matrix_b)
+
+    # If there's no matrix B and matrix A is valid dtype, return it
+    if matrix_b is None and matrix_a.dtype in all_dtypes:
         return matrix_a
+    # If matrix A is complex but not csingle or cdouble, and cast is True, convert it to a cdouble
+    elif matrix_b is None and cast and _n_complex == 1:
+        return _cast_to_cdouble(matrix_a)
+    # If matrix A is real but not float32 or float64, and cast is True, convert it to a float64
     elif matrix_b is None and cast:
         return _cast_to_float64(matrix_a)
+    # Raise an error - the dtype is invalid and cast is False
     elif matrix_b is None:
-        err_msg = "Matrix data type must be float32 or float64; {a} provided".format(a=matrix_a.dtype)
+        err_msg = "Matrix data type must be float32, float64, csingle, or cdouble; {a} provided".format(a=matrix_a.dtype)
         raise ValueError(err_msg)
 
-    # Check dtypes
-    if matrix_a.dtype == np.float32 and matrix_b.dtype == np.float32:
+    # If Matrix A & B have the same valid dtype, return them
+    if matrix_a.dtype in all_dtypes and matrix_a.dtype == matrix_b.dtype:
         return matrix_a, matrix_b
-
-    elif matrix_a.dtype == np.float64 and matrix_b.dtype == np.float64:
-        return matrix_a, matrix_b
-
-    elif (matrix_a.dtype != np.float64 or matrix_b.dtype != np.float64) and cast:
+    # If neither matrix is complex and cast is True, convert to float64s and return them
+    elif cast and _n_complex == 0:
         debug_print("Recasting matrix data types {a} and {b} to np.float64".format(a=matrix_a.dtype, b=matrix_b.dtype))
         return _cast_to_float64(matrix_a), _cast_to_float64(matrix_b)
-
-    elif matrix_a.dtype != np.float64 or matrix_b.dtype != np.float64:
+    # If both matrices are complex and cast is True, convert to cdoubles and return them
+    elif cast and _n_complex == 2:
+        debug_print("Recasting matrix data types {a} and {b} to np.cdouble".format(a=matrix_a.dtype, b=matrix_b.dtype))
+        return _cast_to_cdouble(matrix_a), _cast_to_cdouble(matrix_b)
+    # Can't cast reals and complex matrices together
+    elif cast and _n_complex == 1:
+        err_msg = "Cannot cast reals and complexes together; {a} and {b} provided".format(a=matrix_a.dtype,
+                                                                                          b=matrix_b.dtype)
+        raise ValueError(err_msg)
+    # If cast is False, can't cast anything together
+    elif not cast:
         err_msg = "Matrix data types must be in concordance; {a} and {b} provided".format(a=matrix_a.dtype,
                                                                                           b=matrix_b.dtype)
         raise ValueError(err_msg)
 
+def _mkl_scalar(scalar, complex_type, double_precision):
+    """Turn complex scalars into appropriate precision MKL scalars or leave floats as floats"""
+
+    scalar = 1. if scalar is None else scalar
+
+    if complex_type and double_precision:
+        return MKL_Complex16(complex(scalar))
+    elif complex_type:
+        return MKL_Complex8(complex(scalar))
+    else:
+        return float(scalar)
 
 def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
     """
@@ -1109,11 +1221,15 @@ def _is_double(arr):
 
     # Figure out which dtype for data
     if arr.dtype == np.float32:
-        return False
+        return False, False
     elif arr.dtype == np.float64:
-        return True
-    else:
-        raise ValueError("Only float32 or float64 dtypes are supported")
+        return True, False
+    elif arr.dtype == np.csingle:
+        return False, True
+    elif arr.dtype == np.cdouble:
+        return True, True
+    else: 
+        raise ValueError("Only float32, float64, csingle, and cdouble dtypes are supported")
 
 
 def _is_allowed_sparse_format(matrix):
@@ -1157,7 +1273,7 @@ def _validate_dtype():
     test_array = _spsparse.random(5, 5, density=0.5, format="csc", dtype=np.float32, random_state=50)
     test_comparison = test_array.A
 
-    csc_ref, precision_flag = _create_mkl_sparse(test_array)
+    csc_ref, precision_flag, _ = _create_mkl_sparse(test_array)
 
     try:
         csr_ref = _convert_to_csr(csc_ref)
diff --git a/sparse_dot_mkl/_sparse_dense.py b/sparse_dot_mkl/_sparse_dense.py
index 5080bac..ab61081 100644
--- a/sparse_dot_mkl/_sparse_dense.py
+++ b/sparse_dot_mkl/_sparse_dense.py
@@ -1,12 +1,18 @@
 from sparse_dot_mkl._mkl_interface import (MKL, _sanity_check, _empty_output_check, _type_check, _create_mkl_sparse,
                                            _destroy_mkl_handle, matrix_descr, debug_print, _convert_to_csr,
                                            _get_numpy_layout, _check_return_value, LAYOUT_CODE_C, LAYOUT_CODE_F,
-                                           _out_matrix)
+                                           _out_matrix, _output_dtypes, _mkl_scalar)
 import numpy as np
 import ctypes as _ctypes
 import scipy.sparse as _spsparse
 
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_sp_mm_funcs = {(False, False): MKL._mkl_sparse_s_mm,
+                    (True, False): MKL._mkl_sparse_d_mm,
+                    (False, True): MKL._mkl_sparse_c_mm,
+                    (True, True): MKL._mkl_sparse_z_mm}
+
 def _sparse_dense_matmul(matrix_a, matrix_b, scalar=1., transpose=False, out=None, out_scalar=None, out_t=None):
     """
     Multiply together a sparse and a dense matrix
@@ -39,36 +45,41 @@ def _sparse_dense_matmul(matrix_a, matrix_b, scalar=1., transpose=False, out=Non
         # Prep MKL handles and check that matrixes are compatible types
         # MKL requires CSR format if the dense array is column-major
         if layout_b == LAYOUT_CODE_F and not _spsparse.isspmatrix_csr(matrix_a):
-            mkl_non_csr, dbl = _create_mkl_sparse(matrix_a)
+            mkl_non_csr, dbl, cplx = _create_mkl_sparse(matrix_a)
             _mkl_handles.append(mkl_non_csr)
             mkl_a = _convert_to_csr(mkl_non_csr)
         else:
-            mkl_a, dbl = _create_mkl_sparse(matrix_a)
+            mkl_a, dbl, cplx = _create_mkl_sparse(matrix_a)
 
         _mkl_handles.append(mkl_a)
 
         # Set functions and types for float or doubles
         output_ctype = _ctypes.c_double if dbl else _ctypes.c_float
-        output_dtype = np.float64 if dbl else np.float32
-        func = MKL._mkl_sparse_d_mm if dbl else MKL._mkl_sparse_s_mm
+        output_dtype = _output_dtypes[(dbl, cplx)]
+        func = _mkl_sp_mm_funcs[(dbl, cplx)]
 
         # Allocate an output array
-        output_arr = _out_matrix(output_shape, output_dtype, order="C" if layout_b == LAYOUT_CODE_C else "F",
-                                out_arr=out, out_t=out_t)
-
+        output_order = "C" if layout_b == LAYOUT_CODE_C else "F"
+        output_arr = _out_matrix(output_shape, output_dtype, output_order,
+                                 out_arr=out, out_t=out_t)
+        
         output_layout, output_ld = _get_numpy_layout(output_arr, second_arr=matrix_b)
 
+        # Create a C struct if necessary to be passed
+        scalar = _mkl_scalar(scalar, cplx, dbl)
+        out_scalar = _mkl_scalar(out_scalar, cplx, dbl)
+
         ret_val = func(11 if transpose else 10,
-                    scalar,
-                    mkl_a,
-                    matrix_descr(),
-                    layout_b,
-                    matrix_b,
-                    output_shape[1],
-                    ld_b,
-                    float(out_scalar) if out_scalar is not None else 1.,
-                    output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
-                    output_ld)
+                       scalar,
+                       mkl_a,
+                       matrix_descr(),
+                       layout_b,
+                       matrix_b,
+                       output_shape[1],
+                       ld_b,
+                       out_scalar,
+                       output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
+                       output_ld)
 
         # Check return
         _check_return_value(ret_val, func.__name__)
diff --git a/sparse_dot_mkl/_sparse_qr_solver.py b/sparse_dot_mkl/_sparse_qr_solver.py
index e45a5e5..2f1e48c 100644
--- a/sparse_dot_mkl/_sparse_qr_solver.py
+++ b/sparse_dot_mkl/_sparse_qr_solver.py
@@ -19,7 +19,7 @@ def _sparse_qr(matrix_a, matrix_b):
     :rtype: numpy.ndarray
     """
 
-    mkl_a, dbl = _create_mkl_sparse(matrix_a)
+    mkl_a, dbl, cplx = _create_mkl_sparse(matrix_a)
     layout_b, ld_b = _get_numpy_layout(matrix_b)
 
     output_shape = matrix_a.shape[1], matrix_b.shape[1]
diff --git a/sparse_dot_mkl/_sparse_sparse.py b/sparse_dot_mkl/_sparse_sparse.py
index 0ef47cd..22dc108 100644
--- a/sparse_dot_mkl/_sparse_sparse.py
+++ b/sparse_dot_mkl/_sparse_sparse.py
@@ -1,7 +1,7 @@
 from sparse_dot_mkl._mkl_interface import (MKL, sparse_matrix_t, _create_mkl_sparse, debug_print, debug_timer,
                                            _export_mkl, _order_mkl_handle, _destroy_mkl_handle, _type_check,
                                            _empty_output_check, _sanity_check, _is_allowed_sparse_format,
-                                           _check_return_value)
+                                           _check_return_value, _output_dtypes)
 import ctypes as _ctypes
 import numpy as np
 import scipy.sparse as _spsparse
@@ -32,7 +32,13 @@ def _matmul_mkl(sp_ref_a, sp_ref_b):
     return ref_handle
 
 
-def _matmul_mkl_dense(sp_ref_a, sp_ref_b, output_shape, double_precision):
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_spmmd_funcs = {(False, False): MKL._mkl_sparse_s_spmmd,
+                    (True, False): MKL._mkl_sparse_d_spmmd,
+                    (False, True): MKL._mkl_sparse_c_spmmd,
+                    (True, True): MKL._mkl_sparse_z_spmmd}
+
+def _matmul_mkl_dense(sp_ref_a, sp_ref_b, output_shape, double_precision, complex_type=False):
     """
     Dot product two MKL objects together into a dense numpy array and return the result
 
@@ -51,9 +57,10 @@ def _matmul_mkl_dense(sp_ref_a, sp_ref_b, output_shape, double_precision):
     """
 
     # Allocate an array for outputs and set functions and types for float or doubles
-    output_arr = np.zeros(output_shape, dtype=np.float64 if double_precision else np.float32)
+    output_arr = np.empty(output_shape, dtype=_output_dtypes[(double_precision, complex_type)])
     output_ctype = _ctypes.c_double if double_precision else _ctypes.c_float
-    func = MKL._mkl_sparse_d_spmmd if double_precision else MKL._mkl_sparse_s_spmmd
+
+    func = _mkl_spmmd_funcs[(double_precision, complex_type)]
 
     ret_val = func(10,
                    sp_ref_a,
@@ -124,14 +131,15 @@ def _sparse_dot_sparse(matrix_a, matrix_b, cast=False, reorder_output=False, den
     t = debug_timer()
 
     # Create intel MKL objects
-    mkl_a, a_dbl = _create_mkl_sparse(matrix_a)
-    mkl_b, b_dbl = _create_mkl_sparse(matrix_b)
+    mkl_a, a_dbl, a_cplx = _create_mkl_sparse(matrix_a)
+    mkl_b, b_dbl, b_cplx = _create_mkl_sparse(matrix_b)
 
     t = debug_timer("Created MKL sparse handles", t)
 
     # Call spmmd for dense output directly if the dense flag is set
     if dense:
-        dense_arr = _matmul_mkl_dense(mkl_a, mkl_b, (matrix_a.shape[0], matrix_b.shape[1]), a_dbl or b_dbl)
+        dense_arr = _matmul_mkl_dense(mkl_a, mkl_b, (matrix_a.shape[0], matrix_b.shape[1]), a_dbl or b_dbl,
+                                      complex_type=a_cplx)
 
         debug_timer("Multiplied matrices", t)
 
@@ -157,7 +165,7 @@ def _sparse_dot_sparse(matrix_a, matrix_b, cast=False, reorder_output=False, den
             t = debug_timer("Reordered output indices", t)
 
         # Extract
-        python_c = _export_mkl(mkl_c, a_dbl or b_dbl, output_type=output_type)
+        python_c = _export_mkl(mkl_c, a_dbl or b_dbl, complex_type=a_cplx, output_type=output_type)
         _destroy_mkl_handle(mkl_c)
 
         debug_timer("Created python handle", t)
diff --git a/sparse_dot_mkl/_sparse_sypr.py b/sparse_dot_mkl/_sparse_sypr.py
new file mode 100644
index 0000000..d630d78
--- /dev/null
+++ b/sparse_dot_mkl/_sparse_sypr.py
@@ -0,0 +1,122 @@
+import warnings
+from sparse_dot_mkl._mkl_interface import (MKL, sparse_matrix_t, _create_mkl_sparse, _get_numpy_layout, debug_timer,
+                                           _export_mkl, _order_mkl_handle, _destroy_mkl_handle, _type_check,
+                                           _empty_output_check, _sanity_check, _is_allowed_sparse_format,
+                                           _check_return_value, matrix_descr, debug_print, _convert_to_csr,
+                                           SPARSE_MATRIX_TYPE_SYMMETRIC, SPARSE_FILL_MODE_UPPER, SPARSE_FILL_MODE_LOWER,
+                                           SPARSE_STAGE_FULL_MULT, SPARSE_FILL_MODE_FULL, SPARSE_DIAG_NON_UNIT,
+                                           _get_numpy_layout, _check_return_value, LAYOUT_CODE_C, LAYOUT_CODE_F,
+                                           _out_matrix, SPARSE_OPERATION_NON_TRANSPOSE, SPARSE_OPERATION_TRANSPOSE)
+import ctypes as _ctypes
+import numpy as np
+import scipy.sparse as _spsparse
+from scipy.sparse import (isspmatrix_csr as is_csr, isspmatrix_csc as is_csc, isspmatrix_bsr as is_bsr,
+                          isspmatrix as is_sparse)
+
+def _sypr_sparse_A_dense_B(matrix_a, matrix_b, transpose_a=False, out=None, out_scalar=None, a_scalar=None):
+
+    _output_dim = 1 if transpose_a else 0
+    output_shape = (matrix_b.shape[_output_dim], matrix_b.shape[_output_dim])
+    layout_b, ld_b = _get_numpy_layout(matrix_b, second_arr=out)
+
+    mkl_a, dbl, cplx = _create_mkl_sparse(matrix_a)
+
+    # Set functions and types for float or doubles
+    output_dtype = np.float64 if dbl else np.float32
+    func = MKL._mkl_sparse_d_syprd if dbl else MKL._mkl_sparse_s_syprd
+
+    # Allocate an output array
+    output_arr = _out_matrix(output_shape, output_dtype, order="C" if layout_b == LAYOUT_CODE_C else "F",
+                             out_arr=out, out_t=False)
+
+    output_layout, output_ld = _get_numpy_layout(output_arr, second_arr=matrix_b)
+
+    ret_val = func(SPARSE_OPERATION_TRANSPOSE if transpose_a else SPARSE_OPERATION_NON_TRANSPOSE,
+                   mkl_a,
+                   matrix_b,
+                   layout_b,
+                   ld_b,
+                   float(out_scalar) if a_scalar is not None else 1.,
+                   float(out_scalar) if out_scalar is not None else 1.,
+                   output_arr,
+                   output_layout,
+                   output_ld)
+
+    # Check return
+    _check_return_value(ret_val, func.__name__)
+
+    _destroy_mkl_handle(mkl_a)
+
+    return output_arr
+
+def _sypr_sparse_A_sparse_B(matrix_a, matrix_b, transpose_a=False):
+
+    mkl_c = sparse_matrix_t()
+
+    mkl_a, a_dbl, a_cplx = _create_mkl_sparse(matrix_a)
+    mkl_b, b_dbl, b_cplx = _create_mkl_sparse(matrix_b)
+
+    _order_mkl_handle(mkl_a)
+    _order_mkl_handle(mkl_b)
+
+    if is_csr(matrix_b):
+        output_type = "csr"
+    elif is_bsr(matrix_b):
+        output_type = "bsr"
+    else:
+        raise ValueError("matrix B must be CSR or BSR")
+
+    descrB = matrix_descr(sparse_matrix_type_t=SPARSE_MATRIX_TYPE_SYMMETRIC,
+                          sparse_fill_mode_t=SPARSE_FILL_MODE_UPPER,
+                          sparse_diag_type_t=SPARSE_DIAG_NON_UNIT)
+
+    try:
+        ret_val = MKL._mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE if transpose_a else SPARSE_OPERATION_NON_TRANSPOSE,
+                                    mkl_a,
+                                    mkl_b,
+                                    descrB,
+                                    _ctypes.byref(mkl_c),
+                                    SPARSE_STAGE_FULL_MULT)
+
+        _check_return_value(ret_val, "mkl_sparse_sypr")
+    
+    finally:
+        _destroy_mkl_handle(mkl_a)
+        _destroy_mkl_handle(mkl_b)
+
+    # Extract
+    try:
+        python_c = _export_mkl(mkl_c, b_dbl, output_type=output_type)
+    finally:
+        _destroy_mkl_handle(mkl_c)
+
+    return python_c
+
+
+def _sparse_sypr(matrix_a, matrix_b, transpose_a=False, cast=False, out=None, out_scalar=None, scalar=None):
+
+     # Check dtypes
+    matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast)
+
+    if is_csr(matrix_b):
+        default_output, output_type = _spsparse.csr_matrix, "csr"
+    elif is_bsr(matrix_b):
+        default_output, output_type = _spsparse.bsr_matrix, "bsr"
+    elif not is_sparse(matrix_b):
+        default_output, output_type = np.zeros, "dense"
+
+    if not (is_csr(matrix_a) or is_bsr(matrix_a)) or not (is_csr(matrix_b) or is_bsr(matrix_b) or not is_sparse(matrix_b)):
+        raise ValueError("Input matrices to spyr must be CSR or BSR; CSC and COO is not supported")
+
+    # Call sypr if B is sparse
+    if is_sparse(matrix_b):
+        if out is not None or out_scalar is not None or scalar is not None:
+            _msg = "out, out_scalar, and scalar have no effect if matrix B is not sparse"
+            warnings.warn(_msg, RuntimeWarning)
+
+        return _sypr_sparse_A_sparse_B(matrix_a, matrix_b, transpose_a=transpose_a)
+
+    # Call syprd if B is dense
+    else:
+        return _sypr_sparse_A_dense_B(matrix_a, matrix_b, transpose_a=transpose_a, out=out,
+                                      out_scalar=out_scalar, a_scalar=scalar)
diff --git a/sparse_dot_mkl/_sparse_vector.py b/sparse_dot_mkl/_sparse_vector.py
index 28c7f80..b63efbd 100644
--- a/sparse_dot_mkl/_sparse_vector.py
+++ b/sparse_dot_mkl/_sparse_vector.py
@@ -1,10 +1,15 @@
 from sparse_dot_mkl._mkl_interface import (MKL, _sanity_check, _empty_output_check, _type_check, _create_mkl_sparse,
-                                           _destroy_mkl_handle, matrix_descr, RETURN_CODES, _is_dense_vector,
-                                           _out_matrix, _check_return_value, _is_allowed_sparse_format)
+                                           _destroy_mkl_handle, matrix_descr, _is_dense_vector, _out_matrix, 
+                                           _check_return_value, _is_allowed_sparse_format, _output_dtypes, _mkl_scalar)
 
 import numpy as np
 import ctypes as _ctypes
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_sp_mv_funcs = {(False, False): MKL._mkl_sparse_s_mv,
+                    (True, False): MKL._mkl_sparse_d_mv,
+                    (False, True): MKL._mkl_sparse_c_mv,
+                    (True, True): MKL._mkl_sparse_z_mv}
 
 def _sparse_dense_vector_mult(matrix_a, vector_b, scalar=1., transpose=False, out=None, out_scalar=None, out_t=None):
     """
@@ -33,13 +38,18 @@ def _sparse_dense_vector_mult(matrix_a, vector_b, scalar=1., transpose=False, ou
         final_dtype = np.float64 if matrix_a.dtype != vector_b.dtype or matrix_a.dtype != np.float32 else np.float32
         return _out_matrix(output_shape, final_dtype, out_arr=out)
 
-    mkl_a, dbl = _create_mkl_sparse(matrix_a)
+    mkl_a, dbl, cplx = _create_mkl_sparse(matrix_a)
     vector_b = vector_b.ravel()
 
     # Set functions and types for float or doubles
-    output_ctype = _ctypes.c_double if dbl else _ctypes.c_float
-    output_dtype = np.float64 if dbl else np.float32
-    func = MKL._mkl_sparse_d_mv if dbl else MKL._mkl_sparse_s_mv
+    output_dtype = _output_dtypes[(dbl, cplx)]
+
+    # Set the MKL function for precision
+    func = _mkl_sp_mv_funcs[(dbl, cplx)]
+
+    # Create a C struct if necessary to be passed
+    scalar = _mkl_scalar(scalar, cplx, dbl)
+    out_scalar = _mkl_scalar(out_scalar, cplx, dbl)
 
     output_arr = _out_matrix(output_shape, output_dtype, out_arr=out, out_t=out_t)
 
@@ -48,8 +58,8 @@ def _sparse_dense_vector_mult(matrix_a, vector_b, scalar=1., transpose=False, ou
                    mkl_a,
                    matrix_descr(),
                    vector_b,
-                   float(out_scalar) if out_scalar is not None else 1.,
-                   output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)))
+                   out_scalar,
+                   output_arr)
 
     # Check return
     _check_return_value(ret_val, func.__name__)
diff --git a/sparse_dot_mkl/tests/test_dense_dense.py b/sparse_dot_mkl/tests/test_dense_dense.py
index c4f4b01..dd2c3b2 100644
--- a/sparse_dot_mkl/tests/test_dense_dense.py
+++ b/sparse_dot_mkl/tests/test_dense_dense.py
@@ -2,26 +2,33 @@
 import numpy as np
 import numpy.testing as npt
 from sparse_dot_mkl import dot_product_mkl
-from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2
+from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2, make_matrixes
 
 
 class TestDenseDenseMultiplication(unittest.TestCase):
 
     order = "C"
 
+    double_dtype = np.float64
+    single_dtype = np.float32
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = MATRIX_1.copy().A, MATRIX_2.copy().A
+
     def setUp(self):
-        self.mat1 = MATRIX_1.copy().A
-        self.mat2 = MATRIX_2.copy().A
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()
 
     def test_float32(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype)
         mat3_np = np.dot(d1, d2)
 
         mat3 = dot_product_mkl(d1, d2)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=np.float32, order=self.order), out_scalar=3)
+        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order), out_scalar=3)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
 
     def test_float64(self):
@@ -33,21 +40,21 @@ def test_float64(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
         mat3_np += 3.
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3)
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(mat3_np, out)
         self.assertEqual(id(mat3), id(out))
 
     def test_float64_cast(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2
         mat3_np = np.dot(d1, d2)
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=np.float64, order=self.order), out_scalar=3,
+        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order), out_scalar=3,
                                cast=True)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
 
@@ -59,7 +66,7 @@ def test_outer_product(self):
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=np.float64, order="C"), out_scalar=3)
+        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=self.double_dtype, order="C"), out_scalar=3)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
 
     def test_fails(self):
@@ -87,8 +94,8 @@ class TestDenseDenseFCMultiplication(TestDenseDenseMultiplication):
     order = "F"
 
     def setUp(self):
-        self.mat1 = np.asarray(MATRIX_1.copy().A, order='F')
-        self.mat2 = np.asarray(MATRIX_2.copy().A, order='C')
+        self.mat1 = np.asarray(self.MATRIX_1.copy(), order='F')
+        self.mat2 = np.asarray(self.MATRIX_2.copy(), order='C')
 
 
 class TestDenseDenseFFMultiplication(TestDenseDenseMultiplication):
@@ -96,8 +103,8 @@ class TestDenseDenseFFMultiplication(TestDenseDenseMultiplication):
     order = "F"
 
     def setUp(self):
-        self.mat1 = np.asarray(MATRIX_1.copy().A, order='F')
-        self.mat2 = np.asarray(MATRIX_2.copy().A, order='F')
+        self.mat1 = np.asarray(self.MATRIX_1.copy(), order='F')
+        self.mat2 = np.asarray(self.MATRIX_2.copy(), order='F')
 
 
 class TestDenseDenseCFMultiplication(TestDenseDenseMultiplication):
@@ -105,5 +112,38 @@ class TestDenseDenseCFMultiplication(TestDenseDenseMultiplication):
     order = "C"
 
     def setUp(self):
-        self.mat1 = np.asarray(MATRIX_1.copy().A, order='C')
-        self.mat2 = np.asarray(MATRIX_2.copy().A, order='F')
+        self.mat1 = np.asarray(self.MATRIX_1.copy(), order='C')
+        self.mat2 = np.asarray(self.MATRIX_2.copy(), order='F')
+
+class _ComplexMixin:
+
+    double_dtype = np.cdouble
+    single_dtype = np.csingle
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
+        cls.MATRIX_1 = cls.MATRIX_1.A
+        cls.MATRIX_2 = cls.MATRIX_2.A
+
+    def test_inherits(self):
+
+        self.assertIs(self.double_dtype, np.cdouble)
+        self.assertIs(self.single_dtype, np.csingle)
+        self.assertTrue(self.MATRIX_1.dtype == np.cdouble)
+
+class TestDDComplexMultiplication(_ComplexMixin, TestDenseDenseMultiplication):
+
+    pass
+
+class TestDDFCComplexMultiplication(_ComplexMixin, TestDenseDenseFCMultiplication):
+
+    pass
+
+class TestDDCFComplexMultiplication(_ComplexMixin, TestDenseDenseCFMultiplication):
+
+    pass
+
+class TestDDFFComplexMultiplication(_ComplexMixin, TestDenseDenseFFMultiplication):
+
+    pass
\ No newline at end of file
diff --git a/sparse_dot_mkl/tests/test_mkl.py b/sparse_dot_mkl/tests/test_mkl.py
index be7b700..cfead93 100644
--- a/sparse_dot_mkl/tests/test_mkl.py
+++ b/sparse_dot_mkl/tests/test_mkl.py
@@ -18,16 +18,21 @@
 
 SEED = 86
 
-
-def make_matrixes(a, b, n, density):
-    m1 = _spsparse.random(a, n, density=density, format="csr", dtype=np.float64, random_state=SEED)
-    m2 = _spsparse.random(n, b, density=density, format="csr", dtype=np.float64, random_state=SEED + 1)
+def make_matrixes(a, b, n, density, dtype=np.float64):
+    m1 = _spsparse.random(a, n, density=density, format="csr", dtype=dtype, random_state=SEED)
+    m2 = _spsparse.random(n, b, density=density, format="csr", dtype=dtype, random_state=SEED + 1)
     return m1, m2
 
+def make_vector(n, complex=False):
+    rng = np.random.default_rng(SEED + 2)
+    if not complex:
+        return rng.random(n).astype(np.float64)
+    else:
+        return rng.random(n) + rng.random(n) * 1j
 
 MATRIX_1, MATRIX_2 = make_matrixes(200, 100, 300, 0.05)
-VECTOR = np.random.rand(300).astype(np.float64)
 MATRIX_1_EMPTY = _spsparse.csr_matrix((200, 300), dtype=np.float64)
+VECTOR = make_vector(300)
 
 
 class TestEmptyConditions(unittest.TestCase):
@@ -81,7 +86,7 @@ def test_make_mkl_bad_type(self):
             _create_mkl_sparse(self.mat1.astype(np.int64))
 
     def test_export_mkl_bad_type(self):
-        mkl_handle, dbl = _create_mkl_sparse(self.mat1)
+        mkl_handle, dbl, cplx = _create_mkl_sparse(self.mat1)
 
         with self.assertRaises(ValueError):
             _export_mkl(mkl_handle, dbl, output_type="coo")
@@ -161,10 +166,10 @@ def test_create_export(self):
         mat3 = mat1.astype(np.float32).copy()
         mat4 = self.mat2.astype(np.float32).copy()
 
-        ref_1, precision_1 = _create_mkl_sparse(mat1)
-        ref_2, precision_2 = _create_mkl_sparse(mat2)
-        ref_3, precision_3 = _create_mkl_sparse(mat3)
-        ref_4, precision_4 = _create_mkl_sparse(mat4)
+        ref_1, precision_1, cplx_1 = _create_mkl_sparse(mat1)
+        ref_2, precision_2, cplx_2 = _create_mkl_sparse(mat2)
+        ref_3, precision_3, cplx_3 = _create_mkl_sparse(mat3)
+        ref_4, precision_4, cplx_4 = _create_mkl_sparse(mat4)
 
         self.assertTrue(precision_1)
         self.assertTrue(precision_2)
@@ -185,14 +190,14 @@ def test_create_bsr(self):
         mat1 = _spsparse.bsr_matrix(self.mat1, blocksize=(2, 2))
         mat3 = mat1.astype(np.float32).copy()
 
-        ref_1, precision_1 = _create_mkl_sparse(mat1)
-        ref_3, precision_3 = _create_mkl_sparse(mat3)
+        ref_1, precision_1, cplx_1 = _create_mkl_sparse(mat1)
+        ref_3, precision_3, cplx_3 = _create_mkl_sparse(mat3)
 
         self.assertTrue(precision_1)
         self.assertFalse(precision_3)
 
-        cycle_1 = _export_mkl(ref_1, precision_1, output_type="bsr")
-        cycle_3 = _export_mkl(ref_3, precision_3, output_type="bsr")
+        cycle_1 = _export_mkl(ref_1, precision_1, complex_type=cplx_1, output_type="bsr")
+        cycle_3 = _export_mkl(ref_3, precision_3, complex_type=cplx_3, output_type="bsr")
 
         self.is_sparse_identical_A(self.mat1, cycle_1)
         self.is_sparse_identical_internal(mat1, cycle_1)
@@ -206,8 +211,8 @@ def test_create_convert_bsr(self):
         mat1 = _spsparse.bsr_matrix(self.mat1, blocksize=(2, 2))
         mat3 = mat1.astype(np.float32).copy()
 
-        ref_1, precision_1 = _create_mkl_sparse(mat1)
-        ref_3, precision_3 = _create_mkl_sparse(mat3)
+        ref_1, precision_1, cplx_1 = _create_mkl_sparse(mat1)
+        ref_3, precision_3, cplx_3 = _create_mkl_sparse(mat3)
 
         cref_1 = _convert_to_csr(ref_1)
         cref_3 = _convert_to_csr(ref_3)
@@ -215,8 +220,8 @@ def test_create_convert_bsr(self):
         self.assertTrue(precision_1)
         self.assertFalse(precision_3)
 
-        cycle_1 = _export_mkl(cref_1, precision_1, output_type="csr")
-        cycle_3 = _export_mkl(cref_3, precision_3, output_type="csr")
+        cycle_1 = _export_mkl(cref_1, precision_1, complex_type=cplx_1, output_type="csr")
+        cycle_3 = _export_mkl(cref_3, precision_3, complex_type=cplx_3, output_type="csr")
 
         self.is_sparse_identical_A(self.mat1, cycle_1)
         self.is_sparse_identical_A(self.mat1.astype(np.float32), cycle_3)
diff --git a/sparse_dot_mkl/tests/test_sparse_dense.py b/sparse_dot_mkl/tests/test_sparse_dense.py
index cd3479c..9521fbc 100644
--- a/sparse_dot_mkl/tests/test_sparse_dense.py
+++ b/sparse_dot_mkl/tests/test_sparse_dense.py
@@ -1,23 +1,31 @@
 import unittest
+from unittest.case import skip
 import numpy as np
 import numpy.testing as npt
+from scipy.sparse.csr import csr_matrix
 from sparse_dot_mkl import dot_product_mkl
-from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2
+from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2, make_matrixes
 
 
 class TestSparseDenseMultiplication(unittest.TestCase):
 
     order = "C"
 
-    def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = MATRIX_2.copy()
+    double_dtype = np.float64
+    single_dtype = np.float32
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = MATRIX_1.copy(), MATRIX_2.copy()
 
-        self.mat1_d = MATRIX_1.A
-        self.mat2_d = MATRIX_2.A
+    def setUp(self):
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()
+        self.mat1_d = self.mat1.A
+        self.mat2_d = self.mat2.A
 
     def test_float32_b_sparse(self):
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2.astype(np.float32)
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2.astype(self.single_dtype)
 
         mat3 = dot_product_mkl(d1, d2, debug=True)
         mat3_np = np.dot(d1, d2.A)
@@ -25,7 +33,7 @@ def test_float32_b_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
         mat3_np += 3.
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
 
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3., debug=True)
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -37,31 +45,31 @@ def test_float32_b_sparse(self):
         self.assertEqual(id(mat3), id(out))
 
     def test_float64_b_sparse(self):
-        d1, d2 = self.mat1_d, self.mat2
+        d1, d2 = self.mat1_d.astype(self.double_dtype), self.mat2.astype(self.double_dtype)
 
         mat3 = dot_product_mkl(d1, d2, debug=True)
         mat3_np = np.dot(d1, d2.A)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=np.float64, order=self.order), out_scalar=3.)
+        mat3 = dot_product_mkl(d1, d2, out=np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order), out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
 
     def test_float64_cast_b_sparse(self):
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
         mat3_np = np.dot(d1, d2.A)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3., cast=True)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
 
     def test_float32_csc_sparse(self):
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2.astype(np.float32).tocsc()
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2.astype(self.single_dtype).tocsc()
         mat3_np = np.dot(d1, d2.A)
 
         mat3 = dot_product_mkl(d1, d2)
@@ -69,7 +77,7 @@ def test_float32_csc_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -77,7 +85,7 @@ def test_float32_csc_sparse(self):
     def test_float32_bsr_sparse(self):
 
         bs = min(*self.mat2.shape, 10)
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2.astype(np.float32).tobsr(blocksize=(bs, bs))
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2.astype(self.single_dtype).tobsr(blocksize=(bs, bs))
         mat3_np = np.dot(d1, d2.A)
 
         mat3 = dot_product_mkl(d1, d2)
@@ -85,7 +93,7 @@ def test_float32_bsr_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -95,11 +103,10 @@ def test_float64_csc_sparse(self):
 
         mat3 = dot_product_mkl(d1, d2)
         mat3_np = np.dot(d1, d2.A)
-
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -114,13 +121,13 @@ def test_float64_bsr_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
 
     def test_float64_cast_csc_sparse(self):
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2.tocsc()
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2.tocsc()
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
         mat3_np = np.dot(d1, d2.A)
@@ -128,7 +135,7 @@ def test_float64_cast_csc_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3., cast=True)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -136,7 +143,7 @@ def test_float64_cast_csc_sparse(self):
     def test_float64_cast_bsr_sparse(self):
 
         bs = min(*self.mat2.shape, 10)
-        d1, d2 = self.mat1_d.astype(np.float32), self.mat2.tobsr(blocksize=(bs, bs))
+        d1, d2 = self.mat1_d.astype(self.single_dtype), self.mat2.tobsr(blocksize=(bs, bs))
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
         mat3_np = np.dot(d1, d2.A)
@@ -144,20 +151,20 @@ def test_float64_cast_bsr_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d2.A, self.mat2_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3., cast=True)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
 
     def test_float32_a_sparse(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2_d.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2_d.astype(self.single_dtype)
 
         mat3 = dot_product_mkl(d1, d2)
         mat3_np = np.dot(d1.A, d2)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -170,7 +177,7 @@ def test_float64_a_sparse(self):
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -184,7 +191,7 @@ def test_float64_a_csc_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d1.A, self.mat1_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -200,13 +207,13 @@ def test_float64_a_bsr_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d1.A, self.mat1_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
 
     def test_float32_a_csc_sparse(self):
-        d1, d2 = self.mat1.astype(np.float32).tocsc(), self.mat2_d.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype).tocsc(), self.mat2_d.astype(self.single_dtype)
 
         mat3 = dot_product_mkl(d1, d2)
         mat3_np = np.dot(d1.A, d2)
@@ -214,7 +221,7 @@ def test_float32_a_csc_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d1.A, self.mat1_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -222,7 +229,7 @@ def test_float32_a_csc_sparse(self):
     def test_float32_a_bsr_sparse(self):
 
         bs = min(*self.mat1.shape, 10)
-        d1, d2 = self.mat1.astype(np.float32).tobsr(blocksize=(bs, bs)), self.mat2_d.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype).tobsr(blocksize=(bs, bs)), self.mat2_d.astype(self.single_dtype)
 
         mat3 = dot_product_mkl(d1, d2)
         mat3_np = np.dot(d1.A, d2)
@@ -230,20 +237,20 @@ def test_float32_a_bsr_sparse(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
         npt.assert_array_almost_equal(d1.A, self.mat1_d)
 
-        out = np.ones(mat3_np.shape, dtype=np.float32, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3.)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
 
     def test_float64_cast_a_sparse(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2_d
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2_d
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
         mat3_np = np.dot(d1.A, d2)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
-        out = np.ones(mat3_np.shape, dtype=np.float64, order=self.order)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype, order=self.order)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=3., cast=True)
         npt.assert_array_almost_equal(mat3_np + 3., mat3)
         self.assertEqual(id(mat3), id(out))
@@ -254,11 +261,11 @@ class TestSparseDenseFMultiplication(TestSparseDenseMultiplication):
     order = "F"
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = MATRIX_2.copy()
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="F")
-        self.mat2_d = np.asarray(MATRIX_2.A, order="F")
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order=self.order)
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order=self.order)
 
 
 class TestSparseVectorDenseCMultiplication(TestSparseDenseMultiplication):
@@ -266,11 +273,11 @@ class TestSparseVectorDenseCMultiplication(TestSparseDenseMultiplication):
     order = "C"
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()[0, :]
-        self.mat2 = MATRIX_2.copy()
+        self.mat1 = self.MATRIX_1.copy()[0, :]
+        self.mat2 = self.MATRIX_2.copy()
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="C")[0, :].reshape(1, -1)
-        self.mat2_d = np.asarray(MATRIX_2.A, order="C")
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order=self.order)[0, :].reshape(1, -1)
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order=self.order)
 
 
 class TestSparseVector2DenseCMultiplication(TestSparseDenseMultiplication):
@@ -278,11 +285,11 @@ class TestSparseVector2DenseCMultiplication(TestSparseDenseMultiplication):
     order = "C"
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = MATRIX_2.copy()[:, 0]
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()[:, 0]
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="C")
-        self.mat2_d = np.asarray(MATRIX_2.A, order="C")[:, 0].reshape(-1, 1)
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order=self.order)
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order=self.order)[:, 0].reshape(-1, 1)
 
 
 class TestSparseVectorDenseFMultiplication(TestSparseDenseMultiplication):
@@ -290,11 +297,11 @@ class TestSparseVectorDenseFMultiplication(TestSparseDenseMultiplication):
     order = "F"
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()[0, :]
-        self.mat2 = MATRIX_2.copy()
+        self.mat1 = self.MATRIX_1.copy()[0, :]
+        self.mat2 = self.MATRIX_2.copy()
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="F")[0, :].reshape(1, -1)
-        self.mat2_d = np.asarray(MATRIX_2.A, order="F")
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order=self.order)[0, :].reshape(1, -1)
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order=self.order)
 
 
 class TestSparseVector2DenseFMultiplication(TestSparseDenseMultiplication):
@@ -302,8 +309,30 @@ class TestSparseVector2DenseFMultiplication(TestSparseDenseMultiplication):
     order = "F"
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = MATRIX_2.copy()[:, 0]
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()[:, 0]
+
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order=self.order)
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order=self.order)[:, 0].reshape(-1, 1)
+
+class _ComplexMixin:
+
+    double_dtype = np.cdouble
+    single_dtype = np.csingle
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
+
+
+class TestSparseDenseMultiplicationComplex(_ComplexMixin, TestSparseDenseMultiplication):
+
+    pass
+
+class TestSparseDenseFMultiplicationComplex(_ComplexMixin, TestSparseDenseFMultiplication):
+
+    pass
+
+class TestSparseVectorDenseCMultiplicationComplex(_ComplexMixin, TestSparseVectorDenseCMultiplication):
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="F")
-        self.mat2_d = np.asarray(MATRIX_2.A, order="F")[:, 0].reshape(-1, 1)
+    pass
\ No newline at end of file
diff --git a/sparse_dot_mkl/tests/test_sparse_sparse.py b/sparse_dot_mkl/tests/test_sparse_sparse.py
index 9383647..0f8e63d 100644
--- a/sparse_dot_mkl/tests/test_sparse_sparse.py
+++ b/sparse_dot_mkl/tests/test_sparse_sparse.py
@@ -1,4 +1,5 @@
 import unittest
+from unittest.case import skip
 import numpy as np
 import numpy.testing as npt
 import scipy.sparse as _spsparse
@@ -14,21 +15,30 @@ class TestMultiplicationCSR(unittest.TestCase):
     sparse_args = {}
     sparse_output = "csr"
 
+    double_dtype = np.float64
+    single_dtype = np.float32
+
+    export_complex = False
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = MATRIX_1.copy(), MATRIX_2.copy()
+
     def setUp(self):
-        self.mat1 = self.sparse_func(MATRIX_1, **self.sparse_args).copy()
-        self.mat2 = self.sparse_func(MATRIX_2, **self.sparse_args).copy()
+        self.mat1 = self.sparse_func(self.MATRIX_1, **self.sparse_args).copy()
+        self.mat2 = self.sparse_func(self.MATRIX_2, **self.sparse_args).copy()
 
     def test_spmm_success(self):
         set_debug_mode(True)
 
-        ref_1, precision_1 = _create_mkl_sparse(self.mat1)
-        ref_2, precision_2 = _create_mkl_sparse(self.mat2)
+        ref_1, precision_1, cplx_1 = _create_mkl_sparse(self.mat1)
+        ref_2, precision_2, cplx_2 = _create_mkl_sparse(self.mat2)
 
         self.assertTrue(precision_1)
         self.assertTrue(precision_2)
 
         ref_3 = _matmul_mkl(ref_1, ref_2)
-        mat3 = _export_mkl(ref_3, precision_1 or precision_2, output_type=self.sparse_output)
+        mat3 = _export_mkl(ref_3, precision_1 or precision_2, complex_type=self.export_complex, output_type=self.sparse_output)
 
         mat3_sp = self.mat1.dot(self.mat2)
         mat3_np = np.dot(self.mat1.A, self.mat2.A)
@@ -39,17 +49,17 @@ def test_spmm_success(self):
         set_debug_mode(False)
 
     def test_spmm_success_float32(self):
-        self.mat1.data = self.mat1.data.astype(np.float32)
-        self.mat2.data = self.mat2.data.astype(np.float32)
+        self.mat1.data = self.mat1.data.astype(self.single_dtype)
+        self.mat2.data = self.mat2.data.astype(self.single_dtype)
 
-        ref_1, precision_1 = _create_mkl_sparse(self.mat1)
-        ref_2, precision_2 = _create_mkl_sparse(self.mat2)
+        ref_1, precision_1, cplx_1 = _create_mkl_sparse(self.mat1)
+        ref_2, precision_2, cplx_2 = _create_mkl_sparse(self.mat2)
 
         self.assertFalse(precision_1)
         self.assertFalse(precision_2)
 
         ref_3 = _matmul_mkl(ref_1, ref_2)
-        mat3 = _export_mkl(ref_3, precision_1 or precision_2, output_type=self.sparse_output)
+        mat3 = _export_mkl(ref_3, precision_1 or precision_2, complex_type=self.export_complex, output_type=self.sparse_output)
 
         mat3_sp = self.mat1.dot(self.mat2)
         mat3_np = np.dot(self.mat1.A, self.mat2.A)
@@ -58,8 +68,8 @@ def test_spmm_success_float32(self):
         npt.assert_array_almost_equal(mat3_np, mat3.A)
 
     def test_spmm_error_bad_dims(self):
-        ref_1, prec_1 = _create_mkl_sparse(self.mat1.transpose())
-        ref_2, prec_2 = _create_mkl_sparse(self.mat2)
+        ref_1, prec_1, cplx_1 = _create_mkl_sparse(self.mat1.transpose())
+        ref_2, prec_2, cplx_2 = _create_mkl_sparse(self.mat2)
 
         with self.assertRaises(ValueError):
             _matmul_mkl(ref_1, ref_2)
@@ -92,7 +102,7 @@ def test_all_zeros(self):
         self.assertEqual(len(zm_mkl.data), 0)
 
     def test_highly_sparse(self):
-        hsp1, hsp2 = make_matrixes(2000, 1000, 3000, 0.0005)
+        hsp1, hsp2 = make_matrixes(2000, 1000, 3000, 0.0005, dtype=self.double_dtype)
         hsp1 = self.sparse_func(hsp1, **self.sparse_args)
         hsp2 = self.sparse_func(hsp2, **self.sparse_args)
 
@@ -102,7 +112,7 @@ def test_highly_sparse(self):
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
 
     def test_highly_highly_sparse(self):
-        hsp1, hsp2 = make_matrixes(2000, 1000, 3000, 0.000005)
+        hsp1, hsp2 = make_matrixes(2000, 1000, 3000, 0.000005, dtype=self.double_dtype)
         hsp1 = self.sparse_func(hsp1, **self.sparse_args)
         hsp2 = self.sparse_func(hsp2, **self.sparse_args)
 
@@ -112,7 +122,7 @@ def test_highly_highly_sparse(self):
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
 
     def test_dense(self):
-        d1, d2 = make_matrixes(10, 20, 50, 1)
+        d1, d2 = make_matrixes(10, 20, 50, 1, dtype=self.double_dtype)
         d1 = self.sparse_func(d1, **self.sparse_args)
         d2 = self.sparse_func(d2, **self.sparse_args)
 
@@ -120,7 +130,7 @@ def test_dense(self):
         hsp3 = dot_product_mkl(d1, d2)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     def test_CSC(self):
         d1, d2 = self.mat1, _spsparse.csc_matrix(self.mat2)
@@ -129,7 +139,7 @@ def test_CSC(self):
         hsp3 = dot_product_mkl(d1, d2)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     def test_CSR(self):
         d1, d2 = self.mat1, _spsparse.csc_matrix(self.mat2)
@@ -138,7 +148,7 @@ def test_CSR(self):
         hsp3 = dot_product_mkl(d1, d2)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     @unittest.skip
     def test_BSR(self):
@@ -148,7 +158,7 @@ def test_BSR(self):
         hsp3 = dot_product_mkl(d1, d2, debug=True)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     def test_COO(self):
         d1, d2 = self.mat1, _spsparse.coo_matrix(self.mat2)
@@ -157,37 +167,37 @@ def test_COO(self):
             hsp3 = dot_product_mkl(d1, d2)
 
     def test_mixed(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2
 
         hsp3_sp = d1.dot(d2)
         hsp3 = dot_product_mkl(d1, d2, cast=True)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     def test_mixed_2(self):
-        d1, d2 = self.mat1, self.mat2.astype(np.float32)
+        d1, d2 = self.mat1, self.mat2.astype(self.single_dtype)
 
         hsp3_sp = d1.dot(d2)
         hsp3 = dot_product_mkl(d1, d2, cast=True)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float64)
+        self.assertTrue(hsp3.dtype == self.double_dtype)
 
     def test_mixed_nocast(self):
-        d1, d2 = self.mat1, self.mat2.astype(np.float32)
+        d1, d2 = self.mat1, self.mat2.astype(self.single_dtype)
 
         with self.assertRaises(ValueError):
             hsp3 = dot_product_mkl(d1, d2, cast=False)
 
     def test_float32(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype)
 
         hsp3_sp = d1.dot(d2)
         hsp3 = dot_product_mkl(d1, d2)
 
         npt.assert_array_almost_equal(hsp3.A, hsp3_sp.A)
-        self.assertTrue(hsp3.dtype == np.float32)
+        self.assertTrue(hsp3.dtype == self.single_dtype)
 
     def test_dot_product_mkl_copy(self):
         mat3 = dot_product_mkl(self.mat1, self.mat2, copy=True)
@@ -231,12 +241,19 @@ def test_CSR(self):
 
 class TestSparseToDenseMultiplication(unittest.TestCase):
 
+    double_dtype = np.float64
+    single_dtype = np.float32
+    
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = MATRIX_1.copy(), MATRIX_2.copy()
+
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = MATRIX_2.copy()
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat2 = self.MATRIX_2.copy()
 
     def test_float32_CSR(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2.astype(np.float32)
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype)
         mat3_np = np.dot(d1.A, d2.A)
 
         mat3 = dot_product_mkl(d1, d2, copy=True, dense=True)
@@ -244,7 +261,7 @@ def test_float32_CSR(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
     def test_float32_CSC(self):
-        d1, d2 = self.mat1.astype(np.float32).tocsc(), self.mat2.astype(np.float32).tocsc()
+        d1, d2 = self.mat1.astype(self.single_dtype).tocsc(), self.mat2.astype(self.single_dtype).tocsc()
         mat3_np = np.dot(d1.A, d2.A)
 
         mat3 = dot_product_mkl(d1, d2, copy=True, dense=True)
@@ -276,10 +293,43 @@ def test_float64_BSR(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
     def test_float32_BSR(self):
-        d1 = self.mat1.astype(np.float32).tobsr(blocksize=(10, 10))
-        d2 = self.mat2.astype(np.float32).tobsr(blocksize=(10, 10))
+        d1 = self.mat1.astype(self.single_dtype).tobsr(blocksize=(10, 10))
+        d2 = self.mat2.astype(self.single_dtype).tobsr(blocksize=(10, 10))
         mat3_np = np.dot(d1.A, d2.A)
 
         mat3 = dot_product_mkl(d1, d2, copy=True, dense=True)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
+
+
+class _ComplexMixin:
+
+    double_dtype = np.cdouble
+    single_dtype = np.csingle
+    export_complex = True
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
+
+
+
+class TestMultiplicationCSRComplex(_ComplexMixin, TestMultiplicationCSR):
+
+    pass
+
+
+class TestMultiplicationCSCComplex(_ComplexMixin, TestMultiplicationCSC):
+
+    pass
+
+
+class TestMultiplicationBSRComplex(_ComplexMixin, TestMultiplicationBSR):
+
+    pass
+
+
+class TestSparseToDenseMultiplicationComplex(_ComplexMixin, TestSparseToDenseMultiplication):
+
+    pass
+    
\ No newline at end of file
diff --git a/sparse_dot_mkl/tests/test_sparse_vector.py b/sparse_dot_mkl/tests/test_sparse_vector.py
index 7a43281..6e9d555 100644
--- a/sparse_dot_mkl/tests/test_sparse_vector.py
+++ b/sparse_dot_mkl/tests/test_sparse_vector.py
@@ -3,23 +3,34 @@
 import numpy.testing as npt
 import scipy.sparse as _spsparse
 from sparse_dot_mkl import dot_product_mkl
-from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2, VECTOR
+from sparse_dot_mkl.tests.test_mkl import MATRIX_1, MATRIX_2, VECTOR, make_matrixes, make_vector
 
 
 class TestSparseVectorMultiplication(unittest.TestCase):
 
+    double_dtype = np.float64
+    single_dtype = np.float32
+    export_complex = True
+
+    sparse_func = _spsparse.csr_matrix
+    sparse_args = {}
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2, cls.VECTOR = MATRIX_1.copy(), MATRIX_2.copy(), VECTOR.copy() 
+
     def make_2d(self, arr):
         return arr.reshape(-1, 1) if arr.ndim == 1 else arr
 
     def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat2 = VECTOR.copy()
+        self.mat1 = self.sparse_func(self.MATRIX_1.copy(), **self.sparse_args)
+        self.mat2 = self.VECTOR.copy()
 
-        self.mat1_d = np.asarray(MATRIX_1.A, order="C")
-        self.mat2_d = VECTOR.copy()
+        self.mat1_d = np.asarray(self.MATRIX_1.A, order="C")
+        self.mat2_d = self.VECTOR.copy()
 
     def test_mult_1d(self):
-        mat3 = dot_product_mkl(self.mat1.astype(np.float64), self.mat2, cast=True)
+        mat3 = dot_product_mkl(self.mat1.astype(self.double_dtype), self.mat2, cast=True)
         mat3_np = np.dot(self.mat1_d, self.mat2_d)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -28,7 +39,7 @@ def test_mult_1d_out(self):
         mat3_np = np.dot(self.mat1_d, self.mat2_d)
         mat3_np += 2
 
-        out = np.ones(mat3_np.shape)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype)
         mat3 = dot_product_mkl(self.mat1, self.mat2, cast=True, out=out, out_scalar=2)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -36,7 +47,7 @@ def test_mult_1d_out(self):
         self.assertEqual(id(mat3), id(out))
 
     def test_mult_1d_float32(self):
-        d1, d2 = self.mat1.astype(np.float32), self.mat2
+        d1, d2 = self.mat1.astype(self.single_dtype), self.mat2
 
         mat3 = dot_product_mkl(d1, d2, cast=True)
         mat3_np = np.dot(self.mat1_d, self.mat2_d)
@@ -47,25 +58,25 @@ def test_mult_1d_float32_out(self):
         mat3_np = np.dot(self.mat1_d, self.mat2_d)
         mat3_np += 2
 
-        out = np.ones(mat3_np.shape, dtype=np.float32)
-        mat3 = dot_product_mkl(self.mat1.astype(np.float32), self.mat2.astype(np.float32), cast=False,
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype)
+        mat3 = dot_product_mkl(self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype), cast=False,
                                out=out, out_scalar=2)
 
         npt.assert_array_almost_equal(mat3_np, out, decimal=5)
         self.assertEqual(id(out), id(mat3))
 
         with self.assertRaises(ValueError):
-            mat3 = dot_product_mkl(self.mat1.astype(np.float32), self.mat2.astype(np.float32), cast=True,
-                                   out=np.ones(mat3_np.shape).astype(np.float64), out_scalar=2)
+            mat3 = dot_product_mkl(self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype), cast=True,
+                                   out=np.ones(mat3_np.shape).astype(self.double_dtype), out_scalar=2)
 
     def test_mult_1d_both_float32(self):
-        mat3 = dot_product_mkl(self.mat1.astype(np.float32), self.mat2.astype(np.float32), cast=True)
+        mat3 = dot_product_mkl(self.mat1.astype(self.single_dtype), self.mat2.astype(self.single_dtype), cast=True)
         mat3_np = np.dot(self.mat1_d, self.mat2_d)
 
-        npt.assert_array_almost_equal(mat3_np, mat3)
+        npt.assert_array_almost_equal(mat3_np, mat3, decimal=5)
 
     def test_mult_2d(self):
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float64)), self.make_2d(self.mat2), cast=True)
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.double_dtype)), self.make_2d(self.mat2), cast=True)
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
 
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -74,31 +85,31 @@ def test_mult_2d_out(self):
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
         mat3_np += 2
 
-        out = np.ones(mat3_np.shape, dtype=np.float64)
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float64)), self.make_2d(self.mat2), cast=True,
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype)
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.double_dtype)), self.make_2d(self.mat2), cast=True,
                                out=out, out_scalar=2)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
         self.assertEqual(id(out), id(mat3))
 
         with self.assertRaises(ValueError):
-            mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float64)), self.make_2d(self.mat2), cast=True,
-                                   out=np.ones(mat3_np.shape).astype(np.float32), out_scalar=2)
+            mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.double_dtype)), self.make_2d(self.mat2), cast=True,
+                                   out=np.ones(mat3_np.shape).astype(self.single_dtype), out_scalar=2)
 
     def test_mult_2d_float32(self):
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float32)), self.make_2d(self.mat2), cast=True)
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.single_dtype)), self.make_2d(self.mat2), cast=True)
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
     def test_mult_2d_other_float32(self):
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float32)), self.make_2d(self.mat2), cast=True)
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.single_dtype)), self.make_2d(self.mat2), cast=True)
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
 
         npt.assert_array_almost_equal(mat3_np, mat3)
 
     def test_mult_2d_both_float32(self):
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float32)), self.make_2d(self.mat2.astype(np.float32)))
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.single_dtype)), self.make_2d(self.mat2.astype(self.single_dtype)))
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
 
         npt.assert_array_almost_equal(mat3_np, mat3, decimal=5)
@@ -107,8 +118,8 @@ def test_mult_2d_both_float32_out(self):
         mat3_np = np.dot(self.make_2d(self.mat1_d), self.make_2d(self.mat2_d))
         mat3_np += 2
 
-        out = np.ones(mat3_np.shape, dtype=np.float32)
-        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(np.float32)), self.make_2d(self.mat2.astype(np.float32)),
+        out = np.ones(mat3_np.shape, dtype=self.single_dtype)
+        mat3 = dot_product_mkl(self.make_2d(self.mat1.astype(self.single_dtype)), self.make_2d(self.mat2.astype(self.single_dtype)),
                                out=out, out_scalar=2)
 
         npt.assert_array_almost_equal(mat3_np, mat3, decimal=5)
@@ -117,23 +128,14 @@ def test_mult_2d_both_float32_out(self):
 
 class TestSparseVectorMultiplicationCSC(TestSparseVectorMultiplication):
 
-    def setUp(self):
-        self.mat1 = _spsparse.csc_matrix(MATRIX_1).copy()
-        self.mat2 = VECTOR.copy()
-
-        self.mat1_d = np.asarray(MATRIX_1.A, order="C")
-        self.mat2_d = VECTOR.copy()
+    sparse_func = _spsparse.csc_matrix
+    sparse_args = {}
 
 
 class TestSparseVectorMultiplicationBSR(TestSparseVectorMultiplication):
 
-    def setUp(self):
-        self.mat1 = _spsparse.bsr_matrix(MATRIX_1, blocksize=(10, 10)).copy()
-        self.mat2 = VECTOR.copy()
-
-        self.mat1_d = np.asarray(MATRIX_1.A, order="C")
-        self.mat2_d = VECTOR.copy()
-
+    sparse_func = _spsparse.bsr_matrix
+    sparse_args = {"blocksize": (10, 10)}
 
 class TestSparseVectorMultiplicationCOO(unittest.TestCase):
 
@@ -161,11 +163,11 @@ class TestVectorSparseMultiplication(TestSparseVectorMultiplication):
     sparse_args = {}
 
     def setUp(self):
-        self.mat2 = MATRIX_2.copy()
-        self.mat1 = VECTOR.copy()
+        self.mat2 = self.MATRIX_2.copy()
+        self.mat1 = self.VECTOR.copy()
 
-        self.mat2_d = np.asarray(MATRIX_2.A, order="C")
-        self.mat1_d = VECTOR.copy()
+        self.mat2_d = np.asarray(self.MATRIX_2.A, order="C")
+        self.mat1_d = self.VECTOR.copy()
 
     def make_2d(self, arr):
         return arr.reshape(1, -1) if arr.ndim == 1 else arr
@@ -179,7 +181,7 @@ def test_mult_outer_product_ds(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
         mat3_np += 2.
-        out = np.ones(mat3_np.shape)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=2.)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -194,7 +196,7 @@ def test_mult_outer_product_sd(self):
         npt.assert_array_almost_equal(mat3_np, mat3)
 
         mat3_np += 2.
-        out = np.ones(mat3_np.shape)
+        out = np.ones(mat3_np.shape, dtype=self.double_dtype)
         mat3 = dot_product_mkl(d1, d2, out=out, out_scalar=2.)
 
         npt.assert_array_almost_equal(mat3_np, mat3)
@@ -213,6 +215,10 @@ class TestVectorSparseMultiplicationBSR(TestVectorSparseMultiplication):
 
 class TestVectorVectorMultplication(unittest.TestCase):
 
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.VECTOR = MATRIX_1.copy(), VECTOR.copy() 
+
     def test_1d_1d(self):
         mat3 = dot_product_mkl(VECTOR, VECTOR)
         mat3_np = np.dot(VECTOR, VECTOR)
@@ -236,3 +242,43 @@ def test_2d_2d(self):
         mat3_np = np.dot(VECTOR.reshape(1, -1), VECTOR.reshape(-1, 1))
 
         npt.assert_array_almost_equal(mat3_np, mat3)
+
+
+class _ComplexMixin:
+
+    double_dtype = np.cdouble
+    single_dtype = np.csingle
+    export_complex = True
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, cls.MATRIX_2 = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
+        cls.VECTOR = make_vector(300, complex=True)
+
+
+class TestSparseVectorMultiplicationComplex(_ComplexMixin, TestSparseVectorMultiplication):
+    pass
+
+
+class TestSparseVectorMultiplicationCSCComplex(_ComplexMixin, TestSparseVectorMultiplicationCSC):
+    pass
+
+
+class TestSparseVectorMultiplicationBSRComplex(_ComplexMixin, TestSparseVectorMultiplicationBSR):
+    pass
+
+
+class TestVectorSparseMultiplicationComplex(_ComplexMixin, TestVectorSparseMultiplication):
+    pass
+
+
+class TestVectorSparseMultiplicationCSCComplex(_ComplexMixin, TestVectorSparseMultiplicationCSC):
+    pass
+
+
+class TestVectorSparseMultiplicationBSRComplex(_ComplexMixin, TestVectorSparseMultiplicationBSR):
+    pass
+
+
+class TestVectorVectorMultplicationComplex(_ComplexMixin, TestVectorVectorMultplication):
+    pass
\ No newline at end of file

From 193319b3211becab7f8a95df19e38b1ab4d19430 Mon Sep 17 00:00:00 2001
From: asistradition <asistradition@users.noreply.github.com>
Date: Tue, 4 Jan 2022 15:54:32 -0500
Subject: [PATCH 2/4] Can't figure out why sparse syrk isn't working for
 complex dtypes but blas is

---
 sparse_dot_mkl/_gram_matrix.py           | 62 ++++++++++++-------
 sparse_dot_mkl/tests/test_gram_matrix.py | 76 +++++++++++++++---------
 2 files changed, 89 insertions(+), 49 deletions(-)

diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index 5808b13..a74da97 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -1,7 +1,8 @@
 from sparse_dot_mkl._mkl_interface import (MKL, sparse_matrix_t, _create_mkl_sparse,
                                            _export_mkl, _order_mkl_handle, _destroy_mkl_handle, _type_check,
                                            _get_numpy_layout, _convert_to_csr, _empty_output_check, LAYOUT_CODE_C,
-                                           _out_matrix, _check_return_value, debug_print)
+                                           _out_matrix, _check_return_value, debug_print, _output_dtypes, _mkl_scalar,
+                                           _is_double)
 
 import scipy.sparse as _sps
 import ctypes as _ctypes
@@ -22,7 +23,7 @@ def _gram_matrix_sparse(matrix_a, aat=False, reorder_output=False):
     :rtype: scipy.sparse.csr_matrix
     """
 
-    sp_ref_a, double_prec = _create_mkl_sparse(matrix_a)
+    sp_ref_a, double_prec, complex_type = _create_mkl_sparse(matrix_a)
 
     if _sps.isspmatrix_csc(matrix_a):
         sp_ref_a = _convert_to_csr(sp_ref_a)
@@ -41,13 +42,18 @@ def _gram_matrix_sparse(matrix_a, aat=False, reorder_output=False):
     if reorder_output:
         _order_mkl_handle(ref_handle)
 
-    output_arr = _export_mkl(ref_handle, double_prec, output_type="csr")
+    output_arr = _export_mkl(ref_handle, double_prec, complex_type=complex_type, output_type="csr")
 
     _destroy_mkl_handle(sp_ref_a)
     _destroy_mkl_handle(ref_handle)
 
     return output_arr
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_skryd_funcs = {(False, False): MKL._mkl_sparse_s_syrkd,
+                    (True, False): MKL._mkl_sparse_d_syrkd,
+                    (False, True): MKL._mkl_sparse_c_syrkd,
+                    (True, True): MKL._mkl_sparse_z_syrkd}
 
 def _gram_matrix_sparse_to_dense(matrix_a, aat=False, scalar=1., out=None, out_scalar=None):
     """
@@ -63,30 +69,33 @@ def _gram_matrix_sparse_to_dense(matrix_a, aat=False, scalar=1., out=None, out_s
     :rtype: numpy.ndarray
     """
 
-    sp_ref_a, double_prec = _create_mkl_sparse(matrix_a)
+    sp_ref_a, double_prec, complex_type = _create_mkl_sparse(matrix_a)
 
     if _sps.isspmatrix_csc(matrix_a):
         sp_ref_a = _convert_to_csr(sp_ref_a, destroy_original=True)
 
     _order_mkl_handle(sp_ref_a)
 
-    out_dtype = np.float64 if double_prec else np.float32
-    output_ctype = _ctypes.c_double if double_prec else _ctypes.c_float
-    out_dim = matrix_a.shape[0] if aat else matrix_a.shape[1]
+    out_dtype = _output_dtypes[(double_prec, complex_type)]
+    func = _mkl_skryd_funcs[(double_prec, complex_type)]
+
+    out_dim = matrix_a.shape[0 if aat else 1]
 
     output_arr = _out_matrix((out_dim, out_dim), out_dtype, order="C", out_arr=out)
     _, output_ld = _get_numpy_layout(output_arr)
 
     if _empty_output_check(matrix_a, matrix_a):
+        _destroy_mkl_handle(sp_ref_a)
         return output_arr
 
-    func = MKL._mkl_sparse_d_syrkd if double_prec else MKL._mkl_sparse_s_syrkd
+    scalar = _mkl_scalar(scalar, complex_type, double_prec)
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_prec)
 
     ret_val = func(10 if aat else 11,
                    sp_ref_a,
                    scalar,
-                   float(out_scalar) if out_scalar is not None else 1.,
-                   output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
+                   out_scalar,
+                   output_arr,
                    LAYOUT_CODE_C,
                    output_ld)
 
@@ -98,11 +107,17 @@ def _gram_matrix_sparse_to_dense(matrix_a, aat=False, scalar=1., out=None, out_s
     # This fixes a specific bug in mkl_sparse_d_syrkd which returns a full matrix
     # This stupid thing only happens with specific flags
     # I could probably leave it but it's pretty annoying
-    if not aat and out is None:
+    
+    if not aat and out is None and not complex_type:
         output_arr[np.tril_indices(output_arr.shape[0], k=-1)] = 0.
 
     return output_arr
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_blas_skry_funcs = {(False, False): MKL._cblas_ssyrk,
+                        (True, False): MKL._cblas_dsyrk,
+                        (False, True): MKL._cblas_csyrk,
+                        (True, True): MKL._cblas_zsyrk}
 
 def _gram_matrix_dense_to_dense(matrix_a, aat=False, scalar=1., out=None, out_scalar=None):
     """
@@ -127,25 +142,29 @@ def _gram_matrix_dense_to_dense(matrix_a, aat=False, scalar=1., out=None, out_sc
 
     # Get the memory order for arrays
     layout_a, ld_a = _get_numpy_layout(matrix_a)
-    double_precision = matrix_a.dtype == np.float64
+    double_precision, complex_type = _is_double(matrix_a)
 
-    # Set the MKL function for precision
-    func = MKL._cblas_dsyrk if double_precision else MKL._cblas_ssyrk
-    output_ctype = _ctypes.c_double if double_precision else _ctypes.c_float
+    out_dtype = _output_dtypes[(double_precision, complex_type)]
+    func = _mkl_blas_skry_funcs[(double_precision, complex_type)]
 
     # Allocate an array for outputs and set functions and types for float or doubles
-    output_arr = _out_matrix((n, n), matrix_a.dtype, order="C" if layout_a == LAYOUT_CODE_C else "F", out_arr=out)
+    output_arr = _out_matrix((n, n), out_dtype, order="C" if layout_a == LAYOUT_CODE_C else "F", out_arr=out)
+
+    # The complex versions of these functions take void pointers instead of passed structs
+    # So create a C struct if necessary to be passed by reference
+    scalar = _mkl_scalar(scalar, complex_type, double_precision)
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
 
     func(layout_a,
          121,
          111 if aat else 112,
          n,
          k,
-         scalar,
+         scalar if not complex_type else _ctypes.byref(scalar),
          matrix_a,
          ld_a,
-         float(out_scalar) if out_scalar is not None else 1.,
-         output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
+         out_scalar if not complex_type else _ctypes.byref(scalar),
+         output_arr,
          n)
 
     return output_arr
@@ -178,11 +197,14 @@ def _gram_matrix(matrix, transpose=False, cast=False, dense=False, reorder_outpu
         output_func = _sps.csr_matrix if _sps.isspmatrix(matrix) else np.zeros
         return output_func(output_shape, dtype=matrix.dtype)
 
+    if np.iscomplexobj(matrix):
+        raise ValueError("gram_matrix_mkl does not support complex datatypes")
+
     matrix = _type_check(matrix, cast=cast)
 
     if _sps.isspmatrix(matrix) and not (_sps.isspmatrix_csr(matrix) or _sps.isspmatrix_csc(matrix)):
         raise ValueError("gram_matrix requires sparse matrix to be CSR or CSC format")
-    if _sps.isspmatrix_csc(matrix) and not cast:
+    elif _sps.isspmatrix_csc(matrix) and not cast:
         raise ValueError("gram_matrix cannot use a CSC matrix unless cast=True")
     elif not _sps.isspmatrix(matrix):
         return _gram_matrix_dense_to_dense(matrix, aat=transpose, out=out, out_scalar=out_scalar)
diff --git a/sparse_dot_mkl/tests/test_gram_matrix.py b/sparse_dot_mkl/tests/test_gram_matrix.py
index f75a614..03a9c47 100644
--- a/sparse_dot_mkl/tests/test_gram_matrix.py
+++ b/sparse_dot_mkl/tests/test_gram_matrix.py
@@ -2,47 +2,52 @@
 import numpy as np
 import numpy.testing as npt
 from sparse_dot_mkl import gram_matrix_mkl
-from sparse_dot_mkl.tests.test_mkl import MATRIX_1
+from sparse_dot_mkl.tests.test_mkl import MATRIX_1, make_matrixes
 
 
 class TestGramMatrix(unittest.TestCase):
 
+    double_dtype = np.float64
+    single_dtype = np.float32
+
     @classmethod
     def setUpClass(cls):
-        gram_ut = np.dot(MATRIX_1.A.T, MATRIX_1.A)
+        cls.MATRIX_1 = MATRIX_1.copy()
+
+    def setUp(self):
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat1_d = self.MATRIX_1.A
+
+        gram_ut = np.dot(self.MATRIX_1.A.T, self.MATRIX_1.A)
         gram_ut[np.tril_indices(gram_ut.shape[0], k=-1)] = 0.
-        cls.gram_ut = gram_ut
+        self.gram_ut = gram_ut
 
-        gram_ut_t = np.dot(MATRIX_1.A, MATRIX_1.A.T)
+        gram_ut_t = np.dot(self.MATRIX_1.A, self.MATRIX_1.A.T)
         gram_ut_t[np.tril_indices(gram_ut_t.shape[0], k=-1)] = 0.
-        cls.gram_ut_t = gram_ut_t
-
-    def setUp(self):
-        self.mat1 = MATRIX_1.copy()
-        self.mat1_d = MATRIX_1.A
+        self.gram_ut_t = gram_ut_t
 
     def test_gram_matrix_sp(self):
         mat2 = gram_matrix_mkl(self.mat1)
         npt.assert_array_almost_equal(mat2.A, self.gram_ut)
 
         with self.assertRaises(ValueError):
-            gram_matrix_mkl(self.mat1, out=np.zeros((self.mat1.shape[0], self.mat1.shape[0])))
+            gram_matrix_mkl(self.mat1, out=np.zeros((self.mat1.shape[0], self.mat1.shape[0]), dtype=self.double_dtype))
 
     def test_gram_matrix_sp_single(self):
-        mat2 = gram_matrix_mkl(self.mat1.astype(np.float32))
+        mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype))
         npt.assert_array_almost_equal(mat2.A, self.gram_ut)
 
     def test_gram_matrix_d_single(self):
-        mat2 = gram_matrix_mkl(self.mat1.astype(np.float32), dense=True)
+        mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
-        mat2 = gram_matrix_mkl(self.mat1.astype(np.float32), dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float32),  out_scalar=1.)
+        mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True,
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.single_dtype),  out_scalar=1.)
         mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
         with self.assertRaises(ValueError):
-            mat2 = gram_matrix_mkl(self.mat1.astype(np.float32), dense=True,
+            mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True,
                                    out=np.zeros((self.mat1.shape[1], self.mat1.shape[1])),
                                    out_scalar=1.)
 
@@ -51,7 +56,7 @@ def test_gram_matrix_d(self):
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(self.mat1, dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float64),  out_scalar=1.)
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.double_dtype),  out_scalar=1.)
         mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
@@ -68,7 +73,9 @@ def test_gram_matrix_csc_sp(self):
         npt.assert_array_almost_equal(mat2.A, self.gram_ut)
 
     def test_gram_matrix_csc_d(self):
-        mat2 = gram_matrix_mkl(self.mat1.tocsc(), dense=True, cast=True)
+        mat = self.mat1.tocsc()
+        mat2 = gram_matrix_mkl(mat, dense=True, cast=True)
+        npt.assert_array_almost_equal(mat.A, self.mat1.A)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_double(self):
@@ -76,31 +83,42 @@ def test_gram_matrix_dd_double(self):
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(self.mat1.A, dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float64),  out_scalar=1.)
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.double_dtype),  out_scalar=1.)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single(self):
-        mat2 = gram_matrix_mkl(self.mat1.astype(np.float32).A, dense=True)
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype).A, dense=True)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
-        mat2 = gram_matrix_mkl(self.mat1.astype(np.float32).A, dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float32),  out_scalar=1.)
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype).A, dense=True,
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.single_dtype),  out_scalar=1.)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
     def test_gram_matrix_dd_double_F(self):
         mat2 = gram_matrix_mkl(np.asarray(self.mat1.A, order="F"), dense=True)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(np.asarray(self.mat1.A, order="F"), dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float64, order="F"),
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.double_dtype, order="F"),
                                out_scalar=1.)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single_F(self):
-        mat2 = gram_matrix_mkl(np.asarray(self.mat1.astype(np.float32).A, order="F"), dense=True)
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        mat2 = gram_matrix_mkl(np.asarray(self.mat1.astype(self.single_dtype).A, order="F"), dense=True)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
-        mat2 = gram_matrix_mkl(np.asarray(self.mat1.astype(np.float32).A, order="F"), dense=True,
-                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=np.float32, order="F"),
+        mat2 = gram_matrix_mkl(np.asarray(self.mat1.astype(self.single_dtype).A, order="F"), dense=True,
+                               out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.single_dtype, order="F"),
                                out_scalar=1.)
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
+
+
+@unittest.skip
+class TestGramMatrixComplex(TestGramMatrix):
+
+    double_dtype = np.cdouble
+    single_dtype = np.csingle
+
+    @classmethod
+    def setUpClass(cls):
+        cls.MATRIX_1, _ = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
\ No newline at end of file

From 5d3f854c26a8a5eaf61587c760adffd6e510a64e Mon Sep 17 00:00:00 2001
From: asistradition <asistradition@users.noreply.github.com>
Date: Thu, 6 Jan 2022 11:12:56 -0500
Subject: [PATCH 3/4] Refactored _mkl_interface.py into a subpackage

---
 CHANGELOG.md                                  |   1 +
 sparse_dot_mkl/_mkl_interface/__init__.py     |  67 ++
 sparse_dot_mkl/_mkl_interface/_cfunctions.py  | 456 +++++++++++++
 .../_common.py}                               | 645 ++----------------
 sparse_dot_mkl/_mkl_interface/_constants.py   |  34 +
 sparse_dot_mkl/_mkl_interface/_structs.py     |  37 +
 6 files changed, 637 insertions(+), 603 deletions(-)
 create mode 100644 sparse_dot_mkl/_mkl_interface/__init__.py
 create mode 100644 sparse_dot_mkl/_mkl_interface/_cfunctions.py
 rename sparse_dot_mkl/{_mkl_interface.py => _mkl_interface/_common.py} (50%)
 create mode 100644 sparse_dot_mkl/_mkl_interface/_constants.py
 create mode 100644 sparse_dot_mkl/_mkl_interface/_structs.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3aa1e9b..dde6a2d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ### Version 0.8.0
 
 * Added support for complex data types
+* Refactored _mkl_interface.py into a subpackage
 
 ### Version 0.7.3
 
diff --git a/sparse_dot_mkl/_mkl_interface/__init__.py b/sparse_dot_mkl/_mkl_interface/__init__.py
new file mode 100644
index 0000000..492d011
--- /dev/null
+++ b/sparse_dot_mkl/_mkl_interface/__init__.py
@@ -0,0 +1,67 @@
+import os
+
+# Workaround for this stupid sklearn thing
+if 'KMP_INIT_AT_FORK' in os.environ:
+    _sklearn_env = os.environ['KMP_INIT_AT_FORK']
+    del os.environ['KMP_INIT_AT_FORK']
+else:
+    _sklearn_env = None
+
+from scipy import sparse as _spsparse
+import numpy as _np
+import ctypes as _ctypes
+from ._constants import *
+from ._structs import *
+from ._cfunctions import MKL
+from ._common import (_create_mkl_sparse, _export_mkl, _check_return_value, _convert_to_csr, _is_allowed_sparse_format,
+                      _is_double, _destroy_mkl_handle, _order_mkl_handle, debug_print, debug_timer, _type_check,
+                      _empty_output_check, _sanity_check, _output_dtypes, _get_numpy_layout, _out_matrix, _mkl_scalar,
+                      _is_dense_vector, print_mkl_debug, set_debug_mode, get_version_string)
+
+def _validate_dtype():
+    """
+    Test to make sure that this library works by creating a random sparse array in CSC format,
+    then converting it to CSR format and making sure is has not raised an exception.
+
+    """
+
+    test_array = _spsparse.random(5, 5, density=0.5, format="csc", dtype=_np.float32, random_state=50)
+    test_comparison = test_array.A
+
+    csc_ref, precision_flag, _ = _create_mkl_sparse(test_array)
+
+    try:
+        csr_ref = _convert_to_csr(csc_ref)
+        final_array = _export_mkl(csr_ref, precision_flag)
+        if not _np.allclose(test_comparison, final_array.A):
+            raise ValueError("Match failed after matrix conversion")
+        _destroy_mkl_handle(csr_ref)
+    finally:
+        _destroy_mkl_handle(csc_ref)
+
+
+def _empirical_set_dtype():
+    """
+    Define dtypes empirically
+    Basically just try with int64s and if that doesn't work try with int32s
+    There's a way to do this with intel's mkl helper package but I don't want to add the dependency
+    """
+    MKL._set_int_type(_ctypes.c_longlong, _np.int64)
+
+    try:
+        _validate_dtype()
+    except ValueError as err:
+
+        MKL._set_int_type(_ctypes.c_int, _np.int32)
+
+        try:
+            _validate_dtype()
+        except ValueError:
+            raise ImportError("Unable to set MKL numeric type")
+
+
+if MKL.MKL_INT is None:
+    _empirical_set_dtype()
+
+if _sklearn_env is not None:
+    os.environ['KMP_INIT_AT_FORK'] = _sklearn_env
diff --git a/sparse_dot_mkl/_mkl_interface/_cfunctions.py b/sparse_dot_mkl/_mkl_interface/_cfunctions.py
new file mode 100644
index 0000000..9336b39
--- /dev/null
+++ b/sparse_dot_mkl/_mkl_interface/_cfunctions.py
@@ -0,0 +1,456 @@
+import ctypes as _ctypes
+import ctypes.util as _ctypes_util
+
+from sparse_dot_mkl._mkl_interface._structs import sparse_matrix_t, matrix_descr, MKL_Complex8, MKL_Complex16
+
+# Load mkl_spblas through the libmkl_rt common interface
+_libmkl = None
+try:
+    _so_file = _ctypes_util.find_library('mkl_rt')
+
+    if _so_file is None:
+        _so_file = _ctypes_util.find_library('mkl_rt.1')
+
+    if _so_file is None:
+        # For some reason, find_library is not checking LD_LIBRARY_PATH
+        # If the ctypes.util approach doesn't work, try this (crude) approach
+
+        # Check each of these library names
+        # Also include derivatives because windows find_library implementation won't match partials
+        for so_file in ["libmkl_rt.so", "libmkl_rt.dylib", "mkl_rt.dll", "mkl_rt.1.dll"]:
+            try:
+                _libmkl = _ctypes.cdll.LoadLibrary(so_file)
+                break
+            except (OSError, ImportError) as err:
+                pass
+
+        if _libmkl is None:
+            raise ImportError("mkl_rt not found.")
+    else:
+        _libmkl = _ctypes.cdll.LoadLibrary(_so_file)
+except (OSError, ImportError) as err:
+    _ierr_msg = "Unable to load the MKL libraries through libmkl_rt. Try setting $LD_LIBRARY_PATH. " + str(err)
+    raise ImportError(_ierr_msg)
+
+import numpy as _np
+from numpy.ctypeslib import ndpointer
+
+def mkl_library_name():
+    return _libmkl._name
+
+class MKL:
+    """ This class holds shared object references to C functions with arg and returntypes that can be adjusted"""
+
+    MKL_INT = None
+    MKL_INT_NUMPY = None
+    MKL_DEBUG = False
+
+    # Import function for creating a MKL CSR object
+    _mkl_sparse_d_create_csr = _libmkl.mkl_sparse_d_create_csr
+    _mkl_sparse_s_create_csr = _libmkl.mkl_sparse_s_create_csr
+    _mkl_sparse_c_create_csr = _libmkl.mkl_sparse_c_create_csr
+    _mkl_sparse_z_create_csr = _libmkl.mkl_sparse_z_create_csr
+
+    # Import function for creating a MKL CSC object
+    _mkl_sparse_d_create_csc = _libmkl.mkl_sparse_d_create_csc
+    _mkl_sparse_s_create_csc = _libmkl.mkl_sparse_s_create_csc
+    _mkl_sparse_c_create_csc = _libmkl.mkl_sparse_c_create_csc
+    _mkl_sparse_z_create_csc = _libmkl.mkl_sparse_z_create_csc
+
+    # Import function for creating a MKL BSR object
+    _mkl_sparse_d_create_bsr = _libmkl.mkl_sparse_d_create_bsr
+    _mkl_sparse_s_create_bsr = _libmkl.mkl_sparse_s_create_bsr
+    _mkl_sparse_c_create_bsr = _libmkl.mkl_sparse_c_create_bsr
+    _mkl_sparse_z_create_bsr = _libmkl.mkl_sparse_z_create_bsr
+
+    # Export function for exporting a MKL CSR object
+    _mkl_sparse_d_export_csr = _libmkl.mkl_sparse_d_export_csr
+    _mkl_sparse_s_export_csr = _libmkl.mkl_sparse_s_export_csr
+    _mkl_sparse_c_export_csr = _libmkl.mkl_sparse_c_export_csr
+    _mkl_sparse_z_export_csr = _libmkl.mkl_sparse_z_export_csr
+
+    # Export function for exporting a MKL CSC object
+    _mkl_sparse_d_export_csc = _libmkl.mkl_sparse_d_export_csc
+    _mkl_sparse_s_export_csc = _libmkl.mkl_sparse_s_export_csc
+    _mkl_sparse_z_export_csc = _libmkl.mkl_sparse_z_export_csc
+    _mkl_sparse_c_export_csc = _libmkl.mkl_sparse_c_export_csc
+
+    # Export function for exporting a MKL BSR object
+    _mkl_sparse_d_export_bsr = _libmkl.mkl_sparse_d_export_bsr
+    _mkl_sparse_s_export_bsr = _libmkl.mkl_sparse_s_export_bsr
+    _mkl_sparse_c_export_bsr = _libmkl.mkl_sparse_c_export_bsr
+    _mkl_sparse_z_export_bsr = _libmkl.mkl_sparse_z_export_bsr
+
+    # Import function for matmul
+    _mkl_sparse_spmm = _libmkl.mkl_sparse_spmm
+
+    # Import function for cleaning up MKL objects
+    _mkl_sparse_destroy = _libmkl.mkl_sparse_destroy
+
+    # Import function for ordering MKL objects
+    _mkl_sparse_order = _libmkl.mkl_sparse_order
+
+    # Import function for coverting to CSR
+    _mkl_sparse_convert_csr = _libmkl.mkl_sparse_convert_csr
+
+    # Import function for matmul single dense
+    _mkl_sparse_s_spmmd = _libmkl.mkl_sparse_s_spmmd
+    _mkl_sparse_d_spmmd = _libmkl.mkl_sparse_d_spmmd
+    _mkl_sparse_c_spmmd = _libmkl.mkl_sparse_c_spmmd
+    _mkl_sparse_z_spmmd = _libmkl.mkl_sparse_z_spmmd
+
+    # Import function for matmul single sparse*dense
+    _mkl_sparse_s_mm = _libmkl.mkl_sparse_s_mm
+    _mkl_sparse_d_mm = _libmkl.mkl_sparse_d_mm
+    _mkl_sparse_c_mm = _libmkl.mkl_sparse_c_mm
+    _mkl_sparse_z_mm = _libmkl.mkl_sparse_z_mm
+
+    # Import function for matmul dense*dense
+    _cblas_sgemm = _libmkl.cblas_sgemm
+    _cblas_dgemm = _libmkl.cblas_dgemm
+    _cblas_cgemm = _libmkl.cblas_cgemm
+    _cblas_zgemm = _libmkl.cblas_zgemm
+
+    # Import function for matrix * vector
+    _mkl_sparse_s_mv = _libmkl.mkl_sparse_s_mv
+    _mkl_sparse_d_mv = _libmkl.mkl_sparse_d_mv
+    _mkl_sparse_c_mv = _libmkl.mkl_sparse_c_mv
+    _mkl_sparse_z_mv = _libmkl.mkl_sparse_z_mv
+    
+    # Import function for sparse gram matrix
+    _mkl_sparse_syrk = _libmkl.mkl_sparse_syrk
+
+    # Import function for dense single gram matrix from sparse
+    _mkl_sparse_s_syrkd = _libmkl.mkl_sparse_s_syrkd
+    _mkl_sparse_d_syrkd = _libmkl.mkl_sparse_d_syrkd
+    _mkl_sparse_c_syrkd = _libmkl.mkl_sparse_c_syrkd
+    _mkl_sparse_z_syrkd = _libmkl.mkl_sparse_z_syrkd
+
+    # Import function for dense single gram matrix
+    _cblas_ssyrk = _libmkl.cblas_ssyrk
+    _cblas_dsyrk = _libmkl.cblas_dsyrk
+    _cblas_csyrk = _libmkl.cblas_csyrk
+    _cblas_zsyrk = _libmkl.cblas_zsyrk
+
+    # Import function for QR solver - reorder
+    _mkl_sparse_qr_reorder = _libmkl.mkl_sparse_qr_reorder
+
+    # Import function for QR solver - factorize
+    _mkl_sparse_d_qr_factorize = _libmkl.mkl_sparse_d_qr_factorize
+    _mkl_sparse_s_qr_factorize = _libmkl.mkl_sparse_s_qr_factorize
+
+    # Import function for QR solver - solve
+    _mkl_sparse_d_qr_solve = _libmkl.mkl_sparse_d_qr_solve
+    _mkl_sparse_s_qr_solve = _libmkl.mkl_sparse_s_qr_solve
+
+    @classmethod
+    def _set_int_type(cls, c_type, _np_type):
+        cls.MKL_INT = c_type
+        cls.MKL_INT_NUMPY = _np_type
+
+        cls._set_int_type_create()
+        cls._set_int_type_export()
+        cls._set_int_type_sparse_matmul()
+        cls._set_int_type_dense_matmul()
+        cls._set_int_type_vector_mul()
+        cls._set_int_type_qr_solver()
+        cls._set_int_type_misc()
+
+    @classmethod
+    def _set_int_type_create(cls):
+        """Set the correct argtypes for handle creation functions"""
+        cls._mkl_sparse_d_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_create_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_create_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_np.csingle)
+        cls._mkl_sparse_c_create_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_np.cdouble)
+        cls._mkl_sparse_z_create_csr.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_d_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_create_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_create_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_np.csingle)
+        cls._mkl_sparse_c_create_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_np.cdouble)
+        cls._mkl_sparse_z_create_csc.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_d_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_create_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_create_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_np.csingle)
+        cls._mkl_sparse_c_create_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_np.cdouble)
+        cls._mkl_sparse_z_create_bsr.restypes = _ctypes.c_int
+
+    @classmethod
+    def _set_int_type_export(cls):
+        """Set the correct argtypes for handle export functions"""
+        cls._mkl_sparse_d_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_export_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_export_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_csr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_csr.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_s_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_export_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_export_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_csc.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_csc.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_s_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_export_bsr.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_export_bsr.restypes = _ctypes.c_int
+
+    @classmethod
+    def _set_int_type_sparse_matmul(cls):
+        """Set the correct argtypes for sparse (*) sparse functions and sparse (*) dense functions"""
+        cls._mkl_sparse_spmm.argtypes = [_ctypes.c_int,
+                                         sparse_matrix_t,
+                                         sparse_matrix_t,
+                                         _ctypes.POINTER(sparse_matrix_t)]
+        cls._mkl_sparse_spmm.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_s_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_s_spmmd.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_d_spmmd.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
+        cls._mkl_sparse_c_spmmd.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
+        cls._mkl_sparse_z_spmmd.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_s_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_float, _ctypes.c_float, _ctypes.c_float)
+        cls._mkl_sparse_s_mm.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_double, _ctypes.c_double, _ctypes.c_double)
+        cls._mkl_sparse_d_mm.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex8, _ctypes.c_float, _np.csingle)
+        cls._mkl_sparse_c_mm.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex16, _ctypes.c_double, _np.cdouble)
+        cls._mkl_sparse_z_mm.restypes = _ctypes.c_int
+
+    @classmethod
+    def _set_int_type_dense_matmul(cls):
+        """Set the correct argtypes for dense (*) dense functions"""
+        cls._cblas_sgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_float)
+        cls._cblas_sgemm.restypes = None
+        cls._cblas_dgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_double)
+        cls._cblas_dgemm.restypes = None
+        cls._cblas_cgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
+        cls._cblas_cgemm.restypes = None
+        cls._cblas_zgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
+        cls._cblas_zgemm.restypes = None
+    
+    @classmethod
+    def _set_int_type_vector_mul(cls):
+        """Set the correct argtypes for sparse (*) vector functions"""
+        cls._mkl_sparse_s_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_float, _np.float32)
+        cls._mkl_sparse_s_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_double, _np.float64)
+        cls._mkl_sparse_d_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex8, _np.csingle)
+        cls._mkl_sparse_c_mv.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex16, _np.cdouble)
+        cls._mkl_sparse_z_mv.restypes = _ctypes.c_int
+
+    @classmethod
+    def _set_int_type_misc(cls):
+        cls._mkl_sparse_destroy.argtypes = [sparse_matrix_t]
+        cls._mkl_sparse_destroy.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_order.argtypes = [sparse_matrix_t]
+        cls._mkl_sparse_order.restypes = _ctypes.c_int
+
+
+    @classmethod
+    def _set_int_type_misc(cls):
+        cls._mkl_sparse_syrk.argtypes = [_ctypes.c_int,
+                                         sparse_matrix_t,
+                                         _ctypes.POINTER(sparse_matrix_t)]
+        cls._mkl_sparse_syrk.restypes = _ctypes.c_int
+
+        cls._mkl_sparse_s_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_float, _np.float32)
+        cls._mkl_sparse_s_syrkd.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_double, _np.float64)
+        cls._mkl_sparse_d_syrkd.restypes = _ctypes.c_int
+        cls._mkl_sparse_c_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex8, _np.csingle)
+        cls._mkl_sparse_c_syrkd.restypes = _ctypes.c_int
+        cls._mkl_sparse_z_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex16, _np.cdouble)
+        cls._mkl_sparse_z_syrkd.restypes = _ctypes.c_int
+
+        cls._cblas_ssyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_float, _np.float32)
+        cls._cblas_ssyrk.restypes = None
+        cls._cblas_dsyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_double, _np.float64)
+        cls._cblas_dsyrk.restypes = None
+        cls._cblas_csyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex8, _np.csingle, scalar_pointers=True)
+        cls._cblas_csyrk.restypes = None
+        cls._cblas_zsyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex16, _np.cdouble, scalar_pointers=True)
+        cls._cblas_zsyrk.restypes = None
+
+
+    @classmethod
+    def _set_int_type_qr_solver(cls):
+        """Set the correct argtypes for QR solver functions"""
+        cls._mkl_sparse_qr_reorder.argtypes = [sparse_matrix_t, matrix_descr]
+        cls._mkl_sparse_qr_reorder.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_double)]
+        cls._mkl_sparse_d_qr_factorize.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_float)]
+        cls._mkl_sparse_s_qr_factorize.restypes = _ctypes.c_int
+        cls._mkl_sparse_d_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_double)
+        cls._mkl_sparse_d_qr_solve.restypes = _ctypes.c_int
+        cls._mkl_sparse_s_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_float)
+        cls._mkl_sparse_s_qr_solve.restypes = _ctypes.c_int
+
+    def __init__(self):
+        raise NotImplementedError("This class is not intended to be instanced")
+
+    """ The following methods return the argtype lists for each MKL function that has s and d variants"""
+
+    @staticmethod
+    def _mkl_sparse_create_argtypes(prec_type):
+        return [_ctypes.POINTER(sparse_matrix_t),
+                _ctypes.c_int,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=prec_type, ndim=1, flags='C_CONTIGUOUS')]
+
+    @staticmethod
+    def _mkl_sparse_create_bsr_argtypes(prec_type):
+        return [_ctypes.POINTER(sparse_matrix_t),
+                _ctypes.c_int,
+                _ctypes.c_int,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
+                ndpointer(dtype=prec_type, ndim=3, flags='C_CONTIGUOUS')]
+
+    @staticmethod
+    def _mkl_sparse_export_argtypes(prec_type):
+        return [sparse_matrix_t,
+                _ctypes.POINTER(_ctypes.c_int),
+                _ctypes.POINTER(MKL.MKL_INT),
+                _ctypes.POINTER(MKL.MKL_INT),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(prec_type))]
+
+    @staticmethod
+    def _mkl_sparse_export_bsr_argtypes(prec_type):
+        return [sparse_matrix_t,
+                _ctypes.POINTER(_ctypes.c_int),
+                _ctypes.POINTER(_ctypes.c_int),
+                _ctypes.POINTER(MKL.MKL_INT),
+                _ctypes.POINTER(MKL.MKL_INT),
+                _ctypes.POINTER(MKL.MKL_INT),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
+                _ctypes.POINTER(_ctypes.POINTER(prec_type))]
+
+    @staticmethod
+    def _cblas_gemm_argtypes(prec_type, scalar_pointers=False):
+        return [_ctypes.c_int,
+                _ctypes.c_int,
+                _ctypes.c_int,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                _ctypes.POINTER(prec_type),
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type),
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                _ctypes.POINTER(prec_type),
+                MKL.MKL_INT]
+
+    @staticmethod
+    def _mkl_sparse_spmmd_argtypes(prec_type):
+        return [_ctypes.c_int,
+                sparse_matrix_t,
+                sparse_matrix_t,
+                _ctypes.c_int,
+                _ctypes.POINTER(prec_type), 
+                MKL.MKL_INT]
+
+    @staticmethod
+    def _mkl_sparse_mm_argtypes(scalar_type, prec_type, _np_prec_type):
+        return [_ctypes.c_int,
+                scalar_type,
+                sparse_matrix_t,
+                matrix_descr,
+                _ctypes.c_int,
+                ndpointer(dtype=_np_prec_type, ndim=2),
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                scalar_type,
+                _ctypes.POINTER(prec_type),
+                MKL.MKL_INT]
+
+    @staticmethod
+    def _mkl_sparse_mv_argtypes(prec_type, _np_type):
+        return [_ctypes.c_int,
+                prec_type,
+                sparse_matrix_t,
+                matrix_descr,
+                ndpointer(dtype=_np_type, ndim=1),
+                prec_type,
+                ndpointer(dtype=_np_type)]
+
+    @staticmethod
+    def _mkl_sparse_syrkd_argtypes(prec_type, _np_type):
+        return [_ctypes.c_int,
+                sparse_matrix_t,
+                prec_type,
+                prec_type,
+                ndpointer(dtype=_np_type),
+                _ctypes.c_int,
+                MKL.MKL_INT]
+
+    @staticmethod
+    def _cblas_syrk_argtypes(prec_type, _np_type, scalar_pointers=False):
+        return [_ctypes.c_int,
+                _ctypes.c_int,
+                _ctypes.c_int,
+                MKL.MKL_INT,
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                ndpointer(dtype=_np_type, ndim=2),
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
+                ndpointer(dtype=_np_type, ndim=2),
+                MKL.MKL_INT]
+
+    @staticmethod
+    def _mkl_sparse_qr_solve(prec_type):
+        return [_ctypes.c_int,
+                sparse_matrix_t,
+                _ctypes.POINTER(prec_type),
+                _ctypes.c_int,
+                MKL.MKL_INT,
+                _ctypes.POINTER(prec_type),
+                MKL.MKL_INT,
+                ndpointer(dtype=prec_type, ndim=2),
+                MKL.MKL_INT]
diff --git a/sparse_dot_mkl/_mkl_interface.py b/sparse_dot_mkl/_mkl_interface/_common.py
similarity index 50%
rename from sparse_dot_mkl/_mkl_interface.py
rename to sparse_dot_mkl/_mkl_interface/_common.py
index a450f6b..f3cf5fe 100644
--- a/sparse_dot_mkl/_mkl_interface.py
+++ b/sparse_dot_mkl/_mkl_interface/_common.py
@@ -1,44 +1,14 @@
-import os
-import time
-import warnings
-import ctypes as _ctypes
-import ctypes.util as _ctypes_util
+from ._cfunctions import MKL, mkl_library_name
+from ._constants import *
+from ._structs import *
 
-# Workaround for this stupid sklearn thing
-if 'KMP_INIT_AT_FORK' in os.environ:
-    _sklearn_env = os.environ['KMP_INIT_AT_FORK']
-    del os.environ['KMP_INIT_AT_FORK']
-else:
-    _sklearn_env = None
+import numpy as _np
+import ctypes as _ctypes
+from scipy import sparse as _spsparse
+import warnings
+import time
 
-# Load mkl_spblas through the libmkl_rt common interface
-_libmkl = None
-try:
-    _so_file = _ctypes_util.find_library('mkl_rt')
-
-    if _so_file is None:
-        _so_file = _ctypes_util.find_library('mkl_rt.1')
-
-    if _so_file is None:
-        # For some reason, find_library is not checking LD_LIBRARY_PATH
-        # If the ctypes.util approach doesn't work, try this (crude) approach
-
-        # Check each of these library names
-        # Also include derivatives because windows find_library implementation won't match partials
-        for so_file in ["libmkl_rt.so", "libmkl_rt.dylib", "mkl_rt.dll", "mkl_rt.1.dll"]:
-            try:
-                _libmkl = _ctypes.cdll.LoadLibrary(so_file)
-                break
-            except (OSError, ImportError) as err:
-                pass
-
-        if _libmkl is None:
-            raise ImportError("mkl_rt not found.")
-    else:
-        _libmkl = _ctypes.cdll.LoadLibrary(_so_file)
-except (OSError, ImportError) as err:
-    _ierr_msg = "Unable to load the MKL libraries through libmkl_rt. Try setting $LD_LIBRARY_PATH. " + str(err)
-    raise ImportError(_ierr_msg)
+from numpy.ctypeslib import as_array
 
 # Use mkl-service to check version if it's installed
 # Since it's not on PyPi I don't want to make this an actual package dependency
@@ -61,491 +31,6 @@ def get_max_threads():
     _verr_msg = "Loaded version of MKL is out of date: {v}".format(v=get_version_string())
     warnings.warn(_verr_msg)
 
-import numpy as np
-import scipy.sparse as _spsparse
-from numpy.ctypeslib import ndpointer, as_array
-
-NUMPY_FLOAT_DTYPES = [np.float32, np.float64]
-NUMPY_COMPLEX_DTYPES = [np.csingle, np.cdouble]
-
-class MKL:
-    """ This class holds shared object references to C functions with arg and returntypes that can be adjusted"""
-
-    MKL_INT = None
-    MKL_INT_NUMPY = None
-    MKL_DEBUG = False
-
-    # Import function for creating a MKL CSR object
-    _mkl_sparse_d_create_csr = _libmkl.mkl_sparse_d_create_csr
-    _mkl_sparse_s_create_csr = _libmkl.mkl_sparse_s_create_csr
-    _mkl_sparse_c_create_csr = _libmkl.mkl_sparse_c_create_csr
-    _mkl_sparse_z_create_csr = _libmkl.mkl_sparse_z_create_csr
-
-    # Import function for creating a MKL CSC object
-    _mkl_sparse_d_create_csc = _libmkl.mkl_sparse_d_create_csc
-    _mkl_sparse_s_create_csc = _libmkl.mkl_sparse_s_create_csc
-    _mkl_sparse_c_create_csc = _libmkl.mkl_sparse_c_create_csc
-    _mkl_sparse_z_create_csc = _libmkl.mkl_sparse_z_create_csc
-
-    # Import function for creating a MKL BSR object
-    _mkl_sparse_d_create_bsr = _libmkl.mkl_sparse_d_create_bsr
-    _mkl_sparse_s_create_bsr = _libmkl.mkl_sparse_s_create_bsr
-    _mkl_sparse_c_create_bsr = _libmkl.mkl_sparse_c_create_bsr
-    _mkl_sparse_z_create_bsr = _libmkl.mkl_sparse_z_create_bsr
-
-    # Export function for exporting a MKL CSR object
-    _mkl_sparse_d_export_csr = _libmkl.mkl_sparse_d_export_csr
-    _mkl_sparse_s_export_csr = _libmkl.mkl_sparse_s_export_csr
-    _mkl_sparse_c_export_csr = _libmkl.mkl_sparse_c_export_csr
-    _mkl_sparse_z_export_csr = _libmkl.mkl_sparse_z_export_csr
-
-    # Export function for exporting a MKL CSC object
-    _mkl_sparse_d_export_csc = _libmkl.mkl_sparse_d_export_csc
-    _mkl_sparse_s_export_csc = _libmkl.mkl_sparse_s_export_csc
-    _mkl_sparse_z_export_csc = _libmkl.mkl_sparse_z_export_csc
-    _mkl_sparse_c_export_csc = _libmkl.mkl_sparse_c_export_csc
-
-    # Export function for exporting a MKL BSR object
-    _mkl_sparse_d_export_bsr = _libmkl.mkl_sparse_d_export_bsr
-    _mkl_sparse_s_export_bsr = _libmkl.mkl_sparse_s_export_bsr
-    _mkl_sparse_c_export_bsr = _libmkl.mkl_sparse_c_export_bsr
-    _mkl_sparse_z_export_bsr = _libmkl.mkl_sparse_z_export_bsr
-
-    # Import function for matmul
-    _mkl_sparse_spmm = _libmkl.mkl_sparse_spmm
-
-    # Import function for cleaning up MKL objects
-    _mkl_sparse_destroy = _libmkl.mkl_sparse_destroy
-
-    # Import function for ordering MKL objects
-    _mkl_sparse_order = _libmkl.mkl_sparse_order
-
-    # Import function for coverting to CSR
-    _mkl_sparse_convert_csr = _libmkl.mkl_sparse_convert_csr
-
-    # Import function for matmul single dense
-    _mkl_sparse_s_spmmd = _libmkl.mkl_sparse_s_spmmd
-    _mkl_sparse_d_spmmd = _libmkl.mkl_sparse_d_spmmd
-    _mkl_sparse_c_spmmd = _libmkl.mkl_sparse_c_spmmd
-    _mkl_sparse_z_spmmd = _libmkl.mkl_sparse_z_spmmd
-
-    # Import function for matmul single sparse*dense
-    _mkl_sparse_s_mm = _libmkl.mkl_sparse_s_mm
-    _mkl_sparse_d_mm = _libmkl.mkl_sparse_d_mm
-    _mkl_sparse_c_mm = _libmkl.mkl_sparse_c_mm
-    _mkl_sparse_z_mm = _libmkl.mkl_sparse_z_mm
-
-    # Import function for matmul dense*dense
-    _cblas_sgemm = _libmkl.cblas_sgemm
-    _cblas_dgemm = _libmkl.cblas_dgemm
-    _cblas_cgemm = _libmkl.cblas_cgemm
-    _cblas_zgemm = _libmkl.cblas_zgemm
-
-    # Import function for matrix * vector
-    _mkl_sparse_s_mv = _libmkl.mkl_sparse_s_mv
-    _mkl_sparse_d_mv = _libmkl.mkl_sparse_d_mv
-    _mkl_sparse_c_mv = _libmkl.mkl_sparse_c_mv
-    _mkl_sparse_z_mv = _libmkl.mkl_sparse_z_mv
-    
-    # Import function for sparse gram matrix
-    _mkl_sparse_syrk = _libmkl.mkl_sparse_syrk
-
-    # Import function for dense single gram matrix from sparse
-    _mkl_sparse_s_syrkd = _libmkl.mkl_sparse_s_syrkd
-    _mkl_sparse_d_syrkd = _libmkl.mkl_sparse_d_syrkd
-    _mkl_sparse_c_syrkd = _libmkl.mkl_sparse_c_syrkd
-    _mkl_sparse_z_syrkd = _libmkl.mkl_sparse_z_syrkd
-
-    # Import function for dense single gram matrix
-    _cblas_ssyrk = _libmkl.cblas_ssyrk
-    _cblas_dsyrk = _libmkl.cblas_dsyrk
-    _cblas_csyrk = _libmkl.cblas_csyrk
-    _cblas_zsyrk = _libmkl.cblas_zsyrk
-
-    # Import function for QR solver - reorder
-    _mkl_sparse_qr_reorder = _libmkl.mkl_sparse_qr_reorder
-
-    # Import function for QR solver - factorize
-    _mkl_sparse_d_qr_factorize = _libmkl.mkl_sparse_d_qr_factorize
-    _mkl_sparse_s_qr_factorize = _libmkl.mkl_sparse_s_qr_factorize
-
-    # Import function for QR solver - solve
-    _mkl_sparse_d_qr_solve = _libmkl.mkl_sparse_d_qr_solve
-    _mkl_sparse_s_qr_solve = _libmkl.mkl_sparse_s_qr_solve
-
-    @classmethod
-    def _set_int_type(cls, c_type, np_type):
-        cls.MKL_INT = c_type
-        cls.MKL_INT_NUMPY = np_type
-
-        cls._set_int_type_create()
-        cls._set_int_type_export()
-        cls._set_int_type_sparse_matmul()
-        cls._set_int_type_dense_matmul()
-        cls._set_int_type_vector_mul()
-        cls._set_int_type_qr_solver()
-        cls._set_int_type_misc()
-
-    @classmethod
-    def _set_int_type_create(cls):
-        """Set the correct argtypes for handle creation functions"""
-        cls._mkl_sparse_d_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_create_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_create_csr.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_create_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_create_csr.argtypes = cls._mkl_sparse_create_argtypes(np.csingle)
-        cls._mkl_sparse_c_create_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_create_csr.argtypes = cls._mkl_sparse_create_argtypes(np.cdouble)
-        cls._mkl_sparse_z_create_csr.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_d_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_create_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_create_csc.argtypes = cls._mkl_sparse_create_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_create_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_create_csc.argtypes = cls._mkl_sparse_create_argtypes(np.csingle)
-        cls._mkl_sparse_c_create_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_create_csc.argtypes = cls._mkl_sparse_create_argtypes(np.cdouble)
-        cls._mkl_sparse_z_create_csc.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_d_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_create_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_create_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(np.csingle)
-        cls._mkl_sparse_c_create_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_create_bsr.argtypes = cls._mkl_sparse_create_bsr_argtypes(np.cdouble)
-        cls._mkl_sparse_z_create_bsr.restypes = _ctypes.c_int
-
-    @classmethod
-    def _set_int_type_export(cls):
-        """Set the correct argtypes for handle export functions"""
-        cls._mkl_sparse_d_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_export_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_export_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_z_export_csr.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_export_csr.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_c_export_csr.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_s_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_export_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_export_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_c_export_csc.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_export_csc.argtypes = cls._mkl_sparse_export_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_z_export_csc.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_s_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_export_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_export_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_c_export_bsr.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_export_bsr.argtypes = cls._mkl_sparse_export_bsr_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_z_export_bsr.restypes = _ctypes.c_int
-
-    @classmethod
-    def _set_int_type_sparse_matmul(cls):
-        """Set the correct argtypes for sparse (*) sparse functions and sparse (*) dense functions"""
-        cls._mkl_sparse_spmm.argtypes = [_ctypes.c_int,
-                                         sparse_matrix_t,
-                                         sparse_matrix_t,
-                                         _ctypes.POINTER(sparse_matrix_t)]
-        cls._mkl_sparse_spmm.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_s_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_s_spmmd.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_d_spmmd.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_float)
-        cls._mkl_sparse_c_spmmd.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_spmmd.argtypes = cls._mkl_sparse_spmmd_argtypes(_ctypes.c_double)
-        cls._mkl_sparse_z_spmmd.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_s_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_float, _ctypes.c_float, _ctypes.c_float)
-        cls._mkl_sparse_s_mm.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_mm.argtypes = cls._mkl_sparse_mm_argtypes(_ctypes.c_double, _ctypes.c_double, _ctypes.c_double)
-        cls._mkl_sparse_d_mm.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex8, _ctypes.c_float, np.csingle)
-        cls._mkl_sparse_c_mm.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_mm.argtypes = cls._mkl_sparse_mm_argtypes(MKL_Complex16, _ctypes.c_double, np.cdouble)
-        cls._mkl_sparse_z_mm.restypes = _ctypes.c_int
-
-    @classmethod
-    def _set_int_type_dense_matmul(cls):
-        """Set the correct argtypes for dense (*) dense functions"""
-        cls._cblas_sgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_float)
-        cls._cblas_sgemm.restypes = None
-        cls._cblas_dgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_double)
-        cls._cblas_dgemm.restypes = None
-        cls._cblas_cgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
-        cls._cblas_cgemm.restypes = None
-        cls._cblas_zgemm.argtypes = cls._cblas_gemm_argtypes(_ctypes.c_void_p)
-        cls._cblas_zgemm.restypes = None
-    
-    @classmethod
-    def _set_int_type_vector_mul(cls):
-        """Set the correct argtypes for sparse (*) vector functions"""
-        cls._mkl_sparse_s_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_float, np.float32)
-        cls._mkl_sparse_s_mv.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_mv.argtypes = cls._mkl_sparse_mv_argtypes(_ctypes.c_double, np.float64)
-        cls._mkl_sparse_d_mv.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex8, np.csingle)
-        cls._mkl_sparse_c_mv.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_mv.argtypes = cls._mkl_sparse_mv_argtypes(MKL_Complex16, np.cdouble)
-        cls._mkl_sparse_z_mv.restypes = _ctypes.c_int
-
-    @classmethod
-    def _set_int_type_misc(cls):
-        cls._mkl_sparse_destroy.argtypes = [sparse_matrix_t]
-        cls._mkl_sparse_destroy.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_order.argtypes = [sparse_matrix_t]
-        cls._mkl_sparse_order.restypes = _ctypes.c_int
-
-
-    @classmethod
-    def _set_int_type_misc(cls):
-        cls._mkl_sparse_syrk.argtypes = [_ctypes.c_int,
-                                         sparse_matrix_t,
-                                         _ctypes.POINTER(sparse_matrix_t)]
-        cls._mkl_sparse_syrk.restypes = _ctypes.c_int
-
-        cls._mkl_sparse_s_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_float, np.float32)
-        cls._mkl_sparse_s_syrkd.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(_ctypes.c_double, np.float64)
-        cls._mkl_sparse_d_syrkd.restypes = _ctypes.c_int
-        cls._mkl_sparse_c_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex8, np.csingle)
-        cls._mkl_sparse_c_syrkd.restypes = _ctypes.c_int
-        cls._mkl_sparse_z_syrkd.argtypes = cls._mkl_sparse_syrkd_argtypes(MKL_Complex16, np.cdouble)
-        cls._mkl_sparse_z_syrkd.restypes = _ctypes.c_int
-
-        cls._cblas_ssyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_float, np.float32)
-        cls._cblas_ssyrk.restypes = None
-        cls._cblas_dsyrk.argtypes = cls._cblas_syrk_argtypes(_ctypes.c_double, np.float64)
-        cls._cblas_dsyrk.restypes = None
-        cls._cblas_csyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex8, np.csingle, scalar_pointers=True)
-        cls._cblas_csyrk.restypes = None
-        cls._cblas_zsyrk.argtypes = cls._cblas_syrk_argtypes(MKL_Complex16, np.cdouble, scalar_pointers=True)
-        cls._cblas_zsyrk.restypes = None
-
-
-    @classmethod
-    def _set_int_type_qr_solver(cls):
-        """Set the correct argtypes for QR solver functions"""
-        cls._mkl_sparse_qr_reorder.argtypes = [sparse_matrix_t, matrix_descr]
-        cls._mkl_sparse_qr_reorder.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_double)]
-        cls._mkl_sparse_d_qr_factorize.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_qr_factorize.argtypes = [sparse_matrix_t, _ctypes.POINTER(_ctypes.c_float)]
-        cls._mkl_sparse_s_qr_factorize.restypes = _ctypes.c_int
-        cls._mkl_sparse_d_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_double)
-        cls._mkl_sparse_d_qr_solve.restypes = _ctypes.c_int
-        cls._mkl_sparse_s_qr_solve.argtypes = cls._mkl_sparse_qr_solve(_ctypes.c_float)
-        cls._mkl_sparse_s_qr_solve.restypes = _ctypes.c_int
-
-    def __init__(self):
-        raise NotImplementedError("This class is not intended to be instanced")
-
-    """ The following methods return the argtype lists for each MKL function that has s and d variants"""
-
-    @staticmethod
-    def _mkl_sparse_create_argtypes(prec_type):
-        return [_ctypes.POINTER(sparse_matrix_t),
-                _ctypes.c_int,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=prec_type, ndim=1, flags='C_CONTIGUOUS')]
-
-    @staticmethod
-    def _mkl_sparse_create_bsr_argtypes(prec_type):
-        return [_ctypes.POINTER(sparse_matrix_t),
-                _ctypes.c_int,
-                _ctypes.c_int,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=MKL.MKL_INT, ndim=1, flags='C_CONTIGUOUS'),
-                ndpointer(dtype=prec_type, ndim=3, flags='C_CONTIGUOUS')]
-
-    @staticmethod
-    def _mkl_sparse_export_argtypes(prec_type):
-        return [sparse_matrix_t,
-                _ctypes.POINTER(_ctypes.c_int),
-                _ctypes.POINTER(MKL.MKL_INT),
-                _ctypes.POINTER(MKL.MKL_INT),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(prec_type))]
-
-    @staticmethod
-    def _mkl_sparse_export_bsr_argtypes(prec_type):
-        return [sparse_matrix_t,
-                _ctypes.POINTER(_ctypes.c_int),
-                _ctypes.POINTER(_ctypes.c_int),
-                _ctypes.POINTER(MKL.MKL_INT),
-                _ctypes.POINTER(MKL.MKL_INT),
-                _ctypes.POINTER(MKL.MKL_INT),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(MKL.MKL_INT)),
-                _ctypes.POINTER(_ctypes.POINTER(prec_type))]
-
-    @staticmethod
-    def _cblas_gemm_argtypes(prec_type, scalar_pointers=False):
-        return [_ctypes.c_int,
-                _ctypes.c_int,
-                _ctypes.c_int,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
-                _ctypes.POINTER(prec_type),
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type),
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
-                _ctypes.POINTER(prec_type),
-                MKL.MKL_INT]
-
-    @staticmethod
-    def _mkl_sparse_spmmd_argtypes(prec_type):
-        return [_ctypes.c_int,
-                sparse_matrix_t,
-                sparse_matrix_t,
-                _ctypes.c_int,
-                _ctypes.POINTER(prec_type), 
-                MKL.MKL_INT]
-
-    @staticmethod
-    def _mkl_sparse_mm_argtypes(scalar_type, prec_type, np_prec_type):
-        return [_ctypes.c_int,
-                scalar_type,
-                sparse_matrix_t,
-                matrix_descr,
-                _ctypes.c_int,
-                ndpointer(dtype=np_prec_type, ndim=2),
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                scalar_type,
-                _ctypes.POINTER(prec_type),
-                MKL.MKL_INT]
-
-    @staticmethod
-    def _mkl_sparse_mv_argtypes(prec_type, np_type):
-        return [_ctypes.c_int,
-                prec_type,
-                sparse_matrix_t,
-                matrix_descr,
-                ndpointer(dtype=np_type, ndim=1),
-                prec_type,
-                ndpointer(dtype=np_type)]
-
-    @staticmethod
-    def _mkl_sparse_syrkd_argtypes(prec_type, np_type):
-        return [_ctypes.c_int,
-                sparse_matrix_t,
-                prec_type,
-                prec_type,
-                ndpointer(dtype=np_type),
-                _ctypes.c_int,
-                MKL.MKL_INT]
-
-    @staticmethod
-    def _cblas_syrk_argtypes(prec_type, np_type, scalar_pointers=False):
-        return [_ctypes.c_int,
-                _ctypes.c_int,
-                _ctypes.c_int,
-                MKL.MKL_INT,
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
-                ndpointer(dtype=np_type, ndim=2),
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type) if scalar_pointers else prec_type,
-                ndpointer(dtype=np_type, ndim=2),
-                MKL.MKL_INT]
-
-    @staticmethod
-    def _mkl_sparse_qr_solve(prec_type):
-        return [_ctypes.c_int,
-                sparse_matrix_t,
-                _ctypes.POINTER(prec_type),
-                _ctypes.c_int,
-                MKL.MKL_INT,
-                _ctypes.POINTER(prec_type),
-                MKL.MKL_INT,
-                ndpointer(dtype=prec_type, ndim=2),
-                MKL.MKL_INT]
-
-
-# Construct opaque struct & type
-class _sparse_matrix(_ctypes.Structure):
-    pass
-
-
-sparse_matrix_t = _ctypes.POINTER(_sparse_matrix)
-
-
-# Matrix description struct
-class matrix_descr(_ctypes.Structure):
-    _fields_ = [("sparse_matrix_type_t", _ctypes.c_int),
-                ("sparse_fill_mode_t", _ctypes.c_int),
-                ("sparse_diag_type_t", _ctypes.c_int)]
-
-    def __init__(self, sparse_matrix_type_t=20, sparse_fill_mode_t=0, sparse_diag_type_t=0):
-        super(matrix_descr, self).__init__(sparse_matrix_type_t, sparse_fill_mode_t, sparse_diag_type_t)
-
-
-# Complex type structs
-# These are the same as the np.csingle and np.cdouble structs
-# They're defined to allow passing complex scalars of specific precisions directly
-class MKL_Complex8(_ctypes.Structure):
-    _fields_ = [("real", _ctypes.c_float),
-                ("imag", _ctypes.c_float)]
-
-    def __init__(self, cplx):
-        super(MKL_Complex8, self).__init__(cplx.real, cplx.imag)
-
-
-class MKL_Complex16(_ctypes.Structure):
-    _fields_ = [("real", _ctypes.c_double),
-                ("imag", _ctypes.c_double)]
-
-    def __init__(self, cplx):
-        super(MKL_Complex16, self).__init__(cplx.real, cplx.imag)
-
-
-# Define standard return codes
-RETURN_CODES = {0: "SPARSE_STATUS_SUCCESS",
-                1: "SPARSE_STATUS_NOT_INITIALIZED",
-                2: "SPARSE_STATUS_ALLOC_FAILED",
-                3: "SPARSE_STATUS_INVALID_VALUE",
-                4: "SPARSE_STATUS_EXECUTION_FAILED",
-                5: "SPARSE_STATUS_INTERNAL_ERROR",
-                6: "SPARSE_STATUS_NOT_SUPPORTED"}
-
-# Define order codes
-LAYOUT_CODE_C = 101
-LAYOUT_CODE_F = 102
-
-# Define transpose codes
-SPARSE_OPERATION_NON_TRANSPOSE = 10
-SPARSE_OPERATION_TRANSPOSE = 11
-
-# Define index codes
-SPARSE_INDEX_BASE_ZERO = 0
-SPARSE_INDEX_BASE_ONE = 1
-
-# ILP64 message
-ILP64_MSG = " Try changing MKL to int64 with the environment variable MKL_INTERFACE_LAYER=ILP64"
 
 # Dict keyed by ('sparse_type_str', 'double_precision_bool', 'complex_bool')
 _create_functions = {('csr', False, False): MKL._mkl_sparse_s_create_csr,
@@ -576,10 +61,13 @@ def __init__(self, cplx):
                      ('bsr', True, True): MKL._mkl_sparse_z_export_bsr}
 
 # Dict keyed by ('double_precision_bool', 'complex_bool')
-_output_dtypes = {(False, False): np.float32,
-                 (True, False): np.float64,
-                 (False, True): np.csingle,
-                 (True, True): np.cdouble}
+_output_dtypes = {(False, False): _np.float32,
+                 (True, False): _np.float64,
+                 (False, True): _np.csingle,
+                 (True, True): _np.cdouble}
+
+NUMPY_FLOAT_DTYPES = [_np.float32, _np.float64]
+NUMPY_COMPLEX_DTYPES = [_np.csingle, _np.cdouble]
 
 def set_debug_mode(debug_bool):
     """
@@ -606,8 +94,8 @@ def print_mkl_debug():
         print(get_version_string())
         print("MKL Number of Threads: {n}".format(n=get_max_threads()))
 
-    print("MKL linked: {fn}".format(fn=_libmkl._name))
-    print("MKL interface {np} | {c}".format(np=MKL.MKL_INT_NUMPY, c=MKL.MKL_INT))
+    print("MKL linked: {fn}".format(fn=mkl_library_name()))
+    print("MKL interface {_np} | {c}".format(_np=MKL.MKL_INT_NUMPY, c=MKL.MKL_INT))
     print("Set int32 interface with env MKL_INTERFACE_LAYER=LP64")
     print("Set int64 interface with env MKL_INTERFACE_LAYER=ILP64")
 
@@ -653,7 +141,7 @@ def _check_scipy_index_typing(sparse_matrix):
     :type sparse_matrix: scipy.sparse.spmatrix
     """
 
-    int_max = np.iinfo(MKL.MKL_INT_NUMPY).max
+    int_max = _np.iinfo(MKL.MKL_INT_NUMPY).max
     if (sparse_matrix.nnz > int_max) or (max(sparse_matrix.shape) > int_max):
         msg = "MKL interface is {t} and cannot hold matrix {m}\n".format(m=repr(sparse_matrix), t=MKL.MKL_INT_NUMPY)
         msg += "Try changing MKL to int64 with the environment variable MKL_INTERFACE_LAYER=ILP64"
@@ -672,9 +160,9 @@ def _get_numpy_layout(numpy_arr, second_arr=None):
     Raises a ValueError if the array is not contiguous.
 
     :param numpy_arr: Numpy dense array
-    :type numpy_arr: np.ndarray
+    :type numpy_arr: _np.ndarray
     :param second_arr: Numpy dense array; if numpy_arr is 1d (and therefore both C and F order), use this order
-    :type second_arr: np.ndarray, None
+    :type second_arr: _np.ndarray, None
     :return: The layout code for MKL and the leading dimension
     :rtype: int, int
     """
@@ -874,7 +362,7 @@ def _export_mkl(csr_mkl_handle, double_precision, complex_type=False, output_typ
     indptrb = as_array(indptrb, shape=(index_dim,))
     indptren = as_array(indptren, shape=(index_dim,))
 
-    indptren = np.insert(indptren, 0, indptrb[0])
+    indptren = _np.insert(indptren, 0, indptrb[0])
     nnz = indptren[-1] - indptrb[0]
 
     # If there are no non-zeros, return an empty matrix
@@ -885,8 +373,8 @@ def _export_mkl(csr_mkl_handle, double_precision, complex_type=False, output_typ
         raise ValueError("Matrix ({m} x {n}) is attempting to index {z} elements".format(m=nrows, n=ncols, z=nnz))
 
     # Construct numpy arrays from data pointer and from indicies pointer
-    data = np.array(as_array(data, shape=(nnz * 2 if complex_type else nnz,)), copy=True)
-    indices = np.array(as_array(indices, shape=(nnz,)), copy=True)
+    data = _np.array(as_array(data, shape=(nnz * 2 if complex_type else nnz,)), copy=True)
+    indices = _np.array(as_array(indices, shape=(nnz,)), copy=True)
     
     # Pack and return the matrix
     return sp_matrix_constructor((data.view(final_dtype) if complex_type else data, indices, indptren),
@@ -944,7 +432,7 @@ def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision, complex_type=False)
     indptrb = as_array(indptrb, shape=(index_dim,))
     indptren = as_array(indptren, shape=(index_dim,))
 
-    indptren = np.insert(indptren, 0, indptrb[0])
+    indptren = _np.insert(indptren, 0, indptrb[0])
 
     nnz_blocks = (indptren[-1] - indptrb[0])
     nnz = nnz_blocks * (block_size ** 2)
@@ -959,8 +447,8 @@ def _export_mkl_sparse_bsr(bsr_mkl_handle, double_precision, complex_type=False)
         raise ValueError(_err)
 
     nnz_row_block = block_size if not complex_type else block_size * 2
-    data = np.array(as_array(data, shape=(nnz_blocks, block_size, nnz_row_block)), copy=True, order=ordering)
-    indices = np.array(as_array(indices, shape=(nnz_blocks,)), copy=True)
+    data = _np.array(as_array(data, shape=(nnz_blocks, block_size, nnz_row_block)), copy=True, order=ordering)
+    indices = _np.array(as_array(indices, shape=(nnz_blocks,)), copy=True)
 
     return _spsparse.bsr_matrix((data.view(final_dtype) if complex_type else data, indices, indptren),
                                 shape=(nrows, ncols), blocksize=block_dims)
@@ -1086,11 +574,11 @@ def _sanity_check(matrix_a, matrix_b, allow_vector=False):
 
 def _cast_to_float64(matrix):
     """ Make a copy of the array as double precision floats or return the reference if it already is"""
-    return matrix.astype(np.float64) if matrix.dtype != np.float64 else matrix
+    return matrix.astype(_np.float64) if matrix.dtype != _np.float64 else matrix
 
 def _cast_to_cdouble(matrix):
     """ Make a copy of the array as double precision complex floats or return the reference if it already is"""
-    return matrix.astype(np.cdouble) if matrix.dtype != np.cdouble else matrix
+    return matrix.astype(_np.cdouble) if matrix.dtype != _np.cdouble else matrix
 
 def _type_check(matrix_a, matrix_b=None, cast=False):
     """
@@ -1099,7 +587,7 @@ def _type_check(matrix_a, matrix_b=None, cast=False):
     """
 
     all_dtypes = NUMPY_FLOAT_DTYPES + NUMPY_COMPLEX_DTYPES
-    _n_complex = np.iscomplexobj(matrix_a) + np.iscomplexobj(matrix_b)
+    _n_complex = _np.iscomplexobj(matrix_a) + _np.iscomplexobj(matrix_b)
 
     # If there's no matrix B and matrix A is valid dtype, return it
     if matrix_b is None and matrix_a.dtype in all_dtypes:
@@ -1120,11 +608,11 @@ def _type_check(matrix_a, matrix_b=None, cast=False):
         return matrix_a, matrix_b
     # If neither matrix is complex and cast is True, convert to float64s and return them
     elif cast and _n_complex == 0:
-        debug_print("Recasting matrix data types {a} and {b} to np.float64".format(a=matrix_a.dtype, b=matrix_b.dtype))
+        debug_print("Recasting matrix data types {a} and {b} to _np.float64".format(a=matrix_a.dtype, b=matrix_b.dtype))
         return _cast_to_float64(matrix_a), _cast_to_float64(matrix_b)
     # If both matrices are complex and cast is True, convert to cdoubles and return them
     elif cast and _n_complex == 2:
-        debug_print("Recasting matrix data types {a} and {b} to np.cdouble".format(a=matrix_a.dtype, b=matrix_b.dtype))
+        debug_print("Recasting matrix data types {a} and {b} to _np.cdouble".format(a=matrix_a.dtype, b=matrix_b.dtype))
         return _cast_to_cdouble(matrix_a), _cast_to_cdouble(matrix_b)
     # Can't cast reals and complex matrices together
     elif cast and _n_complex == 1:
@@ -1156,22 +644,22 @@ def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
     :param shape: Required output shape
     :type shape: tuple(int)
     :param dtype: Required output data type
-    :type dtype: np.dtype
+    :type dtype: _np.dtype
     :param order: Array order (row or column-major)
     :type order: str
     :param out_arr: Provided output array
-    :type out_arr: np.ndarray
+    :type out_arr: _np.ndarray
     :param out_t: Out array has been transposed 
     :type out_t: bool
     :return: Array
-    :rtype: np.ndarray
+    :rtype: _np.ndarray
     """
 
     out_t = False if out_t is None else out_t
 
     # If there's no output array allocate a new array and return it
     if out_arr is None:
-        return np.zeros(shape, dtype=dtype, order=order)
+        return _np.zeros(shape, dtype=dtype, order=order)
 
     # Check and make sure the order is correct
     # Note 1d arrays have both flags set
@@ -1214,19 +702,19 @@ def _is_double(arr):
     Return true if the array is doubles, false if singles, and raise an error if it's neither.
 
     :param arr:
-    :type arr: np.ndarray, scipy.sparse.spmatrix
+    :type arr: _np.ndarray, scipy.sparse.spmatrix
     :return:
     :rtype: bool
     """
 
     # Figure out which dtype for data
-    if arr.dtype == np.float32:
+    if arr.dtype == _np.float32:
         return False, False
-    elif arr.dtype == np.float64:
+    elif arr.dtype == _np.float64:
         return True, False
-    elif arr.dtype == np.csingle:
+    elif arr.dtype == _np.csingle:
         return False, True
-    elif arr.dtype == np.cdouble:
+    elif arr.dtype == _np.cdouble:
         return True, True
     else: 
         raise ValueError("Only float32, float64, csingle, and cdouble dtypes are supported")
@@ -1260,53 +748,4 @@ def _empty_output_check(matrix_a, matrix_b):
 
     # Neither trivial condition
     else:
-        return False
-
-
-def _validate_dtype():
-    """
-    Test to make sure that this library works by creating a random sparse array in CSC format,
-    then converting it to CSR format and making sure is has not raised an exception.
-
-    """
-
-    test_array = _spsparse.random(5, 5, density=0.5, format="csc", dtype=np.float32, random_state=50)
-    test_comparison = test_array.A
-
-    csc_ref, precision_flag, _ = _create_mkl_sparse(test_array)
-
-    try:
-        csr_ref = _convert_to_csr(csc_ref)
-        final_array = _export_mkl(csr_ref, precision_flag)
-        if not np.allclose(test_comparison, final_array.A):
-            raise ValueError("Match failed after matrix conversion")
-        _destroy_mkl_handle(csr_ref)
-    finally:
-        _destroy_mkl_handle(csc_ref)
-
-
-def _empirical_set_dtype():
-    """
-    Define dtypes empirically
-    Basically just try with int64s and if that doesn't work try with int32s
-    There's a way to do this with intel's mkl helper package but I don't want to add the dependency
-    """
-    MKL._set_int_type(_ctypes.c_longlong, np.int64)
-
-    try:
-        _validate_dtype()
-    except ValueError as err:
-
-        MKL._set_int_type(_ctypes.c_int, np.int32)
-
-        try:
-            _validate_dtype()
-        except ValueError:
-            raise ImportError("Unable to set MKL numeric type")
-
-
-if MKL.MKL_INT is None:
-    _empirical_set_dtype()
-
-if _sklearn_env is not None:
-    os.environ['KMP_INIT_AT_FORK'] = _sklearn_env
+        return False
\ No newline at end of file
diff --git a/sparse_dot_mkl/_mkl_interface/_constants.py b/sparse_dot_mkl/_mkl_interface/_constants.py
new file mode 100644
index 0000000..1b89ada
--- /dev/null
+++ b/sparse_dot_mkl/_mkl_interface/_constants.py
@@ -0,0 +1,34 @@
+# Define standard return codes
+RETURN_CODES = {0: "SPARSE_STATUS_SUCCESS",
+                1: "SPARSE_STATUS_NOT_INITIALIZED",
+                2: "SPARSE_STATUS_ALLOC_FAILED",
+                3: "SPARSE_STATUS_INVALID_VALUE",
+                4: "SPARSE_STATUS_EXECUTION_FAILED",
+                5: "SPARSE_STATUS_INTERNAL_ERROR",
+                6: "SPARSE_STATUS_NOT_SUPPORTED"}
+
+# Define order codes
+LAYOUT_CODE_C = 101
+LAYOUT_CODE_F = 102
+
+# Define transpose codes
+SPARSE_OPERATION_NON_TRANSPOSE = 10
+SPARSE_OPERATION_TRANSPOSE = 11
+SPARSE_OPERATION_CONJUGATE_TRANSPOSE = 12
+
+# Define cblas transpose codes
+CBLAS_NO_TRANS = 111
+CBLAS_TRANS = 112
+CBLAS_CONJ_TRANS = 113
+
+# Define index codes
+SPARSE_INDEX_BASE_ZERO = 0
+SPARSE_INDEX_BASE_ONE = 1
+
+# Define sparse & cblas upper or lower triangle code
+MKL_UPPER = 121,
+MKL_LOWER = 122
+
+# ILP64 message
+ILP64_MSG = " Try changing MKL to int64 with the environment variable MKL_INTERFACE_LAYER=ILP64"
+
diff --git a/sparse_dot_mkl/_mkl_interface/_structs.py b/sparse_dot_mkl/_mkl_interface/_structs.py
new file mode 100644
index 0000000..fe105bc
--- /dev/null
+++ b/sparse_dot_mkl/_mkl_interface/_structs.py
@@ -0,0 +1,37 @@
+import ctypes as _ctypes
+
+# Construct opaque struct & type
+class _sparse_matrix(_ctypes.Structure):
+    pass
+
+
+sparse_matrix_t = _ctypes.POINTER(_sparse_matrix)
+
+
+# Matrix description struct
+class matrix_descr(_ctypes.Structure):
+    _fields_ = [("sparse_matrix_type_t", _ctypes.c_int),
+                ("sparse_fill_mode_t", _ctypes.c_int),
+                ("sparse_diag_type_t", _ctypes.c_int)]
+
+    def __init__(self, sparse_matrix_type_t=20, sparse_fill_mode_t=0, sparse_diag_type_t=0):
+        super(matrix_descr, self).__init__(sparse_matrix_type_t, sparse_fill_mode_t, sparse_diag_type_t)
+
+
+# Complex type structs
+# These are the same as the np.csingle and np.cdouble structs
+# They're defined to allow passing complex scalars of specific precisions directly
+class MKL_Complex8(_ctypes.Structure):
+    _fields_ = [("real", _ctypes.c_float),
+                ("imag", _ctypes.c_float)]
+
+    def __init__(self, cplx):
+        super(MKL_Complex8, self).__init__(cplx.real, cplx.imag)
+
+
+class MKL_Complex16(_ctypes.Structure):
+    _fields_ = [("real", _ctypes.c_double),
+                ("imag", _ctypes.c_double)]
+
+    def __init__(self, cplx):
+        super(MKL_Complex16, self).__init__(cplx.real, cplx.imag)
\ No newline at end of file

From 7bd7a74c7d0dd9c1dd0071b0581ed543903c9fc2 Mon Sep 17 00:00:00 2001
From: asistradition <asistradition@users.noreply.github.com>
Date: Thu, 6 Jan 2022 12:27:19 -0500
Subject: [PATCH 4/4] Clean up gram_matrix

---
 sparse_dot_mkl/_gram_matrix.py              | 23 ++++++++---
 sparse_dot_mkl/_mkl_interface/_constants.py | 20 +++++++++-
 sparse_dot_mkl/tests/test_gram_matrix.py    | 42 ++++++++++++++++++---
 3 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index a74da97..df693ab 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -1,13 +1,20 @@
 from sparse_dot_mkl._mkl_interface import (MKL, sparse_matrix_t, _create_mkl_sparse,
                                            _export_mkl, _order_mkl_handle, _destroy_mkl_handle, _type_check,
-                                           _get_numpy_layout, _convert_to_csr, _empty_output_check, LAYOUT_CODE_C,
+                                           _get_numpy_layout, _convert_to_csr, _empty_output_check,
                                            _out_matrix, _check_return_value, debug_print, _output_dtypes, _mkl_scalar,
                                            _is_double)
+                                           
+from sparse_dot_mkl._mkl_interface._constants import *
 
 import scipy.sparse as _sps
 import ctypes as _ctypes
 import numpy as np
 
+# Dict keyed by ('transpose_bool', 'complex_bool')
+_mkl_sp_transpose_ops = {(False, False): SPARSE_OPERATION_NON_TRANSPOSE,
+                         (True, False): SPARSE_OPERATION_TRANSPOSE,
+                         (False, True): SPARSE_OPERATION_NON_TRANSPOSE,
+                         (True, True): SPARSE_OPERATION_CONJUGATE_TRANSPOSE}
 
 def _gram_matrix_sparse(matrix_a, aat=False, reorder_output=False):
     """
@@ -32,7 +39,7 @@ def _gram_matrix_sparse(matrix_a, aat=False, reorder_output=False):
 
     ref_handle = sparse_matrix_t()
 
-    ret_val = MKL._mkl_sparse_syrk(10 if aat else 11,
+    ret_val = MKL._mkl_sparse_syrk(_mkl_sp_transpose_ops[(not aat, complex_type)],
                                    sp_ref_a,
                                    _ctypes.byref(ref_handle))
 
@@ -91,7 +98,7 @@ def _gram_matrix_sparse_to_dense(matrix_a, aat=False, scalar=1., out=None, out_s
     scalar = _mkl_scalar(scalar, complex_type, double_prec)
     out_scalar = _mkl_scalar(out_scalar, complex_type, double_prec)
 
-    ret_val = func(10 if aat else 11,
+    ret_val = func(_mkl_sp_transpose_ops[(not aat, complex_type)],
                    sp_ref_a,
                    scalar,
                    out_scalar,
@@ -119,6 +126,12 @@ def _gram_matrix_sparse_to_dense(matrix_a, aat=False, scalar=1., out=None, out_s
                         (False, True): MKL._cblas_csyrk,
                         (True, True): MKL._cblas_zsyrk}
 
+# Dict keyed by ('transpose_bool', 'complex_bool')
+_mkl_cblas_transpose_ops = {(False, False): CBLAS_NO_TRANS,
+                            (True, False): CBLAS_TRANS,
+                            (False, True): CBLAS_NO_TRANS,
+                            (True, True): CBLAS_CONJ_TRANS}
+
 def _gram_matrix_dense_to_dense(matrix_a, aat=False, scalar=1., out=None, out_scalar=None):
     """
     Calculate the gram matrix aTa for dense matrix and return a dense matrix
@@ -156,8 +169,8 @@ def _gram_matrix_dense_to_dense(matrix_a, aat=False, scalar=1., out=None, out_sc
     out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
 
     func(layout_a,
-         121,
-         111 if aat else 112,
+         MKL_UPPER,
+         _mkl_cblas_transpose_ops[(not aat, complex_type)],
          n,
          k,
          scalar if not complex_type else _ctypes.byref(scalar),
diff --git a/sparse_dot_mkl/_mkl_interface/_constants.py b/sparse_dot_mkl/_mkl_interface/_constants.py
index 1b89ada..c703c31 100644
--- a/sparse_dot_mkl/_mkl_interface/_constants.py
+++ b/sparse_dot_mkl/_mkl_interface/_constants.py
@@ -25,8 +25,26 @@
 SPARSE_INDEX_BASE_ZERO = 0
 SPARSE_INDEX_BASE_ONE = 1
 
+# Define matrix types
+SPARSE_MATRIX_TYPE_GENERAL            = 20   #    General case                    
+SPARSE_MATRIX_TYPE_SYMMETRIC          = 21   #    Triangular part of              
+SPARSE_MATRIX_TYPE_HERMITIAN          = 22   #    the matrix is to be processed   
+SPARSE_MATRIX_TYPE_TRIANGULAR         = 23
+SPARSE_MATRIX_TYPE_DIAGONAL           = 24   # diagonal matrix; only diagonal elements will be processed 
+SPARSE_MATRIX_TYPE_BLOCK_TRIANGULAR   = 25
+SPARSE_MATRIX_TYPE_BLOCK_DIAGONAL     = 26    # block-diagonal matrix; only diagonal blocks will be processed 
+
+# Fill types for SYMMETRIC & HERMITIAN
+SPARSE_FILL_MODE_LOWER  = 40           # lower triangular part of the matrix is stored 
+SPARSE_FILL_MODE_UPPER  = 41           # upper triangular part of the matrix is stored 
+SPARSE_FILL_MODE_FULL   = 42           # upper triangular part of the matrix is stored 
+
+SPARSE_DIAG_NON_UNIT    = 50           # triangular matrix with non-unit diagonal
+SPARSE_DIAG_UNIT        = 51           # triangular matrix with unit diagonal
+
+
 # Define sparse & cblas upper or lower triangle code
-MKL_UPPER = 121,
+MKL_UPPER = 121
 MKL_LOWER = 122
 
 # ILP64 message
diff --git a/sparse_dot_mkl/tests/test_gram_matrix.py b/sparse_dot_mkl/tests/test_gram_matrix.py
index 03a9c47..c3526a2 100644
--- a/sparse_dot_mkl/tests/test_gram_matrix.py
+++ b/sparse_dot_mkl/tests/test_gram_matrix.py
@@ -26,6 +26,9 @@ def setUp(self):
         gram_ut_t[np.tril_indices(gram_ut_t.shape[0], k=-1)] = 0.
         self.gram_ut_t = gram_ut_t
 
+
+class TestGramMatrixSparse(TestGramMatrix):
+
     def test_gram_matrix_sp(self):
         mat2 = gram_matrix_mkl(self.mat1)
         npt.assert_array_almost_equal(mat2.A, self.gram_ut)
@@ -35,16 +38,16 @@ def test_gram_matrix_sp(self):
 
     def test_gram_matrix_sp_single(self):
         mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype))
-        npt.assert_array_almost_equal(mat2.A, self.gram_ut)
+        npt.assert_array_almost_equal(mat2.A, self.gram_ut, decimal=5)
 
     def test_gram_matrix_d_single(self):
         mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True)
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True,
                                out=np.zeros((self.mat1.shape[1], self.mat1.shape[1]), dtype=self.single_dtype),  out_scalar=1.)
         mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.
-        npt.assert_array_almost_equal(mat2, self.gram_ut)
+        npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
         with self.assertRaises(ValueError):
             mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True,
@@ -52,7 +55,12 @@ def test_gram_matrix_d_single(self):
                                    out_scalar=1.)
 
     def test_gram_matrix_d(self):
+        print(self.mat1)
+
         mat2 = gram_matrix_mkl(self.mat1, dense=True)
+        print(mat2 - self.gram_ut)
+        print(mat2[np.tril_indices(mat2.shape[0], k=1)])
+
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(self.mat1, dense=True,
@@ -78,6 +86,9 @@ def test_gram_matrix_csc_d(self):
         npt.assert_array_almost_equal(mat.A, self.mat1.A)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
 
+
+class TestGramMatrixDense(TestGramMatrix):
+
     def test_gram_matrix_dd_double(self):
         mat2 = gram_matrix_mkl(self.mat1.A, dense=True)
         npt.assert_array_almost_equal(mat2, self.gram_ut)
@@ -113,12 +124,31 @@ def test_gram_matrix_dd_single_F(self):
         npt.assert_array_almost_equal(mat2, self.gram_ut, decimal=5)
 
 
-@unittest.skip
-class TestGramMatrixComplex(TestGramMatrix):
+class _TestGramMatrixComplex:
 
     double_dtype = np.cdouble
     single_dtype = np.csingle
 
     @classmethod
     def setUpClass(cls):
-        cls.MATRIX_1, _ = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
\ No newline at end of file
+        cls.MATRIX_1, _ = make_matrixes(200, 100, 300, 0.05, dtype=np.cdouble)
+
+    def setUp(self):
+        self.mat1 = self.MATRIX_1.copy()
+        self.mat1_d = self.MATRIX_1.A
+
+        gram_ut = np.dot(self.MATRIX_1.A.conj().T, self.MATRIX_1.A)
+        gram_ut[np.tril_indices(gram_ut.shape[0], k=-1)] = 0.
+        self.gram_ut = gram_ut
+
+        gram_ut_t = np.dot(self.MATRIX_1.A, self.MATRIX_1.A.conj().T)
+        gram_ut_t[np.tril_indices(gram_ut_t.shape[0], k=-1)] = 0.
+        self.gram_ut_t = gram_ut_t
+
+@unittest.skip
+class TestGramMatrixSparseComplex(_TestGramMatrixComplex, TestGramMatrixSparse):
+    pass
+
+@unittest.skip
+class TestGramMatrixDenseComplex(_TestGramMatrixComplex, TestGramMatrixDense):
+    pass