Merge pull request #19 from flatironinstitute/complex

Complex float dtype support
flatironinstitute · Jan 6, 2022 · cad4216 · cad4216
2 parents d04a277 + 7bd7a74
commit cad4216
Show file tree

Hide file tree

Showing 23 changed files with 2,113 additions and 1,500 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,8 @@
+### Version 0.8.0
+
+* Added support for complex data types
+* Refactored _mkl_interface.py into a subpackage
+
 ### Version 0.7.3
 
 * Fixed a memory leak when a CSC matrix was multiplied by a dense matrix in column-major format

diff --git a/README.md b/README.md
@@ -19,12 +19,13 @@ Three functions are explicitly available - `dot_product_mkl`, `gram_matrix_mkl`,
 `matrix_a` and `matrix_b` are either numpy arrays (1d or 2d) or scipy sparse matrices (CSR, CSC, or BSR).
 BSR matrices are supported for matrix-matrix multiplication only if one matrix is a dense array or both sparse matrices are BSR.
 Sparse COO matrices are not supported. 
-Numpy arrays must be contiguous. Non-contiguous arrays should be copied to a contiguous array prior to calling this 
+Numpy arrays must be contiguous. 
+Non-contiguous arrays should be copied to a contiguous array prior to calling this 
 function.
 
-This package only works with float data.
-`cast=True` will convert data to double-precision floats by making an internal copy if necessary.
-If A and B are both single-precision floats they will be used as is.
+This package only works with float or complex float data.
+`cast=True` will convert data to double-precision floats or complex floats by making an internal copy if necessary.
+If A and B are both single-precision floats or complex floats they will be used as is.
 `cast=False` will raise a ValueError if the input arrays are not both double-precision or both single-precision.
 This defaults to `False` on the principle that potentially unsafe dtype conversions should not occur without explicit
 instruction.
@@ -45,7 +46,8 @@ Scipy matrix multiplication does not produce ordered outputs, so this defaults t
 
 `out` is an optional reference to a dense output array to which the product of the matrix multiplication will be added. 
 This must be identical in attributes to the array that would be returned if it was not used.
-Specifically it must have the correct shape, dtype, and column- or row-major order and it must be contiguous. A ValueError will be raised if any attribute of this array is incorrect.
+Specifically it must have the correct shape, dtype, and column- or row-major order and it must be contiguous. 
+A ValueError will be raised if any attribute of this array is incorrect.
 This function will return a reference to the same array object when `out` is set.
 
 `out_scalar` is an optional element-wise scaling of `out`, if `out` is provided.

diff --git a/demo.ipynb b/demo.ipynb
@@ -4,28 +4,39 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy.sparse as sps\n",
+    "import numpy as np\n",
+    "\n",
+    "try:\n",
+    "    import sparse_dot_mkl\n",
+    "    dot_product = sparse_dot_mkl.dot_product_mkl\n",
+    "except ImportError:\n",
+    "    def dot_product(x, y, dense=False, **kwargs):\n",
+    "        z = x @ y\n",
+    "        return z.A if dense and sps.issparse(z) else z "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Using matplotlib backend: Qt5Agg\n",
-      "Populating the interactive namespace from numpy and matplotlib\n"
+      "X sparsity: 78.80 %\n"
      ]
     }
    ],
    "source": [
-    "%pylab"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.sparse\n",
-    "import sparse_dot_mkl"
+    "np.random.seed(0)\n",
+    "X = np.random.randn(500, 5000)\n",
+    "X[X < 0.8] = 0\n",
+    "X = sps.csr_matrix(X)\n",
+    "print(f'X sparsity: {100 * (1 - X.count_nonzero() / np.prod(X.shape)):5.2f} %')"
    ]
   },
   {
@@ -34,19 +45,19 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X sparsity: 78.80 %\n"
-     ]
+     "data": {
+      "text/plain": [
+       "<500x500 sparse matrix of type '<class 'numpy.float64'>'\n",
+       "\twith 250000 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "random.seed(0)\n",
-    "X = random.randn(500, 5000)\n",
-    "X[X < 0.8] = 0\n",
-    "X = scipy.sparse.csr_matrix(X)\n",
-    "print(f'X sparsity: {100 * (1 - X.count_nonzero() / prod(X.shape)):5.2f} %')"
+    "X @ X.T"
    ]
   },
   {
@@ -57,7 +68,7 @@
    "source": [
     "expected_result = (X @ X.T).toarray()\n",
     "expected_result_tril = expected_result.copy()\n",
-    "expected_result_tril[tril_indices(expected_result.shape[0], k=-1)] = 0"
+    "expected_result_tril[np.tril_indices(expected_result.shape[0], k=-1)] = 0"
    ]
   },
   {
@@ -78,7 +89,7 @@
    ],
    "source": [
     "mkl_result1 = sparse_dot_mkl.dot_product_mkl(X, X.T)\n",
-    "allclose(mkl_result1.toarray(), expected_result)"
+    "np.allclose(mkl_result1.toarray(), expected_result)"
    ]
   },
   {
@@ -98,8 +109,8 @@
     }
    ],
    "source": [
-    "mkl_result2 = sparse_dot_mkl.dot_product_transpose_mkl(X)\n",
-    "allclose(mkl_result2.toarray(), expected_result_tril)"
+    "mkl_result2 = sparse_dot_mkl.gram_matrix_mkl(X, transpose=True)\n",
+    "np.allclose(mkl_result2.toarray(), expected_result_tril)"
    ]
   },
   {
@@ -111,24 +122,32 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "197 ms ± 5.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
-      "70.6 ms ± 593 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
-      "34.2 ms ± 421 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+      "Scipy Matrix Multiplication Product:\n",
+      "204 ms ± 8.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
+      "MKL Matrix Multiplication Product:\n",
+      "52.5 ms ± 579 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
+      "MKL Gram Matrix Product:\n",
+      "28.1 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
      ]
     }
    ],
    "source": [
+    "print(\"Scipy Matrix Multiplication Product:\")\n",
     "%timeit X @ X.T\n",
+    "\n",
+    "print(\"MKL Matrix Multiplication Product:\")\n",
     "%timeit sparse_dot_mkl.dot_product_mkl(X, X.T)\n",
-    "%timeit sparse_dot_mkl.dot_product_transpose_mkl(X)"
+    "\n",
+    "print(\"MKL Gram Matrix Product:\")\n",
+    "%timeit sparse_dot_mkl.dot_product_transpose_mkl(X, transpose=True)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:sparse-test]",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-sparse-test-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -140,7 +159,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 from setuptools import setup, find_packages
 
 DISTNAME = 'sparse_dot_mkl'
-VERSION = '0.7.3'
+VERSION = '0.8.0'
 DESCRIPTION = "Intel MKL wrapper for sparse matrix multiplication"
 MAINTAINER = 'Chris Jackson'
 MAINTAINER_EMAIL = '[email protected]'

diff --git a/sparse_dot_mkl/_dense_dense.py b/sparse_dot_mkl/_dense_dense.py
@@ -1,11 +1,19 @@
 from sparse_dot_mkl._mkl_interface import (MKL, _type_check, _sanity_check, _empty_output_check, _get_numpy_layout,
-                                           LAYOUT_CODE_C, LAYOUT_CODE_F, _out_matrix, debug_print)
+                                           LAYOUT_CODE_C, LAYOUT_CODE_F, _out_matrix, debug_print, _is_double, 
+                                           _output_dtypes, _mkl_scalar)
 
 import numpy as np
 import ctypes as _ctypes
 
+# Dict keyed by ('double_precision_bool', 'complex_bool')
+_mkl_gemm_funcs = {(False, False): MKL._cblas_sgemm,
+                   (True, False): MKL._cblas_dgemm,
+                   (False, True): MKL._cblas_cgemm,
+                   (True, True): MKL._cblas_zgemm}
 
-def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out_scalar=None):
+def _dense_matmul(matrix_a, matrix_b, scalar=1., out=None, out_scalar=None):
+
+    double_precision, complex_type = _is_double(matrix_a)
 
     # Reshape matrix_b to a column instead of a vector if it's 1d
     flatten_output = matrix_b.ndim == 1
@@ -16,7 +24,7 @@ def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out
     output_shape = (m, n)
 
     # Set the MKL function for precision
-    func = MKL._cblas_dgemm if double_precision else MKL._cblas_sgemm
+    func = _mkl_gemm_funcs[(double_precision, complex_type)]
 
     # Get the memory order for arrays
     layout_a, ld_a = _get_numpy_layout(matrix_a)
@@ -29,21 +37,31 @@ def _dense_matmul(matrix_a, matrix_b, double_precision, scalar=1., out=None, out
     out_order, ld_out = ("C", output_shape[1]) if layout_a == LAYOUT_CODE_C else ("F", output_shape[0])
 
     # Allocate an array for outputs and set functions and types for float or doubles
-    output_arr = _out_matrix(output_shape, np.float64 if double_precision else np.float32, order=out_order, out_arr=out)
+    output_arr = _out_matrix(output_shape, _output_dtypes[(double_precision, complex_type)],
+                             order=out_order, out_arr=out)
+
     output_ctype = _ctypes.c_double if double_precision else _ctypes.c_float
 
+    # The complex functions all take void pointers
+    output_ctype = _ctypes.c_void_p if complex_type else output_ctype
+
+    # The complex versions of these functions take void pointers instead of passed structs
+    # So create a C struct if necessary to be passed by reference
+    scalar = _mkl_scalar(scalar, complex_type, double_precision)
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
+
     func(layout_a,
          111,
          op_b,
          m,
          n,
          k,
-         scalar,
-         matrix_a,
+         scalar if not complex_type else _ctypes.byref(scalar),
+         matrix_a.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_a,
-         matrix_b,
+         matrix_b.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_b,
-         float(out_scalar) if out_scalar is not None else 1.,
+         out_scalar if not complex_type else _ctypes.byref(out_scalar),
          output_arr.ctypes.data_as(_ctypes.POINTER(output_ctype)),
          ld_out)
 
@@ -62,6 +80,4 @@ def _dense_dot_dense(matrix_a, matrix_b, cast=False, scalar=1., out=None, out_sc
 
     matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast)
 
-    a_dbl, b_dbl = matrix_a.dtype == np.float64, matrix_b.dtype == np.float64
-
-    return _dense_matmul(matrix_a, matrix_b, a_dbl or b_dbl, scalar=scalar, out=out, out_scalar=out_scalar)
+    return _dense_matmul(matrix_a, matrix_b, scalar=scalar, out=out, out_scalar=out_scalar)