diff --git a/CHANGELOG.md b/CHANGELOG.md index 7544880..d635c03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### Version 0.9.1 + +* Support for `out` parameter with sparse-sparse multiplication when `dense=True` + ### Version 0.9.0 * Support for scipy sparse arrays (introduced in scipy 1.11) diff --git a/setup.py b/setup.py index 4ea8940..efe7ae4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages DISTNAME = 'sparse_dot_mkl' -VERSION = '0.9.0' +VERSION = '0.9.1' DESCRIPTION = "Intel MKL wrapper for sparse matrix multiplication" MAINTAINER = 'Chris Jackson' MAINTAINER_EMAIL = 'cj59@nyu.edu' diff --git a/sparse_dot_mkl/_sparse_sparse.py b/sparse_dot_mkl/_sparse_sparse.py index e34cfae..5fd70ab 100644 --- a/sparse_dot_mkl/_sparse_sparse.py +++ b/sparse_dot_mkl/_sparse_sparse.py @@ -2,7 +2,6 @@ MKL, sparse_matrix_t, _create_mkl_sparse, - debug_print, debug_timer, _export_mkl, _order_mkl_handle, @@ -13,10 +12,10 @@ _is_allowed_sparse_format, _check_return_value, _output_dtypes, - sparse_output_type + sparse_output_type, + _out_matrix ) import ctypes as _ctypes -import numpy as np def _matmul_mkl(sp_ref_a, sp_ref_b): @@ -59,6 +58,7 @@ def _matmul_mkl_dense( sp_ref_b, output_shape, double_precision, + out=None, complex_type=False ): """ @@ -81,9 +81,10 @@ def _matmul_mkl_dense( """ # Allocate an array for outputs - output_arr = np.empty( + output_arr = _out_matrix( output_shape, - dtype=_output_dtypes[(double_precision, complex_type)] + _output_dtypes[(double_precision, complex_type)], + out_arr=out ) # Set types @@ -110,7 +111,8 @@ def _sparse_dot_sparse( matrix_b, cast=False, reorder_output=False, - dense=False + dense=False, + out=None ): """ Multiply together two scipy sparse matrixes using the intel @@ -153,30 +155,31 @@ def _sparse_dot_sparse( "COO is not supported" ) - default_output, output_type = sparse_output_type(matrix_a) + if out is not None and not dense: + raise ValueError( + "out argument cannot be used with sparse (dot) sparse " + "matrix multiplication unless dense=True" + ) - # Override output if dense flag is set - default_output = default_output if not dense else np.zeros + default_output, output_type = sparse_output_type(matrix_a) # Check to make sure that this multiplication can work and check dtypes _sanity_check(matrix_a, matrix_b) # Check for edge condition inputs which result in empty outputs if _empty_output_check(matrix_a, matrix_b): - debug_print( - "Skipping multiplication because A (dot) B must yield an " - "empty matrix" - ) - if matrix_a.dtype != matrix_b.dtype or matrix_a.dtype != np.float32: - final_dtype = np.float64 + if dense: + return _out_matrix( + (matrix_a.shape[0], matrix_b.shape[1]), + matrix_a.dtype, + out_arr=out + ) else: - final_dtype = np.float32 - - return default_output( - (matrix_a.shape[0], matrix_b.shape[1]), - dtype=final_dtype - ) + return default_output( + (matrix_a.shape[0], matrix_b.shape[1]), + dtype=matrix_a.dtype + ) # Check dtypes matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast) @@ -191,18 +194,21 @@ def _sparse_dot_sparse( # Call spmmd for dense output directly if the dense flag is set if dense: - dense_arr = _matmul_mkl_dense( - mkl_a, - mkl_b, - (matrix_a.shape[0], matrix_b.shape[1]), - a_dbl or b_dbl, - complex_type=a_cplx, - ) - debug_timer("Multiplied matrices", t) + try: + dense_arr = _matmul_mkl_dense( + mkl_a, + mkl_b, + (matrix_a.shape[0], matrix_b.shape[1]), + a_dbl or b_dbl, + complex_type=a_cplx, + out=out + ) + finally: + _destroy_mkl_handle(mkl_a) + _destroy_mkl_handle(mkl_b) - _destroy_mkl_handle(mkl_a) - _destroy_mkl_handle(mkl_b) + debug_timer("Multiplied matrices", t) return dense_arr diff --git a/sparse_dot_mkl/sparse_dot.py b/sparse_dot_mkl/sparse_dot.py index 0221a29..450e30a 100644 --- a/sparse_dot_mkl/sparse_dot.py +++ b/sparse_dot_mkl/sparse_dot.py @@ -82,19 +82,14 @@ def dot_product_mkl( )) # SPARSE (DOT) SPARSE # - if num_sparse == 2 and out is not None: - raise ValueError( - "out argument cannot be used with sparse (dot) sparse " - "matrix multiplication" - ) - - elif num_sparse == 2: + if num_sparse == 2: return _sds( matrix_a, matrix_b, cast=cast, reorder_output=reorder_output, - dense=dense + dense=dense, + out=out ) # SPARSE (DOT) VECTOR # diff --git a/sparse_dot_mkl/tests/test_sparse_sparse.py b/sparse_dot_mkl/tests/test_sparse_sparse.py index 90dd836..73a47a7 100644 --- a/sparse_dot_mkl/tests/test_sparse_sparse.py +++ b/sparse_dot_mkl/tests/test_sparse_sparse.py @@ -279,10 +279,23 @@ def test_float32_CSR(self): ) mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float32_CSR_out(self): + d1, d2 = self.mat1.astype(self.single_dtype), self.mat2.astype( + self.single_dtype + ) + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + def test_float32_CSC(self): d1, d2 = ( self.mat1.astype(self.single_dtype).tocsc(), @@ -290,45 +303,123 @@ def test_float32_CSC(self): ) mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float32_CSC_out(self): + d1, d2 = ( + self.mat1.astype(self.single_dtype).tocsc(), + self.mat2.astype(self.single_dtype).tocsc(), + ) + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + def test_float64_CSR(self): d1, d2 = self.mat1, self.mat2 mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float64_CSR_out(self): + d1, d2 = self.mat1, self.mat2 + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + def test_float64_CSC(self): d1, d2 = self.mat1.tocsc(), self.mat2.tocsc() mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float64_CSC_out(self): + d1, d2 = self.mat1.tocsc(), self.mat2.tocsc() + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + def test_float64_BSR(self): d1, d2 = self.mat1.tobsr(blocksize=(10, 10)), self.mat2.tobsr( blocksize=(10, 10) ) mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float64_BSR_out(self): + d1, d2 = self.mat1.tobsr(blocksize=(10, 10)), self.mat2.tobsr( + blocksize=(10, 10) + ) + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + def test_float32_BSR(self): d1 = self.mat1.astype(self.single_dtype).tobsr(blocksize=(10, 10)) d2 = self.mat2.astype(self.single_dtype).tobsr(blocksize=(10, 10)) mat3_np = np.dot(d1.toarray(), d2.toarray()) - mat3 = dot_product_mkl(d1, d2, copy=True, dense=True) + mat3 = dot_product_mkl(d1, d2, dense=True) np_almost_equal(mat3_np, mat3) + def test_float32_BSR_out(self): + d1 = self.mat1.astype(self.single_dtype).tobsr(blocksize=(10, 10)) + d2 = self.mat2.astype(self.single_dtype).tobsr(blocksize=(10, 10)) + mat3_np = np.dot(d1.toarray(), d2.toarray()) + out_arr = np.empty_like(mat3_np) + mat3 = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + np_almost_equal(mat3_np, out_arr) + self.assertEqual( + id(mat3), id(out_arr) + ) + + def test_CSR_bad_outs(self): + d1, d2 = self.mat1, self.mat2 + mat3_np = np.dot(d1.toarray(), d2.toarray()) + + out_arr = np.empty_like(mat3_np, dtype=np.float32) + + with self.assertRaises(ValueError): + _ = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + out_arr = np.empty_like(mat3_np, order='F') + with self.assertRaises(ValueError): + _ = dot_product_mkl(d1, d2, dense=True, out=out_arr) + + out_arr = np.empty((1, 1), dtype=np.float64) + with self.assertRaises(ValueError): + _ = dot_product_mkl(d1, d2, dense=True, out=out_arr) + class _ComplexMixin: double_dtype = np.cdouble