diff --git a/h5sparse/h5sparse.py b/h5sparse/h5sparse.py index b06022d..9699cf0 100644 --- a/h5sparse/h5sparse.py +++ b/h5sparse/h5sparse.py @@ -10,10 +10,11 @@ 'coo': ss.coo_matrix, } -indptr_dtype = np.int64 +indptr_dtype = np.int64 indices_dtype = np.int64 -row_dtype = np.int64 -col_dtype = np.int64 +row_dtype = np.int64 +col_dtype = np.int64 + def get_format_str(data): for format_str, format_class in six.viewitems(FORMAT_DICT): @@ -30,6 +31,7 @@ def get_format_class(format_str): .format(format_str)) return format_class + def is_compressed_format(format_str): return format_str in ('csc', 'csr') @@ -52,27 +54,27 @@ def __getitem__(self, key): raise ValueError("Unexpected item type.") def create_dataset_compressed(self, name, sparse_format, shape, data, indices, indptr, - dtype, **kwargs): + dtype, **kwargs): """Create a dataset in csc or csr format""" assert sparse_format in ("csc", "csr") group = self.create_group(name) group.attrs['h5sparse_format'] = sparse_format - group.attrs['h5sparse_shape'] = shape - group.create_dataset('data', data=data, dtype=dtype, **kwargs) + group.attrs['h5sparse_shape'] = shape + group.create_dataset('data', data=data, dtype=dtype, **kwargs) group.create_dataset('indices', data=indices, dtype=indices_dtype, **kwargs) - group.create_dataset('indptr', data=indptr, dtype=indptr_dtype, **kwargs) + group.create_dataset('indptr', data=indptr, dtype=indptr_dtype, **kwargs) return group def create_dataset_coo(self, name, sparse_format, shape, data, row, col, - dtype, **kwargs): + dtype, **kwargs): """Create a dataset in csc or csr format""" assert sparse_format == "coo" group = self.create_group(name) group.attrs['h5sparse_format'] = sparse_format - group.attrs['h5sparse_shape'] = shape - group.create_dataset('data', data=data, dtype=dtype, **kwargs) + group.attrs['h5sparse_shape'] = shape + group.create_dataset('data', data=data, dtype=dtype, **kwargs) group.create_dataset('row', data=row, dtype=row_dtype, **kwargs) group.create_dataset('col', data=col, dtype=col_dtype, **kwargs) return group @@ -81,13 +83,13 @@ def create_dataset_from_dataset(self, name, data, dtype, **kwargs): sparse_format = data.attrs['h5sparse_format'] if (is_compressed_format(sparse_format)): group = self.create_dataset_compressed(name, - data.attrs['h5sparse_format'], - data.attrs['h5sparse_shape'], - data.h5py_group['data'], - data.h5py_group['indices'], - data.h5py_group['indptr'], - dtype, - **kwargs) + data.attrs['h5sparse_format'], + data.attrs['h5sparse_shape'], + data.h5py_group['data'], + data.h5py_group['indices'], + data.h5py_group['indptr'], + dtype, + **kwargs) else: group = self.create_dataset_coo(name, data.attrs['h5sparse_format'], @@ -100,7 +102,7 @@ def create_dataset_from_dataset(self, name, data, dtype, **kwargs): return group def create_dataset_from_scipy(self, name, data, dtype, **kwargs): - sparse_format = get_format_str(data) + sparse_format = get_format_str(data) if (is_compressed_format(sparse_format)): group = self.create_dataset_compressed(name, sparse_format, @@ -112,13 +114,13 @@ def create_dataset_from_scipy(self, name, data, dtype, **kwargs): **kwargs) else: group = self.create_dataset_coo(name, - sparse_format, - data.shape, - data.data, - data.row, - data.col, - dtype, - **kwargs) + sparse_format, + data.shape, + data.data, + data.row, + data.col, + dtype, + **kwargs) return group def create_dataset(self, name, shape=None, dtype=None, data=None, @@ -137,9 +139,9 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, format_class = get_format_class(sparse_format) data = format_class(data) group = self.create_dataset_from_scipy(name, - data, - dtype, - **kwargs) + data, + dtype, + **kwargs) elif data is None and sparse_format is not None: format_class = get_format_class(sparse_format) if dtype is None: @@ -148,9 +150,9 @@ def create_dataset(self, name, shape=None, dtype=None, data=None, shape = (0, 0) data = format_class(shape, dtype=dtype) group = self.create_dataset_from_scipy(name, - data, - dtype, - **kwargs) + data, + dtype, + **kwargs) else: # forward the arguments to h5py assert sparse_format is None diff --git a/h5sparse/tests.py b/h5sparse/tests.py index e6ce78b..2a39f0a 100644 --- a/h5sparse/tests.py +++ b/h5sparse/tests.py @@ -8,10 +8,11 @@ import h5sparse + class AbstractTestH5Sparse(): def test_create_empty_sparse_dataset(self): h5_path = mkstemp(suffix=".h5")[1] - format_str = h5sparse.get_format_str(self.sparse_class((0,0))) + format_str = h5sparse.get_format_str(self.sparse_class((0, 0))) with h5sparse.File(h5_path, 'w') as h5f: h5f.create_dataset('sparse/matrix', sparse_format=format_str) with h5sparse.File(h5_path, 'r') as h5f: @@ -27,15 +28,14 @@ def test_create_empty_sparse_dataset(self): os.remove(h5_path) - def test_create_dataset_from_dataset(self): from_h5_path = mkstemp(suffix=".h5")[1] to_h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = self.sparse_class([[0, 1, 0], - [0, 0, 1], - [0, 0, 0], - [1, 1, 0]], - dtype=np.float64) + [0, 0, 1], + [0, 0, 0], + [1, 1, 0]], + dtype=np.float64) with h5sparse.File(from_h5_path, 'w') as from_h5f: from_dset = from_h5f.create_dataset('sparse/matrix', data=sparse_matrix) @@ -57,7 +57,6 @@ def test_numpy_array(self): np.testing.assert_equal(h5f['matrix'][()], matrix) os.remove(h5_path) - def test_bytestring(self): h5_path = mkstemp(suffix=".h5")[1] strings = [str(i) for i in range(100)] @@ -68,7 +67,6 @@ def test_bytestring(self): assert strings == json.loads(h5f['strings'][()].decode('utf8')) os.remove(h5_path) - def test_create_empty_dataset(self): h5_path = mkstemp(suffix=".h5")[1] with h5sparse.File(h5_path, 'w') as h5f: @@ -84,10 +82,10 @@ class Test5HCSR(unittest.TestCase, AbstractTestH5Sparse): def test_create_and_read_dataset(self): h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = self.sparse_class([[0, 1, 0], - [0, 0, 1], - [0, 0, 0], - [1, 1, 0]], - dtype=np.float64) + [0, 0, 1], + [0, 0, 0], + [1, 1, 0]], + dtype=np.float64) with h5sparse.File(h5_path, 'w') as h5f: h5f.create_dataset('sparse/matrix', data=sparse_matrix) with h5sparse.File(h5_path, 'r') as h5f: @@ -105,18 +103,18 @@ def test_create_and_read_dataset(self): def test_dataset_append(self): h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = self.sparse_class([[0, 1, 0], - [0, 0, 1], - [0, 0, 0], - [1, 1, 0]], - dtype=np.float64) + [0, 0, 1], + [0, 0, 0], + [1, 1, 0]], + dtype=np.float64) to_append = self.sparse_class([[0, 1, 1], - [1, 0, 0]], - dtype=np.float64) + [1, 0, 0]], + dtype=np.float64) appended_matrix = ss.vstack((sparse_matrix, to_append)) with h5sparse.File(h5_path, 'w') as h5f: h5f.create_dataset('matrix', data=sparse_matrix, chunks=(100000,), - maxshape=(None,)) + maxshape=(None,)) h5f['matrix'].append(to_append) assert (h5f['matrix'][()] != appended_matrix).size == 0 @@ -125,10 +123,10 @@ def test_dataset_append(self): def test_create_dataset_with_format_change(self): h5_path = mkstemp(suffix=".h5")[1] sparse_matrix = self.sparse_class([[0, 1, 0, 1], - [0, 0, 1, 0], - [0, 0, 0, 1], - [1, 1, 0, 1]], - dtype=np.float64) + [0, 0, 1, 0], + [0, 0, 0, 1], + [1, 1, 0, 1]], + dtype=np.float64) with h5sparse.File(h5_path, 'w') as h5f: h5f.create_dataset('sparse/matrix', data=sparse_matrix, sparse_format='csc') with h5sparse.File(h5_path, 'r') as h5f: @@ -146,8 +144,10 @@ def test_create_dataset_with_format_change(self): os.remove(h5_path) + class Test5HCSC(unittest.TestCase, AbstractTestH5Sparse): sparse_class = ss.csc_matrix + class Test5HCOO(unittest.TestCase, AbstractTestH5Sparse): - sparse_class = ss.coo_matrix \ No newline at end of file + sparse_class = ss.coo_matrix