From f8dba0774104aeb950029d98b3ebdc0b5d942b66 Mon Sep 17 00:00:00 2001 From: jreadey Date: Tue, 26 Mar 2024 12:26:03 -0700 Subject: [PATCH] support for multi-dim hyperchunking --- h5pyd/_apps/utillib.py | 23 ++++++++++++++++------- h5pyd/_hl/dataset.py | 3 +-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/h5pyd/_apps/utillib.py b/h5pyd/_apps/utillib.py index eb7a2dd..b10ed96 100755 --- a/h5pyd/_apps/utillib.py +++ b/h5pyd/_apps/utillib.py @@ -732,7 +732,6 @@ def create_chunktable(dset, dset_dims, ctx): chunks["file_uri"] = ctx["s3path"] chunks["dims"] = chunk_dims chunks["chunk_table"] = anon_dset.id.id - chunks["hyper_dims"] = dset.chunks elif num_chunks <= 1 and dset.chunks is None: # use contiguous mapping @@ -1085,7 +1084,7 @@ def create_dataset(dobj, ctx): np.prod(dobj.shape) > MIN_DSET_ELEMENTS_FOR_LINKING)): chunks = create_chunktable(dobj, tgt_shape, ctx) - logging.info(f"using chunk layout: {chunks}") + logging.debug(f"using chunk layout for link option: {chunks}") # use the source object layout if we are not using reference mapping if chunks is None and dobj.shape is not None and len(dobj.shape) > 0: @@ -1105,15 +1104,25 @@ def create_dataset(dobj, ctx): new_chunks = [1,] new_chunks.extend(chunks) chunks = tuple(new_chunks) + logging.debug("extend chunks for preappend:", chunks) else: if isinstance(chunks, dict): if "dims" in chunks: chunk_dims = chunks["dims"] - if len(chunk_dims) == 1: - # currently hyperchunks only supported for 1d datasets - chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize) - logging.debug(f"expanded chunks: {chunk_dims}") - chunks["dims"] = chunk_dims + layout_class = chunks.get("class") + server_version = fout.serverver + if server_version and server_version.startswith("0.9"): + + if layout_class == "H5D_CHUNKED_REF_INDIRECT": + logging.debug("expand chunks for hyperchunksing") + # currently hyperchunks only supported for 1d datasets + logging.debug(f"hdf5 chunk dims: {chunk_dims}") + chunks["hyper_dims"] = chunk_dims + chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize) + logging.debug(f"expanded chunks: {chunk_dims}") + logging.debug(f"expanded chunks: {chunk_dims}") + chunks["dims"] = chunk_dims + logging.debug(f"updating for hyper_dims: {chunks}") else: # contiguous or compact, using dataset shape pass diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 97439bb..eaa0296 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -255,7 +255,6 @@ def make_new_dset( layout=layout, initializer=initializer, initializer_opts=initializer_opts - ) if fillvalue is not None: @@ -778,7 +777,7 @@ def _getVerboseInfo(self): if "num_chunks" in rsp_json: self._num_chunks = rsp_json["num_chunks"] else: - # not avaailable yet, set to 0 + # not available yet, set to 0 self._num_chunks = 0 if "allocated_size" in rsp_json: self._allocated_size = rsp_json["allocated_size"]