Skip to content

Commit

Permalink
support for multi-dim hyperchunking (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey authored Mar 27, 2024
1 parent b61fa0e commit 15c97f3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
23 changes: 16 additions & 7 deletions h5pyd/_apps/utillib.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,6 @@ def create_chunktable(dset, dset_dims, ctx):
chunks["file_uri"] = ctx["s3path"]
chunks["dims"] = chunk_dims
chunks["chunk_table"] = anon_dset.id.id
chunks["hyper_dims"] = dset.chunks

elif num_chunks <= 1 and dset.chunks is None:
# use contiguous mapping
Expand Down Expand Up @@ -1085,7 +1084,7 @@ def create_dataset(dobj, ctx):
np.prod(dobj.shape) > MIN_DSET_ELEMENTS_FOR_LINKING)):

chunks = create_chunktable(dobj, tgt_shape, ctx)
logging.info(f"using chunk layout: {chunks}")
logging.debug(f"using chunk layout for link option: {chunks}")

# use the source object layout if we are not using reference mapping
if chunks is None and dobj.shape is not None and len(dobj.shape) > 0:
Expand All @@ -1105,15 +1104,25 @@ def create_dataset(dobj, ctx):
new_chunks = [1,]
new_chunks.extend(chunks)
chunks = tuple(new_chunks)
logging.debug("extend chunks for preappend:", chunks)
else:
if isinstance(chunks, dict):
if "dims" in chunks:
chunk_dims = chunks["dims"]
if len(chunk_dims) == 1:
# currently hyperchunks only supported for 1d datasets
chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize)
logging.debug(f"expanded chunks: {chunk_dims}")
chunks["dims"] = chunk_dims
layout_class = chunks.get("class")
server_version = fout.serverver
if server_version and server_version.startswith("0.9"):

if layout_class == "H5D_CHUNKED_REF_INDIRECT":
logging.debug("expand chunks for hyperchunksing")
# currently hyperchunks only supported for 1d datasets
logging.debug(f"hdf5 chunk dims: {chunk_dims}")
chunks["hyper_dims"] = chunk_dims
chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize)
logging.debug(f"expanded chunks: {chunk_dims}")
logging.debug(f"expanded chunks: {chunk_dims}")
chunks["dims"] = chunk_dims
logging.debug(f"updating for hyper_dims: {chunks}")
else:
# contiguous or compact, using dataset shape
pass
Expand Down
3 changes: 1 addition & 2 deletions h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ def make_new_dset(
layout=layout,
initializer=initializer,
initializer_opts=initializer_opts

)

if fillvalue is not None:
Expand Down Expand Up @@ -778,7 +777,7 @@ def _getVerboseInfo(self):
if "num_chunks" in rsp_json:
self._num_chunks = rsp_json["num_chunks"]
else:
# not avaailable yet, set to 0
# not available yet, set to 0
self._num_chunks = 0
if "allocated_size" in rsp_json:
self._allocated_size = rsp_json["allocated_size"]
Expand Down

0 comments on commit 15c97f3

Please sign in to comment.