Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for multi-dim hyperchunking #176

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions h5pyd/_apps/utillib.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,6 @@ def create_chunktable(dset, dset_dims, ctx):
chunks["file_uri"] = ctx["s3path"]
chunks["dims"] = chunk_dims
chunks["chunk_table"] = anon_dset.id.id
chunks["hyper_dims"] = dset.chunks

elif num_chunks <= 1 and dset.chunks is None:
# use contiguous mapping
Expand Down Expand Up @@ -1085,7 +1084,7 @@ def create_dataset(dobj, ctx):
np.prod(dobj.shape) > MIN_DSET_ELEMENTS_FOR_LINKING)):

chunks = create_chunktable(dobj, tgt_shape, ctx)
logging.info(f"using chunk layout: {chunks}")
logging.debug(f"using chunk layout for link option: {chunks}")

# use the source object layout if we are not using reference mapping
if chunks is None and dobj.shape is not None and len(dobj.shape) > 0:
Expand All @@ -1105,15 +1104,25 @@ def create_dataset(dobj, ctx):
new_chunks = [1,]
new_chunks.extend(chunks)
chunks = tuple(new_chunks)
logging.debug("extend chunks for preappend:", chunks)
else:
if isinstance(chunks, dict):
if "dims" in chunks:
chunk_dims = chunks["dims"]
if len(chunk_dims) == 1:
# currently hyperchunks only supported for 1d datasets
chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize)
logging.debug(f"expanded chunks: {chunk_dims}")
chunks["dims"] = chunk_dims
layout_class = chunks.get("class")
server_version = fout.serverver
if server_version and server_version.startswith("0.9"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This version check should probably be implemented as a greater-than comparison on tuples so it doesn't break if HSDS switches to a 1.X.X version


if layout_class == "H5D_CHUNKED_REF_INDIRECT":
logging.debug("expand chunks for hyperchunksing")
# currently hyperchunks only supported for 1d datasets
logging.debug(f"hdf5 chunk dims: {chunk_dims}")
chunks["hyper_dims"] = chunk_dims
chunk_dims = expandChunk(chunk_dims, dobj.shape, dobj.dtype.itemsize)
logging.debug(f"expanded chunks: {chunk_dims}")
logging.debug(f"expanded chunks: {chunk_dims}")
chunks["dims"] = chunk_dims
logging.debug(f"updating for hyper_dims: {chunks}")
else:
# contiguous or compact, using dataset shape
pass
Expand Down
3 changes: 1 addition & 2 deletions h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ def make_new_dset(
layout=layout,
initializer=initializer,
initializer_opts=initializer_opts

)

if fillvalue is not None:
Expand Down Expand Up @@ -778,7 +777,7 @@ def _getVerboseInfo(self):
if "num_chunks" in rsp_json:
self._num_chunks = rsp_json["num_chunks"]
else:
# not avaailable yet, set to 0
# not available yet, set to 0
self._num_chunks = 0
if "allocated_size" in rsp_json:
self._allocated_size = rsp_json["allocated_size"]
Expand Down
Loading