From f97455d83bce9b25ec0b4fea061ed29e18992933 Mon Sep 17 00:00:00 2001 From: John Readey Date: Mon, 9 Oct 2023 18:38:59 -0700 Subject: [PATCH] fix async errors in getting dset layout (#266) --- hsds/async_lib.py | 20 +++++++++---- hsds/chunk_sn.py | 9 +++--- hsds/dset_dn.py | 8 +---- hsds/dset_sn.py | 15 +++++++--- hsds/util/dsetUtil.py | 60 ++++++++++++++++++------------------- tests/integ/dataset_test.py | 22 ++++++++++---- 6 files changed, 76 insertions(+), 58 deletions(-) diff --git a/hsds/async_lib.py b/hsds/async_lib.py index 92e788f5..9ebfa099 100755 --- a/hsds/async_lib.py +++ b/hsds/async_lib.py @@ -22,7 +22,7 @@ from .util.hdf5dtype import getItemSize, createDataType from .util.arrayUtil import getShapeDims, getNumElements, bytesToArray from .util.dsetUtil import getHyperslabSelection, getFilterOps, getChunkDims -from .util.dsetUtil import getDatasetLayoutClass, getDatasetCreationPropertyLayout +from .util.dsetUtil import getDatasetLayoutClass, getDatasetLayout from .util.storUtil import getStorKeys, putStorJSONObj, getStorJSONObj from .util.storUtil import deleteStorObj, getStorBytes, isStorObj @@ -79,9 +79,8 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None): msg += f"for {dset_id}" log.warn(msg) return - layout = getDatasetCreationPropertyLayout(dset_json) msg = f"updateDatasetInfo - shape: {shape_json} type: {type_json} " - msg += f"item size: {item_size} layout: {layout}" + msg += f"item size: {item_size}" log.info(msg) dims = getShapeDims(shape_json) # returns None for HS_NULL dsets @@ -120,6 +119,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None): linked_bytes = chunk_size * num_chunks num_linked_chunks = num_chunks elif layout_class == "H5D_CHUNKED_REF": + layout = getDatasetLayout(dset_json) if "chunks" not in layout: log.error("Expected to find 'chunks' key in H5D_CHUNKED_REF layout") return @@ -130,7 +130,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None): linked_bytes += chunk_info[1] num_linked_chunks = len(chunks) elif layout_class == "H5D_CHUNKED_REF_INDIRECT": - log.debug("chunk ref indirect") + layout = getDatasetLayout(dset_json) if "chunk_table" not in layout: msg = "Expected to find chunk_table in dataset layout for " msg += f"{dset_id}" @@ -147,7 +147,7 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None): msg += f"for {dset_id}" log.warn(msg) return - chunktable_layout = getDatasetCreationPropertyLayout(chunktable_json) + chunktable_layout = getDatasetLayout(chunktable_json) log.debug(f"chunktable_layout: {chunktable_layout}") if not isinstance(chunktable_layout, dict): log.warn(f"unexpected chunktable_layout: {chunktable_id}") @@ -234,7 +234,15 @@ async def updateDatasetInfo(app, dset_id, dataset_info, bucket=None): elif layout_class == "H5D_CHUNKED": msg = "updateDatasetInfo - no linked bytes/chunks for " msg += "H5D_CHUNKED layout" - log.debug(msg) + log.info(msg) + elif layout_class == "H5D_CONTIGUOUS": + msg = "updateDatasetInfo - no linked bytes/chunks for " + msg += "H5D_CONTIGUOUS layout" + log.info(msg) + elif layout_class == "H5D_COMPACT": + msg = "updateDatasetInfo - no linked bytes/chunks for " + msg += "H5D_COMPACT layout" + log.info(msg) else: log.error(f"unexpected chunk layout: {layout_class}") diff --git a/hsds/chunk_sn.py b/hsds/chunk_sn.py index 73f0e5ed..df4d7476 100755 --- a/hsds/chunk_sn.py +++ b/hsds/chunk_sn.py @@ -30,10 +30,9 @@ from .util.domainUtil import getDomainFromRequest, isValidDomain from .util.domainUtil import getBucketForDomain from .util.hdf5dtype import getItemSize, createDataType -from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayoutClass +from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayout, getDatasetLayoutClass from .util.dsetUtil import isExtensible, getSelectionPagination from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout -from .util.dsetUtil import getDatasetCreationPropertyLayout from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId from .util.chunkUtil import getChunkIndex, getChunkSuffix from .util.chunkUtil import getChunkCoverage, getDataCoverage @@ -177,7 +176,7 @@ def getChunkItem(chunkid): return chunk_item if layout_class == "H5D_CONTIGUOUS_REF": - layout = getDatasetCreationPropertyLayout(dset_json) + layout = getDatasetLayout(dset_json) log.debug(f"cpl layout: {layout}") s3path = layout["file_uri"] s3size = layout["size"] @@ -229,7 +228,7 @@ def getChunkItem(chunkid): chunk_item["s3offset"] = s3offset chunk_item["s3size"] = chunk_size elif layout_class == "H5D_CHUNKED_REF": - layout = getDatasetCreationPropertyLayout(dset_json) + layout = getDatasetLayout(dset_json) log.debug(f"cpl layout: {layout}") s3path = layout["file_uri"] chunks = layout["chunks"] @@ -248,7 +247,7 @@ def getChunkItem(chunkid): chunk_item["s3size"] = s3size elif layout_class == "H5D_CHUNKED_REF_INDIRECT": - layout = getDatasetCreationPropertyLayout(dset_json) + layout = getDatasetLayout(dset_json) log.debug(f"cpl layout: {layout}") if "chunk_table" not in layout: log.error("Expected to find chunk_table in dataset layout") diff --git a/hsds/dset_dn.py b/hsds/dset_dn.py index beac5a1b..e250bde9 100755 --- a/hsds/dset_dn.py +++ b/hsds/dset_dn.py @@ -273,13 +273,7 @@ async def PUT_DatasetShape(request): # e.g. another client has already extended the shape since the SN # verified it shape_update = body["shape"] - log.debug("shape_update: {}".format(shape_update)) - - for i in range(len(dims)): - if shape_update[i] < dims[i]: - msg = "Dataspace can not be made smaller" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + log.debug(f"shape_update: {shape_update}") # Update the shape! for i in range(len(dims)): diff --git a/hsds/dset_sn.py b/hsds/dset_sn.py index e9e8729c..8f69a9fc 100755 --- a/hsds/dset_sn.py +++ b/hsds/dset_sn.py @@ -621,15 +621,22 @@ async def PUT_DatasetShape(request): msg = "Extent of update shape request does not match dataset sahpe" log.warn(msg) raise HTTPBadRequest(reason=msg) + shape_reduction = False for i in range(rank): if shape_update and shape_update[i] < dims[i]: - msg = "Dataspace can not be made smaller" - log.warn(msg) - raise HTTPBadRequest(reason=msg) + shape_reduction = True + if shape_update[i] < 0: + msg = "Extension dimension can not be made less than zero" + log.warn(msg) + raise HTTPBadRequest(reason=msg) if shape_update and maxdims[i] != 0 and shape_update[i] > maxdims[i]: - msg = "Database can not be extended past max extent" + msg = "Extension dimension can not be extended past max extent" log.warn(msg) raise HTTPConflict() + if shape_reduction: + log.info("Shape extent reduced for dataset") + # TBD - ensure any chunks that are outside the new shape region are + # deleted if extend_dim < 0 or extend_dim >= rank: msg = "Extension dimension must be less than rank and non-negative" log.warn(msg) diff --git a/hsds/util/dsetUtil.py b/hsds/util/dsetUtil.py index 9df8c0fe..da8dbdff 100644 --- a/hsds/util/dsetUtil.py +++ b/hsds/util/dsetUtil.py @@ -855,48 +855,46 @@ def isExtensible(dims, maxdims): return False -def getDatasetCreationPropertyLayout(dset_json): - """ return layout json from creation property list """ - cpl = None +def getDatasetLayout(dset_json): + """ Return layout json from creation property list or layout json """ + layout = None + if "creationProperties" in dset_json: cp = dset_json["creationProperties"] if "layout" in cp: - cpl = cp["layout"] - if not cpl and "layout" in dset_json: - # fallback to dset_json layout - cpl = dset_json["layout"] - if cpl is None: - log.warn(f"no layout found for {dset_json}") - return cpl + layout = cp["layout"] + if not layout and "layout" in dset_json: + layout = dset_json["layout"] + if not layout: + log.warn(f"no layout for {dset_json}") + return layout def getDatasetLayoutClass(dset_json): """ return layout class """ - chunk_layout = None - cp_layout = getDatasetCreationPropertyLayout(dset_json) - # check creation properties first - if cp_layout: - if "class" in cp_layout: - chunk_layout = cp_layout["class"] - # otherwise, get class prop from layout - if chunk_layout is None and "layout" in dset_json: - layout = dset_json["layout"] - if "class" in layout: - chunk_layout = layout["class"] - return chunk_layout + layout = getDatasetLayout(dset_json) + if layout and "class" in layout: + layout_class = layout["class"] + else: + layout_class = None + return layout_class def getChunkDims(dset_json): """ get chunk shape for given dset_json """ - cpl = getDatasetCreationPropertyLayout(dset_json) - if cpl and "dims" in cpl: - return cpl["dims"] - # otherwise, check the 'layout' key - if 'layout' in dset_json: - layout = dset_json["layout"] - if "dims" in layout: - return layout["dims"] - return None # not found + + layout = getDatasetLayout(dset_json) + if layout and "dims" in layout: + return layout["dims"] + else: + # H5D_COMPACT and H5D_CONTIGUOUS will not have a dims key + # Check the layout dict in dset_json to see if it's + # defined there + if "layout" in dset_json: + layout = dset_json["layout"] + if "dims" in layout: + return layout["dims"] + return None class ItemIterator: diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py index 9a729e55..f42dcdb3 100755 --- a/tests/integ/dataset_test.py +++ b/tests/integ/dataset_test.py @@ -681,11 +681,23 @@ def testResizableDataset(self): self.assertEqual(rsp.status_code, 201) rspJson = json.loads(rsp.text) + # verify updated-shape using the GET shape request + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("shape" in rspJson) + shape = rspJson["shape"] + self.assertEqual(shape["class"], "H5S_SIMPLE") + self.assertEqual(len(shape["dims"]), 1) + self.assertEqual(shape["dims"][0], 15) # increased to 15 + self.assertTrue("maxdims" in shape) + self.assertEqual(shape["maxdims"][0], 20) + # reduce the size to 5 elements - # payload = {"shape": 5} - # rsp = self.session.put(req, data=json.dumps(payload), headers=headers) - # self.assertEqual(rsp.status_code, 201) - # rspJson = json.loads(rsp.text) + payload = {"shape": 5} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) + rspJson = json.loads(rsp.text) # verify updated-shape using the GET shape request rsp = self.session.get(req, headers=headers) @@ -695,7 +707,7 @@ def testResizableDataset(self): shape = rspJson["shape"] self.assertEqual(shape["class"], "H5S_SIMPLE") self.assertEqual(len(shape["dims"]), 1) - self.assertEqual(shape["dims"][0], 15) # increased to 15 + self.assertEqual(shape["dims"][0], 5) # decreased to 5 self.assertTrue("maxdims" in shape) self.assertEqual(shape["maxdims"][0], 20)