diff --git a/hsds/datanode_lib.py b/hsds/datanode_lib.py index 0045dbc0..7989d2cd 100644 --- a/hsds/datanode_lib.py +++ b/hsds/datanode_lib.py @@ -956,17 +956,6 @@ async def get_chunk( # validate arguments if s3path: - """ - if s3size == 0 and s3offset == 0: - # uninitialized chunk ref - msg = f"reference chunk not set for id: {chunk_id}, returning 404" - log.info(msg) - raise HTTPNotFound() # not found return 404 - if s3size == 0: - msg = f"Unexpected get_chunk parameter - s3path: {s3path} with size 0" - log.error(msg) - raise HTTPInternalServerError() - """ if bucket: msg = "get_chunk - bucket arg should not be used with s3path" log.error(msg) diff --git a/hsds/util/chunkUtil.py b/hsds/util/chunkUtil.py index 487af392..c6ce9655 100644 --- a/hsds/util/chunkUtil.py +++ b/hsds/util/chunkUtil.py @@ -8,6 +8,63 @@ PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning +# compare two numpy arrays. +# return true if the same (exclusive of null vs. empty array) +# false otherwise + + +def ndarray_compare(arr1, arr2): + if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): + if not isinstance(arr1, np.void) and not isinstance(arr2, np.void): + return arr1 == arr2 + if isinstance(arr1, np.void) and not isinstance(arr2, np.void): + if arr1.size == 0 and not arr2: + return True + else: + return False + if not isinstance(arr1, np.void) and isinstance(arr2, np.void): + if not arr1 and arr2.size == 0: + return True + else: + return False + # both np.voids + if arr1.size != arr2.size: + return False + + if len(arr1) != len(arr2): + return False + + for i in range(len(arr1)): + if not ndarray_compare(arr1[i], arr2[i]): + return False + return True + + if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): + # same only if arr1 is empty and arr2 is 0 + if arr1.size == 0 and not arr2: + return True + else: + return False + if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): + # same only if arr1 is empty and arr2 size is 0 + if not arr1 and arr2.size == 0: + return True + else: + return False + + # two ndarrays... + if arr1.shape != arr2.shape: + return False + if arr2.dtype != arr2.dtype: + return False + nElements = np.prod(arr1.shape) + arr1 = arr1.reshape((nElements,)) + arr2 = arr2.reshape((nElements,)) + for i in range(nElements): + if not ndarray_compare(arr1[i], arr2[i]): + return False + return True + """ Convert list that may contain bytes type elements to list of string elements @@ -803,27 +860,38 @@ def chunkWriteSelection(chunk_arr=None, slices=None, data=None): """ Write data for requested chunk and selection """ - log.info("chunkWriteSelection") dims = chunk_arr.shape rank = len(dims) if rank == 0: msg = "No dimension passed to chunkReadSelection" + log.error(msg) raise ValueError(msg) if len(slices) != rank: msg = "Selection rank does not match dataset rank" + log.error(msg) raise ValueError(msg) if len(data.shape) != rank: msg = "Input arr does not match dataset rank" + log.error(msg) raise ValueError(msg) updated = False # check if the new data modifies the array or not - if not np.array_equal(chunk_arr[slices], data): - # update chunk array - chunk_arr[slices] = data - updated = True + # TBD - is this worth the cost of comparing two arrays element by element? + try: + if not ndarray_compare(chunk_arr[slices], data): + # if not np.array_equal(chunk_arr[slices], data): + # update chunk array + chunk_arr[slices] = data + updated = True + except ValueError as ve: + msg = f"array_equal ValueError, chunk_arr[{slices}]: {chunk_arr[slices]} " + msg += f"data: {data}, data type: {type(data)} ve: {ve}" + log.error(msg) + raise + return updated diff --git a/hsds/util/storUtil.py b/hsds/util/storUtil.py index a7e5e524..7c832945 100644 --- a/hsds/util/storUtil.py +++ b/hsds/util/storUtil.py @@ -361,18 +361,18 @@ async def getStorBytes(app, # uncompress chunks within the fetched data and store to # chunk bytes if not h5_size: - log.error("getStoreBytes - h5_size not set") + log.error("getStorBytes - h5_size not set") raise HTTPInternalServerError() if not chunk_bytes: - log.error("getStoreBytes - chunk_bytes not set") + log.error("getStorBytes - chunk_bytes not set") raise HTTPInternalServerError() if len(chunk_locations) * h5_size < len(chunk_bytes): - log.error(f"getStoreBytes - invalid chunk_bytes length: {len(chunk_bytes)}") + log.error(f"getStorBytes - invalid chunk_bytes length: {len(chunk_bytes)}") for chunk_location in chunk_locations: log.debug(f"getStoreBytes - processing chunk_location: {chunk_location}") n = chunk_location.offset - offset if n < 0: - log.warn(f"getStoreBytes - unexpected offset for chunk_location: {chunk_location}") + log.warn(f"getStorBytes - unexpected offset for chunk_location: {chunk_location}") continue m = n + chunk_location.length log.debug(f"getStorBytes - extracting chunk from data[{n}:{m}]") diff --git a/tests/integ/query_test.py b/tests/integ/query_test.py index 044ca240..9495390c 100644 --- a/tests/integ/query_test.py +++ b/tests/integ/query_test.py @@ -502,9 +502,8 @@ def testPutQuery(self): params["Limit"] = 1 update_value = {"open": 999} payload = {"value": update_value} - rsp = self.session.put( - req, params=params, data=json.dumps(update_value), headers=headers - ) + + rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers) self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) self.assertTrue("hrefs" in rspJson) @@ -537,9 +536,7 @@ def testPutQuery(self): params = {"query": "open == 0"} update_value = {"open": -999} payload = {"value": update_value} - rsp = self.session.put( - req, params=params, data=json.dumps(update_value), headers=headers - ) + rsp = self.session.put(req, params=params, data=json.dumps(update_value), headers=headers) self.assertEqual(rsp.status_code, 200) rspJson = json.loads(rsp.text) self.assertTrue("value" in rspJson) diff --git a/tests/integ/vlen_test.py b/tests/integ/vlen_test.py index 0f16cb00..61cafac4 100755 --- a/tests/integ/vlen_test.py +++ b/tests/integ/vlen_test.py @@ -17,6 +17,7 @@ sys.path.append("../..") from hsds.util.arrayUtil import arrayToBytes, bytesToArray +from hsds.util.chunkUtil import ndarray_compare from hsds.util.hdf5dtype import createDataType @@ -24,6 +25,7 @@ class VlenTest(unittest.TestCase): def __init__(self, *args, **kwargs): super(VlenTest, self).__init__(*args, **kwargs) self.base_domain = helper.getTestDomainName(self.__class__.__name__) + print(self.base_domain) helper.setupDomain(self.base_domain) self.endpoint = helper.getEndpoint() @@ -77,9 +79,7 @@ def testPutVLenInt(self): # write values to dataset data = [ - [ - 1, - ], + [1,], [1, 2], [1, 2, 3], [1, 2, 3, 4], @@ -97,6 +97,8 @@ def testPutVLenInt(self): self.assertTrue("value" in rspJson) value = rspJson["value"] self.assertEqual(len(value), 4) + print("value:", value) + print("data:", data) for i in range(4): self.assertEqual(value[i], data[i]) @@ -221,6 +223,9 @@ def testPutVLenIntBinary(self): for i in range(count): self.assertEqual(value[i], test_values[i]) + print("data:", data) + print("arr:", arr) + # read back a selection params = {"select": "[2:3]"} rsp = self.session.get(req, headers=headers, params=params) @@ -291,6 +296,8 @@ def testPutVLen2DInt(self): self.assertTrue("value" in rspJson) value = rspJson["value"] self.assertEqual(len(value), nrow) + print("value:", value) + print("data:", data) for i in range(nrow): for j in range(ncol): diff --git a/tests/unit/array_util_test.py b/tests/unit/array_util_test.py index 40d4a9dd..77125673 100644 --- a/tests/unit/array_util_test.py +++ b/tests/unit/array_util_test.py @@ -29,59 +29,7 @@ from hsds.util.hdf5dtype import special_dtype from hsds.util.hdf5dtype import check_dtype from hsds.util.hdf5dtype import createDataType - -# compare two numpy arrays. -# return true if the same (exclusive of null vs. empty array) -# false otherwise - - -def ndarray_compare(arr1, arr2): - if not isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): - if not isinstance(arr1, np.void) and not isinstance(arr2, np.void): - return arr1 == arr2 - if isinstance(arr1, np.void) and not isinstance(arr2, np.void): - if arr1.size == 0 and not arr2: - return True - else: - return False - if not isinstance(arr1, np.void) and isinstance(arr2, np.void): - if not arr1 and arr2.size == 0: - return True - else: - return False - # both np.voids - if arr1.size != arr2.size: - return False - for i in range(arr1.size): - if not ndarray_compare(arr1[i], arr2[i]): - return False - return True - - if isinstance(arr1, np.ndarray) and not isinstance(arr2, np.ndarray): - # same only if arr1 is empty and arr2 is 0 - if arr1.size == 0 and not arr2: - return True - else: - return False - if not isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): - # same only if arr1 is empty and arr2 is 0 - if not arr1 and not arr2.size == 0: - return True - else: - return False - - # two ndarrays... - if arr1.shape != arr2.shape: - return False - if arr2.dtype != arr2.dtype: - return False - nElements = np.prod(arr1.shape) - arr1 = arr1.reshape((nElements,)) - arr2 = arr2.reshape((nElements,)) - for i in range(nElements): - if not ndarray_compare(arr1[i], arr2[i]): - return False - return True +from hsds.util.chunkUtil import ndarray_compare class ArrayUtilTest(unittest.TestCase):