Skip to content

Commit

Permalink
Fix writing base64 encoded vlen data to point selection (#325)
Browse files Browse the repository at this point in the history
* Fix vlen point write

* add test
  • Loading branch information
mattjala authored Mar 18, 2024
1 parent 2af9faa commit 9297e3a
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 7 deletions.
11 changes: 7 additions & 4 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,10 +569,14 @@ async def write_point_sel(
log.debug("POST chunk req: " + req)

num_points = len(point_list)
log.debug(f"write_point_sel - {num_points}")

# create a numpy array with point_data
data_arr = jsonToArray((num_points,), dset_dtype, point_data)

# if point data was already decoded from binary, don't decode again
if len(point_data) > 0 and isinstance(point_data[0], np.ndarray):
data_arr = point_data
else:
data_arr = jsonToArray((num_points,), dset_dtype, point_data)

# create a numpy array with the following type:
# (coord1, coord2, ...) | dset_dtype
Expand All @@ -592,8 +596,7 @@ async def write_point_sel(
elem = (tuple(point_list[i]), data_arr[i])
np_arr[i] = elem

# TBD - support VLEN data
post_data = np_arr.tobytes()
post_data = arrayToBytes(np_arr)

# pass dset_json as query params
params = {}
Expand Down
6 changes: 3 additions & 3 deletions hsds/chunk_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ async def _getRequestData(request, http_streaming=True):
base64_data = body["value_base64"]
base64_data = base64_data.encode("ascii")
input_data = base64.b64decode(base64_data)
log.debug(f"input_data from base64: {input_data}")
else:
msg = "request has no value or value_base64 key in body"
log.warn(msg)
Expand Down Expand Up @@ -760,7 +761,6 @@ async def PUT_Value(request):
return resp

# regular PUT_Value processing without query update
binary_data = None
np_shape = [] # shape of incoming data
bc_shape = [] # shape of broadcast array (if element_count is set)
input_data = await _getRequestData(request, http_streaming=http_streaming)
Expand Down Expand Up @@ -832,12 +832,12 @@ async def PUT_Value(request):
# fixed item size
if len(input_data) % item_size != 0:
msg = f"Expected request size to be a multiple of {item_size}, "
msg += f"but {len(binary_data)} bytes received"
msg += f"but {len(input_data)} bytes received"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

if len(input_data) // item_size != num_elements:
msg = f"expected {item_size * num_elements} bytes but got {len(binary_data)}"
msg = f"expected {item_size * num_elements} bytes but got {len(input_data)}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

Expand Down
121 changes: 121 additions & 0 deletions tests/integ/pointsel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,127 @@ def testPut1DDatasetBinary(self):
else:
self.assertEqual(ret_values[i], 0)

def testPut1DDatasetVlenBinary(self):
# Test writing using point selection for a 1D dataset of vlen types
print("testPut1DDatasetVlenBinary", self.base_domain)

headers = helper.getRequestHeaders(domain=self.base_domain)
req = self.endpoint + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

# create dataset
# pass in layout specification so that we can test selection across chunk boundries
vlen_type = {"class": "H5T_VLEN", "base": "H5T_STD_I8LE"}
data = {"type": vlen_type, "shape": (100,)}
data["creationProperties"] = {
"layout": {
"class": "H5D_CHUNKED",
"dims": [20, ],
}
}

req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 201)
rspJson = json.loads(rsp.text)
dset_id = rspJson["id"]
self.assertTrue(helper.validateId(dset_id))

# link new dataset as 'dset1d'
name = "dset"
req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
payload = {"id": dset_id}
rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201)

# Do a point selection write
primes = [
2,
3,
5,
7,
11,
13,
17,
19,
23,
29,
31,
37,
41,
43,
47,
53,
59,
61,
67,
71,
73,
79,
83,
89,
97,
]

# create binary array for the values
# each length value is 4 bytes, each sequence contains 2 elements
num_bytes = len(primes) * 6
byte_array = bytearray(num_bytes)
for i in range(len(primes)):
idx = i * 6
byte_array[idx] = 2 # length of this sequence in bytes
# skip bytes because the length is a 32-bit integer
byte_array[idx + 4] = 1 # values in this vlen sequence
byte_array[idx + 5] = 2

value_base64 = base64.b64encode(bytes(byte_array))
value_base64 = value_base64.decode("ascii")

# write to all the prime indexes
payload = {"points": primes, "value_base64": value_base64}
req = self.endpoint + "/datasets/" + dset_id + "/value"
rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 200)

# read back data
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
self.assertTrue("value" in rspJson)

# verify the correct elements got set
value = rspJson["value"]
for i in range(100):
if i in primes:
seq = value[i]
self.assertEqual(seq[0], 1)
self.assertEqual(seq[1], 2)
else:
self.assertEqual(value[i], 0)

# read back data as one big hyperslab selection
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
self.assertTrue("value" in rspJson)
ret_values = rspJson["value"]
self.assertEqual(len(ret_values), 100)
for i in range(100):
if i in primes:
seq = value[i]
self.assertEqual(seq[0], 1)
self.assertEqual(seq[1], 2)
else:
self.assertEqual(ret_values[i], 0)

def testPut2DDatasetBinary(self):
# Test writing with point selection for 2d dataset with binary data
print("testPut2DDatasetBinary", self.base_domain)
Expand Down

0 comments on commit 9297e3a

Please sign in to comment.