Skip to content

Commit

Permalink
feature: (feature) Improved performance of memory-side hyperslab
Browse files Browse the repository at this point in the history
- Improved the performance of memory-side hyperslab when writing
  data by taking into account if the selected data is contiguous
  and starting memory offset.
- Fixed various hyperslab and point tests.
  • Loading branch information
jwsblokland committed Aug 31, 2023
1 parent 49c26ea commit 08a12d8
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 5 deletions.
178 changes: 177 additions & 1 deletion src/rest_vol_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ static herr_t RV_convert_dataset_creation_properties_to_JSON(hid_t dcpl_id, char
static herr_t RV_convert_dataspace_selection_to_string(hid_t space_id, char **selection_string,
size_t *selection_string_len, hbool_t req_param);

/* Helper function for dataspace selection */
static hbool_t RV_dataspace_selection_is_contiguous(hid_t space_id);

/* Conversion function to convert one or more rest_obj_ref_t objects into a binary buffer for data transfer */
static herr_t RV_convert_obj_refs_to_buffer(const rv_obj_ref_t *ref_array, size_t ref_array_len,
char **buf_out, size_t *buf_out_len);
static herr_t RV_convert_buffer_to_obj_refs(char *ref_buf, size_t ref_buf_len, rv_obj_ref_t **buf_out,
size_t *buf_out_len);
static hssize_t RV_convert_start_to_offset(hid_t space_id);

/* H5Dscatter() callback for dataset reads */
static herr_t dataset_read_scatter_op(const void **src_buf, size_t *src_buf_bytes_used, void *op_data);
Expand Down Expand Up @@ -709,7 +713,7 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t mem_spac
upload_info uinfo;
H5T_class_t dtype_class;
curl_off_t write_len;
hssize_t mem_select_npoints, file_select_npoints;
hssize_t mem_select_npoints, file_select_npoints, offset;
hbool_t is_transfer_binary = FALSE;
htri_t is_variable_str;
size_t host_header_len = 0;
Expand Down Expand Up @@ -829,6 +833,20 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t mem_spac
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "memory datatype is invalid");

write_body_len = (size_t)file_select_npoints * dtype_size;
if (!RV_dataspace_selection_is_contiguous(mem_space_id[0])) {
if (NULL == (write_body = (char *)RV_malloc(write_body_len)))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for the 'write_body' values");
if (H5Dgather(mem_space_id[0], buf[0], mem_type_id[0], write_body_len, write_body, NULL,
write_body) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "can't gather data to write buffer");
buf[0] = write_body;
}
else {
if ((offset = RV_convert_start_to_offset(mem_space_id[0])) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "Unable to determine memory offset value");
buf[0] = buf[0] + offset * dtype_size;
}
} /* end if */
else {
if (H5T_STD_REF_OBJ == mem_type_id[0]) {
Expand Down Expand Up @@ -897,6 +915,8 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t mem_spac
printf("-> Base64-encoded data buffer: %s\n\n", base64_encoded_value);
#endif

if (write_body)
RV_free(write_body);
write_body_len = (strlen(fmt_string) - 4) + selection_body_len + value_body_len;
if (NULL == (write_body = RV_malloc(write_body_len + 1)))
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for write buffer");
Expand Down Expand Up @@ -3744,3 +3764,159 @@ dataset_read_scatter_op(const void **src_buf, size_t *src_buf_bytes_used, void *

return 0;
} /* end dataset_read_scatter_op() */

/*-------------------------------------------------------------------------
* Function: RV_dataspace_selection_is_contiguous
*
* Purpose: Checks if the specified dataspace in a contiguous selection.
*
* Return: TRUE if the selection is contiguous otherwise FALSE.
*
* Programmer: Jan-Willem Blokland
* August, 2023
*/
static hbool_t
RV_dataspace_selection_is_contiguous(hid_t space_id)
{
hbool_t ret_value = true;
hbool_t whole = true;
hsize_t *dims = NULL;
hsize_t *start = NULL;
hsize_t *stride = NULL;
hsize_t *count = NULL;
hsize_t *block = NULL;
int i;
int ndims;
hssize_t npoints, nblocks;

if ((npoints = H5Sget_select_npoints(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve number of selected points");
if (npoints < 2)
FUNC_GOTO_DONE(true);

if ((ndims = H5Sget_simple_extent_ndims(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve dataspace dimensionality");
if (!ndims)
FUNC_GOTO_DONE(true);

switch (H5Sget_select_type(space_id)) {
case H5S_SEL_HYPERSLABS:
if (NULL == (dims = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*dims))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for dimension 'dims' values");

if (H5Sget_simple_extent_dims(space_id, dims, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL,
"can't get dataspace dimension size");

if (NULL == (start = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*start))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'start' values");
if (NULL == (stride = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*stride))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'stride' values");
if (NULL == (count = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*count))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'count' values");
if (NULL == (block = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*block))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'block' values");

if (nblocks = H5Sget_select_hyper_nblocks(space_id) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL,
"can't get number of hyperslab blocks");

if (H5Sget_regular_hyperslab(space_id, start, stride, count, block) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL,
"can't get regular hyperslab selection");

/* For contiguous, the stride should be 1. */
for (i = 0; i < ndims; i++) {
if (stride[i] > 1)
FUNC_GOTO_DONE(false);
}

if (nblocks > 1) {
/* Multiple blocks: count should be 1 except for the last dimension (fastest) */
for (i = 0; i < ndims - 1; i++) {
if (count[i] > 1)
FUNC_GOTO_DONE(false);
}
}

/* For contiguous, all faster running dimensions than the current dimension should be selected completely */
whole = (start[ndims - 1] == 0) && (count[ndims - 1] * block[ndims - 1] == dims[ndims - 1]);
for (i = ndims - 2; i > 0; i--) {
whole = whole && (start[i] == 0) && (count[i] * block[i] == dims[i]);
if ((dims[i - 1] > 1) && (count[i - 1] * block[i - 1] > 1) && !whole)
FUNC_GOTO_DONE(false);
}
} /* end switch */

done:
if (block)
RV_free(block);
if (count)
RV_free(count);
if (dims)
RV_free(dims);
if (stride)
RV_free(stride);
if (start)
RV_free(start);

return ret_value;
} /* end RV_dataspace_selection_is_contiguous() */

/*-------------------------------------------------------------------------
* Function: RV_convert_start_to_offset
*
* Purpose: Convert starting position value to an offset value.
*
* Return: Offset value on success/Negative value on failure.
*
* Programmer: Jan-Willem Blokland
* August, 2023
*/
static hssize_t
RV_convert_start_to_offset(hid_t space_id)
{
hsize_t *dims = NULL;
hsize_t *start = NULL;
hssize_t ret_value = 0;
int ndims, i;

if (H5S_SEL_HYPERSLABS == H5Sget_select_type(space_id)) {
if ((ndims = H5Sget_simple_extent_ndims(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1, "can't retrieve dataspace dimensionality");

if (NULL == (dims = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*dims))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, -1,
"can't allocate space for dimension 'dims' values");

if (H5Sget_simple_extent_dims(space_id, dims, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1,
"can't get dataspace dimension size");

if (NULL == (start = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*start))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, -1,
"can't allocate space for hyperslab selection 'start' values");

if (H5Sget_regular_hyperslab(space_id, start, NULL, NULL, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1,
"can't get regular hyperslab selection");

ret_value = start[0];
for (i = 1; i < ndims; i++) {
ret_value = ret_value * dims[i] + start[i];
}
}

done:
if (dims)
RV_free(dims);
if (start)
RV_free(start);

return ret_value;
} /* end RV_convert_start_to_offset() */
39 changes: 35 additions & 4 deletions test/test_rest_vol.c
Original file line number Diff line number Diff line change
Expand Up @@ -7464,11 +7464,13 @@ test_write_dataset_small_point_selection(void)
hsize_t points[DATASET_SMALL_WRITE_TEST_POINT_SELECTION_NUM_POINTS *
DATASET_SMALL_WRITE_TEST_POINT_SELECTION_DSET_SPACE_RANK];
hsize_t dims[DATASET_SMALL_WRITE_TEST_POINT_SELECTION_DSET_SPACE_RANK] = {10, 10, 10};
hsize_t mdims[1];
size_t i, data_size;
hid_t file_id = -1, fapl_id = -1;
hid_t container_group = -1;
hid_t dset_id = -1;
hid_t fspace_id = -1;
hid_t mspace_id = -1;
void *data = NULL;

TESTING("small write to dataset w/ point selection")
Expand Down Expand Up @@ -7511,6 +7513,9 @@ test_write_dataset_small_point_selection(void)
if (NULL == (data = malloc(data_size)))
TEST_ERROR

mdims[0] = DATASET_SMALL_WRITE_TEST_POINT_SELECTION_NUM_POINTS;
if ((mspace_id = H5Screate_simple(1, mdims, NULL)) < 0)
TEST_ERROR
for (i = 0; i < data_size / DATASET_SMALL_WRITE_TEST_POINT_SELECTION_DSET_DTYPESIZE; i++)
((int *)data)[i] = (int)i;

Expand All @@ -7532,7 +7537,7 @@ test_write_dataset_small_point_selection(void)
puts("Writing a small amount of data to dataset using a point selection\n");
#endif

if (H5Dwrite(dset_id, DATASET_SMALL_WRITE_TEST_POINT_SELECTION_DSET_DTYPE, H5S_ALL, fspace_id,
if (H5Dwrite(dset_id, DATASET_SMALL_WRITE_TEST_POINT_SELECTION_DSET_DTYPE, mspace_id, fspace_id,
H5P_DEFAULT, data) < 0) {
H5_FAILED();
printf(" couldn't write to dataset\n");
Expand All @@ -7544,6 +7549,8 @@ test_write_dataset_small_point_selection(void)
data = NULL;
}

if (H5Sclose(mspace_id) < 0)
TEST_ERROR
if (H5Sclose(fspace_id) < 0)
TEST_ERROR
if (H5Dclose(dset_id) < 0)
Expand Down Expand Up @@ -8539,6 +8546,7 @@ test_write_dataset_data_verification(void)
{
hssize_t space_npoints;
hsize_t dims[DATASET_DATA_VERIFY_WRITE_TEST_DSET_SPACE_RANK] = {10, 10, 10};
hsize_t mdims[1];
hsize_t start[DATASET_DATA_VERIFY_WRITE_TEST_DSET_SPACE_RANK];
hsize_t stride[DATASET_DATA_VERIFY_WRITE_TEST_DSET_SPACE_RANK];
hsize_t count[DATASET_DATA_VERIFY_WRITE_TEST_DSET_SPACE_RANK];
Expand All @@ -8550,6 +8558,7 @@ test_write_dataset_data_verification(void)
hid_t container_group = -1;
hid_t dset_id = -1;
hid_t fspace_id = -1;
hid_t mspace_id = -1;
void *data = NULL;
void *write_buf = NULL;
void *read_buf = NULL;
Expand Down Expand Up @@ -8697,23 +8706,34 @@ test_write_dataset_data_verification(void)
}

/* Write to first two rows of dataset */
mdims[0] = dims[1] * 2;
start[0] = 0;
stride[0] = 1;
count[0] = dims[1] * 2;
block[0] = 1;
if ((mspace_id = H5Screate_simple(1, mdims, NULL)) < 0)
TEST_ERROR
if (H5Sselect_hyperslab(mspace_id, H5S_SELECT_SET, start, stride, count, block) < 0)
TEST_ERROR

start[0] = start[1] = start[2] = 0;
stride[0] = stride[1] = stride[2] = 1;
count[0] = 2;
count[1] = dims[1];
count[2] = 1;
block[0] = block[1] = block[2] = 1;

if (H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) < 0)
TEST_ERROR

if (H5Dwrite(dset_id, DATASET_DATA_VERIFY_WRITE_TEST_DSET_DTYPE, H5S_ALL, fspace_id, H5P_DEFAULT,
if (H5Dwrite(dset_id, DATASET_DATA_VERIFY_WRITE_TEST_DSET_DTYPE, mspace_id, fspace_id, H5P_DEFAULT,
write_buf) < 0) {
H5_FAILED();
printf(" couldn't write to dataset\n");
goto error;
}

if (H5Sclose(mspace_id) < 0)
TEST_ERROR
if (H5Sclose(fspace_id) < 0)
TEST_ERROR
if (H5Dclose(dset_id) < 0)
Expand Down Expand Up @@ -8814,6 +8834,15 @@ test_write_dataset_data_verification(void)
}

/* Select a series of 10 points in the dataset */
mdims[0] = DATASET_DATA_VERIFY_WRITE_TEST_NUM_POINTS;
if ((mspace_id = H5Screate_simple(1, mdims, NULL)) < 0)
TEST_ERROR
for (i = 0; i < DATASET_DATA_VERIFY_WRITE_TEST_NUM_POINTS; i++) {
points[i] = i;
}
if (H5Sselect_elements(mspace_id, H5S_SELECT_SET, DATASET_DATA_VERIFY_WRITE_TEST_NUM_POINTS, points) < 0)
TEST_ERROR

for (i = 0; i < DATASET_DATA_VERIFY_WRITE_TEST_NUM_POINTS; i++) {
size_t j;

Expand All @@ -8824,13 +8853,15 @@ test_write_dataset_data_verification(void)
if (H5Sselect_elements(fspace_id, H5S_SELECT_SET, DATASET_DATA_VERIFY_WRITE_TEST_NUM_POINTS, points) < 0)
TEST_ERROR

if (H5Dwrite(dset_id, DATASET_DATA_VERIFY_WRITE_TEST_DSET_DTYPE, H5S_ALL, fspace_id, H5P_DEFAULT,
if (H5Dwrite(dset_id, DATASET_DATA_VERIFY_WRITE_TEST_DSET_DTYPE, mspace_id, fspace_id, H5P_DEFAULT,
write_buf) < 0) {
H5_FAILED();
printf(" couldn't write to dataset\n");
goto error;
}

if (H5Sclose(mspace_id) < 0)
TEST_ERROR
if (H5Sclose(fspace_id) < 0)
TEST_ERROR
if (H5Dclose(dset_id) < 0)
Expand Down

0 comments on commit 08a12d8

Please sign in to comment.