Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rest_vol_dataset: (feature) Fixed memory-side hyperslab #58

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 231 additions & 6 deletions src/rest_vol_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ static herr_t RV_convert_dataset_creation_properties_to_JSON(hid_t dcpl_id, char
static herr_t RV_convert_dataspace_selection_to_string(hid_t space_id, char **selection_string,
size_t *selection_string_len, hbool_t req_param);

/* Helper function for dataspace selection */
static htri_t RV_dataspace_selection_is_contiguous(hid_t space_id);

/* Conversion function to convert one or more rest_obj_ref_t objects into a binary buffer for data transfer */
static herr_t RV_convert_obj_refs_to_buffer(const rv_obj_ref_t *ref_array, size_t ref_array_len,
char **buf_out, size_t *buf_out_len);
static herr_t RV_convert_buffer_to_obj_refs(char *ref_buf, size_t ref_buf_len, rv_obj_ref_t **buf_out,
size_t *buf_out_len);
static herr_t RV_convert_obj_refs_to_buffer(const rv_obj_ref_t *ref_array, size_t ref_array_len,
char **buf_out, size_t *buf_out_len);
static herr_t RV_convert_buffer_to_obj_refs(char *ref_buf, size_t ref_buf_len, rv_obj_ref_t **buf_out,
size_t *buf_out_len);
static hssize_t RV_convert_start_to_offset(hid_t space_id);

/* Callbacks used for post-processing after a curl request succeeds */
static herr_t rv_dataset_read_cb(hid_t mem_type_id, hid_t mem_space_id, void *buf,
Expand Down Expand Up @@ -804,9 +808,11 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
H5S_sel_type sel_type = H5S_SEL_ALL;
H5T_class_t dtype_class;
hbool_t is_transfer_binary = FALSE;
htri_t contiguous = FALSE;
htri_t is_variable_str;
hssize_t mem_select_npoints = 0;
hssize_t file_select_npoints = 0;
hssize_t offset = 0;
size_t host_header_len = 0;
size_t write_body_len = 0;
size_t selection_body_len = 0;
Expand Down Expand Up @@ -864,7 +870,7 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
transfer_info[i].u.write_info.write_body = NULL;
transfer_info[i].u.write_info.base64_encoded_values = NULL;
transfer_info[i].dataset = (RV_object_t *)dset[i];
transfer_info[i].buf = buf[i];
transfer_info[i].buf = (void *)buf[i];
transfer_info[i].transfer_type = WRITE;

transfer_info[i].mem_space_id = _mem_space_id[i];
Expand Down Expand Up @@ -982,6 +988,24 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "memory datatype is invalid");

write_body_len = (size_t)file_select_npoints * dtype_size;
if ((contiguous = RV_dataspace_selection_is_contiguous(transfer_info[i].mem_space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL,
"Unable to determine if the dataspace selection is contiguous");
if (!contiguous) {
if (NULL == (transfer_info[i].u.write_info.write_body = (char *)RV_malloc(write_body_len)))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for the 'write_body' values");
if (H5Dgather(transfer_info[i].mem_space_id, buf[i], transfer_info[i].mem_type_id,
write_body_len, transfer_info[i].u.write_info.write_body, NULL, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "can't gather data to write buffer");
buf[i] = transfer_info[i].u.write_info.write_body;
}
else {
if ((offset = RV_convert_start_to_offset(transfer_info[i].mem_space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL,
"Unable to determine memory offset value");
buf[i] = buf[i] + offset * dtype_size;
}
} /* end if */
else {
if (H5T_STD_REF_OBJ == transfer_info[i].mem_type_id) {
Expand Down Expand Up @@ -1060,6 +1084,10 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
transfer_info[i].u.write_info.base64_encoded_values);
#endif

if (transfer_info[i].u.write_info.write_body) {
RV_free(transfer_info[i].u.write_info.write_body);
jhendersonHDF marked this conversation as resolved.
Show resolved Hide resolved
transfer_info[i].u.write_info.write_body = NULL;
}
write_body_len = (strlen(fmt_string) - 4) + selection_body_len + value_body_len;
if (NULL == (transfer_info[i].u.write_info.write_body = RV_malloc(write_body_len + 1)))
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for write buffer");
Expand Down Expand Up @@ -4137,4 +4165,201 @@ rv_dataset_write_cb(hid_t mem_type_id, hid_t mem_space_id, void *buf, struct res
{
herr_t ret_value = SUCCEED;
return SUCCEED;
}
}

/*-------------------------------------------------------------------------
* Function: RV_dataspace_selection_is_contiguous
*
* Purpose: Checks if the specified dataspace in a contiguous selection.
*
* Return: TRUE or FALSE if the selection is contiguous or
* non-contiguous and FAIL if it is unable to determine it.
*
* Programmer: Jan-Willem Blokland
* August, 2023
*/
static htri_t
RV_dataspace_selection_is_contiguous(hid_t space_id)
{
htri_t ret_value = TRUE;
hbool_t whole = TRUE;
htri_t regular = TRUE;
hsize_t *dims = NULL;
hsize_t *start = NULL;
hsize_t *stride = NULL;
hsize_t *count = NULL;
hsize_t *block = NULL;
int i;
int ndims;
hssize_t npoints, nblocks;

if ((npoints = H5Sget_select_npoints(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get number of selected points");
if (npoints < 2)
FUNC_GOTO_DONE(TRUE);

if ((ndims = H5Sget_simple_extent_ndims(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get dataspace dimensionality");
if (!ndims)
FUNC_GOTO_DONE(TRUE);

switch (H5Sget_select_type(space_id)) {
case H5S_SEL_HYPERSLABS: {
if ((regular = H5Sis_regular_hyperslab(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL,
"can't determine if the hyperslab is regular");
if (!regular)
FUNC_GOTO_DONE(FALSE);

if (NULL == (dims = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*dims))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for dimension 'dims' values");

if (H5Sget_simple_extent_dims(space_id, dims, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get dataspace dimension size");

if (NULL == (start = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*start))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'start' values");
if (NULL == (stride = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*stride))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'stride' values");
if (NULL == (count = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*count))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'count' values");
if (NULL == (block = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*block))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL,
"can't allocate space for hyperslab selection 'block' values");

if (nblocks = H5Sget_select_hyper_nblocks(space_id) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get number of hyperslab blocks");

if (H5Sget_regular_hyperslab(space_id, start, stride, count, block) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't get regular hyperslab selection");

/* For contiguous, the stride should be 1. */
for (i = 0; i < ndims; i++) {
if (stride[i] > 1)
FUNC_GOTO_DONE(FALSE);
}

if (nblocks > 1) {
/* Multiple blocks: count should be 1 except for the last dimension (fastest) */
for (i = 0; i < ndims - 1; i++) {
if (count[i] > 1)
FUNC_GOTO_DONE(FALSE);
}
}

/* For contiguous, all faster running dimensions than the current dimension should be selected
* completely */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to require that the fastest running/last dimension be selected completely, which I don't think is necessary for the selection to be contiguous.

Should the comment say that all slower running dimensions than the last dimension must be contiguous?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had the same thought yesterday until I did a small test which shows it was a wrong thought. Now I read contiguous selection as a selection which is contiguous in memory. If the fastest running/last dimension is not completely selected you need additional jumps to get the right location in memory. A way to visual this, at least for me, is by considering a 2D example and make various selections. For each selection translate it to which memory locations you need to get the selected data.

whole = (start[ndims - 1] == 0) && (count[ndims - 1] * block[ndims - 1] == dims[ndims - 1]);
for (i = ndims - 2; i >= 0; i--) {
if ((dims[i] > 1) && (count[i] * block[i] > 1) && !whole)
FUNC_GOTO_DONE(FALSE);

whole = whole && (start[i] == 0) && (count[i] * block[i] == dims[i]);
}
break;
} /* H5S_SEL_HYPERSLABS */

case H5S_SEL_POINTS:
/* Assumption: any point selection is non-contiguous in memory */
FUNC_GOTO_DONE(FALSE);
break;

case H5S_SEL_ALL:
case H5S_SEL_ERROR:
case H5S_SEL_N:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to get this PR merged, but if you could make the following changes in a future PR I'd appreciate it:

  • H5S_SEL_ALL should return true for is_contiguous
  • H5S_SEL_NONE should return false for is_contiguous
  • H5S_SEL_ERROR and H5S_SEL_N should make this function return FAIL since it shouldn't get called with those

case H5S_SEL_NONE:
default:
FUNC_GOTO_DONE(TRUE);
} /* end switch */

done:
if (block)
RV_free(block);
if (count)
RV_free(count);
if (dims)
RV_free(dims);
if (stride)
RV_free(stride);
if (start)
RV_free(start);

return ret_value;
} /* end RV_dataspace_selection_is_contiguous() */

/*-------------------------------------------------------------------------
* Function: RV_convert_start_to_offset
*
* Purpose: Convert starting position value to an offset value.
*
* Return: Offset value on success/Negative value on failure.
*
* Programmer: Jan-Willem Blokland
* August, 2023
*/
static hssize_t
RV_convert_start_to_offset(hid_t space_id)
{
hsize_t *dims = NULL;
hsize_t *start = NULL;
hsize_t *end = NULL;
hssize_t ret_value = 0;
int ndims, i;

switch (H5Sget_select_type(space_id)) {
case H5S_SEL_HYPERSLABS: {
if ((ndims = H5Sget_simple_extent_ndims(space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1, "can't retrieve dataspace dimensionality");

if (NULL == (dims = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*dims))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, -1,
"can't allocate space for dimension 'dims' values");

if (H5Sget_simple_extent_dims(space_id, dims, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1, "can't get dataspace dimension size");

if (NULL == (start = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*start))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, -1,
"can't allocate space for hyperslab selection 'start' values");
if (NULL == (end = (hsize_t *)RV_malloc((size_t)ndims * sizeof(*end))))
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, -1,
"can't allocate space for hyperslab selection 'end' values");

if (H5Sget_select_bounds(space_id, start, end) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, -1,
"can't get bounding box of hyperslab selection");

ret_value = start[0];
for (i = 1; i < ndims; i++) {
ret_value = ret_value * dims[i] + start[i];
}
break;
} /* H5S_SEL_HYPERSLABS */

case H5S_SEL_POINTS:
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, -1,
"for point selection, computing the offset is not supported");
break;

case H5S_SEL_ALL:
case H5S_SEL_ERROR:
case H5S_SEL_N:
case H5S_SEL_NONE:
default:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here about H5S_SEL_ERROR and H5S_SEL_N making this function return a failure value

ret_value = 0;
} /* end switch */

done:
if (dims)
RV_free(dims);
if (end)
RV_free(end);
if (start)
RV_free(start);

return ret_value;
} /* end RV_convert_start_to_offset() */
Loading
Loading