Skip to content

Commit

Permalink
Implement compound subset support for dset writes
Browse files Browse the repository at this point in the history
  • Loading branch information
mattjala committed Nov 21, 2023
1 parent ea23d39 commit 310ccee
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 76 deletions.
13 changes: 9 additions & 4 deletions src/rest_vol.h
Original file line number Diff line number Diff line change
Expand Up @@ -774,14 +774,19 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
/* Determine if a read from file to mem dtype is a compound subset read */
herr_t RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *subset_info);

/* Helper to get information about fields that are unused due to compound subsetting */
herr_t RV_get_unused_compound_fields(hid_t mem_type_id, hid_t file_type_id,
RV_compound_info_t *compound_info);
/* Helper to get information about members in dst that are omitted in src due to compound subsetting */
herr_t RV_get_omitted_compound_members(hid_t src_type_id, hid_t dst_type_id,
RV_compound_info_t *compound_info);

/* Helper function to handle compound type subsetting during reads. */
/* Helper function to handle compound type subsetting during reads */
herr_t RV_handle_compound_subset_read(hid_t src_type_id, hid_t dst_type_id, hid_t dst_space_id,
const void *src_buf, void *dst_buf);

/* Helper function to handle compound type subsetting during writes */
herr_t RV_handle_compound_subset_write(hid_t src_type_id, hid_t dst_type_id, hid_t src_space_id,
hid_t dst_space_id, RV_object_t *dset, const void *buf_in,
void *buf_out);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand Down
184 changes: 172 additions & 12 deletions src/rest_vol_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,8 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
hbool_t fill_bkg = FALSE;
void *buf_to_write = NULL;

RV_subset_t subset_info = H5T_SUBSET_BADVALUE;

if ((transfer_info = RV_calloc(count * sizeof(dataset_transfer_info))) == NULL)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for dataset transfer info");

Expand Down Expand Up @@ -1027,11 +1029,45 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
printf("-> %lld points selected in memory dataspace\n\n", mem_select_npoints);
#endif

/* Handle compound subsetting */
if (RV_get_compound_subset_info(transfer_info[i].mem_type_id, transfer_info[i].file_type_id,
&subset_info) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get compound type subset info");

if (subset_info < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"error while checking if types are compound subsets");

if (subset_info == H5T_SUBSET_DST)
FUNC_GOTO_ERROR(
H5E_DATATYPE, H5E_UNSUPPORTED, FAIL,
"write using a type with more members than file type is unsupported by the REST VOL");

/* If memory type is subset than file type, modify write buffer to avoid
* modifying omitted members */
if (subset_info == H5T_SUBSET_SRC) {
/* Allocate buffer for output of subsetting */
if ((file_type_size = H5Tget_size(transfer_info[i].file_type_id)) == 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "unable to get size of file datatype");

if ((transfer_info[i].u.write_info.write_body =
RV_calloc(file_type_size * (size_t)file_select_npoints)) == NULL)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
"can't allocate buffer for compound subset write");

if (RV_handle_compound_subset_write(transfer_info[i].mem_type_id, transfer_info[i].file_type_id,
transfer_info[i].mem_space_id, transfer_info[i].file_space_id,
transfer_info[i].dataset, (const void *)transfer_info[i].buf,
transfer_info[i].u.write_info.write_body) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_INTERNAL, FAIL,
"can't populate buffer for compound subset write");
}

/* Handle conversion from memory datatype to file datatype, if necessary */
if ((needs_tconv = RV_need_tconv(transfer_info[i].file_type_id, transfer_info[i].mem_type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "unable to check if datatypes need conversion");

if (needs_tconv) {
if (!subset_info && needs_tconv) {

#ifdef RV_CONNECTOR_DEBUG
printf("-> Beginning type conversion for write\n");
Expand Down Expand Up @@ -1059,7 +1095,14 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
"failed to convert file datatype to memory datatype");
}

buf_to_write = (transfer_info[i].tconv_buf) ? transfer_info[i].tconv_buf : transfer_info[i].buf;
/* TODO - Simplify pointer management here */
if (transfer_info[i].tconv_buf)
buf_to_write = transfer_info[i].tconv_buf;
else {
buf_to_write = (transfer_info[i].u.write_info.write_body)
? transfer_info[i].u.write_info.write_body
: transfer_info[i].buf;
}

/* Setup the size of the data being transferred and the data buffer itself (for non-simple
* types like object references or variable length types)
Expand Down Expand Up @@ -1141,7 +1184,7 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
#endif

/* If using a point selection, add the selection body
* into the write body sent to server.
* into the write body and base64 encode the write values.
*/
if (H5S_SEL_POINTS == sel_type) {
const char *const fmt_string = "{%s,\"value_base64\": \"%s\"}";
Expand Down Expand Up @@ -1196,8 +1239,6 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
#endif
} /* end if */

// TODO - Compound subset handling on write

transfer_info[i].u.write_info.uinfo.buffer =
is_transfer_binary ? buf_to_write : transfer_info[i].u.write_info.write_body;
transfer_info[i].u.write_info.uinfo.buffer_size = write_body_len;
Expand Down Expand Up @@ -1244,6 +1285,7 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa
RV_free(selection_body);
selection_body = NULL;
}

} /* End iteration over dsets to write to */

#ifdef RV_CONNECTOR_DEBUG
Expand Down Expand Up @@ -4482,7 +4524,13 @@ rv_dataset_read_cb(hid_t mem_type_id, hid_t mem_space_id, hid_t file_type_id, hi
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"error while checking if types are compound subsets");

if (subset_info > 0)
if (subset_info == H5T_SUBSET_SRC)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_UNSUPPORTED, FAIL,
"read to type with more members than file type is unsupported by the REST VOL");

/* If memory type has fewer fields than file type, modify response buffer to avoid
* modifying omitted fields */
if (subset_info == H5T_SUBSET_DST)
if (RV_handle_compound_subset_read(file_type_id, mem_type_id, file_space_id, (const void *)buf,
resp_info.buffer) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't handle compound subset read");
Expand Down Expand Up @@ -4959,10 +5007,10 @@ RV_handle_compound_subset_read(hid_t src_type_id, hid_t dst_type_id, hid_t dst_s
compound_info.src_offsets = NULL;
compound_info.lengths = NULL;

/* Copy unselected fields from user buffer to resp info buffer to avoid overwriting fields that
/* Copy unselected members from user buffer to resp info buffer to avoid overwriting members that
* weren't selected */

/* Determine which fields to copy */
/* Determine which members to copy */
if ((src_nmembs = H5Tget_nmembers(src_type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get number of members in file datatype");

Expand All @@ -4978,8 +5026,8 @@ RV_handle_compound_subset_read(hid_t src_type_id, hid_t dst_type_id, hid_t dst_s
compound_info.nmembers = 0;
compound_info.nalloc = src_nmembs;

if (RV_get_unused_compound_fields(dst_type_id, src_type_id, &compound_info) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get unused fields in compound datatype");
if (RV_get_omitted_compound_members(dst_type_id, src_type_id, &compound_info) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get unused members in compound datatype");

/* Allocate space for gather */
if ((npoints = H5Sget_simple_extent_npoints(dst_space_id)) < 0)
Expand All @@ -4999,7 +5047,7 @@ RV_handle_compound_subset_read(hid_t src_type_id, hid_t dst_type_id, hid_t dst_s
subset_buffer, NULL, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't gather data from read buffer");

/* Copy pre-existing data from src buffer to avoid modifying fields that are omitted via
/* Copy pre-existing data from src buffer to avoid modifying members that are omitted via
* compound subsetting */
for (size_t i = 0; i < npoints; i++) {
for (size_t j = 0; j < compound_info.nmembers; j++) {
Expand All @@ -5022,4 +5070,116 @@ RV_handle_compound_subset_read(hid_t src_type_id, hid_t dst_type_id, hid_t dst_s
if (compound_info.lengths)
RV_free(compound_info.lengths);
}
}
} /* end RV_handle_compound_subset_read */

/* Helper function to handle compound type subsetting during writes.
* Reads from the target dataset in order to avoid altering members
* which are omitted from the compound type.
* Provided buf_out should be as large as the original write buffer. */
herr_t
RV_handle_compound_subset_write(hid_t mem_type_id, hid_t file_type_id, hid_t mem_space_id,
hid_t file_space_id, RV_object_t *dset, const void *buf_in, void *buf_out)
{
herr_t ret_value = SUCCEED;
hssize_t npoints = 0;
char *src = NULL, *dst = NULL;
void *src_buf = NULL;
void *cmpd_read_buf = NULL, *cmpd_gather_buf = NULL;
void *write_gather_buf = NULL;
response_read_info cmpd_scatter_info;
size_t scatter_size = 0;
size_t file_type_size = 0, mem_type_size = 0;
hid_t space_select_all = H5S_ALL;

RV_compound_info_t compound_info;
compound_info.nalloc = 0;
compound_info.nmembers = 0;
compound_info.dst_offsets = NULL;
compound_info.src_offsets = NULL;
compound_info.lengths = NULL;

/* Perform a read from the dataset to get values that occupy the omitted members */
if ((file_type_size = H5Tget_size(file_type_id)) == 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get size of file datatype");

if ((npoints = H5Sget_simple_extent_npoints(file_space_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get npoints in object dataspace");

if ((cmpd_read_buf = RV_calloc(file_type_size * (size_t)npoints)) == NULL)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTALLOC, FAIL, "can't allocate space for compound subset buffer");

if ((RV_dataset_read(1, (void **)&dset, &file_type_id, &space_select_all, &file_space_id, H5P_DEFAULT,
&cmpd_read_buf, NULL)) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL,
"can't perform dataset read for compound subset info");

/* Gather data from the read */
if ((cmpd_gather_buf = RV_calloc(file_type_size * (size_t)npoints)) == NULL)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for compound gather buffer");

if (H5Dgather(file_space_id, cmpd_read_buf, file_type_id, file_type_size * (size_t)npoints,
cmpd_gather_buf, NULL, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "can't gather data for compound subset write");

/* Gather data from the user-provided write buffer */
if ((write_gather_buf = RV_calloc(file_type_size * (size_t)npoints)) == NULL)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTALLOC, FAIL, "can't allocate space for compound subset buffer");

if (H5Dgather(mem_space_id, buf_in, file_type_id, file_type_size * (size_t)npoints, write_gather_buf,
NULL, NULL) < 0)
FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTGATHER, FAIL,
"can't gather write data from buffer for compound subsetting");

/* Get information on members omitted by compound subsetting */
if ((compound_info.nalloc = H5Tget_nmembers(file_type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of file type");

if ((compound_info.src_offsets = RV_calloc((sizeof(size_t) * (size_t)compound_info.nalloc))) == NULL)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTALLOC, FAIL,
"can't allocate memory for compound type subsetting");

if ((compound_info.dst_offsets = RV_calloc((sizeof(size_t) * (size_t)compound_info.nalloc))) == NULL)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTALLOC, FAIL,
"can't allocate memory for compound type subsetting");

if ((compound_info.lengths = RV_calloc((sizeof(size_t) * (size_t)compound_info.nalloc))) == NULL)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTALLOC, FAIL,
"can't allocate memory for compound type subsetting");

if (RV_get_omitted_compound_members(mem_type_id, file_type_id, &compound_info) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL,
"can't get omitted fields from subsetted compound types");

/* Copy omitted fields from gathered read buffer to gathered write buffer */
for (size_t elem_idx = 0; elem_idx < npoints; elem_idx++) {
for (size_t member_idx = 0; member_idx < compound_info.nmembers; member_idx++) {
src =
(char *)cmpd_gather_buf + (elem_idx * file_type_size) + compound_info.dst_offsets[member_idx];
dst = (char *)write_gather_buf + (elem_idx * file_type_size) +
compound_info.dst_offsets[member_idx];

memcpy(dst, src, compound_info.lengths[member_idx]);
}
}

/* Re-scatter write buffer into output buffer according to memory dataspace */
cmpd_scatter_info.buffer = write_gather_buf;
scatter_size = (size_t)npoints * file_type_size;
cmpd_scatter_info.read_size = &scatter_size;

if (H5Dscatter(dataset_read_scatter_op, &cmpd_scatter_info, file_type_id, mem_space_id, buf_out) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_DATATYPE, FAIL, "can't scatter compound data into write buffer");

done:
if (compound_info.nalloc > 0) {
RV_free(compound_info.dst_offsets);
RV_free(compound_info.src_offsets);
RV_free(compound_info.lengths);
}

RV_free(cmpd_read_buf);
RV_free(cmpd_gather_buf);
RV_free(write_gather_buf);

return ret_value;
} /* end RV_handle_compound_subset_write */
64 changes: 4 additions & 60 deletions src/rest_vol_datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -2630,6 +2630,9 @@ RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *s
if ((src_nmembs = H5Tget_nmembers(src_type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of source datatype");

/* The library just compares the number of members to determine if two
* compounds are subsets, so that should suffice here as well. */

if (src_nmembs > dst_nmembs) {
*subset = H5T_SUBSET_DST;
}
Expand All @@ -2640,65 +2643,6 @@ RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *s
*subset = H5T_SUBSET_FALSE;
}

/* TODO - Library just compares the number of fields in each to determine if two
* compounds are subsets, so that should suffice here as well. */

/*
for (unsigned int src_idx = 0; src_idx < src_nmembs; src_idx++) {
match_for_src_member = false;
if ((src_member_name = H5Tget_member_name(src_type_id, src_idx)) == NULL)
FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source datatype member name");
if ((src_member_type = H5Tget_member_type(src_type_id, src_idx)) == H5I_INVALID_HID)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source datatype member type");
for (unsigned int dst_idx = 0; dst_idx < dst_nmembs; dst_idx++) {
if ((dst_member_name = H5Tget_member_name(dst_type_id, dst_idx)) == NULL)
FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get destination datatype member
name");
if ((dst_member_type = H5Tget_member_type(dst_type_id, dst_idx)) == H5I_INVALID_HID)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get destination datatype member
type");
if ((types_same = H5Tequal(src_member_type, dst_member_type)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"can't check if member datatypes are equal");
if (types_same && !strcmp(dst_member_name, src_member_name))
match_for_src_member = true;
if (H5Tclose(dst_member_type) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close datatype member");
dst_member_type = H5I_INVALID_HID;
if (H5free_memory(dst_member_name) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, "can't free destination datatype member
name");
dst_member_name = NULL;
}
if (!match_for_src_member)
FUNC_GOTO_DONE(false);
if (H5Tclose(src_member_type) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close source datatype member");
src_member_type = H5I_INVALID_HID;
if (H5free_memory(src_member_name) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, "can't free member datatype member name");
src_member_name = NULL;
}
ret_value = true;
*/

done:
if (src_member_name) {
if (H5free_memory(src_member_name) < 0)
Expand Down Expand Up @@ -2727,7 +2671,7 @@ RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *s

/* Helper to get information about fields in the destination type that are unused in the source type. */
herr_t
RV_get_unused_compound_fields(hid_t src_type_id, hid_t dst_type_id, RV_compound_info_t *compound_info)
RV_get_omitted_compound_members(hid_t src_type_id, hid_t dst_type_id, RV_compound_info_t *compound_info)
{
herr_t ret_value = SUCCEED;
H5T_class_t dst_type_class = H5T_NO_CLASS, src_type_class = H5T_NO_CLASS;
Expand Down

0 comments on commit 310ccee

Please sign in to comment.