diff --git a/src/rest_vol.h b/src/rest_vol.h index dcdbc1f0..29d0e78a 100644 --- a/src/rest_vol.h +++ b/src/rest_vol.h @@ -440,6 +440,33 @@ extern RV_type_info *RV_type_info_array_g[]; * * **************************/ +/* Values for the optimization of compound data reading and writing. They indicate + * whether the fields of the source and destination are subset of each other + */ +typedef enum { + H5T_SUBSET_BADVALUE = -1, /* Invalid value */ + H5T_SUBSET_FALSE = 0, /* Source and destination aren't subset of each other */ + H5T_SUBSET_SRC, /* Source is the subset of dest and no conversion is needed */ + H5T_SUBSET_DST, /* Dest is the subset of source and no conversion is needed */ + H5T_SUBSET_CAP /* Must be the last value */ +} RV_subset_t; + +/* Information about members of a compound type for subsetting */ +typedef struct RV_compound_info_t { + RV_subset_t subset_info; /* Subset relationship between src and dst */ + + int nmembers; /* Number of offset/length pairs in the subset */ + int nalloc; /* Number of offset/length pairs allocated */ + size_t full_type_size; /* Size of compound type with all fields */ + + size_t *src_offsets; /* Offsets of fields within source datatype */ + size_t *lengths; /* Lengths of members (in both datatypes) */ + + size_t subset_buf_size; + void *subset_buf; + void *subset_ptr; /* Pointer for writing into the subset buffer */ +} RV_compound_info_t; + /* * A struct which is used to return a link's name or the size of * a link's name when calling H5Lget_name_by_idx. @@ -604,6 +631,9 @@ typedef struct dataset_transfer_info { void *tconv_buf; void *bkg_buf; + /* Fields for writing to subsets of compound types */ + RV_compound_info_t compound_info; + transfer_type_t transfer_type; union { @@ -764,6 +794,13 @@ herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type /* Helper function to escape control characters for JSON strings */ herr_t RV_JSON_escape_string(const char *in, char *out, size_t *out_size); +/* Determine if a read from file to mem dtype is a compound subset read */ +herr_t RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *subset_info); + +/* Helper to get information about members in dst that are included in src compound */ +herr_t RV_get_compound_subset_members(hid_t src_type_id, hid_t dst_type_id, + RV_compound_info_t *compound_info); + #define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \ (version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \ (version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed) @@ -777,6 +814,9 @@ herr_t RV_JSON_escape_string(const char *in, char *out, size_t *out_size); #define SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(version) \ (SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5)) +#define SERVER_VERSION_SUPPORTS_MEMBER_SELECTION(version) \ + (SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 6)) + #ifdef __cplusplus } #endif diff --git a/src/rest_vol_dataset.c b/src/rest_vol_dataset.c index 73317452..f56298f6 100644 --- a/src/rest_vol_dataset.c +++ b/src/rest_vol_dataset.c @@ -53,11 +53,15 @@ static herr_t RV_convert_buffer_to_obj_refs(char *ref_buf, size_t ref_buf_len, static hssize_t RV_convert_start_to_offset(hid_t space_id); /* Callbacks used for post-processing after a curl request succeeds */ + static herr_t rv_dataset_read_cb(hid_t mem_type_id, hid_t mem_space_id, hid_t file_type_id, hid_t file_space_id, void *buf, struct response_buffer resp_buffer); static herr_t rv_dataset_write_cb(hid_t mem_type_id, hid_t mem_space_id, hid_t file_type_id, hid_t file_space_id, void *buf, struct response_buffer resp_buffer); +/* H5Dgather callback that packs selected compound type members from dst buf into a buffer */ +herr_t RV_gather_compound_subset_cb(const void *dst_buf, size_t dst_buf_bytes_used, void *op_data); + /* Struct for H5Dscatter's callback that allows it to scatter from a non-global response buffer */ struct response_read_info { void *buffer; @@ -114,6 +118,12 @@ const char *value_keys[] = {"value", (const char *)0}; */ #define BASE64_ENCODE_DEFAULT_BUFFER_SIZE 33554432 /* 32MB */ +/* Default separator character when specifying compounds members in a URL */ +#define COMPOUND_MEMBER_SEPARATOR ':' + +/* Query for server to operate on a subset of members in a compound type */ +#define COMPOUND_MEMBER_QUERY "fields=" + /*------------------------------------------------------------------------- * Function: RV_dataset_create * @@ -852,8 +862,8 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa { H5S_sel_type sel_type = H5S_SEL_ALL; H5T_class_t dtype_class; - hbool_t is_transfer_binary = FALSE; - htri_t contiguous = FALSE; + hbool_t is_transfer_binary = FALSE; + htri_t is_write_contiguous = FALSE; htri_t is_variable_str; hssize_t mem_select_npoints = 0; hssize_t file_select_npoints = 0; @@ -873,6 +883,13 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa hbool_t fill_bkg = FALSE; void *buf_to_write = NULL; + hbool_t has_selection_body = FALSE; + char cmpd_query[URL_MAX_LENGTH]; + char *member_name = NULL; + char *url_encoded_member_name = NULL; + void *gather_buffer = NULL; + int num_cmpd_members = 0; + if ((transfer_info = RV_calloc(count * sizeof(dataset_transfer_info))) == NULL) FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for dataset transfer info"); @@ -924,16 +941,26 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa transfer_info[i].buf = (void *)buf[i]; transfer_info[i].transfer_type = WRITE; - transfer_info[i].mem_space_id = _mem_space_id[i]; - transfer_info[i].file_space_id = _file_space_id[i]; - transfer_info[i].mem_type_id = mem_type_id[i]; - transfer_info[i].file_type_id = ((RV_object_t *)dset[i])->u.dataset.dtype_id; - transfer_info[i].curl_headers = NULL; - transfer_info[i].host_headers = NULL; - transfer_info[i].resp_buffer.buffer_size = CURL_RESPONSE_BUFFER_DEFAULT_SIZE; - transfer_info[i].resp_buffer.curr_buf_ptr = transfer_info[i].resp_buffer.buffer; - transfer_info[i].tconv_buf = NULL; - transfer_info[i].bkg_buf = NULL; + transfer_info[i].mem_space_id = _mem_space_id[i]; + transfer_info[i].file_space_id = _file_space_id[i]; + transfer_info[i].mem_type_id = mem_type_id[i]; + transfer_info[i].file_type_id = ((RV_object_t *)dset[i])->u.dataset.dtype_id; + transfer_info[i].curl_headers = NULL; + transfer_info[i].host_headers = NULL; + transfer_info[i].resp_buffer.buffer_size = CURL_RESPONSE_BUFFER_DEFAULT_SIZE; + transfer_info[i].resp_buffer.curr_buf_ptr = transfer_info[i].resp_buffer.buffer; + transfer_info[i].tconv_buf = NULL; + transfer_info[i].bkg_buf = NULL; + transfer_info[i].compound_info.subset_info = H5T_SUBSET_BADVALUE; + + transfer_info[i].compound_info.nalloc = 0; + transfer_info[i].compound_info.nmembers = 0; + transfer_info[i].compound_info.src_offsets = NULL; + transfer_info[i].compound_info.lengths = NULL; + transfer_info[i].compound_info.subset_buf = NULL; + transfer_info[i].compound_info.subset_buf_size = 0; + transfer_info[i].compound_info.subset_ptr = NULL; + transfer_info[i].compound_info.full_type_size = 0; } #ifdef RV_CONNECTOR_DEBUG @@ -1037,7 +1064,6 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "unable to check if datatypes need conversion"); if (needs_tconv) { - #ifdef RV_CONNECTOR_DEBUG printf("-> Beginning type conversion for write\n"); #endif @@ -1052,11 +1078,12 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa &file_type_size, (size_t)file_select_npoints, TRUE, FALSE, &transfer_info[i].tconv_buf, &transfer_info[i].bkg_buf, NULL, &fill_bkg); - /* Perform type conversion on response values */ + /* Copy memory to avoid modifying user-provided write buffer */ memset(transfer_info[i].tconv_buf, 0, file_type_size * (size_t)mem_select_npoints); memcpy(transfer_info[i].tconv_buf, transfer_info[i].buf, mem_type_size * (size_t)mem_select_npoints); + /* Perform type conversion on values to write */ if (H5Tconvert(transfer_info[i].mem_type_id, transfer_info[i].file_type_id, (size_t)file_select_npoints, transfer_info[i].tconv_buf, transfer_info[i].bkg_buf, H5P_DEFAULT) < 0) @@ -1070,32 +1097,116 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa * types like object references or variable length types) */ if ((H5T_REFERENCE != dtype_class) && (H5T_VLEN != dtype_class) && !is_variable_str) { - size_t dtype_size; + hid_t dest_dtype = H5I_INVALID_HID; + hbool_t is_compound_subset = FALSE; + size_t write_dtype_size = 0; + + if ((RV_get_compound_subset_info(transfer_info[i].mem_type_id, transfer_info[i].file_type_id, + &transfer_info[i].compound_info.subset_info)) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't determine if write uses compound subsetting"); - if (0 == (dtype_size = H5Tget_size(transfer_info[i].file_type_id))) - FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "file datatype is invalid"); + is_compound_subset = (transfer_info[i].compound_info.subset_info == H5T_SUBSET_SRC); - write_body_len = (size_t)file_select_npoints * dtype_size; - if ((contiguous = RV_dataspace_selection_is_contiguous(transfer_info[i].mem_space_id)) < 0) + dest_dtype = is_compound_subset ? transfer_info[i].mem_type_id : transfer_info[i].file_type_id; + + if (!is_compound_subset) { + if (0 == (write_dtype_size = H5Tget_size(dest_dtype))) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "file datatype is invalid"); + } + else { + /* If compound subset write, size of written dtype is the sum of its member sizes */ + transfer_info[i].compound_info.nmembers = 0; + + if ((num_cmpd_members = H5Tget_nmembers(transfer_info[i].mem_type_id)) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't get num members in compound datatype"); + + transfer_info[i].compound_info.nalloc = num_cmpd_members; + + if ((transfer_info[i].compound_info.lengths = + RV_calloc(sizeof(size_t) * (size_t)num_cmpd_members)) == NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "can't allocate memory for compound info"); + + if ((transfer_info[i].compound_info.src_offsets = + RV_calloc(sizeof(size_t) * (size_t)num_cmpd_members)) == NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "can't allocate memory for compound info"); + + if (RV_get_compound_subset_members(transfer_info[i].mem_type_id, + transfer_info[i].file_type_id, + &transfer_info[i].compound_info) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't get members included in compound subset"); + + for (size_t ii = 0; ii < num_cmpd_members; ii++) { + write_dtype_size += transfer_info[i].compound_info.lengths[ii]; + } + } + + write_body_len = (size_t)file_select_npoints * write_dtype_size; + + if ((is_write_contiguous = RV_dataspace_selection_is_contiguous(transfer_info[i].mem_space_id)) < + 0) FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "Unable to determine if the dataspace selection is contiguous"); - if (!contiguous) { + + is_write_contiguous = (is_compound_subset) ? FALSE : is_write_contiguous; + + if (!is_write_contiguous) { if (NULL == (transfer_info[i].u.write_info.write_body = (char *)RV_malloc(write_body_len))) FUNC_GOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate space for the 'write_body' values"); - if (H5Dgather(transfer_info[i].mem_space_id, buf_to_write, transfer_info[i].file_type_id, - write_body_len, transfer_info[i].u.write_info.write_body, NULL, NULL) < 0) - FUNC_GOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "can't gather data to write buffer"); - buf_to_write = transfer_info[i].u.write_info.write_body; + + if (is_compound_subset) { + transfer_info[i].compound_info.full_type_size = file_type_size; + transfer_info[i].compound_info.subset_buf_size = + write_dtype_size * (size_t)file_select_npoints; + + /* Temp buffer for each iteration of gather callback */ + if ((gather_buffer = RV_calloc(file_type_size * (size_t)file_select_npoints)) == NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "can't allocate memory for compound buffer"); + + /* Final buffer populated piecemeal by Gather callback */ + /* This may be freely modified, because compound subset implies that type conversion took + * place, so this points to the type conversion allocated buffer */ + transfer_info[i].compound_info.subset_buf = buf_to_write; + + transfer_info[i].compound_info.subset_ptr = transfer_info[i].compound_info.subset_buf; + /* Avoid using the same buffer as both the dst_buf for H5Dgather and the buffer populated + * by callback */ + memcpy(gather_buffer, buf_to_write, file_type_size * (size_t)file_select_npoints); + + if (H5Dgather(transfer_info[i].mem_space_id, gather_buffer, transfer_info[i].file_type_id, + file_type_size * (size_t)file_select_npoints, gather_buffer, + RV_gather_compound_subset_cb, (void *)&transfer_info[i].compound_info) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "can't gather data to write buffer for cmpd subset"); + + RV_free(gather_buffer); + gather_buffer = NULL; + } + else { + /* Not a compound subset */ + if (H5Dgather(transfer_info[i].mem_space_id, buf_to_write, transfer_info[i].file_type_id, + write_body_len, transfer_info[i].u.write_info.write_body, NULL, NULL) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "can't gather data to write buffer"); + + buf_to_write = transfer_info[i].u.write_info.write_body; + } } else { if ((offset = RV_convert_start_to_offset(transfer_info[i].mem_space_id)) < 0) FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "Unable to determine memory offset value"); - buf_to_write = (const void *)((const char *)buf_to_write + (size_t)offset * dtype_size); + buf_to_write = (const void *)((const char *)buf_to_write + (size_t)offset * write_dtype_size); } } /* end if */ else { + if (H5T_STD_REF_OBJ == transfer_info[i].file_type_id) { /* Convert the buffer of rest_obj_ref_t's to a binary buffer */ if (RV_convert_obj_refs_to_buffer( @@ -1128,13 +1239,78 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa transfer_info[i].curl_headers, is_transfer_binary ? "Content-Type: application/octet-stream" : "Content-Type: application/json"); + has_selection_body = is_transfer_binary && selection_body && (H5S_SEL_POINTS != sel_type); + /* Redirect cURL from the base URL to "/datasets//value" to write the value out */ - if ((url_len = snprintf( - transfer_info[i].request_url, URL_MAX_LENGTH, "%s/datasets/%s/value%s%s", - transfer_info[i].dataset->domain->u.file.server_info.base_URL, transfer_info[i].dataset->URI, - is_transfer_binary && selection_body && (H5S_SEL_POINTS != sel_type) ? "?select=" : "", - is_transfer_binary && selection_body && (H5S_SEL_POINTS != sel_type) ? selection_body - : "")) < 0) + if (transfer_info[i].compound_info.subset_info == H5T_SUBSET_SRC) { + char *cmpd_query_ptr = cmpd_query; + ptrdiff_t cmpd_query_len = 0; + + /* Construct compound member selection string for request URL */ + if (!(SERVER_VERSION_SUPPORTS_MEMBER_SELECTION( + transfer_info[i].dataset->domain->u.file.server_info.version))) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, + "compound member selection on write requires server >= 0.8.5"); + + /* If using element selection and member selection, preface member query with '&' to join it to + previous queries. Otherwise, it's the first query, so preface it with '?' */ + if (has_selection_body) { + cmpd_query_ptr[0] = '&'; + cmpd_query_ptr++; + } + else { + cmpd_query_ptr[0] = '?'; + cmpd_query_ptr++; + } + + memcpy(cmpd_query_ptr, COMPOUND_MEMBER_QUERY, strlen(COMPOUND_MEMBER_QUERY)); + cmpd_query_ptr += strlen(COMPOUND_MEMBER_QUERY); + + for (size_t j = 0; j < (size_t)num_cmpd_members; j++) { + if ((member_name = H5Tget_member_name(transfer_info[i].mem_type_id, (unsigned)j)) == NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get name of compound member"); + + cmpd_query_len = cmpd_query_ptr - cmpd_query; + + /* URL encode the member name */ + if ((url_encoded_member_name = curl_easy_escape(curl, member_name, strlen(member_name))) == + NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, NULL, + "can't URL-encode compound member name"); + + if ((size_t)cmpd_query_len + strlen(url_encoded_member_name) + 1 > URL_MAX_LENGTH) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, + "compound member names too long for URL"); + + /* Copy name to query string without null byte */ + memcpy(cmpd_query_ptr, member_name, strlen(member_name)); + cmpd_query_ptr += strlen(member_name); + + /* If another member name will follow, add sepator */ + if (j < num_cmpd_members - 1) { + *cmpd_query_ptr = COMPOUND_MEMBER_SEPARATOR; + cmpd_query_ptr++; + } + + if (H5free_memory((void *)member_name) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't free dataset member name"); + + member_name = NULL; + + curl_free(url_encoded_member_name); + url_encoded_member_name = NULL; + } + + *cmpd_query_ptr = '\0'; + } + + if ((url_len = snprintf(transfer_info[i].request_url, URL_MAX_LENGTH, "%s/datasets/%s/value%s%s%s", + transfer_info[i].dataset->domain->u.file.server_info.base_URL, + transfer_info[i].dataset->URI, has_selection_body ? "?select=" : "", + has_selection_body ? selection_body : "", + (transfer_info[i].compound_info.subset_info == H5T_SUBSET_SRC) ? cmpd_query + : "") < 0)) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_SYSERRSTR, FAIL, "snprintf error"); if (url_len >= URL_MAX_LENGTH) @@ -1294,12 +1470,31 @@ RV_dataset_write(size_t count, void *dset[], hid_t mem_type_id[], hid_t _mem_spa if (transfer_info[i].host_headers) RV_free(transfer_info[i].host_headers); + + if (gather_buffer) + RV_free(gather_buffer); + + if (transfer_info[i].compound_info.src_offsets) + RV_free(transfer_info[i].compound_info.src_offsets); + + if (transfer_info[i].compound_info.lengths) + RV_free(transfer_info[i].compound_info.lengths); } curl_multi_cleanup(curl_multi_handle); RV_free(transfer_info); + if (url_encoded_member_name) + curl_free(url_encoded_member_name); + + if (member_name) + if (H5free_memory((void *)member_name) < 0) + FUNC_DONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "can't free datatype member name"); + + if (gather_buffer) + RV_free(gather_buffer); + PRINT_ERROR_STACK; return ret_value; @@ -4545,6 +4740,8 @@ rv_dataset_read_cb(hid_t mem_type_id, hid_t mem_space_id, hid_t file_type_id, hi RV_tconv_reuse_t reuse = RV_TCONV_REUSE_NONE; hbool_t fill_bkg = FALSE; + RV_subset_t subset_info; + if (H5T_NO_CLASS == (dtype_class = H5Tget_class(mem_type_id))) FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "memory datatype is invalid"); @@ -4595,11 +4792,27 @@ rv_dataset_read_cb(hid_t mem_type_id, hid_t mem_space_id, hid_t file_type_id, hi #ifdef RV_CONNECTOR_DEBUG printf("-> Beginning type conversion\n"); #endif - /* Initialize type conversion */ RV_tconv_init(file_type_id, &file_type_size, mem_type_id, &mem_type_size, (size_t)file_select_npoints, TRUE, FALSE, &tconv_buf, &bkg_buf, &reuse, &fill_bkg); + /* Handle compound subsetting */ + if (RV_get_compound_subset_info(file_type_id, mem_type_id, &subset_info) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get compound type subset info"); + + if (subset_info < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, + "error while checking if types are compound subsets"); + + if (subset_info == H5T_SUBSET_DST) { + /* Populate background buffer with bytes from user input buffer */ + if (!bkg_buf || !fill_bkg) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_UNSUPPORTED, FAIL, + "compound subset requires bkg buffer"); + + memcpy(bkg_buf, buf, file_type_size * (size_t)file_select_npoints); + } + /* Perform type conversion on response values */ if (reuse == RV_TCONV_REUSE_TCONV) { /* Use read buffer as type conversion buffer */ @@ -5038,3 +5251,176 @@ RV_json_values_to_binary_recursive(yajl_val value_entry, hid_t dtype_id, void *v done: return ret_value; } + +/* Get the offsets and lengths of the compound type members in both destination and source type. + Requires compound_info->src_offsets and compound_info->lengths to be allocated ahead of time, + with the number of allocated slots specified in compound_info->nalloc. */ +herr_t +RV_get_compound_subset_members(hid_t src_type_id, hid_t dst_type_id, RV_compound_info_t *compound_info) +{ + herr_t ret_value = SUCCEED; + H5T_class_t dst_type_class = H5T_NO_CLASS, src_type_class = H5T_NO_CLASS; + hid_t src_member_type = H5I_INVALID_HID, dst_member_type = H5I_INVALID_HID; + int dst_nmembs = 0, src_nmembs = 0; + char *src_member_name = NULL, *dst_member_name = NULL; + htri_t types_same = false; + bool match_for_dst_member = false; + size_t dst_member_offset = 0, src_member_offset = 0; + size_t member_size = 0; + + if (!compound_info) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "provided compound info struct is NULL"); + + if (H5T_NO_CLASS == (src_type_class = H5Tget_class(src_type_id))) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "source datatype is invalid"); + + if (H5T_NO_CLASS == (dst_type_class = H5Tget_class(dst_type_id))) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "destination datatype is invalid"); + + if ((dst_type_class != H5T_COMPOUND) || (src_type_class != H5T_COMPOUND)) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "datatypes must be compound to compare members"); + + if ((dst_nmembs = H5Tget_nmembers(dst_type_id)) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of destination datatype"); + + if ((src_nmembs = H5Tget_nmembers(src_type_id)) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of source datatype"); + + if (src_nmembs > compound_info->nalloc) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "not enough space in compound info"); + + // TODO - Optimize this + for (unsigned int dst_idx = 0; dst_idx < dst_nmembs; dst_idx++) { + match_for_dst_member = false; + src_member_offset = 0; + + if ((dst_member_name = H5Tget_member_name(dst_type_id, dst_idx)) == NULL) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get destination datatype member name"); + + if ((dst_member_type = H5Tget_member_type(dst_type_id, dst_idx)) == H5I_INVALID_HID) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get destination datatype member type"); + + for (unsigned int src_idx = 0; src_idx < src_nmembs; src_idx++) { + if ((src_member_name = H5Tget_member_name(src_type_id, src_idx)) == NULL) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source datatype member name"); + + if ((src_member_type = H5Tget_member_type(src_type_id, src_idx)) == H5I_INVALID_HID) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get source datatype member type"); + + src_member_offset = H5Tget_member_offset(src_type_id, src_idx); + + if ((types_same = H5Tequal(src_member_type, dst_member_type)) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, + "can't check if member datatypes are equal"); + + if (types_same && !strcmp(dst_member_name, src_member_name)) + match_for_dst_member = true; + + if (H5Tclose(src_member_type) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close datatype member"); + + src_member_type = H5I_INVALID_HID; + + if (H5free_memory(src_member_name) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, + "can't free destination datatype member name"); + + src_member_name = NULL; + + if (match_for_dst_member) + break; + } + + /* If member in dst type is included by subsetting, copy its offset and length */ + if (match_for_dst_member) { + dst_member_offset = H5Tget_member_offset(dst_type_id, dst_idx); + + if ((member_size = H5Tget_size(dst_member_type)) == 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, + "can't get size of destination datatype member"); + + if (compound_info->nmembers >= compound_info->nalloc) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "compound info struct is full"); + + compound_info->src_offsets[compound_info->nmembers] = src_member_offset; + compound_info->lengths[compound_info->nmembers] = member_size; + compound_info->nmembers++; + } + + if (H5Tclose(dst_member_type) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close destination datatype member"); + + dst_member_type = H5I_INVALID_HID; + + if (H5free_memory(dst_member_name) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, "can't free member datatype name"); + + dst_member_name = NULL; + } + +done: + if (src_member_name) { + if (H5free_memory(src_member_name) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, "can't free member datatype name"); + + src_member_name = NULL; + } + + if (dst_member_name) { + if (H5free_memory(dst_member_name) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTFREE, FAIL, "can't free destination datatype member name"); + + dst_member_name = NULL; + } + + if (src_member_type > 0) + if (H5Tclose(src_member_type) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close source datatype member"); + + if (dst_member_type > 0) + if (H5Tclose(dst_member_type) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close destination datatype member"); + + return ret_value; +} + +/* H5Dgather callback that packs selected compound type members from dst buf into a buffer */ +herr_t +RV_gather_compound_subset_cb(const void *dst_buf, size_t dst_buf_bytes_used, void *op_data) +{ + + RV_compound_info_t *cinfo = (RV_compound_info_t *)op_data; + + herr_t ret_value = H5_ITER_CONT; + size_t num_elems = 0; + + if (dst_buf_bytes_used % cinfo->full_type_size != 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, H5_ITER_STOP, + "provided buffer does not contain multiple of type size bytes"); + + num_elems = (dst_buf_bytes_used / cinfo->full_type_size); + + for (size_t elem_idx = 0; elem_idx < num_elems; elem_idx++) { + const void *src_elem = (const char *)dst_buf + elem_idx * cinfo->full_type_size; + + for (size_t member_idx = 0; member_idx < cinfo->nmembers; member_idx++) { + const void *src_member = (const char *)src_elem + cinfo->src_offsets[member_idx]; + ptrdiff_t subset_bytes_used = 0; + + subset_bytes_used = (char *)cinfo->subset_ptr - (char *)cinfo->subset_buf; + + if (subset_bytes_used < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_INTERNAL, FAIL, "invalid subset buffer pointer"); + + if ((size_t)subset_bytes_used + cinfo->lengths[member_idx] > cinfo->subset_buf_size) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, + "compound subset members too large for buffer"); + + memcpy(cinfo->subset_ptr, src_member, cinfo->lengths[member_idx]); + cinfo->subset_ptr = (void *)((char *)cinfo->subset_ptr + cinfo->lengths[member_idx]); + } + } + +done: + return (ret_value); +} \ No newline at end of file diff --git a/src/rest_vol_datatype.c b/src/rest_vol_datatype.c index 615e4cf5..c11b061c 100644 --- a/src/rest_vol_datatype.c +++ b/src/rest_vol_datatype.c @@ -2638,3 +2638,56 @@ RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id, size_ return ret_value; } /* end RV_tconv_init() */ + +/* Determine the subset relationsip (if any) between src and dst datatypes */ +herr_t +RV_get_compound_subset_info(hid_t src_type_id, hid_t dst_type_id, RV_subset_t *subset) +{ + herr_t ret_value = SUCCEED; + H5T_class_t dst_type_class = H5T_NO_CLASS, src_type_class = H5T_NO_CLASS; + hid_t src_member_type = H5I_INVALID_HID, dst_member_type = H5I_INVALID_HID; + int dst_nmembs = 0, src_nmembs = 0; + htri_t types_same = false; + bool match_for_src_member = false; + + if (H5T_NO_CLASS == (src_type_class = H5Tget_class(src_type_id))) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "source datatype is invalid"); + + if (H5T_NO_CLASS == (dst_type_class = H5Tget_class(dst_type_id))) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "destination datatype is invalid"); + + if ((dst_type_class != H5T_COMPOUND) || (src_type_class != H5T_COMPOUND)) { + *subset = H5T_SUBSET_FALSE; + FUNC_GOTO_DONE(SUCCEED); + } + + if ((dst_nmembs = H5Tget_nmembers(dst_type_id)) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of destination datatype"); + + if ((src_nmembs = H5Tget_nmembers(src_type_id)) < 0) + FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "can't get nmembers of source datatype"); + + /* The library just compares the number of members to determine if two + * compounds are subsets, so that should suffice here as well. */ + + if (src_nmembs > dst_nmembs) { + *subset = H5T_SUBSET_DST; + } + else if (src_nmembs < dst_nmembs) { + *subset = H5T_SUBSET_SRC; + } + else { + *subset = H5T_SUBSET_FALSE; + } + +done: + if (src_member_type > 0) + if (H5Tclose(src_member_type) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close source datatype member"); + + if (dst_member_type > 0) + if (H5Tclose(dst_member_type) < 0) + FUNC_DONE_ERROR(H5E_DATATYPE, H5E_CANTCLOSEOBJ, FAIL, "can't close destination datatype member"); + + return ret_value; +}