Skip to content

Commit

Permalink
Hdf5 1 12 merges (#1528)
Browse files Browse the repository at this point in the history
* Use internal version of H5Eprint2 to avoid possible stack overflow (#661)

* Add support for parallel filters to h5repack (#832)

* Allow parallel filters feature for comm size of 1 (#840)

* Avoid popping API context when one wasn't pushed (#848)

* Fix several warnings (#720)

* Don't allow H5Pset(get)_all_coll_metadata_ops for DXPLs (#1201)

* Fix free list tracking and cleanup cast alignment warnings (#1288)

* Fix free list tracking and cleanup cast alignment warnings

* Add free list tracking code to H5FL 'arr' routines

* Fix usage of several HDfprintf format specifiers after HDfprintf removal (#1324)

* Use appropriate printf format specifiers for haddr_t and hsize_t types directly (#1340)

* Fix H5ACmpio dirty bytes creation debugging (#1357)

* Fix documentation for H5D_space_status_t enum values (#1372)

* Parallel rank0 deadlock fixes (#1183)

* Fix several places where rank 0 can skip past collective MPI operations on failure

* Committing clang-format changes

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>

* Fix a few issues noted by LGTM (#1421)

* Fix cache sanity checking code by moving functions to wider scope (#1435)

* Fix metadata cache bug when resizing a pinned/protected entry (v2) (#1463)

* Disable memory alloc sanity checks by default for Autotools debug builds (#1468)

* Committing clang-format changes

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
jhendersonHDF and github-actions[bot] authored Mar 25, 2022
1 parent f73b4c6 commit 15971fb
Show file tree
Hide file tree
Showing 55 changed files with 855 additions and 470 deletions.
17 changes: 9 additions & 8 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2820,9 +2820,11 @@ AC_ARG_ENABLE([memory-alloc-sanity-check],
[Enable this option to turn on internal memory
allocation sanity checking. This could cause
more memory use and somewhat slower allocation.
This option is orthogonal to the
--enable-using-memchecker option.
[default=yes if debug build, otherwise no]
This option may also cause issues with HDF5
filter plugins, so should not be enabled if
filters are to be used. This option is orthogonal
to the --enable-using-memchecker option.
[default=no]
])],
[MEMORYALLOCSANITYCHECK=$enableval])

Expand All @@ -2832,11 +2834,10 @@ AC_SUBST([MEMORYALLOCSANITYCHECK])

## Set default
if test "X-$MEMORYALLOCSANITYCHECK" = X- ; then
if test "X-$BUILD_MODE" = "X-debug" ; then
MEMORYALLOCSANITYCHECK=yes
else
MEMORYALLOCSANITYCHECK=no
fi
# Should consider enabling this option by default for
# 'developer' builds if that build mode is added in
# the future
MEMORYALLOCSANITYCHECK=no
fi

case "X-$MEMORYALLOCSANITYCHECK" in
Expand Down
2 changes: 1 addition & 1 deletion hl/src/H5LT.c
Original file line number Diff line number Diff line change
Expand Up @@ -2180,7 +2180,7 @@ realloc_and_append(hbool_t _no_user_buf, size_t *len, char *buf, const char *str
*/
if (size_str < *len - 1) {
if (size_str + size_str_to_add < *len - 1) {
HDstrncat(buf, str_to_add, size_str_to_add);
HDstrcat(buf, str_to_add);
}
else {
HDstrncat(buf, str_to_add, (*len - 1) - size_str);
Expand Down
19 changes: 3 additions & 16 deletions java/src/jni/h5util.c
Original file line number Diff line number Diff line change
Expand Up @@ -870,11 +870,8 @@ h5str_sprintf(JNIEnv *env, h5str_t *out_str, hid_t container, hid_t tid, void *i
}
else {
if (typeSize > 0) {
if (NULL == (this_str = (char *)HDmalloc(typeSize + 1)))
if (NULL == (this_str = HDstrdup(tmp_str)))
H5_OUT_OF_MEMORY_ERROR(ENVONLY, "h5str_sprintf: failed to allocate string buffer");

HDstrncpy(this_str, tmp_str, typeSize);
this_str[typeSize] = '\0';
}
}

Expand Down Expand Up @@ -3664,19 +3661,14 @@ obj_info_all(hid_t loc_id, const char *name, const H5L_info2_t *info, void *op_d
info_all_t *datainfo = (info_all_t *)op_data;
H5O_info2_t object_info;
htri_t object_exists;
size_t str_len;

datainfo->otype[datainfo->count] = -1;
datainfo->ltype[datainfo->count] = -1;
datainfo->obj_token[datainfo->count] = H5O_TOKEN_UNDEF;

str_len = HDstrlen(name);
if (NULL == (datainfo->objname[datainfo->count] = (char *)HDmalloc(str_len + 1)))
if (NULL == (datainfo->objname[datainfo->count] = HDstrdup(name)))
goto done;

HDstrncpy(datainfo->objname[datainfo->count], name, str_len);
(datainfo->objname[datainfo->count])[str_len] = '\0';

if ((object_exists = H5Oexists_by_name(loc_id, name, H5P_DEFAULT)) < 0)
goto done;

Expand All @@ -3702,21 +3694,16 @@ obj_info_max(hid_t loc_id, const char *name, const H5L_info2_t *info, void *op_d
{
info_all_t *datainfo = (info_all_t *)op_data;
H5O_info2_t object_info;
size_t str_len;

datainfo->otype[datainfo->count] = -1;
datainfo->ltype[datainfo->count] = -1;
datainfo->objname[datainfo->count] = NULL;
datainfo->obj_token[datainfo->count] = H5O_TOKEN_UNDEF;

/* This will be freed by h5str_array_free(oName, n) */
str_len = HDstrlen(name);
if (NULL == (datainfo->objname[datainfo->count] = (char *)HDmalloc(str_len + 1)))
if (NULL == (datainfo->objname[datainfo->count] = HDstrdup(name)))
goto done;

HDstrncpy(datainfo->objname[datainfo->count], name, str_len);
(datainfo->objname[datainfo->count])[str_len] = '\0';

if (H5Oget_info3(loc_id, &object_info, H5O_INFO_ALL) < 0)
goto done;

Expand Down
48 changes: 48 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ New Features

(ADB - 2022/03/11)

- HDF5 memory allocation sanity checking is now off by default for
Autotools debug builds

HDF5 can be configured to perform sanity checking on internal memory
allocations by adding heap canaries to these allocations. However,
enabling this option can cause issues with external filter plugins
when working with (reallocating/freeing/allocating and passing back)
buffers.

Previously, this option was off by default for all CMake build types,
but only off by default for non-debug Autotools builds. Since debug
is the default build mode for HDF5 when built from source with
Autotools, this can result in surprising segfaults that don't occur
when an application is built against a release version of HDF5.
Therefore, this option is now off by default for all build types
across both CMake and Autotools.

(JTH - 2022/03/01)

- Refactored the utils folder.

Added subfolder test and moved the 'swmr_check_compat_vfd.c file'
Expand Down Expand Up @@ -224,6 +243,23 @@ Bug Fixes since HDF5-1.12.1 release
===================================
Library
-------
- Fixed a metadata cache bug when resizing a pinned/protected cache entry

When resizing a pinned/protected cache entry, the metadata
cache code previously would wait until after resizing the
entry to attempt to log the newly-dirtied entry. This would
cause H5C_resize_entry to mark the entry as dirty and make
H5AC_resize_entry think that it doesn't need to add the
newly-dirtied entry to the dirty entries skiplist.

Thus, a subsequent H5AC__log_moved_entry would think it
needs to allocate a new entry for insertion into the dirty
entry skip list, since the entry doesn't exist on that list.
This causes an assertion failure, as the code to allocate a
new entry assumes that the entry is not dirty.

(JRM - 2022/02/28)

- Issue #1436 identified a problem with the H5_VERS_RELEASE check in the
H5check_version function.

Expand Down Expand Up @@ -262,6 +298,18 @@ Bug Fixes since HDF5-1.12.1 release

(JTH - 2021/11/16, HDFFV-10501/HDFFV-10562)

- Fixed several potential MPI deadlocks in library failure conditions

In the parallel library, there were several places where MPI rank 0
could end up skipping past collective MPI operations when some failure
occurs in rank 0-specific processing. This would lead to deadlocks
where rank 0 completes an operation while other ranks wait in the
collective operation. These places have been rewritten to have rank 0
push an error and try to cleanup after the failure, then continue to
participate in the collective operation to the best of its ability.

(JTH - 2021/11/09)

- Fixed an issue with collective metadata reads being permanently disabled
after a dataset chunk lookup operation. This would usually cause a
mismatched MPI_Bcast and MPI_ERR_TRUNCATE issue in the library for
Expand Down
84 changes: 75 additions & 9 deletions src/H5AC.c
Original file line number Diff line number Diff line change
Expand Up @@ -1470,21 +1470,82 @@ H5AC_resize_entry(void *thing, size_t new_size)
cache_ptr = entry_ptr->cache_ptr;
HDassert(cache_ptr);

/* Resize the entry */
if (H5C_resize_entry(thing, new_size) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")

#ifdef H5_HAVE_PARALLEL
{
/* Log the generation of dirty bytes of metadata iff:
*
* 1) The entry is clean on entry, and this resize will dirty it
* (i.e. the current and new sizes are different), and
*
* 2) This is a parallel computation -- which it is if the aux_ptr
* is non-null.
*
* A few points to note about this section of the code:
*
* 1) This call must occur before the call to H5C_resize_entry() since
* H5AC__log_dirtied_entry() expects the target entry to be clean
* on entry.
*
* 2) This code has some basic issues in terms of the number of bytes
* added to the dirty bytes count.
*
* First, it adds the initial entry size to aux_ptr->dirty_bytes,
* not the final size. Note that this code used to use the final
* size, but code to support this has been removed from
* H5AC__log_dirtied_entry() for reasons unknown since I wrote this
* code.
*
* As long as all ranks do the same thing here, this probably doesn't
* matter much, although it will delay initiation of sync points.
*
* A more interesting point is that this code will not increment
* aux_ptr->dirty_bytes if a dirty entry is resized. At first glance
* this seems major, as particularly with the older file formats,
* resizes can be quite large. However, this is probably not an
* issue either, since such resizes will be accompanied by large
* amounts of dirty metadata creation in other areas -- which will
* cause aux_ptr->dirty_bytes to be incremented.
*
* The bottom line is that this code is probably OK, but the above
* points should be kept in mind.
*
* One final observation: This comment is occasioned by a bug caused
* by moving the call to H5AC__log_dirtied_entry() after the call to
* H5C_resize_entry(), and then only calling H5AC__log_dirtied_entry()
* if entry_ptr->is_dirty was false.
*
* Since H5C_resize_entry() marks the target entry dirty unless there
* is not change in size, this had the effect of not calling
* H5AC__log_dirtied_entry() when it should be, and corrupting
* the cleaned and dirtied lists used by rank 0 in the parallel
* version of the metadata cache.
*
* The point here is that you should be very careful when working with
* this code, and not modify it unless you fully understand it.
*
* JRM -- 2/28/22
*/

if ((!entry_ptr->is_dirty) && (entry_ptr->size != new_size)) {

/* the entry is clean, and will be marked dirty in the resize
* operation.
*/
H5AC_aux_t *aux_ptr;

aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
if ((!entry_ptr->is_dirty) && (NULL != aux_ptr))

if (NULL != aux_ptr) {

if (H5AC__log_dirtied_entry(entry_ptr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, "can't log dirtied entry")
}
}
#endif /* H5_HAVE_PARALLEL */

/* Resize the entry */
if (H5C_resize_entry(thing, new_size) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")

done:
/* If currently logging, generate a message */
if (cache_ptr != NULL && cache_ptr->log_info != NULL)
Expand Down Expand Up @@ -1666,9 +1727,14 @@ H5AC_unprotect(H5F_t *f, const H5AC_class_t *type, haddr_t addr, void *thing, un
if (H5AC__log_dirtied_entry((H5AC_info_t *)thing) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log dirtied entry")

if (deleted && aux_ptr->mpi_rank == 0)
if (H5AC__log_deleted_entry((H5AC_info_t *)thing) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC__log_deleted_entry() failed")
if (deleted && aux_ptr->mpi_rank == 0) {
if (H5AC__log_deleted_entry((H5AC_info_t *)thing) < 0) {
/* If we fail to log the deleted entry, push an error but still
* participate in a possible sync point ahead
*/
HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC__log_deleted_entry() failed")
}
}
} /* end if */
#endif /* H5_HAVE_PARALLEL */

Expand Down
Loading

0 comments on commit 15971fb

Please sign in to comment.